3 * Copyright (c) 1998-2002
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE c_regex_traits.cpp
19 * VERSION see <boost/version.hpp>
20 * DESCRIPTION: Implements the c_regex_traits<charT> traits class
23 #define BOOST_REGEX_SOURCE
25 #include <boost/config.hpp>
28 # pragma warning(disable: 4702)
36 #include <boost/cregex.hpp>
37 #include <boost/regex/regex_traits.hpp>
38 #include <boost/regex/detail/regex_synch.hpp>
39 #include <boost/regex/detail/regex_cstring.hpp>
40 #include <boost/scoped_array.hpp>
42 #include "primary_transform.hpp"
45 #if defined(BOOST_HAS_NL_TYPES_H)
49 // Fixes a very strange bug in Comeau 4.2.45.2 that would otherwise result in
50 // an instantiation loop
51 #if defined(__COMO__) && __COMO_VERSION__ <= 4245
52 void c_regex_adopted_no_longer_needed_loop_shutter_upper() { }
60 boost::uint_fast32_t re_char_class_id[] = {
61 boost::re_detail::c_traits_base::char_class_alnum,
62 boost::re_detail::c_traits_base::char_class_alpha,
63 boost::re_detail::c_traits_base::char_class_cntrl,
64 boost::re_detail::c_traits_base::char_class_digit,
65 boost::re_detail::c_traits_base::char_class_graph,
66 boost::re_detail::c_traits_base::char_class_lower,
67 boost::re_detail::c_traits_base::char_class_print,
68 boost::re_detail::c_traits_base::char_class_punct,
69 boost::re_detail::c_traits_base::char_class_space,
70 boost::re_detail::c_traits_base::char_class_upper,
71 boost::re_detail::c_traits_base::char_class_xdigit,
72 boost::re_detail::c_traits_base::char_class_blank,
73 boost::re_detail::c_traits_base::char_class_word,
74 boost::re_detail::c_traits_base::char_class_unicode,
77 const char* re_char_class_names[] = {
94 std::string* re_cls_name;
95 std::string* pclasses;
96 unsigned int classes_count = 0;
97 const unsigned int re_classes_max = 14;
102 struct collate_name_t
106 collate_name_t(const char* p1, const char* p2, const char* p3, const char* p4)
107 : name(p1, p2), value(p3, p4) {}
110 std::string* re_coll_name;
111 std::list<collate_name_t>* pcoll_names;
112 unsigned int collate_count = 0;
116 #ifndef BOOST_RE_MESSAGE_BASE
117 #define BOOST_RE_MESSAGE_BASE 0
120 #if defined(BOOST_HAS_NL_TYPES_H)
121 nl_catd message_cat = (nl_catd)-1;
124 unsigned int message_count = 0;
125 std::string* mess_locale;
127 BOOST_REGEX_DECL char* re_custom_error_messages[] = {
153 #if !defined(LC_MESSAGES)
154 #define LC_MESSAGES LC_CTYPE
160 unsigned int entry_count = 0;
162 std::string* ctype_name;
163 std::string* collate_name;
166 map_size = UCHAR_MAX + 1
169 std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id);
171 #ifndef BOOST_NO_WREGEX
173 BOOST_REGEX_DECL wchar_t re_zero_w;
174 BOOST_REGEX_DECL wchar_t re_ten_w;
176 unsigned int nlsw_count = 0;
177 std::string* wlocale_name = 0;
185 std::list<syntax_map_t>* syntax;
187 std::size_t BOOST_REGEX_CALL re_get_message(wchar_t* buf, std::size_t len, std::size_t id)
189 std::size_t size = _re_get_message(static_cast<char*>(0), 0, id);
192 boost::scoped_array<char> cb(new char[size]);
193 _re_get_message(cb.get(), size, id);
194 size = boost::c_regex_traits<wchar_t>::strwiden(buf, len, cb.get());
199 inline std::size_t BOOST_REGEX_CALL re_get_message(char* buf, std::size_t len, std::size_t id)
201 return _re_get_message(buf, len, id);
204 void BOOST_REGEX_CALL re_init_classes()
207 if(classes_count == 0)
209 re_cls_name = new std::string("xxxxxxxx");
210 #ifndef BOOST_NO_EXCEPTIONS
213 pclasses = new std::string[re_classes_max];
214 BOOST_REGEX_NOEH_ASSERT(pclasses)
215 #ifndef BOOST_NO_EXCEPTIONS
227 void BOOST_REGEX_CALL re_free_classes()
230 if(--classes_count == 0)
237 void BOOST_REGEX_CALL re_update_classes()
240 if(*re_cls_name != std::setlocale(LC_CTYPE, 0))
242 *re_cls_name = std::setlocale(LC_CTYPE, 0);
245 for(i = 0; i < re_classes_max; ++i)
247 re_get_message(buf, 256, i+300);
253 void BOOST_REGEX_CALL re_init_collate()
256 if(collate_count == 0)
258 re_coll_name = new std::string("xxxxxxxx");
259 #ifndef BOOST_NO_EXCEPTIONS
262 pcoll_names = new std::list<collate_name_t>();
263 BOOST_REGEX_NOEH_ASSERT(pcoll_names)
264 #ifndef BOOST_NO_EXCEPTIONS
276 void BOOST_REGEX_CALL re_free_collate()
279 if(--collate_count == 0)
286 void BOOST_REGEX_CALL re_update_collate()
289 if(*re_coll_name != std::setlocale(LC_COLLATE, 0))
291 *re_coll_name = std::setlocale(LC_COLLATE, 0);
293 unsigned int i = 400;
294 re_get_message(buf, 256, i);
297 char* p1, *p2, *p3, *p4;;
299 while(*p1 && std::isspace((unsigned char)*p1))++p1;
301 while(*p2 && !std::isspace((unsigned char)*p2))++p2;
303 while(*p3 && std::isspace((unsigned char)*p3))++p3;
305 while(*p4 && !std::isspace((unsigned char)*p4))++p4;
306 pcoll_names->push_back(collate_name_t(p1, p2, p3, p4));
308 re_get_message(buf, 256, i);
313 std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id)
316 // get the customised message if any:
317 #if defined(BOOST_HAS_NL_TYPES_H)
318 if(message_cat != (nl_catd)-1)
320 const char* m = catgets(message_cat, 0, id, 0);
323 std::size_t size = std::strlen(m) + 1;
333 // now get the default message if any:
334 return boost::re_detail::re_get_default_message(buf, len, id);
337 void BOOST_REGEX_CALL re_message_init()
340 if(message_count == 0)
342 mess_locale = new std::string("xxxxxxxxxxxxxxxx");
347 void BOOST_REGEX_CALL re_message_update()
351 // called whenever the global locale changes:
353 std::string l(std::setlocale(LC_MESSAGES, 0));
354 if(*mess_locale != l)
357 #if defined(BOOST_HAS_NL_TYPES_H)
358 if(message_cat != (nl_catd)-1)
360 catclose(message_cat);
361 message_cat = (nl_catd)-1;
363 if(*boost::re_detail::c_traits_base::get_catalogue())
365 message_cat = catopen(boost::re_detail::c_traits_base::get_catalogue(), 0);
366 #ifndef BOOST_NO_EXCEPTIONS
367 if(message_cat == (nl_catd)-1)
369 std::string m("Unable to open message catalog: ");
370 throw std::runtime_error(m + boost::re_detail::c_traits_base::get_catalogue());
373 BOOST_REGEX_NOEH_ASSERT(message_cat != (nl_catd)-1);
377 for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
379 if(re_custom_error_messages[i])
381 boost::re_detail::re_strfree(re_custom_error_messages[i]);
382 re_custom_error_messages[i] = 0;
388 void BOOST_REGEX_CALL re_message_free()
392 if(message_count == 0)
394 #if defined(BOOST_HAS_NL_TYPES_H)
395 if(message_cat != (nl_catd)-1)
396 catclose(message_cat);
399 for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
401 if(re_custom_error_messages[i])
403 boost::re_detail::re_strfree(re_custom_error_messages[i]);
404 re_custom_error_messages[i] = 0;
411 const char* BOOST_REGEX_CALL re_get_error_str(unsigned int id)
414 #ifdef BOOST_HAS_THREADS
415 boost::re_detail::cs_guard g(*boost::re_detail::p_re_lock);
417 if(re_custom_error_messages[id] == 0)
420 _re_get_message(buf, 256, id + 200);
423 re_custom_error_messages[id] = boost::re_detail::re_strdup(buf);
424 return re_custom_error_messages[id];
426 return boost::re_detail::re_default_error_messages[id];
428 return re_custom_error_messages[id];
436 char c_traits_base::regex_message_catalogue[BOOST_REGEX_MAX_PATH] = {0};
438 std::string BOOST_REGEX_CALL c_traits_base::error_string(unsigned id)
440 return re_get_error_str(id);
443 void BOOST_REGEX_CALL c_traits_base::do_update_collate()
448 const char* p = "zero";
449 if(c_regex_traits<char>::lookup_collatename(s, p, p+4))
451 jm_assert(s.size() == 1);
452 re_zero = *s.c_str();
458 if(c_regex_traits<char>::lookup_collatename(s, p, p+3))
460 jm_assert(s.size() == 1);
467 void BOOST_REGEX_CALL c_traits_base::do_update_ctype()
470 // start by updating the syntax map:
472 char buf[map_size+2];
473 std::memset(syntax_map, syntax_char, map_size);
474 for(i = 1; i < syntax_max; ++i)
477 re_get_message(static_cast<char*>(buf), map_size, i+100);
480 syntax_map[(unsigned char)*ptr] = (unsigned char)i;
484 // now update the character class map,
485 // and lower case map:
486 std::memset(class_map, 0, map_size);
487 for(i = 0; i < map_size; ++i)
490 class_map[i] |= char_class_alpha;
492 class_map[i] |= char_class_cntrl;
494 class_map[i] |= char_class_digit;
496 class_map[i] |= char_class_lower;
498 class_map[i] |= char_class_upper;
500 class_map[i] |= char_class_punct;
502 class_map[i] |= char_class_space;
504 class_map[i] |= char_class_xdigit;
506 class_map['_'] |= char_class_underscore;
507 class_map[' '] |= char_class_blank;
508 class_map['\t'] |= char_class_blank;
509 for(i = 0; i < map_size; ++i)
511 lower_case_map[i] = (char)std::tolower(i);
516 boost::uint_fast32_t BOOST_REGEX_CALL c_traits_base::do_lookup_class(const char* p)
520 for(i = 0; i < re_classes_max; ++i)
524 return re_char_class_id[i];
527 for(i = 0; i < re_classes_max; ++i)
529 if(std::strcmp(re_char_class_names[i], p) == 0)
531 return re_char_class_id[i];
537 bool BOOST_REGEX_CALL c_traits_base::do_lookup_collate(std::string& buf, const char* p)
540 std::list<collate_name_t>::iterator first, last;
541 first = pcoll_names->begin();
542 last = pcoll_names->end();
545 if((*first).name == p)
547 buf = (*first).value;
553 bool result = re_detail::re_lookup_def_collate_name(buf, p);
554 if((result == 0) && (std::strlen(p) == 1))
562 std::string BOOST_REGEX_CALL c_traits_base::set_message_catalogue(const std::string& l)
564 if(sizeof(regex_message_catalogue) <= l.size())
566 std::string old(regex_message_catalogue);
567 std::strcpy(regex_message_catalogue, l.c_str());
571 unsigned char c_traits_base::syntax_map[map_size];
572 unsigned short c_traits_base::class_map[map_size];
573 char c_traits_base::lower_case_map[map_size];
575 } // namespace re_detail
577 #ifndef BOOST_NO_WREGEX
578 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
581 std::basic_string<wchar_t> s(first, last);
582 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
583 scoped_array<char> buf(new char[len]);
584 strnarrow(buf.get(), len, s.c_str());
586 bool result = base_type::do_lookup_collate(t_out, buf.get());
587 if(t_out.size() == 0) result = false;
592 len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
593 scoped_array<wchar_t> wb(new wchar_t[len]);
594 strwiden(wb.get(), len, t_out.c_str());
598 out.append(1, (wchar_t)0);
604 c_regex_traits<char> c_regex_traits<char>::i;
606 void BOOST_REGEX_CALL c_regex_traits<char>::init()
609 #ifdef BOOST_HAS_THREADS
610 re_detail::re_init_threads();
611 re_detail::cs_guard g(*re_detail::p_re_lock);
613 // just keep track of entry_count
616 ctype_name = new std::string("xxxxxxxxxxxxxxxx");
617 #ifndef BOOST_NO_EXCEPTIONS
620 collate_name = new std::string("xxxxxxxxxxxxxxxx");
621 BOOST_REGEX_NOEH_ASSERT(collate_name)
622 #ifndef BOOST_NO_EXCEPTIONS
637 void BOOST_REGEX_CALL c_regex_traits<char>::update()
640 #ifdef BOOST_HAS_THREADS
641 re_detail::cs_guard g(*re_detail::p_re_lock);
644 if(*collate_name != std::setlocale(LC_COLLATE, 0))
647 *collate_name = std::setlocale(LC_COLLATE, 0);
649 if(*ctype_name != std::setlocale(LC_CTYPE, 0))
652 *ctype_name = std::setlocale(LC_CTYPE, 0);
654 sort_type = re_detail::find_sort_syntax(&i, &sort_delim);
657 void BOOST_REGEX_CALL c_regex_traits<char>::m_free()
660 #ifdef BOOST_HAS_THREADS
661 re_detail::cs_guard g(*re_detail::p_re_lock);
667 // add reference to static member here to ensure
668 // that the linker includes it in the .exe:
669 if((entry_count == 0) && (0 != &c_regex_traits<char>::i))
674 #ifdef BOOST_HAS_THREADS
676 re_detail::re_free_threads();
680 void BOOST_REGEX_CALL c_regex_traits<char>::transform(std::string& out, const std::string& in)
683 std::size_t n = std::strxfrm(0, in.c_str(), 0);
684 if(n == (std::size_t)(-1))
689 scoped_array<char> buf(new char[n+1]);
690 n = std::strxfrm(buf.get(), in.c_str(), n+1);
691 if(n == (std::size_t)(-1))
699 void BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(std::string& out, const std::string& in)
704 case re_detail::sort_C:
705 case re_detail::sort_unknown:
707 case re_detail::sort_fixed:
708 out.erase((int)sort_delim);
710 case re_detail::sort_delim:
711 for(unsigned int i = 0; i < out.size(); ++i)
713 if((out[i] == sort_delim) && (i+1 < out.size()))
722 unsigned c_regex_traits<char>::sort_type;
723 char c_regex_traits<char>::sort_delim;
726 int BOOST_REGEX_CALL c_regex_traits<char>::toi(char c)
728 if(is_class(c, char_class_digit))
730 if(is_class(c, char_class_xdigit))
731 return 10 + translate(c, true) - translate(re_ten, true);
732 return -1; // error!!
735 int BOOST_REGEX_CALL c_regex_traits<char>::toi(const char*& first, const char* last, int radix)
740 // if radix is less than zero, then restrict
741 // return value to charT. NB assumes sizeof(charT) <= sizeof(int)
743 maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
750 maxval = (unsigned int)-1;
754 unsigned int result = 0;
755 unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
756 while((first != last) && is_class(*first, type) && (result <= maxval))
759 result += toi(*first);
765 #ifndef BOOST_NO_WREGEX
767 unsigned int BOOST_REGEX_CALL c_regex_traits<wchar_t>::syntax_type(size_type c)
770 std::list<syntax_map_t>::const_iterator first, last;
771 first = syntax->begin();
772 last = syntax->end();
775 if((uchar_type)(*first).c == c)
776 return (*first).type;
782 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::init()
785 re_detail::re_init_threads();
786 #ifdef BOOST_HAS_THREADS
787 re_detail::cs_guard g(*re_detail::p_re_lock);
794 wlocale_name = new std::string("xxxxxxxxxxxxxxxx");
795 #ifndef BOOST_NO_EXCEPTIONS
798 syntax = new std::list<syntax_map_t>();
799 BOOST_REGEX_NOEH_ASSERT(syntax)
800 #ifndef BOOST_NO_EXCEPTIONS
812 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_lookup_collate(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
815 std::basic_string<wchar_t> s(first, last);
816 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
817 scoped_array<char> buf(new char[len]);
818 strnarrow(buf.get(), len, s.c_str());
820 bool result = base_type::do_lookup_collate(t_out, buf.get());
823 len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
824 scoped_array<wchar_t> wb(new wchar_t[len]);
825 strwiden(wb.get(), len, t_out.c_str());
832 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::update()
835 #ifdef BOOST_HAS_THREADS
836 re_detail::cs_guard g(*re_detail::p_re_lock);
841 std::string l(std::setlocale(LC_CTYPE, 0));
842 if(*wlocale_name != l)
845 std::basic_string<wchar_t> s;
846 const wchar_t* p = L"zero";
847 if(do_lookup_collate(s, p, p+4))
849 jm_assert(s.size() == 1);
850 re_zero_w = *s.c_str();
856 if(do_lookup_collate(s, p, p+3))
858 jm_assert(s.size() == 1);
859 re_ten_w = *s.c_str();
868 for(i = 1; i < syntax_max; ++i)
871 re_get_message(static_cast<wchar_t*>(buf), 256, i+100);
876 syntax->push_back(sm);
879 sort_type = re_detail::find_sort_syntax(&init_, &sort_delim);
883 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::m_free()
886 #ifdef BOOST_HAS_THREADS
887 re_detail::cs_guard g(*re_detail::p_re_lock);
893 // add reference to static member here to ensure
894 // that the linker includes it in the .exe:
895 if((nlsw_count == 0) && (0 != &c_regex_traits<wchar_t>::init_))
901 #ifdef BOOST_HAS_THREADS
903 re_detail::re_free_threads();
907 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_iswclass(wchar_t c, boost::uint_fast32_t f)
911 return BOOST_REGEX_MAKE_BOOL(re_detail::wide_unicode_classes[(uchar_type)c] & f);
912 if((f & char_class_alpha) && std::iswalpha(c))
914 if((f & char_class_cntrl) && std::iswcntrl(c))
916 if((f & char_class_digit) && std::iswdigit(c))
918 if((f & char_class_lower) && std::iswlower(c))
920 if((f & char_class_punct) && std::iswpunct(c))
922 if((f & char_class_space) && std::iswspace(c))
924 if((f & char_class_upper) && std::iswupper(c))
926 if((f & char_class_xdigit) && std::iswxdigit(c))
928 if(f & char_class_unicode)
933 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
937 std::size_t n = std::wcsxfrm(0, in.c_str(), 0);
939 // broken wcsxfrm under VC6 doesn't check size of
940 // output buffer, we have no choice but to guess!
941 std::size_t n = 100 * in.size();
943 if((n == (std::size_t)(-1)) || (n == 0))
948 scoped_array<wchar_t> buf(new wchar_t[n+1]);
949 n = std::wcsxfrm(buf.get(), in.c_str(), n+1);
950 if(n == (std::size_t)(-1))
958 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
963 case re_detail::sort_C:
964 case re_detail::sort_unknown:
966 case re_detail::sort_fixed:
967 if((unsigned)sort_delim < out.size())
968 out.erase((int)sort_delim);
970 case re_detail::sort_delim:
971 for(unsigned int i = 0; i < out.size(); ++i)
973 if((out[i] == sort_delim) && (i+1 < out.size()))
982 unsigned c_regex_traits<wchar_t>::sort_type;
983 wchar_t c_regex_traits<wchar_t>::sort_delim;
986 int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(wchar_t c)
988 if(is_class(c, char_class_digit))
989 return c - re_zero_w;
990 if(is_class(c, char_class_xdigit))
991 return 10 + translate(c, true) - translate(re_ten_w, true);
992 return -1; // error!!
995 int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(const wchar_t*& first, const wchar_t* last, int radix)
1000 // if radix is less than zero, then restrict
1001 // return value to charT. NB assumes sizeof(charT) <= sizeof(int)
1003 maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
1010 maxval = (unsigned int)-1;
1014 unsigned int result = 0;
1015 unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
1016 while((first != last) && is_class(*first, type) && (result <= maxval))
1019 result += toi(*first);
1025 boost::uint_fast32_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* first, const wchar_t* last)
1027 std::basic_string<wchar_t> s(first, last);
1028 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
1029 scoped_array<char> buf(new char[len]);
1030 strnarrow(buf.get(), len, s.c_str());
1031 boost::uint_fast32_t result = do_lookup_class(buf.get());
1035 c_regex_traits<wchar_t> c_regex_traits<wchar_t>::init_;
1037 std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strnarrow(char *s1, std::size_t len, const wchar_t *s2)
1039 BOOST_RE_GUARD_STACK
1040 std::size_t size = std::wcslen(s2) + 1;
1043 return std::wcstombs(s1, s2, len);
1046 std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strwiden(wchar_t *s1, std::size_t len, const char *s2)
1048 BOOST_RE_GUARD_STACK
1049 std::size_t size = std::strlen(s2) + 1;
1052 size = std::mbstowcs(s1, s2, len);
1057 #endif // BOOST_NO_WREGEX
1059 } // namespace boost