3 * Copyright (c) 1998-2002
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE c_regex_traits.cpp
19 * VERSION see <boost/version.hpp>
20 * DESCRIPTION: Implements the c_regex_traits<charT> traits class
23 #define BOOST_REGEX_SOURCE
30 #include <boost/cregex.hpp>
31 #include <boost/regex/regex_traits.hpp>
32 #include <boost/regex/detail/regex_synch.hpp>
33 #include <boost/regex/detail/regex_cstring.hpp>
34 #include <boost/scoped_array.hpp>
36 #include "primary_transform.hpp"
39 #if defined(BOOST_HAS_NL_TYPES_H)
43 // Fixes a very strange bug in Comeau 4.2.45.2 that would otherwise result in
44 // an instantiation loop
45 #if defined(__COMO__) && __COMO_VERSION__ <= 4245
46 void c_regex_adopted_no_longer_needed_loop_shutter_upper() { }
54 boost::uint_fast32_t re_char_class_id[] = {
55 boost::re_detail::c_traits_base::char_class_alnum,
56 boost::re_detail::c_traits_base::char_class_alpha,
57 boost::re_detail::c_traits_base::char_class_cntrl,
58 boost::re_detail::c_traits_base::char_class_digit,
59 boost::re_detail::c_traits_base::char_class_graph,
60 boost::re_detail::c_traits_base::char_class_lower,
61 boost::re_detail::c_traits_base::char_class_print,
62 boost::re_detail::c_traits_base::char_class_punct,
63 boost::re_detail::c_traits_base::char_class_space,
64 boost::re_detail::c_traits_base::char_class_upper,
65 boost::re_detail::c_traits_base::char_class_xdigit,
66 boost::re_detail::c_traits_base::char_class_blank,
67 boost::re_detail::c_traits_base::char_class_word,
68 boost::re_detail::c_traits_base::char_class_unicode,
71 const char* re_char_class_names[] = {
88 std::string* re_cls_name;
89 std::string* pclasses;
90 unsigned int classes_count = 0;
91 const unsigned int re_classes_max = 14;
100 collate_name_t(const char* p1, const char* p2, const char* p3, const char* p4)
101 : name(p1, p2), value(p3, p4) {}
104 std::string* re_coll_name;
105 std::list<collate_name_t>* pcoll_names;
106 unsigned int collate_count = 0;
110 #ifndef BOOST_RE_MESSAGE_BASE
111 #define BOOST_RE_MESSAGE_BASE 0
114 #if defined(BOOST_HAS_NL_TYPES_H)
115 nl_catd message_cat = (nl_catd)-1;
118 unsigned int message_count = 0;
119 std::string* mess_locale;
121 BOOST_REGEX_DECL char* re_custom_error_messages[] = {
147 #if !defined(LC_MESSAGES)
148 #define LC_MESSAGES LC_CTYPE
154 unsigned int entry_count = 0;
156 std::string* ctype_name;
157 std::string* collate_name;
160 map_size = UCHAR_MAX + 1
163 #ifndef BOOST_NO_WREGEX
165 BOOST_REGEX_DECL wchar_t re_zero_w;
166 BOOST_REGEX_DECL wchar_t re_ten_w;
168 unsigned int nlsw_count = 0;
169 std::string* wlocale_name = 0;
177 std::list<syntax_map_t>* syntax;
182 std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id);
184 template <class charT>
185 std::size_t BOOST_REGEX_CALL re_get_message(charT* buf, std::size_t len, std::size_t id)
187 std::size_t size = _re_get_message(static_cast<char*>(0), 0, id);
190 boost::scoped_array<char> cb(new char[size]);
191 _re_get_message(cb.get(), size, id);
192 size = boost::c_regex_traits<wchar_t>::strwiden(buf, len, cb.get());
196 inline std::size_t BOOST_REGEX_CALL re_get_message(char* buf, std::size_t len, std::size_t id)
198 return _re_get_message(buf, len, id);
201 void BOOST_REGEX_CALL re_init_classes()
204 if(classes_count == 0)
206 re_cls_name = new std::string("xxxxxxxx");
207 #ifndef BOOST_NO_EXCEPTIONS
210 pclasses = new std::string[re_classes_max];
211 BOOST_REGEX_NOEH_ASSERT(pclasses)
212 #ifndef BOOST_NO_EXCEPTIONS
224 void BOOST_REGEX_CALL re_free_classes()
227 if(--classes_count == 0)
234 void BOOST_REGEX_CALL re_update_classes()
237 if(*re_cls_name != std::setlocale(LC_CTYPE, 0))
239 *re_cls_name = std::setlocale(LC_CTYPE, 0);
242 for(i = 0; i < re_classes_max; ++i)
244 re_get_message(buf, 256, i+300);
250 void BOOST_REGEX_CALL re_init_collate()
253 if(collate_count == 0)
255 re_coll_name = new std::string("xxxxxxxx");
256 #ifndef BOOST_NO_EXCEPTIONS
259 pcoll_names = new std::list<collate_name_t>();
260 BOOST_REGEX_NOEH_ASSERT(pcoll_names)
261 #ifndef BOOST_NO_EXCEPTIONS
273 void BOOST_REGEX_CALL re_free_collate()
276 if(--collate_count == 0)
283 void BOOST_REGEX_CALL re_update_collate()
286 if(*re_coll_name != std::setlocale(LC_COLLATE, 0))
288 *re_coll_name = std::setlocale(LC_COLLATE, 0);
290 unsigned int i = 400;
291 re_get_message(buf, 256, i);
294 char* p1, *p2, *p3, *p4;;
296 while(*p1 && std::isspace((unsigned char)*p1))++p1;
298 while(*p2 && !std::isspace((unsigned char)*p2))++p2;
300 while(*p3 && std::isspace((unsigned char)*p3))++p3;
302 while(*p4 && !std::isspace((unsigned char)*p4))++p4;
303 pcoll_names->push_back(collate_name_t(p1, p2, p3, p4));
305 re_get_message(buf, 256, i);
310 std::size_t BOOST_REGEX_CALL _re_get_message(char* buf, std::size_t len, std::size_t id)
313 // get the customised message if any:
314 #if defined(BOOST_HAS_NL_TYPES_H)
315 if(message_cat != (nl_catd)-1)
317 const char* m = catgets(message_cat, 0, id, 0);
320 std::size_t size = std::strlen(m) + 1;
330 // now get the default message if any:
331 return boost::re_detail::re_get_default_message(buf, len, id);
334 void BOOST_REGEX_CALL re_message_init()
337 if(message_count == 0)
339 mess_locale = new std::string("xxxxxxxxxxxxxxxx");
344 void BOOST_REGEX_CALL re_message_update()
348 // called whenever the global locale changes:
350 std::string l(std::setlocale(LC_MESSAGES, 0));
351 if(*mess_locale != l)
354 #if defined(BOOST_HAS_NL_TYPES_H)
355 if(message_cat != (nl_catd)-1)
357 catclose(message_cat);
358 message_cat = (nl_catd)-1;
360 if(*boost::re_detail::c_traits_base::get_catalogue())
362 message_cat = catopen(boost::re_detail::c_traits_base::get_catalogue(), 0);
363 #ifndef BOOST_NO_EXCEPTIONS
364 if(message_cat == (nl_catd)-1)
366 std::string m("Unable to open message catalog: ");
367 throw std::runtime_error(m + boost::re_detail::c_traits_base::get_catalogue());
370 BOOST_REGEX_NOEH_ASSERT(message_cat != (nl_catd)-1);
374 for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
376 if(re_custom_error_messages[i])
378 boost::re_detail::re_strfree(re_custom_error_messages[i]);
379 re_custom_error_messages[i] = 0;
385 void BOOST_REGEX_CALL re_message_free()
389 if(message_count == 0)
391 #if defined(BOOST_HAS_NL_TYPES_H)
392 if(message_cat != (nl_catd)-1)
393 catclose(message_cat);
396 for(int i = 0; i < boost::REG_E_UNKNOWN; ++i)
398 if(re_custom_error_messages[i])
400 boost::re_detail::re_strfree(re_custom_error_messages[i]);
401 re_custom_error_messages[i] = 0;
408 const char* BOOST_REGEX_CALL re_get_error_str(unsigned int id)
411 #ifdef BOOST_HAS_THREADS
412 boost::re_detail::cs_guard g(*boost::re_detail::p_re_lock);
414 if(re_custom_error_messages[id] == 0)
417 _re_get_message(buf, 256, id + 200);
420 re_custom_error_messages[id] = boost::re_detail::re_strdup(buf);
421 return re_custom_error_messages[id];
423 return boost::re_detail::re_default_error_messages[id];
425 return re_custom_error_messages[id];
433 char c_traits_base::regex_message_catalogue[BOOST_REGEX_MAX_PATH] = {0};
435 std::string BOOST_REGEX_CALL c_traits_base::error_string(unsigned id)
437 return re_get_error_str(id);
440 void BOOST_REGEX_CALL c_traits_base::do_update_collate()
445 const char* p = "zero";
446 if(c_regex_traits<char>::lookup_collatename(s, p, p+4))
448 jm_assert(s.size() == 1);
449 re_zero = *s.c_str();
455 if(c_regex_traits<char>::lookup_collatename(s, p, p+3))
457 jm_assert(s.size() == 1);
464 void BOOST_REGEX_CALL c_traits_base::do_update_ctype()
467 // start by updating the syntax map:
469 char buf[map_size+2];
470 std::memset(syntax_map, syntax_char, map_size);
471 for(i = 1; i < syntax_max; ++i)
474 re_get_message(static_cast<char*>(buf), map_size, i+100);
477 syntax_map[(unsigned char)*ptr] = (unsigned char)i;
481 // now update the character class map,
482 // and lower case map:
483 std::memset(class_map, 0, map_size);
484 for(i = 0; i < map_size; ++i)
487 class_map[i] |= char_class_alpha;
489 class_map[i] |= char_class_cntrl;
491 class_map[i] |= char_class_digit;
493 class_map[i] |= char_class_lower;
495 class_map[i] |= char_class_upper;
497 class_map[i] |= char_class_punct;
499 class_map[i] |= char_class_space;
501 class_map[i] |= char_class_xdigit;
503 class_map['_'] |= char_class_underscore;
504 class_map[' '] |= char_class_blank;
505 class_map['\t'] |= char_class_blank;
506 for(i = 0; i < map_size; ++i)
508 lower_case_map[i] = (char)std::tolower(i);
513 boost::uint_fast32_t BOOST_REGEX_CALL c_traits_base::do_lookup_class(const char* p)
517 for(i = 0; i < re_classes_max; ++i)
521 return re_char_class_id[i];
524 for(i = 0; i < re_classes_max; ++i)
526 if(std::strcmp(re_char_class_names[i], p) == 0)
528 return re_char_class_id[i];
534 bool BOOST_REGEX_CALL c_traits_base::do_lookup_collate(std::string& buf, const char* p)
537 std::list<collate_name_t>::iterator first, last;
538 first = pcoll_names->begin();
539 last = pcoll_names->end();
542 if((*first).name == p)
544 buf = (*first).value;
550 bool result = re_detail::re_lookup_def_collate_name(buf, p);
551 if((result == 0) && (std::strlen(p) == 1))
559 std::string BOOST_REGEX_CALL c_traits_base::set_message_catalogue(const std::string& l)
561 if(sizeof(regex_message_catalogue) <= l.size())
563 std::string old(regex_message_catalogue);
564 std::strcpy(regex_message_catalogue, l.c_str());
568 unsigned char c_traits_base::syntax_map[map_size];
569 unsigned short c_traits_base::class_map[map_size];
570 char c_traits_base::lower_case_map[map_size];
572 } // namespace re_detail
574 #ifndef BOOST_NO_WREGEX
575 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
578 std::basic_string<wchar_t> s(first, last);
579 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
580 scoped_array<char> buf(new char[len]);
581 strnarrow(buf.get(), len, s.c_str());
583 bool result = base_type::do_lookup_collate(t_out, buf.get());
584 if(t_out.size() == 0) result = false;
589 len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
590 scoped_array<wchar_t> wb(new wchar_t[len]);
591 strwiden(wb.get(), len, t_out.c_str());
595 out.append(1, (wchar_t)0);
601 c_regex_traits<char> c_regex_traits<char>::i;
603 void BOOST_REGEX_CALL c_regex_traits<char>::init()
606 #ifdef BOOST_HAS_THREADS
607 re_detail::re_init_threads();
608 re_detail::cs_guard g(*re_detail::p_re_lock);
610 // just keep track of entry_count
613 ctype_name = new std::string("xxxxxxxxxxxxxxxx");
614 #ifndef BOOST_NO_EXCEPTIONS
617 collate_name = new std::string("xxxxxxxxxxxxxxxx");
618 BOOST_REGEX_NOEH_ASSERT(collate_name)
619 #ifndef BOOST_NO_EXCEPTIONS
634 void BOOST_REGEX_CALL c_regex_traits<char>::update()
637 #ifdef BOOST_HAS_THREADS
638 re_detail::cs_guard g(*re_detail::p_re_lock);
641 if(*collate_name != std::setlocale(LC_COLLATE, 0))
644 *collate_name = std::setlocale(LC_COLLATE, 0);
646 if(*ctype_name != std::setlocale(LC_CTYPE, 0))
649 *ctype_name = std::setlocale(LC_CTYPE, 0);
651 sort_type = re_detail::find_sort_syntax(&i, &sort_delim);
654 void BOOST_REGEX_CALL c_regex_traits<char>::m_free()
657 #ifdef BOOST_HAS_THREADS
658 re_detail::cs_guard g(*re_detail::p_re_lock);
669 #ifdef BOOST_HAS_THREADS
671 re_detail::re_free_threads();
675 void BOOST_REGEX_CALL c_regex_traits<char>::transform(std::string& out, const std::string& in)
678 std::size_t n = std::strxfrm(0, in.c_str(), 0);
679 if(n == (std::size_t)(-1))
684 scoped_array<char> buf(new char[n+1]);
685 n = std::strxfrm(buf.get(), in.c_str(), n+1);
686 if(n == (std::size_t)(-1))
694 void BOOST_REGEX_CALL c_regex_traits<char>::transform_primary(std::string& out, const std::string& in)
699 case re_detail::sort_C:
700 case re_detail::sort_unknown:
702 case re_detail::sort_fixed:
703 out.erase((int)sort_delim);
705 case re_detail::sort_delim:
706 for(unsigned int i = 0; i < out.size(); ++i)
708 if((out[i] == sort_delim) && (i+1 < out.size()))
717 unsigned c_regex_traits<char>::sort_type;
718 char c_regex_traits<char>::sort_delim;
721 int BOOST_REGEX_CALL c_regex_traits<char>::toi(char c)
723 if(is_class(c, char_class_digit))
725 if(is_class(c, char_class_xdigit))
726 return 10 + translate(c, true) - translate(re_ten, true);
727 return -1; // error!!
730 int BOOST_REGEX_CALL c_regex_traits<char>::toi(const char*& first, const char* last, int radix)
735 // if radix is less than zero, then restrict
736 // return value to charT. NB assumes sizeof(charT) <= sizeof(int)
738 maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
745 maxval = (unsigned int)-1;
749 unsigned int result = 0;
750 unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
751 while((first != last) && is_class(*first, type) && (result <= maxval))
754 result += toi(*first);
760 #ifndef BOOST_NO_WREGEX
762 unsigned int BOOST_REGEX_CALL c_regex_traits<wchar_t>::syntax_type(size_type c)
765 std::list<syntax_map_t>::const_iterator first, last;
766 first = syntax->begin();
767 last = syntax->end();
770 if((uchar_type)(*first).c == c)
771 return (*first).type;
777 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::init()
780 re_detail::re_init_threads();
781 #ifdef BOOST_HAS_THREADS
782 re_detail::cs_guard g(*re_detail::p_re_lock);
789 wlocale_name = new std::string("xxxxxxxxxxxxxxxx");
790 #ifndef BOOST_NO_EXCEPTIONS
793 syntax = new std::list<syntax_map_t>();
794 BOOST_REGEX_NOEH_ASSERT(syntax)
795 #ifndef BOOST_NO_EXCEPTIONS
807 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_lookup_collate(std::basic_string<wchar_t>& out, const wchar_t* first, const wchar_t* last)
810 std::basic_string<wchar_t> s(first, last);
811 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
812 scoped_array<char> buf(new char[len]);
813 strnarrow(buf.get(), len, s.c_str());
815 bool result = base_type::do_lookup_collate(t_out, buf.get());
818 len = strwiden(static_cast<wchar_t*>(0), 0, t_out.c_str());
819 scoped_array<wchar_t> wb(new wchar_t[len]);
820 strwiden(wb.get(), len, t_out.c_str());
827 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::update()
830 #ifdef BOOST_HAS_THREADS
831 re_detail::cs_guard g(*re_detail::p_re_lock);
836 std::string l(std::setlocale(LC_CTYPE, 0));
837 if(*wlocale_name != l)
840 std::basic_string<wchar_t> s;
841 const wchar_t* p = L"zero";
842 if(do_lookup_collate(s, p, p+4))
844 jm_assert(s.size() == 1);
845 re_zero_w = *s.c_str();
851 if(do_lookup_collate(s, p, p+3))
853 jm_assert(s.size() == 1);
854 re_ten_w = *s.c_str();
863 for(i = 1; i < syntax_max; ++i)
866 re_get_message(static_cast<wchar_t*>(buf), 256, i+100);
871 syntax->push_back(sm);
874 sort_type = re_detail::find_sort_syntax(&init_, &sort_delim);
878 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::m_free()
881 #ifdef BOOST_HAS_THREADS
882 re_detail::cs_guard g(*re_detail::p_re_lock);
894 #ifdef BOOST_HAS_THREADS
896 re_detail::re_free_threads();
900 bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::do_iswclass(wchar_t c, boost::uint_fast32_t f)
904 return BOOST_REGEX_MAKE_BOOL(re_detail::wide_unicode_classes[(uchar_type)c] & f);
905 if((f & char_class_alpha) && std::iswalpha(c))
907 if((f & char_class_cntrl) && std::iswcntrl(c))
909 if((f & char_class_digit) && std::iswdigit(c))
911 if((f & char_class_lower) && std::iswlower(c))
913 if((f & char_class_punct) && std::iswpunct(c))
915 if((f & char_class_space) && std::iswspace(c))
917 if((f & char_class_upper) && std::iswupper(c))
919 if((f & char_class_xdigit) && std::iswxdigit(c))
921 if(f & char_class_unicode)
926 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
930 std::size_t n = std::wcsxfrm(0, in.c_str(), 0);
932 // broken wcsxfrm under VC6 doesn't check size of
933 // output buffer, we have no choice but to guess!
934 std::size_t n = 100 * in.size();
936 if((n == (std::size_t)(-1)) || (n == 0))
941 scoped_array<wchar_t> buf(new wchar_t[n+1]);
942 n = std::wcsxfrm(buf.get(), in.c_str(), n+1);
943 if(n == (std::size_t)(-1))
951 void BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(std::basic_string<wchar_t>& out, const std::basic_string<wchar_t>& in)
956 case re_detail::sort_C:
957 case re_detail::sort_unknown:
959 case re_detail::sort_fixed:
960 if((unsigned)sort_delim < out.size())
961 out.erase((int)sort_delim);
963 case re_detail::sort_delim:
964 for(unsigned int i = 0; i < out.size(); ++i)
966 if((out[i] == sort_delim) && (i+1 < out.size()))
975 unsigned c_regex_traits<wchar_t>::sort_type;
976 wchar_t c_regex_traits<wchar_t>::sort_delim;
979 int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(wchar_t c)
981 if(is_class(c, char_class_digit))
982 return c - re_zero_w;
983 if(is_class(c, char_class_xdigit))
984 return 10 + translate(c, true) - translate(re_ten_w, true);
985 return -1; // error!!
988 int BOOST_REGEX_CALL c_regex_traits<wchar_t>::toi(const wchar_t*& first, const wchar_t* last, int radix)
993 // if radix is less than zero, then restrict
994 // return value to charT. NB assumes sizeof(charT) <= sizeof(int)
996 maxval = 1u << (sizeof(*first) * CHAR_BIT - 1);
1003 maxval = (unsigned int)-1;
1007 unsigned int result = 0;
1008 unsigned int type = (radix > 10) ? char_class_xdigit : char_class_digit;
1009 while((first != last) && is_class(*first, type) && (result <= maxval))
1012 result += toi(*first);
1018 boost::uint_fast32_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* first, const wchar_t* last)
1020 std::basic_string<wchar_t> s(first, last);
1021 std::size_t len = strnarrow(static_cast<char*>(0), 0, s.c_str());
1022 scoped_array<char> buf(new char[len]);
1023 strnarrow(buf.get(), len, s.c_str());
1024 boost::uint_fast32_t result = do_lookup_class(buf.get());
1028 c_regex_traits<wchar_t> c_regex_traits<wchar_t>::init_;
1030 std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strnarrow(char *s1, std::size_t len, const wchar_t *s2)
1032 BOOST_RE_GUARD_STACK
1033 std::size_t size = std::wcslen(s2) + 1;
1036 return std::wcstombs(s1, s2, len);
1039 std::size_t BOOST_REGEX_CALL c_regex_traits<wchar_t>::strwiden(wchar_t *s1, std::size_t len, const char *s2)
1041 BOOST_RE_GUARD_STACK
1042 std::size_t size = std::strlen(s2) + 1;
1045 size = std::mbstowcs(s1, s2, len);
1050 #endif // BOOST_NO_WREGEX
1052 } // namespace boost