6 * Use, modification and distribution are subject to the
\r
7 * Boost Software License, Version 1.0. (See accompanying file
\r
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
\r
13 * LOCATION: see http://www.boost.org for most recent version.
\r
15 * VERSION see <boost/version.hpp>
\r
16 * DESCRIPTION: Unicode regular expressions on top of the ICU Library.
\r
19 #ifndef BOOST_REGEX_ICU_HPP
\r
20 #define BOOST_REGEX_ICU_HPP
\r
22 #include <unicode/utypes.h>
\r
23 #include <unicode/uchar.h>
\r
24 #include <unicode/coll.h>
\r
25 #include <boost/regex.hpp>
\r
26 #include <boost/regex/pending/unicode_iterator.hpp>
\r
27 #include <boost/mpl/int_fwd.hpp>
\r
33 namespace re_detail{
\r
36 // Implementation details:
\r
38 class BOOST_REGEX_DECL icu_regex_traits_implementation
\r
40 typedef UChar32 char_type;
\r
41 typedef std::size_t size_type;
\r
42 typedef std::vector<char_type> string_type;
\r
43 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
\r
44 typedef boost::uint_least32_t char_class_type;
\r
46 icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
\r
49 UErrorCode success = U_ZERO_ERROR;
\r
50 m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
\r
51 if(U_SUCCESS(success) == 0)
\r
53 m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
\r
54 success = U_ZERO_ERROR;
\r
55 m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
\r
56 if(U_SUCCESS(success) == 0)
\r
58 m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
\r
60 U_NAMESPACE_QUALIFIER Locale getloc()const
\r
64 string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
\r
65 string_type transform(const char_type* p1, const char_type* p2) const
\r
67 return do_transform(p1, p2, m_collator.get());
\r
69 string_type transform_primary(const char_type* p1, const char_type* p2) const
\r
71 return do_transform(p1, p2, m_primary_collator.get());
\r
76 std::runtime_error e("Could not initialize ICU resources");
\r
77 boost::throw_exception(e);
\r
79 U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using
\r
80 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object
\r
81 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object
\r
84 inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
\r
86 return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
\r
91 class BOOST_REGEX_DECL icu_regex_traits
\r
94 typedef UChar32 char_type;
\r
95 typedef std::size_t size_type;
\r
96 typedef std::vector<char_type> string_type;
\r
97 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
\r
98 #ifdef BOOST_NO_INT64_T
\r
99 typedef std::bitset<64> char_class_type;
\r
101 typedef boost::uint64_t char_class_type;
\r
104 struct boost_extensions_tag{};
\r
107 : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
\r
110 static size_type length(const char_type* p);
\r
112 ::boost::regex_constants::syntax_type syntax_type(char_type c)const
\r
114 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
\r
116 ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
\r
118 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
\r
120 char_type translate(char_type c) const
\r
124 char_type translate_nocase(char_type c) const
\r
126 return ::u_tolower(c);
\r
128 char_type translate(char_type c, bool icase) const
\r
130 return icase ? translate_nocase(c) : translate(c);
\r
132 char_type tolower(char_type c) const
\r
134 return ::u_tolower(c);
\r
136 char_type toupper(char_type c) const
\r
138 return ::u_toupper(c);
\r
140 string_type transform(const char_type* p1, const char_type* p2) const
\r
142 return m_pimpl->transform(p1, p2);
\r
144 string_type transform_primary(const char_type* p1, const char_type* p2) const
\r
146 return m_pimpl->transform_primary(p1, p2);
\r
148 char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
\r
149 string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
\r
150 bool isctype(char_type c, char_class_type f) const;
\r
151 int toi(const char_type*& p1, const char_type* p2, int radix)const
\r
153 return re_detail::global_toi(p1, p2, radix, *this);
\r
155 int value(char_type c, int radix)const
\r
157 return u_digit(c, static_cast< ::int8_t>(radix));
\r
159 locale_type imbue(locale_type l)
\r
161 locale_type result(m_pimpl->getloc());
\r
162 m_pimpl = re_detail::get_icu_regex_traits_implementation(l);
\r
165 locale_type getloc()const
\r
167 return locale_type();
\r
169 std::string error_string(::boost::regex_constants::error_type n) const
\r
171 return re_detail::get_default_error_string(n);
\r
174 icu_regex_traits(const icu_regex_traits&);
\r
175 icu_regex_traits& operator=(const icu_regex_traits&);
\r
178 // define the bitmasks offsets we need for additional character properties:
\r
181 offset_blank = U_CHAR_CATEGORY_COUNT,
\r
182 offset_space = U_CHAR_CATEGORY_COUNT+1,
\r
183 offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
\r
184 offset_underscore = U_CHAR_CATEGORY_COUNT+3,
\r
185 offset_unicode = U_CHAR_CATEGORY_COUNT+4,
\r
186 offset_any = U_CHAR_CATEGORY_COUNT+5,
\r
187 offset_ascii = U_CHAR_CATEGORY_COUNT+6
\r
191 // and now the masks:
\r
193 static const char_class_type mask_blank;
\r
194 static const char_class_type mask_space;
\r
195 static const char_class_type mask_xdigit;
\r
196 static const char_class_type mask_underscore;
\r
197 static const char_class_type mask_unicode;
\r
198 static const char_class_type mask_any;
\r
199 static const char_class_type mask_ascii;
\r
201 static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
\r
203 boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
\r
206 } // namespace boost
\r
209 // template instances:
\r
211 #define BOOST_REGEX_CHAR_T UChar32
\r
212 #undef BOOST_REGEX_TRAITS_T
\r
213 #define BOOST_REGEX_TRAITS_T , icu_regex_traits
\r
214 #define BOOST_REGEX_ICU_INSTANCES
\r
215 #ifdef BOOST_REGEX_ICU_INSTANTIATE
\r
216 # define BOOST_REGEX_INSTANTIATE
\r
218 #include <boost/regex/v4/instances.hpp>
\r
219 #undef BOOST_REGEX_CHAR_T
\r
220 #undef BOOST_REGEX_TRAITS_T
\r
221 #undef BOOST_REGEX_ICU_INSTANCES
\r
222 #ifdef BOOST_REGEX_INSTANTIATE
\r
223 # undef BOOST_REGEX_INSTANTIATE
\r
229 typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
\r
230 typedef match_results<const ::UChar32*> u32match;
\r
231 typedef match_results<const ::UChar*> u16match;
\r
234 // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
\r
236 namespace re_detail{
\r
238 #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
\r
239 template <class InputIterator>
\r
240 inline u32regex do_make_u32regex(InputIterator i,
\r
242 boost::regex_constants::syntax_option_type opt,
\r
243 const boost::mpl::int_<1>*)
\r
245 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
246 return u32regex(conv_type(i), conv_type(j), opt);
\r
249 template <class InputIterator>
\r
250 inline u32regex do_make_u32regex(InputIterator i,
\r
252 boost::regex_constants::syntax_option_type opt,
\r
253 const boost::mpl::int_<2>*)
\r
255 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
256 return u32regex(conv_type(i), conv_type(j), opt);
\r
259 template <class InputIterator>
\r
260 inline u32regex do_make_u32regex(InputIterator i,
\r
262 boost::regex_constants::syntax_option_type opt,
\r
263 const boost::mpl::int_<4>*)
\r
265 return u32regex(i, j, opt);
\r
268 template <class InputIterator>
\r
269 inline u32regex do_make_u32regex(InputIterator i,
\r
271 boost::regex_constants::syntax_option_type opt,
\r
272 const boost::mpl::int_<1>*)
\r
274 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
275 typedef std::vector<UChar32> vector_type;
\r
277 conv_type a(i), b(j);
\r
284 return u32regex(&*v.begin(), v.size(), opt);
\r
285 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
288 template <class InputIterator>
\r
289 inline u32regex do_make_u32regex(InputIterator i,
\r
291 boost::regex_constants::syntax_option_type opt,
\r
292 const boost::mpl::int_<2>*)
\r
294 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
295 typedef std::vector<UChar32> vector_type;
\r
297 conv_type a(i), b(j);
\r
304 return u32regex(&*v.begin(), v.size(), opt);
\r
305 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
308 template <class InputIterator>
\r
309 inline u32regex do_make_u32regex(InputIterator i,
\r
311 boost::regex_constants::syntax_option_type opt,
\r
312 const boost::mpl::int_<4>*)
\r
314 typedef std::vector<UCHAR32> vector_type;
\r
318 v.push_back((UCHAR32)(*i));
\r
322 return u32regex(&*v.begin(), v.size(), opt);
\r
323 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
329 // Construction from an iterator pair:
\r
331 template <class InputIterator>
\r
332 inline u32regex make_u32regex(InputIterator i,
\r
334 boost::regex_constants::syntax_option_type opt)
\r
336 return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
\r
339 // construction from UTF-8 nul-terminated strings:
\r
341 inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
343 return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
\r
345 inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
347 return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
\r
350 // construction from UTF-16 nul-terminated strings:
\r
352 #ifndef BOOST_NO_WREGEX
\r
353 inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
355 return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
\r
358 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
\r
359 inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
361 return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
\r
365 // construction from basic_string class-template:
\r
367 template<class C, class T, class A>
\r
368 inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
370 return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
\r
373 // Construction from ICU string type:
\r
375 inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
377 return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
\r
381 // regex_match overloads that widen the character type as appropriate:
\r
383 namespace re_detail{
\r
384 template<class MR1, class MR2>
\r
385 void copy_results(MR1& out, MR2 const& in)
\r
387 // copy results from an adapted MR2 match_results:
\r
388 out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
\r
389 out.set_base(in.base().base());
\r
390 for(int i = 0; i < (int)in.size(); ++i)
\r
394 out.set_first(in[i].first.base(), i);
\r
395 out.set_second(in[i].second.base(), i);
\r
400 template <class BidiIterator, class Allocator>
\r
401 inline bool do_regex_match(BidiIterator first, BidiIterator last,
\r
402 match_results<BidiIterator, Allocator>& m,
\r
403 const u32regex& e,
\r
404 match_flag_type flags,
\r
405 boost::mpl::int_<4> const*)
\r
407 return ::boost::regex_match(first, last, m, e, flags);
\r
409 template <class BidiIterator, class Allocator>
\r
410 bool do_regex_match(BidiIterator first, BidiIterator last,
\r
411 match_results<BidiIterator, Allocator>& m,
\r
412 const u32regex& e,
\r
413 match_flag_type flags,
\r
414 boost::mpl::int_<2> const*)
\r
416 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
417 typedef match_results<conv_type> match_type;
\r
418 typedef typename match_type::allocator_type alloc_type;
\r
420 bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
\r
421 // copy results across to m:
\r
422 if(result) copy_results(m, what);
\r
425 template <class BidiIterator, class Allocator>
\r
426 bool do_regex_match(BidiIterator first, BidiIterator last,
\r
427 match_results<BidiIterator, Allocator>& m,
\r
428 const u32regex& e,
\r
429 match_flag_type flags,
\r
430 boost::mpl::int_<1> const*)
\r
432 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
433 typedef match_results<conv_type> match_type;
\r
434 typedef typename match_type::allocator_type alloc_type;
\r
436 bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
\r
437 // copy results across to m:
\r
438 if(result) copy_results(m, what);
\r
441 } // namespace re_detail
\r
443 template <class BidiIterator, class Allocator>
\r
444 inline bool u32regex_match(BidiIterator first, BidiIterator last,
\r
445 match_results<BidiIterator, Allocator>& m,
\r
446 const u32regex& e,
\r
447 match_flag_type flags = match_default)
\r
449 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
451 inline bool u32regex_match(const UChar* p,
\r
452 match_results<const UChar*>& m,
\r
453 const u32regex& e,
\r
454 match_flag_type flags = match_default)
\r
456 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
\r
458 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
459 inline bool u32regex_match(const wchar_t* p,
\r
460 match_results<const wchar_t*>& m,
\r
461 const u32regex& e,
\r
462 match_flag_type flags = match_default)
\r
464 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
467 inline bool u32regex_match(const char* p,
\r
468 match_results<const char*>& m,
\r
469 const u32regex& e,
\r
470 match_flag_type flags = match_default)
\r
472 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
474 inline bool u32regex_match(const unsigned char* p,
\r
475 match_results<const unsigned char*>& m,
\r
476 const u32regex& e,
\r
477 match_flag_type flags = match_default)
\r
479 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
481 inline bool u32regex_match(const std::string& s,
\r
482 match_results<std::string::const_iterator>& m,
\r
483 const u32regex& e,
\r
484 match_flag_type flags = match_default)
\r
486 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
488 #ifndef BOOST_NO_STD_WSTRING
\r
489 inline bool u32regex_match(const std::wstring& s,
\r
490 match_results<std::wstring::const_iterator>& m,
\r
491 const u32regex& e,
\r
492 match_flag_type flags = match_default)
\r
494 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
497 inline bool u32regex_match(const UnicodeString& s,
\r
498 match_results<const UChar*>& m,
\r
499 const u32regex& e,
\r
500 match_flag_type flags = match_default)
\r
502 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
505 // regex_match overloads that do not return what matched:
\r
507 template <class BidiIterator>
\r
508 inline bool u32regex_match(BidiIterator first, BidiIterator last,
\r
509 const u32regex& e,
\r
510 match_flag_type flags = match_default)
\r
512 match_results<BidiIterator> m;
\r
513 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
515 inline bool u32regex_match(const UChar* p,
\r
516 const u32regex& e,
\r
517 match_flag_type flags = match_default)
\r
519 match_results<const UChar*> m;
\r
520 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
\r
522 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
523 inline bool u32regex_match(const wchar_t* p,
\r
524 const u32regex& e,
\r
525 match_flag_type flags = match_default)
\r
527 match_results<const wchar_t*> m;
\r
528 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
531 inline bool u32regex_match(const char* p,
\r
532 const u32regex& e,
\r
533 match_flag_type flags = match_default)
\r
535 match_results<const char*> m;
\r
536 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
538 inline bool u32regex_match(const unsigned char* p,
\r
539 const u32regex& e,
\r
540 match_flag_type flags = match_default)
\r
542 match_results<const unsigned char*> m;
\r
543 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
545 inline bool u32regex_match(const std::string& s,
\r
546 const u32regex& e,
\r
547 match_flag_type flags = match_default)
\r
549 match_results<std::string::const_iterator> m;
\r
550 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
552 #ifndef BOOST_NO_STD_WSTRING
\r
553 inline bool u32regex_match(const std::wstring& s,
\r
554 const u32regex& e,
\r
555 match_flag_type flags = match_default)
\r
557 match_results<std::wstring::const_iterator> m;
\r
558 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
561 inline bool u32regex_match(const UnicodeString& s,
\r
562 const u32regex& e,
\r
563 match_flag_type flags = match_default)
\r
565 match_results<const UChar*> m;
\r
566 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
570 // regex_search overloads that widen the character type as appropriate:
\r
572 namespace re_detail{
\r
573 template <class BidiIterator, class Allocator>
\r
574 inline bool do_regex_search(BidiIterator first, BidiIterator last,
\r
575 match_results<BidiIterator, Allocator>& m,
\r
576 const u32regex& e,
\r
577 match_flag_type flags,
\r
579 boost::mpl::int_<4> const*)
\r
581 return ::boost::regex_search(first, last, m, e, flags, base);
\r
583 template <class BidiIterator, class Allocator>
\r
584 bool do_regex_search(BidiIterator first, BidiIterator last,
\r
585 match_results<BidiIterator, Allocator>& m,
\r
586 const u32regex& e,
\r
587 match_flag_type flags,
\r
589 boost::mpl::int_<2> const*)
\r
591 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
592 typedef match_results<conv_type> match_type;
\r
593 typedef typename match_type::allocator_type alloc_type;
\r
595 bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
\r
596 // copy results across to m:
\r
597 if(result) copy_results(m, what);
\r
600 template <class BidiIterator, class Allocator>
\r
601 bool do_regex_search(BidiIterator first, BidiIterator last,
\r
602 match_results<BidiIterator, Allocator>& m,
\r
603 const u32regex& e,
\r
604 match_flag_type flags,
\r
606 boost::mpl::int_<1> const*)
\r
608 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
609 typedef match_results<conv_type> match_type;
\r
610 typedef typename match_type::allocator_type alloc_type;
\r
612 bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
\r
613 // copy results across to m:
\r
614 if(result) copy_results(m, what);
\r
619 template <class BidiIterator, class Allocator>
\r
620 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
621 match_results<BidiIterator, Allocator>& m,
\r
622 const u32regex& e,
\r
623 match_flag_type flags = match_default)
\r
625 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
627 template <class BidiIterator, class Allocator>
\r
628 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
629 match_results<BidiIterator, Allocator>& m,
\r
630 const u32regex& e,
\r
631 match_flag_type flags,
\r
634 return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
636 inline bool u32regex_search(const UChar* p,
\r
637 match_results<const UChar*>& m,
\r
638 const u32regex& e,
\r
639 match_flag_type flags = match_default)
\r
641 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
\r
643 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
644 inline bool u32regex_search(const wchar_t* p,
\r
645 match_results<const wchar_t*>& m,
\r
646 const u32regex& e,
\r
647 match_flag_type flags = match_default)
\r
649 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
652 inline bool u32regex_search(const char* p,
\r
653 match_results<const char*>& m,
\r
654 const u32regex& e,
\r
655 match_flag_type flags = match_default)
\r
657 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
659 inline bool u32regex_search(const unsigned char* p,
\r
660 match_results<const unsigned char*>& m,
\r
661 const u32regex& e,
\r
662 match_flag_type flags = match_default)
\r
664 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
666 inline bool u32regex_search(const std::string& s,
\r
667 match_results<std::string::const_iterator>& m,
\r
668 const u32regex& e,
\r
669 match_flag_type flags = match_default)
\r
671 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
\r
673 #ifndef BOOST_NO_STD_WSTRING
\r
674 inline bool u32regex_search(const std::wstring& s,
\r
675 match_results<std::wstring::const_iterator>& m,
\r
676 const u32regex& e,
\r
677 match_flag_type flags = match_default)
\r
679 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
682 inline bool u32regex_search(const UnicodeString& s,
\r
683 match_results<const UChar*>& m,
\r
684 const u32regex& e,
\r
685 match_flag_type flags = match_default)
\r
687 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
689 template <class BidiIterator>
\r
690 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
691 const u32regex& e,
\r
692 match_flag_type flags = match_default)
\r
694 match_results<BidiIterator> m;
\r
695 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
697 inline bool u32regex_search(const UChar* p,
\r
698 const u32regex& e,
\r
699 match_flag_type flags = match_default)
\r
701 match_results<const UChar*> m;
\r
702 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
\r
704 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
705 inline bool u32regex_search(const wchar_t* p,
\r
706 const u32regex& e,
\r
707 match_flag_type flags = match_default)
\r
709 match_results<const wchar_t*> m;
\r
710 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
713 inline bool u32regex_search(const char* p,
\r
714 const u32regex& e,
\r
715 match_flag_type flags = match_default)
\r
717 match_results<const char*> m;
\r
718 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
720 inline bool u32regex_search(const unsigned char* p,
\r
721 const u32regex& e,
\r
722 match_flag_type flags = match_default)
\r
724 match_results<const unsigned char*> m;
\r
725 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
727 inline bool u32regex_search(const std::string& s,
\r
728 const u32regex& e,
\r
729 match_flag_type flags = match_default)
\r
731 match_results<std::string::const_iterator> m;
\r
732 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
\r
734 #ifndef BOOST_NO_STD_WSTRING
\r
735 inline bool u32regex_search(const std::wstring& s,
\r
736 const u32regex& e,
\r
737 match_flag_type flags = match_default)
\r
739 match_results<std::wstring::const_iterator> m;
\r
740 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
743 inline bool u32regex_search(const UnicodeString& s,
\r
744 const u32regex& e,
\r
745 match_flag_type flags = match_default)
\r
747 match_results<const UChar*> m;
\r
748 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
752 // overloads for regex_replace with utf-8 and utf-16 data types:
\r
754 namespace re_detail{
\r
756 inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
\r
757 make_utf32_seq(I i, I j, mpl::int_<1> const*)
\r
759 return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i), boost::u8_to_u32_iterator<I>(j));
\r
762 inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
\r
763 make_utf32_seq(I i, I j, mpl::int_<2> const*)
\r
765 return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i), boost::u16_to_u32_iterator<I>(j));
\r
768 inline std::pair< I, I >
\r
769 make_utf32_seq(I i, I j, mpl::int_<4> const*)
\r
771 return std::pair< I, I >(i, j);
\r
773 template <class charT>
\r
774 inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
\r
775 make_utf32_seq(const charT* p, mpl::int_<1> const*)
\r
777 return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p), boost::u8_to_u32_iterator<const charT*>(p+std::strlen((const char*)p)));
\r
779 template <class charT>
\r
780 inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
\r
781 make_utf32_seq(const charT* p, mpl::int_<2> const*)
\r
783 return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p), boost::u16_to_u32_iterator<const charT*>(p+u_strlen((const UChar*)p)));
\r
785 template <class charT>
\r
786 inline std::pair< const charT*, const charT* >
\r
787 make_utf32_seq(const charT* p, mpl::int_<4> const*)
\r
789 return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p));
\r
791 template <class OutputIterator>
\r
792 inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
\r
796 template <class OutputIterator>
\r
797 inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
\r
801 template <class OutputIterator>
\r
802 inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
\r
807 template <class OutputIterator, class I1, class I2>
\r
808 OutputIterator do_regex_replace(OutputIterator out,
\r
809 std::pair<I1, I1> const& in,
\r
810 const u32regex& e,
\r
811 const std::pair<I2, I2>& fmt,
\r
812 match_flag_type flags
\r
815 // unfortunately we have to copy the format string in order to pass in onward:
\r
816 std::vector<UChar32> f;
\r
817 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
\r
818 f.assign(fmt.first, fmt.second);
\r
821 I2 pos = fmt.first;
\r
822 while(pos != fmt.second)
\r
823 f.push_back(*pos++);
\r
826 regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
\r
827 regex_iterator<I1, UChar32, icu_regex_traits> j;
\r
830 if(!(flags & regex_constants::format_no_copy))
\r
831 out = re_detail::copy(in.first, in.second, out);
\r
835 I1 last_m = in.first;
\r
838 if(!(flags & regex_constants::format_no_copy))
\r
839 out = re_detail::copy(i->prefix().first, i->prefix().second, out);
\r
841 out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
\r
843 out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
\r
844 last_m = (*i)[0].second;
\r
845 if(flags & regex_constants::format_first_only)
\r
849 if(!(flags & regex_constants::format_no_copy))
\r
850 out = re_detail::copy(last_m, in.second, out);
\r
854 template <class BaseIterator>
\r
855 inline const BaseIterator& extract_output_base(const BaseIterator& b)
\r
859 template <class BaseIterator>
\r
860 inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
\r
864 template <class BaseIterator>
\r
865 inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
\r
871 template <class OutputIterator, class BidirectionalIterator, class charT>
\r
872 inline OutputIterator u32regex_replace(OutputIterator out,
\r
873 BidirectionalIterator first,
\r
874 BidirectionalIterator last,
\r
875 const u32regex& e,
\r
877 match_flag_type flags = match_default)
\r
879 return re_detail::extract_output_base
\r
880 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
884 re_detail::do_regex_replace(
\r
885 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
886 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
888 re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
\r
893 template <class OutputIterator, class Iterator, class charT>
\r
894 inline OutputIterator u32regex_replace(OutputIterator out,
\r
897 const u32regex& e,
\r
898 const std::basic_string<charT>& fmt,
\r
899 match_flag_type flags = match_default)
\r
901 return re_detail::extract_output_base
\r
902 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
906 re_detail::do_regex_replace(
\r
907 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
908 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
910 re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
\r
915 template <class OutputIterator, class Iterator>
\r
916 inline OutputIterator u32regex_replace(OutputIterator out,
\r
919 const u32regex& e,
\r
920 const UnicodeString& fmt,
\r
921 match_flag_type flags = match_default)
\r
923 return re_detail::extract_output_base
\r
924 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
928 re_detail::do_regex_replace(
\r
929 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
930 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
932 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
\r
937 template <class charT>
\r
938 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
\r
939 const u32regex& e,
\r
941 match_flag_type flags = match_default)
\r
943 std::basic_string<charT> result;
\r
944 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
\r
945 u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
\r
949 template <class charT>
\r
950 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
\r
951 const u32regex& e,
\r
952 const std::basic_string<charT>& fmt,
\r
953 match_flag_type flags = match_default)
\r
955 std::basic_string<charT> result;
\r
956 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
\r
957 u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
\r
961 namespace re_detail{
\r
963 class unicode_string_out_iterator
\r
965 UnicodeString* out;
\r
967 unicode_string_out_iterator(UnicodeString& s) : out(&s) {}
\r
968 unicode_string_out_iterator& operator++() { return *this; }
\r
969 unicode_string_out_iterator& operator++(int) { return *this; }
\r
970 unicode_string_out_iterator& operator*() { return *this; }
\r
971 unicode_string_out_iterator& operator=(UChar v)
\r
976 typedef std::ptrdiff_t difference_type;
\r
977 typedef UChar value_type;
\r
978 typedef value_type* pointer;
\r
979 typedef value_type& reference;
\r
980 typedef std::output_iterator_tag iterator_category;
\r
985 inline UnicodeString u32regex_replace(const UnicodeString& s,
\r
986 const u32regex& e,
\r
988 match_flag_type flags = match_default)
\r
990 UnicodeString result;
\r
991 re_detail::unicode_string_out_iterator i(result);
\r
992 u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags);
\r
996 inline UnicodeString u32regex_replace(const UnicodeString& s,
\r
997 const u32regex& e,
\r
998 const UnicodeString& fmt,
\r
999 match_flag_type flags = match_default)
\r
1001 UnicodeString result;
\r
1002 re_detail::unicode_string_out_iterator i(result);
\r
1003 re_detail::do_regex_replace(
\r
1004 re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
\r
1005 re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)),
\r
1007 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
\r
1012 } // namespace boost.
\r
1014 #include <boost/regex/v4/u32regex_iterator.hpp>
\r
1015 #include <boost/regex/v4/u32regex_token_iterator.hpp>
\r