6 * Use, modification and distribution are subject to the
\r
7 * Boost Software License, Version 1.0. (See accompanying file
\r
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
\r
13 * LOCATION: see http://www.boost.org for most recent version.
\r
15 * VERSION see <boost/version.hpp>
\r
16 * DESCRIPTION: Unicode regular expressions on top of the ICU Library.
\r
19 #ifndef BOOST_REGEX_ICU_HPP
\r
20 #define BOOST_REGEX_ICU_HPP
\r
22 #include <unicode/utypes.h>
\r
23 #include <unicode/uchar.h>
\r
24 #include <unicode/coll.h>
\r
25 #include <boost/regex.hpp>
\r
26 #include <boost/regex/pending/unicode_iterator.hpp>
\r
27 #include <boost/mpl/int_fwd.hpp>
\r
33 namespace re_detail{
\r
36 // Implementation details:
\r
38 class BOOST_REGEX_DECL icu_regex_traits_implementation
\r
40 typedef UChar32 char_type;
\r
41 typedef std::size_t size_type;
\r
42 typedef std::vector<char_type> string_type;
\r
43 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
\r
44 typedef boost::uint_least32_t char_class_type;
\r
46 icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l)
\r
49 UErrorCode success = U_ZERO_ERROR;
\r
50 m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
\r
51 if(U_SUCCESS(success) == 0)
\r
53 m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL);
\r
54 success = U_ZERO_ERROR;
\r
55 m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success));
\r
56 if(U_SUCCESS(success) == 0)
\r
58 m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY);
\r
60 U_NAMESPACE_QUALIFIER Locale getloc()const
\r
64 string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
\r
65 string_type transform(const char_type* p1, const char_type* p2) const
\r
67 return do_transform(p1, p2, m_collator.get());
\r
69 string_type transform_primary(const char_type* p1, const char_type* p2) const
\r
71 return do_transform(p1, p2, m_primary_collator.get());
\r
76 std::runtime_error e("Could not initialize ICU resources");
\r
77 boost::throw_exception(e);
\r
79 U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using
\r
80 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object
\r
81 boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object
\r
84 inline boost::shared_ptr<icu_regex_traits_implementation> get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc)
\r
86 return boost::shared_ptr<icu_regex_traits_implementation>(new icu_regex_traits_implementation(loc));
\r
91 class BOOST_REGEX_DECL icu_regex_traits
\r
94 typedef UChar32 char_type;
\r
95 typedef std::size_t size_type;
\r
96 typedef std::vector<char_type> string_type;
\r
97 typedef U_NAMESPACE_QUALIFIER Locale locale_type;
\r
98 #ifdef BOOST_NO_INT64_T
\r
99 typedef std::bitset<64> char_class_type;
\r
101 typedef boost::uint64_t char_class_type;
\r
104 struct boost_extensions_tag{};
\r
107 : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
\r
110 static size_type length(const char_type* p);
\r
112 ::boost::regex_constants::syntax_type syntax_type(char_type c)const
\r
114 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
\r
116 ::boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const
\r
118 return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast<char>(c)) : regex_constants::syntax_char;
\r
120 char_type translate(char_type c) const
\r
124 char_type translate_nocase(char_type c) const
\r
126 return ::u_tolower(c);
\r
128 char_type translate(char_type c, bool icase) const
\r
130 return icase ? translate_nocase(c) : translate(c);
\r
132 char_type tolower(char_type c) const
\r
134 return ::u_tolower(c);
\r
136 char_type toupper(char_type c) const
\r
138 return ::u_toupper(c);
\r
140 string_type transform(const char_type* p1, const char_type* p2) const
\r
142 return m_pimpl->transform(p1, p2);
\r
144 string_type transform_primary(const char_type* p1, const char_type* p2) const
\r
146 return m_pimpl->transform_primary(p1, p2);
\r
148 char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
\r
149 string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
\r
150 bool isctype(char_type c, char_class_type f) const;
\r
151 int toi(const char_type*& p1, const char_type* p2, int radix)const
\r
153 return re_detail::global_toi(p1, p2, radix, *this);
\r
155 int value(char_type c, int radix)const
\r
157 return u_digit(c, static_cast< ::int8_t>(radix));
\r
159 locale_type imbue(locale_type l)
\r
161 locale_type result(m_pimpl->getloc());
\r
162 m_pimpl = re_detail::get_icu_regex_traits_implementation(l);
\r
165 locale_type getloc()const
\r
167 return locale_type();
\r
169 std::string error_string(::boost::regex_constants::error_type n) const
\r
171 return re_detail::get_default_error_string(n);
\r
174 icu_regex_traits(const icu_regex_traits&);
\r
175 icu_regex_traits& operator=(const icu_regex_traits&);
\r
178 // define the bitmasks offsets we need for additional character properties:
\r
181 offset_blank = U_CHAR_CATEGORY_COUNT,
\r
182 offset_space = U_CHAR_CATEGORY_COUNT+1,
\r
183 offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
\r
184 offset_underscore = U_CHAR_CATEGORY_COUNT+3,
\r
185 offset_unicode = U_CHAR_CATEGORY_COUNT+4,
\r
186 offset_any = U_CHAR_CATEGORY_COUNT+5,
\r
187 offset_ascii = U_CHAR_CATEGORY_COUNT+6,
\r
188 offset_horizontal = U_CHAR_CATEGORY_COUNT+7,
\r
189 offset_vertical = U_CHAR_CATEGORY_COUNT+8
\r
193 // and now the masks:
\r
195 static const char_class_type mask_blank;
\r
196 static const char_class_type mask_space;
\r
197 static const char_class_type mask_xdigit;
\r
198 static const char_class_type mask_underscore;
\r
199 static const char_class_type mask_unicode;
\r
200 static const char_class_type mask_any;
\r
201 static const char_class_type mask_ascii;
\r
202 static const char_class_type mask_horizontal;
\r
203 static const char_class_type mask_vertical;
\r
205 static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
\r
207 boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
\r
210 } // namespace boost
\r
213 // template instances:
\r
215 #define BOOST_REGEX_CHAR_T UChar32
\r
216 #undef BOOST_REGEX_TRAITS_T
\r
217 #define BOOST_REGEX_TRAITS_T , icu_regex_traits
\r
218 #define BOOST_REGEX_ICU_INSTANCES
\r
219 #ifdef BOOST_REGEX_ICU_INSTANTIATE
\r
220 # define BOOST_REGEX_INSTANTIATE
\r
222 #include <boost/regex/v4/instances.hpp>
\r
223 #undef BOOST_REGEX_CHAR_T
\r
224 #undef BOOST_REGEX_TRAITS_T
\r
225 #undef BOOST_REGEX_ICU_INSTANCES
\r
226 #ifdef BOOST_REGEX_INSTANTIATE
\r
227 # undef BOOST_REGEX_INSTANTIATE
\r
233 typedef basic_regex< ::UChar32, icu_regex_traits> u32regex;
\r
234 typedef match_results<const ::UChar32*> u32match;
\r
235 typedef match_results<const ::UChar*> u16match;
\r
238 // Construction of 32-bit regex types from UTF-8 and UTF-16 primitives:
\r
240 namespace re_detail{
\r
242 #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
\r
243 template <class InputIterator>
\r
244 inline u32regex do_make_u32regex(InputIterator i,
\r
246 boost::regex_constants::syntax_option_type opt,
\r
247 const boost::mpl::int_<1>*)
\r
249 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
250 return u32regex(conv_type(i), conv_type(j), opt);
\r
253 template <class InputIterator>
\r
254 inline u32regex do_make_u32regex(InputIterator i,
\r
256 boost::regex_constants::syntax_option_type opt,
\r
257 const boost::mpl::int_<2>*)
\r
259 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
260 return u32regex(conv_type(i), conv_type(j), opt);
\r
263 template <class InputIterator>
\r
264 inline u32regex do_make_u32regex(InputIterator i,
\r
266 boost::regex_constants::syntax_option_type opt,
\r
267 const boost::mpl::int_<4>*)
\r
269 return u32regex(i, j, opt);
\r
272 template <class InputIterator>
\r
273 inline u32regex do_make_u32regex(InputIterator i,
\r
275 boost::regex_constants::syntax_option_type opt,
\r
276 const boost::mpl::int_<1>*)
\r
278 typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
279 typedef std::vector<UChar32> vector_type;
\r
281 conv_type a(i), b(j);
\r
288 return u32regex(&*v.begin(), v.size(), opt);
\r
289 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
292 template <class InputIterator>
\r
293 inline u32regex do_make_u32regex(InputIterator i,
\r
295 boost::regex_constants::syntax_option_type opt,
\r
296 const boost::mpl::int_<2>*)
\r
298 typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
\r
299 typedef std::vector<UChar32> vector_type;
\r
301 conv_type a(i), b(j);
\r
308 return u32regex(&*v.begin(), v.size(), opt);
\r
309 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
312 template <class InputIterator>
\r
313 inline u32regex do_make_u32regex(InputIterator i,
\r
315 boost::regex_constants::syntax_option_type opt,
\r
316 const boost::mpl::int_<4>*)
\r
318 typedef std::vector<UChar32> vector_type;
\r
322 v.push_back((UChar32)(*i));
\r
326 return u32regex(&*v.begin(), v.size(), opt);
\r
327 return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
\r
333 // Construction from an iterator pair:
\r
335 template <class InputIterator>
\r
336 inline u32regex make_u32regex(InputIterator i,
\r
338 boost::regex_constants::syntax_option_type opt)
\r
340 return re_detail::do_make_u32regex(i, j, opt, static_cast<boost::mpl::int_<sizeof(*i)> const*>(0));
\r
343 // construction from UTF-8 nul-terminated strings:
\r
345 inline u32regex make_u32regex(const char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
347 return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast<boost::mpl::int_<1> const*>(0));
\r
349 inline u32regex make_u32regex(const unsigned char* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
351 return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast<const char*>(p)), opt, static_cast<boost::mpl::int_<1> const*>(0));
\r
354 // construction from UTF-16 nul-terminated strings:
\r
356 #ifndef BOOST_NO_WREGEX
\r
357 inline u32regex make_u32regex(const wchar_t* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
359 return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast<boost::mpl::int_<sizeof(wchar_t)> const*>(0));
\r
362 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2)
\r
363 inline u32regex make_u32regex(const UChar* p, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
365 return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast<boost::mpl::int_<2> const*>(0));
\r
369 // construction from basic_string class-template:
\r
371 template<class C, class T, class A>
\r
372 inline u32regex make_u32regex(const std::basic_string<C, T, A>& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
374 return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast<boost::mpl::int_<sizeof(C)> const*>(0));
\r
377 // Construction from ICU string type:
\r
379 inline u32regex make_u32regex(const UnicodeString& s, boost::regex_constants::syntax_option_type opt = boost::regex_constants::perl)
\r
381 return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast<boost::mpl::int_<2> const*>(0));
\r
385 // regex_match overloads that widen the character type as appropriate:
\r
387 namespace re_detail{
\r
388 template<class MR1, class MR2>
\r
389 void copy_results(MR1& out, MR2 const& in)
\r
391 // copy results from an adapted MR2 match_results:
\r
392 out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base());
\r
393 out.set_base(in.base().base());
\r
394 for(int i = 0; i < (int)in.size(); ++i)
\r
398 out.set_first(in[i].first.base(), i);
\r
399 out.set_second(in[i].second.base(), i);
\r
404 template <class BidiIterator, class Allocator>
\r
405 inline bool do_regex_match(BidiIterator first, BidiIterator last,
\r
406 match_results<BidiIterator, Allocator>& m,
\r
407 const u32regex& e,
\r
408 match_flag_type flags,
\r
409 boost::mpl::int_<4> const*)
\r
411 return ::boost::regex_match(first, last, m, e, flags);
\r
413 template <class BidiIterator, class Allocator>
\r
414 bool do_regex_match(BidiIterator first, BidiIterator last,
\r
415 match_results<BidiIterator, Allocator>& m,
\r
416 const u32regex& e,
\r
417 match_flag_type flags,
\r
418 boost::mpl::int_<2> const*)
\r
420 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
421 typedef match_results<conv_type> match_type;
\r
422 typedef typename match_type::allocator_type alloc_type;
\r
424 bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
\r
425 // copy results across to m:
\r
426 if(result) copy_results(m, what);
\r
429 template <class BidiIterator, class Allocator>
\r
430 bool do_regex_match(BidiIterator first, BidiIterator last,
\r
431 match_results<BidiIterator, Allocator>& m,
\r
432 const u32regex& e,
\r
433 match_flag_type flags,
\r
434 boost::mpl::int_<1> const*)
\r
436 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
437 typedef match_results<conv_type> match_type;
\r
438 typedef typename match_type::allocator_type alloc_type;
\r
440 bool result = ::boost::regex_match(conv_type(first), conv_type(last), what, e, flags);
\r
441 // copy results across to m:
\r
442 if(result) copy_results(m, what);
\r
445 } // namespace re_detail
\r
447 template <class BidiIterator, class Allocator>
\r
448 inline bool u32regex_match(BidiIterator first, BidiIterator last,
\r
449 match_results<BidiIterator, Allocator>& m,
\r
450 const u32regex& e,
\r
451 match_flag_type flags = match_default)
\r
453 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
455 inline bool u32regex_match(const UChar* p,
\r
456 match_results<const UChar*>& m,
\r
457 const u32regex& e,
\r
458 match_flag_type flags = match_default)
\r
460 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
\r
462 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
463 inline bool u32regex_match(const wchar_t* p,
\r
464 match_results<const wchar_t*>& m,
\r
465 const u32regex& e,
\r
466 match_flag_type flags = match_default)
\r
468 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
471 inline bool u32regex_match(const char* p,
\r
472 match_results<const char*>& m,
\r
473 const u32regex& e,
\r
474 match_flag_type flags = match_default)
\r
476 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
478 inline bool u32regex_match(const unsigned char* p,
\r
479 match_results<const unsigned char*>& m,
\r
480 const u32regex& e,
\r
481 match_flag_type flags = match_default)
\r
483 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
485 inline bool u32regex_match(const std::string& s,
\r
486 match_results<std::string::const_iterator>& m,
\r
487 const u32regex& e,
\r
488 match_flag_type flags = match_default)
\r
490 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
492 #ifndef BOOST_NO_STD_WSTRING
\r
493 inline bool u32regex_match(const std::wstring& s,
\r
494 match_results<std::wstring::const_iterator>& m,
\r
495 const u32regex& e,
\r
496 match_flag_type flags = match_default)
\r
498 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
501 inline bool u32regex_match(const UnicodeString& s,
\r
502 match_results<const UChar*>& m,
\r
503 const u32regex& e,
\r
504 match_flag_type flags = match_default)
\r
506 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
509 // regex_match overloads that do not return what matched:
\r
511 template <class BidiIterator>
\r
512 inline bool u32regex_match(BidiIterator first, BidiIterator last,
\r
513 const u32regex& e,
\r
514 match_flag_type flags = match_default)
\r
516 match_results<BidiIterator> m;
\r
517 return re_detail::do_regex_match(first, last, m, e, flags, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
519 inline bool u32regex_match(const UChar* p,
\r
520 const u32regex& e,
\r
521 match_flag_type flags = match_default)
\r
523 match_results<const UChar*> m;
\r
524 return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast<mpl::int_<2> const*>(0));
\r
526 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
527 inline bool u32regex_match(const wchar_t* p,
\r
528 const u32regex& e,
\r
529 match_flag_type flags = match_default)
\r
531 match_results<const wchar_t*> m;
\r
532 return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
535 inline bool u32regex_match(const char* p,
\r
536 const u32regex& e,
\r
537 match_flag_type flags = match_default)
\r
539 match_results<const char*> m;
\r
540 return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
542 inline bool u32regex_match(const unsigned char* p,
\r
543 const u32regex& e,
\r
544 match_flag_type flags = match_default)
\r
546 match_results<const unsigned char*> m;
\r
547 return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
549 inline bool u32regex_match(const std::string& s,
\r
550 const u32regex& e,
\r
551 match_flag_type flags = match_default)
\r
553 match_results<std::string::const_iterator> m;
\r
554 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<1> const*>(0));
\r
556 #ifndef BOOST_NO_STD_WSTRING
\r
557 inline bool u32regex_match(const std::wstring& s,
\r
558 const u32regex& e,
\r
559 match_flag_type flags = match_default)
\r
561 match_results<std::wstring::const_iterator> m;
\r
562 return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
565 inline bool u32regex_match(const UnicodeString& s,
\r
566 const u32regex& e,
\r
567 match_flag_type flags = match_default)
\r
569 match_results<const UChar*> m;
\r
570 return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
574 // regex_search overloads that widen the character type as appropriate:
\r
576 namespace re_detail{
\r
577 template <class BidiIterator, class Allocator>
\r
578 inline bool do_regex_search(BidiIterator first, BidiIterator last,
\r
579 match_results<BidiIterator, Allocator>& m,
\r
580 const u32regex& e,
\r
581 match_flag_type flags,
\r
583 boost::mpl::int_<4> const*)
\r
585 return ::boost::regex_search(first, last, m, e, flags, base);
\r
587 template <class BidiIterator, class Allocator>
\r
588 bool do_regex_search(BidiIterator first, BidiIterator last,
\r
589 match_results<BidiIterator, Allocator>& m,
\r
590 const u32regex& e,
\r
591 match_flag_type flags,
\r
593 boost::mpl::int_<2> const*)
\r
595 typedef u16_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
596 typedef match_results<conv_type> match_type;
\r
597 typedef typename match_type::allocator_type alloc_type;
\r
599 bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
\r
600 // copy results across to m:
\r
601 if(result) copy_results(m, what);
\r
604 template <class BidiIterator, class Allocator>
\r
605 bool do_regex_search(BidiIterator first, BidiIterator last,
\r
606 match_results<BidiIterator, Allocator>& m,
\r
607 const u32regex& e,
\r
608 match_flag_type flags,
\r
610 boost::mpl::int_<1> const*)
\r
612 typedef u8_to_u32_iterator<BidiIterator, UChar32> conv_type;
\r
613 typedef match_results<conv_type> match_type;
\r
614 typedef typename match_type::allocator_type alloc_type;
\r
616 bool result = ::boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base));
\r
617 // copy results across to m:
\r
618 if(result) copy_results(m, what);
\r
623 template <class BidiIterator, class Allocator>
\r
624 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
625 match_results<BidiIterator, Allocator>& m,
\r
626 const u32regex& e,
\r
627 match_flag_type flags = match_default)
\r
629 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
631 template <class BidiIterator, class Allocator>
\r
632 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
633 match_results<BidiIterator, Allocator>& m,
\r
634 const u32regex& e,
\r
635 match_flag_type flags,
\r
638 return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
640 inline bool u32regex_search(const UChar* p,
\r
641 match_results<const UChar*>& m,
\r
642 const u32regex& e,
\r
643 match_flag_type flags = match_default)
\r
645 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
\r
647 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
648 inline bool u32regex_search(const wchar_t* p,
\r
649 match_results<const wchar_t*>& m,
\r
650 const u32regex& e,
\r
651 match_flag_type flags = match_default)
\r
653 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
656 inline bool u32regex_search(const char* p,
\r
657 match_results<const char*>& m,
\r
658 const u32regex& e,
\r
659 match_flag_type flags = match_default)
\r
661 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
663 inline bool u32regex_search(const unsigned char* p,
\r
664 match_results<const unsigned char*>& m,
\r
665 const u32regex& e,
\r
666 match_flag_type flags = match_default)
\r
668 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
670 inline bool u32regex_search(const std::string& s,
\r
671 match_results<std::string::const_iterator>& m,
\r
672 const u32regex& e,
\r
673 match_flag_type flags = match_default)
\r
675 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
\r
677 #ifndef BOOST_NO_STD_WSTRING
\r
678 inline bool u32regex_search(const std::wstring& s,
\r
679 match_results<std::wstring::const_iterator>& m,
\r
680 const u32regex& e,
\r
681 match_flag_type flags = match_default)
\r
683 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
686 inline bool u32regex_search(const UnicodeString& s,
\r
687 match_results<const UChar*>& m,
\r
688 const u32regex& e,
\r
689 match_flag_type flags = match_default)
\r
691 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
693 template <class BidiIterator>
\r
694 inline bool u32regex_search(BidiIterator first, BidiIterator last,
\r
695 const u32regex& e,
\r
696 match_flag_type flags = match_default)
\r
698 match_results<BidiIterator> m;
\r
699 return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast<mpl::int_<sizeof(*first)> const*>(0));
\r
701 inline bool u32regex_search(const UChar* p,
\r
702 const u32regex& e,
\r
703 match_flag_type flags = match_default)
\r
705 match_results<const UChar*> m;
\r
706 return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast<mpl::int_<2> const*>(0));
\r
708 #if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX)
\r
709 inline bool u32regex_search(const wchar_t* p,
\r
710 const u32regex& e,
\r
711 match_flag_type flags = match_default)
\r
713 match_results<const wchar_t*> m;
\r
714 return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
717 inline bool u32regex_search(const char* p,
\r
718 const u32regex& e,
\r
719 match_flag_type flags = match_default)
\r
721 match_results<const char*> m;
\r
722 return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
724 inline bool u32regex_search(const unsigned char* p,
\r
725 const u32regex& e,
\r
726 match_flag_type flags = match_default)
\r
728 match_results<const unsigned char*> m;
\r
729 return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast<mpl::int_<1> const*>(0));
\r
731 inline bool u32regex_search(const std::string& s,
\r
732 const u32regex& e,
\r
733 match_flag_type flags = match_default)
\r
735 match_results<std::string::const_iterator> m;
\r
736 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<1> const*>(0));
\r
738 #ifndef BOOST_NO_STD_WSTRING
\r
739 inline bool u32regex_search(const std::wstring& s,
\r
740 const u32regex& e,
\r
741 match_flag_type flags = match_default)
\r
743 match_results<std::wstring::const_iterator> m;
\r
744 return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
747 inline bool u32regex_search(const UnicodeString& s,
\r
748 const u32regex& e,
\r
749 match_flag_type flags = match_default)
\r
751 match_results<const UChar*> m;
\r
752 return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast<mpl::int_<sizeof(wchar_t)> const*>(0));
\r
756 // overloads for regex_replace with utf-8 and utf-16 data types:
\r
758 namespace re_detail{
\r
760 inline std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >
\r
761 make_utf32_seq(I i, I j, mpl::int_<1> const*)
\r
763 return std::pair< boost::u8_to_u32_iterator<I>, boost::u8_to_u32_iterator<I> >(boost::u8_to_u32_iterator<I>(i), boost::u8_to_u32_iterator<I>(j));
\r
766 inline std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >
\r
767 make_utf32_seq(I i, I j, mpl::int_<2> const*)
\r
769 return std::pair< boost::u16_to_u32_iterator<I>, boost::u16_to_u32_iterator<I> >(boost::u16_to_u32_iterator<I>(i), boost::u16_to_u32_iterator<I>(j));
\r
772 inline std::pair< I, I >
\r
773 make_utf32_seq(I i, I j, mpl::int_<4> const*)
\r
775 return std::pair< I, I >(i, j);
\r
777 template <class charT>
\r
778 inline std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >
\r
779 make_utf32_seq(const charT* p, mpl::int_<1> const*)
\r
781 return std::pair< boost::u8_to_u32_iterator<const charT*>, boost::u8_to_u32_iterator<const charT*> >(boost::u8_to_u32_iterator<const charT*>(p), boost::u8_to_u32_iterator<const charT*>(p+std::strlen((const char*)p)));
\r
783 template <class charT>
\r
784 inline std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >
\r
785 make_utf32_seq(const charT* p, mpl::int_<2> const*)
\r
787 return std::pair< boost::u16_to_u32_iterator<const charT*>, boost::u16_to_u32_iterator<const charT*> >(boost::u16_to_u32_iterator<const charT*>(p), boost::u16_to_u32_iterator<const charT*>(p+u_strlen((const UChar*)p)));
\r
789 template <class charT>
\r
790 inline std::pair< const charT*, const charT* >
\r
791 make_utf32_seq(const charT* p, mpl::int_<4> const*)
\r
793 return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p));
\r
795 template <class OutputIterator>
\r
796 inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*)
\r
800 template <class OutputIterator>
\r
801 inline utf16_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<2> const*)
\r
805 template <class OutputIterator>
\r
806 inline utf8_output_iterator<OutputIterator> make_utf32_out(OutputIterator o, mpl::int_<1> const*)
\r
811 template <class OutputIterator, class I1, class I2>
\r
812 OutputIterator do_regex_replace(OutputIterator out,
\r
813 std::pair<I1, I1> const& in,
\r
814 const u32regex& e,
\r
815 const std::pair<I2, I2>& fmt,
\r
816 match_flag_type flags
\r
819 // unfortunately we have to copy the format string in order to pass in onward:
\r
820 std::vector<UChar32> f;
\r
821 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
\r
822 f.assign(fmt.first, fmt.second);
\r
825 I2 pos = fmt.first;
\r
826 while(pos != fmt.second)
\r
827 f.push_back(*pos++);
\r
830 regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
\r
831 regex_iterator<I1, UChar32, icu_regex_traits> j;
\r
834 if(!(flags & regex_constants::format_no_copy))
\r
835 out = re_detail::copy(in.first, in.second, out);
\r
839 I1 last_m = in.first;
\r
842 if(!(flags & regex_constants::format_no_copy))
\r
843 out = re_detail::copy(i->prefix().first, i->prefix().second, out);
\r
845 out = ::boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits());
\r
847 out = ::boost::re_detail::regex_format_imp(out, *i, static_cast<UChar32 const*>(0), static_cast<UChar32 const*>(0), flags, e.get_traits());
\r
848 last_m = (*i)[0].second;
\r
849 if(flags & regex_constants::format_first_only)
\r
853 if(!(flags & regex_constants::format_no_copy))
\r
854 out = re_detail::copy(last_m, in.second, out);
\r
858 template <class BaseIterator>
\r
859 inline const BaseIterator& extract_output_base(const BaseIterator& b)
\r
863 template <class BaseIterator>
\r
864 inline BaseIterator extract_output_base(const utf8_output_iterator<BaseIterator>& b)
\r
868 template <class BaseIterator>
\r
869 inline BaseIterator extract_output_base(const utf16_output_iterator<BaseIterator>& b)
\r
875 template <class OutputIterator, class BidirectionalIterator, class charT>
\r
876 inline OutputIterator u32regex_replace(OutputIterator out,
\r
877 BidirectionalIterator first,
\r
878 BidirectionalIterator last,
\r
879 const u32regex& e,
\r
881 match_flag_type flags = match_default)
\r
883 return re_detail::extract_output_base
\r
884 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
888 re_detail::do_regex_replace(
\r
889 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
890 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
892 re_detail::make_utf32_seq(fmt, static_cast<mpl::int_<sizeof(*fmt)> const*>(0)),
\r
897 template <class OutputIterator, class Iterator, class charT>
\r
898 inline OutputIterator u32regex_replace(OutputIterator out,
\r
901 const u32regex& e,
\r
902 const std::basic_string<charT>& fmt,
\r
903 match_flag_type flags = match_default)
\r
905 return re_detail::extract_output_base
\r
906 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
910 re_detail::do_regex_replace(
\r
911 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
912 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
914 re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast<mpl::int_<sizeof(charT)> const*>(0)),
\r
919 template <class OutputIterator, class Iterator>
\r
920 inline OutputIterator u32regex_replace(OutputIterator out,
\r
923 const u32regex& e,
\r
924 const UnicodeString& fmt,
\r
925 match_flag_type flags = match_default)
\r
927 return re_detail::extract_output_base
\r
928 #if BOOST_WORKAROUND(BOOST_MSVC, <= 1300)
\r
932 re_detail::do_regex_replace(
\r
933 re_detail::make_utf32_out(out, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
934 re_detail::make_utf32_seq(first, last, static_cast<mpl::int_<sizeof(*first)> const*>(0)),
\r
936 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
\r
941 template <class charT>
\r
942 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
\r
943 const u32regex& e,
\r
945 match_flag_type flags = match_default)
\r
947 std::basic_string<charT> result;
\r
948 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
\r
949 u32regex_replace(i, s.begin(), s.end(), e, fmt, flags);
\r
953 template <class charT>
\r
954 std::basic_string<charT> u32regex_replace(const std::basic_string<charT>& s,
\r
955 const u32regex& e,
\r
956 const std::basic_string<charT>& fmt,
\r
957 match_flag_type flags = match_default)
\r
959 std::basic_string<charT> result;
\r
960 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
\r
961 u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags);
\r
965 namespace re_detail{
\r
967 class unicode_string_out_iterator
\r
969 UnicodeString* out;
\r
971 unicode_string_out_iterator(UnicodeString& s) : out(&s) {}
\r
972 unicode_string_out_iterator& operator++() { return *this; }
\r
973 unicode_string_out_iterator& operator++(int) { return *this; }
\r
974 unicode_string_out_iterator& operator*() { return *this; }
\r
975 unicode_string_out_iterator& operator=(UChar v)
\r
980 typedef std::ptrdiff_t difference_type;
\r
981 typedef UChar value_type;
\r
982 typedef value_type* pointer;
\r
983 typedef value_type& reference;
\r
984 typedef std::output_iterator_tag iterator_category;
\r
989 inline UnicodeString u32regex_replace(const UnicodeString& s,
\r
990 const u32regex& e,
\r
992 match_flag_type flags = match_default)
\r
994 UnicodeString result;
\r
995 re_detail::unicode_string_out_iterator i(result);
\r
996 u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags);
\r
1000 inline UnicodeString u32regex_replace(const UnicodeString& s,
\r
1001 const u32regex& e,
\r
1002 const UnicodeString& fmt,
\r
1003 match_flag_type flags = match_default)
\r
1005 UnicodeString result;
\r
1006 re_detail::unicode_string_out_iterator i(result);
\r
1007 re_detail::do_regex_replace(
\r
1008 re_detail::make_utf32_out(i, static_cast<mpl::int_<2> const*>(0)),
\r
1009 re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast<mpl::int_<2> const*>(0)),
\r
1011 re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast<mpl::int_<2> const*>(0)),
\r
1016 } // namespace boost.
\r
1018 #include <boost/regex/v4/u32regex_iterator.hpp>
\r
1019 #include <boost/regex/v4/u32regex_token_iterator.hpp>
\r