3 * Copyright (c) 1998-2004 John Maddock
4 * Copyright 2011 Garmin Ltd. or its subsidiaries
6 * Distributed under the Boost Software License, Version 1.0.
7 * (See accompanying file LICENSE_1_0.txt or copy at
8 * http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org/ for most recent version.
14 * FILE basic_regex.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_HPP
22 #include <boost/type_traits/is_same.hpp>
23 #include <boost/functional/hash.hpp>
27 #pragma warning(disable: 4103)
29 #ifdef BOOST_HAS_ABI_HEADERS
30 # include BOOST_ABI_PREFIX
39 #pragma warning(disable : 4251 4231 4800)
41 #pragma warning(disable : 4660)
48 // forward declaration, we will need this one later:
50 template <class charT, class traits>
51 class basic_regex_parser;
54 void bubble_down_one(I first, I last)
59 while((next != first) && (*next < *(next-1)))
61 (next-1)->swap(*next);
67 template <class Iterator>
68 inline int hash_value_from_capture_name(Iterator i, Iterator j)
70 std::size_t r = boost::hash_range(i, j);
71 r %= ((std::numeric_limits<int>::max)() - 10001);
73 return static_cast<int>(r);
76 class named_subexpressions
81 template <class charT>
82 name(const charT* i, const charT* j, int idx)
85 hash = hash_value_from_capture_name(i, j);
93 bool operator < (const name& other)const
95 return hash < other.hash;
97 bool operator == (const name& other)const
99 return hash == other.hash;
101 void swap(name& other)
103 std::swap(index, other.index);
104 std::swap(hash, other.hash);
108 typedef std::vector<name>::const_iterator const_iterator;
109 typedef std::pair<const_iterator, const_iterator> range_type;
111 named_subexpressions(){}
113 template <class charT>
114 void set_name(const charT* i, const charT* j, int index)
116 m_sub_names.push_back(name(i, j, index));
117 bubble_down_one(m_sub_names.begin(), m_sub_names.end());
119 template <class charT>
120 int get_id(const charT* i, const charT* j)const
123 typename std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
124 if((pos != m_sub_names.end()) && (*pos == t))
130 template <class charT>
131 range_type equal_range(const charT* i, const charT* j)const
134 return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
136 int get_id(int h)const
139 std::vector<name>::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t);
140 if((pos != m_sub_names.end()) && (*pos == t))
146 range_type equal_range(int h)const
149 return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t);
152 std::vector<name> m_sub_names;
157 // represents the data we wish to expose to the matching algorithms.
159 template <class charT, class traits>
160 struct regex_data : public named_subexpressions
162 typedef regex_constants::syntax_option_type flag_type;
163 typedef std::size_t size_type;
165 regex_data(const ::boost::shared_ptr<
166 ::boost::regex_traits_wrapper<traits> >& t)
167 : m_ptraits(t), m_expression(0), m_expression_len(0) {}
169 : m_ptraits(new ::boost::regex_traits_wrapper<traits>()), m_expression(0), m_expression_len(0) {}
172 ::boost::regex_traits_wrapper<traits>
173 > m_ptraits; // traits class instance
174 flag_type m_flags; // flags with which we were compiled
175 int m_status; // error code (0 implies OK).
176 const charT* m_expression; // the original expression
177 std::ptrdiff_t m_expression_len; // the length of the original expression
178 size_type m_mark_count; // the number of marked sub-expressions
179 re_detail::re_syntax_base* m_first_state; // the first state of the machine
180 unsigned m_restart_type; // search optimisation type
181 unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match
182 unsigned int m_can_be_null; // whether we can match a null string
183 re_detail::raw_storage m_data; // the buffer in which our states are constructed
184 typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
187 std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*.
188 bool m_has_recursions; // whether we have recursive expressions;
191 // class basic_regex_implementation
192 // pimpl implementation class for basic_regex.
194 template <class charT, class traits>
195 class basic_regex_implementation
196 : public regex_data<charT, traits>
199 typedef regex_constants::syntax_option_type flag_type;
200 typedef std::ptrdiff_t difference_type;
201 typedef std::size_t size_type;
202 typedef typename traits::locale_type locale_type;
203 typedef const charT* const_iterator;
205 basic_regex_implementation(){}
206 basic_regex_implementation(const ::boost::shared_ptr<
207 ::boost::regex_traits_wrapper<traits> >& t)
208 : regex_data<charT, traits>(t) {}
209 void assign(const charT* arg_first,
210 const charT* arg_last,
213 regex_data<charT, traits>* pdat = this;
214 basic_regex_parser<charT, traits> parser(pdat);
215 parser.parse(arg_first, arg_last, f);
218 locale_type BOOST_REGEX_CALL imbue(locale_type l)
220 return this->m_ptraits->imbue(l);
222 locale_type BOOST_REGEX_CALL getloc()const
224 return this->m_ptraits->getloc();
226 std::basic_string<charT> BOOST_REGEX_CALL str()const
228 std::basic_string<charT> result;
229 if(this->m_status == 0)
230 result = std::basic_string<charT>(this->m_expression, this->m_expression_len);
233 const_iterator BOOST_REGEX_CALL expression()const
235 return this->m_expression;
237 std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
239 const std::pair<std::size_t, std::size_t>& pi = this->m_subs.at(n);
240 std::pair<const_iterator, const_iterator> p(expression() + pi.first, expression() + pi.second);
245 const_iterator BOOST_REGEX_CALL begin()const
247 return (this->m_status ? 0 : this->m_expression);
249 const_iterator BOOST_REGEX_CALL end()const
251 return (this->m_status ? 0 : this->m_expression + this->m_expression_len);
253 flag_type BOOST_REGEX_CALL flags()const
255 return this->m_flags;
257 size_type BOOST_REGEX_CALL size()const
259 return this->m_expression_len;
261 int BOOST_REGEX_CALL status()const
263 return this->m_status;
265 size_type BOOST_REGEX_CALL mark_count()const
267 return this->m_mark_count - 1;
269 const re_detail::re_syntax_base* get_first_state()const
271 return this->m_first_state;
273 unsigned get_restart_type()const
275 return this->m_restart_type;
277 const unsigned char* get_map()const
279 return this->m_startmap;
281 const ::boost::regex_traits_wrapper<traits>& get_traits()const
283 return *(this->m_ptraits);
285 bool can_be_null()const
287 return this->m_can_be_null;
289 const regex_data<charT, traits>& get_data()const
291 basic_regex_implementation<charT, traits> const* p = this;
292 return *static_cast<const regex_data<charT, traits>*>(p);
296 } // namespace re_detail
298 // class basic_regex:
299 // represents the compiled
300 // regular expression:
303 #ifdef BOOST_REGEX_NO_FWD
304 template <class charT, class traits = regex_traits<charT> >
306 template <class charT, class traits >
308 class basic_regex : public regbase
312 typedef std::size_t traits_size_type;
313 typedef typename traits::string_type traits_string_type;
314 typedef charT char_type;
315 typedef traits traits_type;
317 typedef charT value_type;
318 typedef charT& reference;
319 typedef const charT& const_reference;
320 typedef const charT* const_iterator;
321 typedef const_iterator iterator;
322 typedef std::ptrdiff_t difference_type;
323 typedef std::size_t size_type;
324 typedef regex_constants::syntax_option_type flag_type;
326 // placeholder for actual locale type used by the
327 // traits class to localise *this.
328 typedef typename traits::locale_type locale_type;
331 explicit basic_regex(){}
332 explicit basic_regex(const charT* p, flag_type f = regex_constants::normal)
336 basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
340 basic_regex(const charT* p, size_type len, flag_type f)
344 basic_regex(const basic_regex& that)
345 : m_pimpl(that.m_pimpl) {}
347 basic_regex& BOOST_REGEX_CALL operator=(const basic_regex& that)
351 basic_regex& BOOST_REGEX_CALL operator=(const charT* ptr)
358 basic_regex& assign(const basic_regex& that)
360 m_pimpl = that.m_pimpl;
363 basic_regex& assign(const charT* p, flag_type f = regex_constants::normal)
365 return assign(p, p + traits::length(p), f);
367 basic_regex& assign(const charT* p, size_type len, flag_type f)
369 return assign(p, p + len, f);
372 basic_regex& do_assign(const charT* p1,
376 basic_regex& assign(const charT* p1,
378 flag_type f = regex_constants::normal)
380 return do_assign(p1, p2, f);
382 #if !defined(BOOST_NO_MEMBER_TEMPLATES)
384 template <class ST, class SA>
385 unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
387 return set_expression(p.data(), p.data() + p.size(), f);
390 template <class ST, class SA>
391 explicit basic_regex(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
396 template <class InputIterator>
397 basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
399 typedef typename traits::string_type seq_type;
400 seq_type a(arg_first, arg_last);
402 assign(static_cast<const charT*>(&*a.begin()), static_cast<const charT*>(&*a.begin() + a.size()), f);
404 assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
407 template <class ST, class SA>
408 basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
410 return assign(p.data(), p.data() + p.size(), regex_constants::normal);
413 template <class string_traits, class A>
414 basic_regex& BOOST_REGEX_CALL assign(
415 const std::basic_string<charT, string_traits, A>& s,
416 flag_type f = regex_constants::normal)
418 return assign(s.data(), s.data() + s.size(), f);
421 template <class InputIterator>
422 basic_regex& BOOST_REGEX_CALL assign(InputIterator arg_first,
423 InputIterator arg_last,
424 flag_type f = regex_constants::normal)
426 typedef typename traits::string_type seq_type;
427 seq_type a(arg_first, arg_last);
430 const charT* p1 = &*a.begin();
431 const charT* p2 = &*a.begin() + a.size();
432 return assign(p1, p2, f);
434 return assign(static_cast<const charT*>(0), static_cast<const charT*>(0), f);
437 unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
439 return set_expression(p.data(), p.data() + p.size(), f);
442 basic_regex(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
447 basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
449 return assign(p.data(), p.data() + p.size(), regex_constants::normal);
452 basic_regex& BOOST_REGEX_CALL assign(
453 const std::basic_string<charT>& s,
454 flag_type f = regex_constants::normal)
456 return assign(s.data(), s.data() + s.size(), f);
463 locale_type BOOST_REGEX_CALL imbue(locale_type l);
464 locale_type BOOST_REGEX_CALL getloc()const
466 return m_pimpl.get() ? m_pimpl->getloc() : locale_type();
470 // retained for backwards compatibility only, "flags"
471 // is now the preferred name:
472 flag_type BOOST_REGEX_CALL getflags()const
476 flag_type BOOST_REGEX_CALL flags()const
478 return m_pimpl.get() ? m_pimpl->flags() : 0;
482 std::basic_string<charT> BOOST_REGEX_CALL str()const
484 return m_pimpl.get() ? m_pimpl->str() : std::basic_string<charT>();
487 // begin, end, subexpression:
488 std::pair<const_iterator, const_iterator> BOOST_REGEX_CALL subexpression(std::size_t n)const
491 boost::throw_exception(std::logic_error("Can't access subexpressions in an invalid regex."));
492 return m_pimpl->subexpression(n);
494 const_iterator BOOST_REGEX_CALL begin()const
496 return (m_pimpl.get() ? m_pimpl->begin() : 0);
498 const_iterator BOOST_REGEX_CALL end()const
500 return (m_pimpl.get() ? m_pimpl->end() : 0);
504 void BOOST_REGEX_CALL swap(basic_regex& that)throw()
506 m_pimpl.swap(that.m_pimpl);
510 size_type BOOST_REGEX_CALL size()const
512 return (m_pimpl.get() ? m_pimpl->size() : 0);
516 size_type BOOST_REGEX_CALL max_size()const
522 bool BOOST_REGEX_CALL empty()const
524 return (m_pimpl.get() ? 0 != m_pimpl->status() : true);
527 size_type BOOST_REGEX_CALL mark_count()const
529 return (m_pimpl.get() ? m_pimpl->mark_count() : 0);
534 return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty);
537 int BOOST_REGEX_CALL compare(const basic_regex& that) const
539 if(m_pimpl.get() == that.m_pimpl.get())
543 if(!that.m_pimpl.get())
545 if(status() != that.status())
546 return status() - that.status();
547 if(flags() != that.flags())
548 return flags() - that.flags();
549 return str().compare(that.str());
551 bool BOOST_REGEX_CALL operator==(const basic_regex& e)const
553 return compare(e) == 0;
555 bool BOOST_REGEX_CALL operator != (const basic_regex& e)const
557 return compare(e) != 0;
559 bool BOOST_REGEX_CALL operator<(const basic_regex& e)const
561 return compare(e) < 0;
563 bool BOOST_REGEX_CALL operator>(const basic_regex& e)const
565 return compare(e) > 0;
567 bool BOOST_REGEX_CALL operator<=(const basic_regex& e)const
569 return compare(e) <= 0;
571 bool BOOST_REGEX_CALL operator>=(const basic_regex& e)const
573 return compare(e) >= 0;
577 // The following are deprecated as public interfaces
578 // but are available for compatibility with earlier versions.
579 const charT* BOOST_REGEX_CALL expression()const
581 return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0);
583 unsigned int BOOST_REGEX_CALL set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
585 assign(p1, p2, f | regex_constants::no_except);
588 unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal)
590 assign(p, f | regex_constants::no_except);
593 unsigned int BOOST_REGEX_CALL error_code()const
598 // private access methods:
600 const re_detail::re_syntax_base* get_first_state()const
602 BOOST_ASSERT(0 != m_pimpl.get());
603 return m_pimpl->get_first_state();
605 unsigned get_restart_type()const
607 BOOST_ASSERT(0 != m_pimpl.get());
608 return m_pimpl->get_restart_type();
610 const unsigned char* get_map()const
612 BOOST_ASSERT(0 != m_pimpl.get());
613 return m_pimpl->get_map();
615 const ::boost::regex_traits_wrapper<traits>& get_traits()const
617 BOOST_ASSERT(0 != m_pimpl.get());
618 return m_pimpl->get_traits();
620 bool can_be_null()const
622 BOOST_ASSERT(0 != m_pimpl.get());
623 return m_pimpl->can_be_null();
625 const re_detail::regex_data<charT, traits>& get_data()const
627 BOOST_ASSERT(0 != m_pimpl.get());
628 return m_pimpl->get_data();
630 boost::shared_ptr<re_detail::named_subexpressions > get_named_subs()const
636 shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;
640 // out of line members;
641 // these are the only members that mutate the basic_regex object,
642 // and are designed to provide the strong exception guarentee
643 // (in the event of a throw, the state of the object remains unchanged).
645 template <class charT, class traits>
646 basic_regex<charT, traits>& basic_regex<charT, traits>::do_assign(const charT* p1,
650 shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp;
653 temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>());
657 temp = shared_ptr<re_detail::basic_regex_implementation<charT, traits> >(new re_detail::basic_regex_implementation<charT, traits>(m_pimpl->m_ptraits));
659 temp->assign(p1, p2, f);
664 template <class charT, class traits>
665 typename basic_regex<charT, traits>::locale_type BOOST_REGEX_CALL basic_regex<charT, traits>::imbue(locale_type l)
667 shared_ptr<re_detail::basic_regex_implementation<charT, traits> > temp(new re_detail::basic_regex_implementation<charT, traits>());
668 locale_type result = temp->imbue(l);
676 template <class charT, class traits>
677 void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2)
682 #ifndef BOOST_NO_STD_LOCALE
683 template <class charT, class traits, class traits2>
684 std::basic_ostream<charT, traits>&
685 operator << (std::basic_ostream<charT, traits>& os,
686 const basic_regex<charT, traits2>& e)
688 return (os << e.str());
691 template <class traits>
692 std::ostream& operator << (std::ostream& os, const basic_regex<char, traits>& e)
694 return (os << e.str());
699 // class reg_expression:
700 // this is provided for backwards compatibility only,
701 // it is deprecated, no not use!
703 #ifdef BOOST_REGEX_NO_FWD
704 template <class charT, class traits = regex_traits<charT> >
706 template <class charT, class traits >
708 class reg_expression : public basic_regex<charT, traits>
711 typedef typename basic_regex<charT, traits>::flag_type flag_type;
712 typedef typename basic_regex<charT, traits>::size_type size_type;
713 explicit reg_expression(){}
714 explicit reg_expression(const charT* p, flag_type f = regex_constants::normal)
715 : basic_regex<charT, traits>(p, f){}
716 reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal)
717 : basic_regex<charT, traits>(p1, p2, f){}
718 reg_expression(const charT* p, size_type len, flag_type f)
719 : basic_regex<charT, traits>(p, len, f){}
720 reg_expression(const reg_expression& that)
721 : basic_regex<charT, traits>(that) {}
723 reg_expression& BOOST_REGEX_CALL operator=(const reg_expression& that)
725 return this->assign(that);
728 #if !defined(BOOST_NO_MEMBER_TEMPLATES)
729 template <class ST, class SA>
730 explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regex_constants::normal)
731 : basic_regex<charT, traits>(p, f)
735 template <class InputIterator>
736 reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal)
737 : basic_regex<charT, traits>(arg_first, arg_last, f)
741 template <class ST, class SA>
742 reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT, ST, SA>& p)
748 explicit reg_expression(const std::basic_string<charT>& p, flag_type f = regex_constants::normal)
749 : basic_regex<charT, traits>(p, f)
753 reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string<charT>& p)
763 #pragma warning (pop)
769 #pragma warning(push)
770 #pragma warning(disable: 4103)
772 #ifdef BOOST_HAS_ABI_HEADERS
773 # include BOOST_ABI_SUFFIX