6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
24 #pragma warning(disable: 4103)
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
34 namespace BOOST_REGEX_DETAIL_NS{
38 #pragma warning(disable:4244 4800)
41 inline boost::intmax_t umax(mpl::false_ const&)
43 // Get out clause here, just in case numeric_limits is unspecialized:
44 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
46 inline boost::intmax_t umax(mpl::true_ const&)
48 return (std::numeric_limits<std::size_t>::max)();
51 inline boost::intmax_t umax()
53 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
56 template <class charT, class traits>
57 class basic_regex_parser : public basic_regex_creator<charT, traits>
60 basic_regex_parser(regex_data<charT, traits>* data);
61 void parse(const charT* p1, const charT* p2, unsigned flags);
62 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
63 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
64 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
66 fail(error_code, position, message, position);
71 bool parse_extended();
73 bool parse_open_paren();
74 bool parse_basic_escape();
75 bool parse_extended_escape();
76 bool parse_match_any();
77 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
78 bool parse_repeat_range(bool isbasic);
82 void parse_set_literal(basic_char_set<charT, traits>& char_set);
83 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
85 bool parse_perl_extension();
86 bool parse_perl_verb();
87 bool match_verb(const char*);
88 bool add_emacs_code(bool negate);
89 bool unwind_alts(std::ptrdiff_t last_paren_start);
90 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
91 charT unescape_character();
92 regex_constants::syntax_option_type parse_options();
95 typedef bool (basic_regex_parser::*parser_proc_type)();
96 typedef typename traits::string_type string_type;
97 typedef typename traits::char_class_type char_class_type;
98 parser_proc_type m_parser_proc; // the main parser to use
99 const charT* m_base; // the start of the string being parsed
100 const charT* m_end; // the end of the string being parsed
101 const charT* m_position; // our current parser position
102 unsigned m_mark_count; // how many sub-expressions we have
103 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
104 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
105 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
106 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
107 bool m_has_case_change; // true if somewhere in the current block the case has changed
108 unsigned m_recursion_count; // How many times we've called parse_all.
109 #if defined(BOOST_MSVC) && defined(_M_IX86)
110 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
111 // that can not otherwise be suppressed)...
112 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
113 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
115 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
118 basic_regex_parser& operator=(const basic_regex_parser&);
119 basic_regex_parser(const basic_regex_parser&);
122 template <class charT, class traits>
123 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
124 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
128 template <class charT, class traits>
129 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
131 // pass l_flags on to base class:
134 m_position = m_base = p1;
136 // empty strings are errors:
139 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
140 || (l_flags & regbase::no_empty_expressions)
144 fail(regex_constants::error_empty, 0);
147 // select which parser to use:
148 switch(l_flags & regbase::main_option_type)
150 case regbase::perl_syntax_group:
152 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
154 // Add a leading paren with index zero to give recursions a target:
156 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
158 br->icase = this->flags() & regbase::icase;
161 case regbase::basic_syntax_group:
162 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
164 case regbase::literal:
165 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
168 // Ooops, someone has managed to set more than one of the main option flags,
169 // so this must be an error:
170 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
174 // parse all our characters:
175 bool result = parse_all();
177 // Unwind our alternatives:
180 // reset l_flags as a global scope (?imsx) may have altered them:
181 this->flags(l_flags);
182 // if we haven't gobbled up all the characters then we must
183 // have had an unexpected ')' :
186 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
189 // if an error has been set then give up now:
190 if(this->m_pdata->m_status)
192 // fill in our sub-expression count:
193 this->m_pdata->m_mark_count = 1 + m_mark_count;
194 this->finalize(p1, p2);
197 template <class charT, class traits>
198 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
200 // get the error message:
201 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
202 fail(error_code, position, message);
205 template <class charT, class traits>
206 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
208 if(0 == this->m_pdata->m_status) // update the error code if not already set
209 this->m_pdata->m_status = error_code;
210 m_position = m_end; // don't bother parsing anything else
212 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
214 // Augment error message with the regular expression text:
216 if(start_pos == position)
217 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
218 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
219 if(error_code != regex_constants::error_empty)
221 if((start_pos != 0) || (end_pos != (m_end - m_base)))
222 message += " The error occurred while parsing the regular expression fragment: '";
224 message += " The error occurred while parsing the regular expression: '";
225 if(start_pos != end_pos)
227 message += std::string(m_base + start_pos, m_base + position);
228 message += ">>>HERE>>>";
229 message += std::string(m_base + position, m_base + end_pos);
235 #ifndef BOOST_NO_EXCEPTIONS
236 if(0 == (this->flags() & regex_constants::no_except))
238 boost::regex_error e(message, error_code, position);
242 (void)position; // suppress warnings.
246 template <class charT, class traits>
247 bool basic_regex_parser<charT, traits>::parse_all()
249 if (++m_recursion_count > 400)
251 // exceeded internal limits
252 fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
255 while(result && (m_position != m_end))
257 result = (this->*m_parser_proc)();
264 #pragma warning(push)
265 #pragma warning(disable:4702)
267 template <class charT, class traits>
268 bool basic_regex_parser<charT, traits>::parse_basic()
270 switch(this->m_traits.syntax_type(*m_position))
272 case regex_constants::syntax_escape:
273 return parse_basic_escape();
274 case regex_constants::syntax_dot:
275 return parse_match_any();
276 case regex_constants::syntax_caret:
278 this->append_state(syntax_element_start_line);
280 case regex_constants::syntax_dollar:
282 this->append_state(syntax_element_end_line);
284 case regex_constants::syntax_star:
285 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
286 return parse_literal();
290 return parse_repeat();
292 case regex_constants::syntax_plus:
293 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
294 return parse_literal();
298 return parse_repeat(1);
300 case regex_constants::syntax_question:
301 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
302 return parse_literal();
306 return parse_repeat(0, 1);
308 case regex_constants::syntax_open_set:
310 case regex_constants::syntax_newline:
311 if(this->flags() & regbase::newline_alt)
314 return parse_literal();
316 return parse_literal();
321 template <class charT, class traits>
322 bool basic_regex_parser<charT, traits>::parse_extended()
325 switch(this->m_traits.syntax_type(*m_position))
327 case regex_constants::syntax_open_mark:
328 return parse_open_paren();
329 case regex_constants::syntax_close_mark:
331 case regex_constants::syntax_escape:
332 return parse_extended_escape();
333 case regex_constants::syntax_dot:
334 return parse_match_any();
335 case regex_constants::syntax_caret:
338 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
340 case regex_constants::syntax_dollar:
343 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
345 case regex_constants::syntax_star:
346 if(m_position == this->m_base)
348 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
352 return parse_repeat();
353 case regex_constants::syntax_question:
354 if(m_position == this->m_base)
356 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
360 return parse_repeat(0,1);
361 case regex_constants::syntax_plus:
362 if(m_position == this->m_base)
364 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
368 return parse_repeat(1);
369 case regex_constants::syntax_open_brace:
371 return parse_repeat_range(false);
372 case regex_constants::syntax_close_brace:
373 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
375 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
378 result = parse_literal();
380 case regex_constants::syntax_or:
382 case regex_constants::syntax_open_set:
384 case regex_constants::syntax_newline:
385 if(this->flags() & regbase::newline_alt)
388 return parse_literal();
389 case regex_constants::syntax_hash:
391 // If we have a mod_x flag set, then skip until
392 // we get to a newline character:
395 & (regbase::no_perl_ex|regbase::mod_x))
398 while((m_position != m_end) && !is_separator(*m_position++)){}
403 result = parse_literal();
412 template <class charT, class traits>
413 bool basic_regex_parser<charT, traits>::parse_literal()
415 // append this as a literal provided it's not a space character
416 // or the perl option regbase::mod_x is not set:
419 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
421 || !this->m_traits.isctype(*m_position, this->m_mask_space))
422 this->append_literal(*m_position);
427 template <class charT, class traits>
428 bool basic_regex_parser<charT, traits>::parse_open_paren()
431 // skip the '(' and error check:
433 if(++m_position == m_end)
435 fail(regex_constants::error_paren, m_position - m_base);
439 // begin by checking for a perl-style (?...) extension:
442 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
443 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
446 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
447 return parse_perl_extension();
448 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
449 return parse_perl_verb();
452 // update our mark count, and append the required state:
455 if(0 == (this->flags() & regbase::nosubs))
457 markid = ++m_mark_count;
458 #ifndef BOOST_NO_STD_DISTANCE
459 if(this->flags() & regbase::save_subexpression_location)
460 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
462 if(this->flags() & regbase::save_subexpression_location)
463 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
466 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
468 pb->icase = this->flags() & regbase::icase;
469 std::ptrdiff_t last_paren_start = this->getoffset(pb);
470 // back up insertion point for alternations, and set new point:
471 std::ptrdiff_t last_alt_point = m_alt_insert_point;
472 this->m_pdata->m_data.align();
473 m_alt_insert_point = this->m_pdata->m_data.size();
475 // back up the current flags in case we have a nested (?imsx) group:
477 regex_constants::syntax_option_type opts = this->flags();
478 bool old_case_change = m_has_case_change;
479 m_has_case_change = false; // no changes to this scope as yet...
481 // Back up branch reset data in case we have a nested (?|...)
483 int mark_reset = m_mark_reset;
486 // now recursively add more states, this will terminate when we get to a
491 // Unwind pushed alternatives:
493 if(0 == unwind_alts(last_paren_start))
498 if(m_has_case_change)
500 // the case has changed in one or more of the alternatives
501 // within the scoped (...) block: we have to add a state
502 // to reset the case sensitivity:
503 static_cast<re_case*>(
504 this->append_state(syntax_element_toggle_case, sizeof(re_case))
505 )->icase = opts & regbase::icase;
508 m_has_case_change = old_case_change;
510 // restore branch reset:
512 m_mark_reset = mark_reset;
514 // we either have a ')' or we have run out of characters prematurely:
516 if(m_position == m_end)
518 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
521 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
523 #ifndef BOOST_NO_STD_DISTANCE
524 if(markid && (this->flags() & regbase::save_subexpression_location))
525 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
527 if(markid && (this->flags() & regbase::save_subexpression_location))
528 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
532 // append closing parenthesis state:
534 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
536 pb->icase = this->flags() & regbase::icase;
537 this->m_paren_start = last_paren_start;
539 // restore the alternate insertion point:
541 this->m_alt_insert_point = last_alt_point;
543 // allow backrefs to this mark:
545 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
546 this->m_backrefs |= 1u << (markid - 1);
551 template <class charT, class traits>
552 bool basic_regex_parser<charT, traits>::parse_basic_escape()
554 if(++m_position == m_end)
556 fail(regex_constants::error_paren, m_position - m_base);
560 switch(this->m_traits.escape_syntax_type(*m_position))
562 case regex_constants::syntax_open_mark:
563 return parse_open_paren();
564 case regex_constants::syntax_close_mark:
566 case regex_constants::syntax_plus:
567 if(this->flags() & regex_constants::bk_plus_qm)
570 return parse_repeat(1);
573 return parse_literal();
574 case regex_constants::syntax_question:
575 if(this->flags() & regex_constants::bk_plus_qm)
578 return parse_repeat(0, 1);
581 return parse_literal();
582 case regex_constants::syntax_open_brace:
583 if(this->flags() & regbase::no_intervals)
584 return parse_literal();
586 return parse_repeat_range(true);
587 case regex_constants::syntax_close_brace:
588 if(this->flags() & regbase::no_intervals)
589 return parse_literal();
590 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
592 case regex_constants::syntax_or:
593 if(this->flags() & regbase::bk_vbar)
596 result = parse_literal();
598 case regex_constants::syntax_digit:
599 return parse_backref();
600 case regex_constants::escape_type_start_buffer:
601 if(this->flags() & regbase::emacs_ex)
604 this->append_state(syntax_element_buffer_start);
607 result = parse_literal();
609 case regex_constants::escape_type_end_buffer:
610 if(this->flags() & regbase::emacs_ex)
613 this->append_state(syntax_element_buffer_end);
616 result = parse_literal();
618 case regex_constants::escape_type_word_assert:
619 if(this->flags() & regbase::emacs_ex)
622 this->append_state(syntax_element_word_boundary);
625 result = parse_literal();
627 case regex_constants::escape_type_not_word_assert:
628 if(this->flags() & regbase::emacs_ex)
631 this->append_state(syntax_element_within_word);
634 result = parse_literal();
636 case regex_constants::escape_type_left_word:
637 if(this->flags() & regbase::emacs_ex)
640 this->append_state(syntax_element_word_start);
643 result = parse_literal();
645 case regex_constants::escape_type_right_word:
646 if(this->flags() & regbase::emacs_ex)
649 this->append_state(syntax_element_word_end);
652 result = parse_literal();
655 if(this->flags() & regbase::emacs_ex)
665 basic_char_set<charT, traits> char_set;
668 char_set.add_class(this->m_word_mask);
669 if(0 == this->append_set(char_set))
671 fail(regex_constants::error_ctype, m_position - m_base);
681 return add_emacs_code(negate);
684 // not supported yet:
685 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
691 result = parse_literal();
697 template <class charT, class traits>
698 bool basic_regex_parser<charT, traits>::parse_extended_escape()
701 if(m_position == m_end)
703 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
706 bool negate = false; // in case this is a character class escape: \w \d etc
707 switch(this->m_traits.escape_syntax_type(*m_position))
709 case regex_constants::escape_type_not_class:
712 case regex_constants::escape_type_class:
714 escape_type_class_jump:
715 typedef typename traits::char_class_type m_type;
716 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
719 basic_char_set<charT, traits> char_set;
722 char_set.add_class(m);
723 if(0 == this->append_set(char_set))
725 fail(regex_constants::error_ctype, m_position - m_base);
732 // not a class, just a regular unknown escape:
734 this->append_literal(unescape_character());
737 case regex_constants::syntax_digit:
738 return parse_backref();
739 case regex_constants::escape_type_left_word:
741 this->append_state(syntax_element_word_start);
743 case regex_constants::escape_type_right_word:
745 this->append_state(syntax_element_word_end);
747 case regex_constants::escape_type_start_buffer:
749 this->append_state(syntax_element_buffer_start);
751 case regex_constants::escape_type_end_buffer:
753 this->append_state(syntax_element_buffer_end);
755 case regex_constants::escape_type_word_assert:
757 this->append_state(syntax_element_word_boundary);
759 case regex_constants::escape_type_not_word_assert:
761 this->append_state(syntax_element_within_word);
763 case regex_constants::escape_type_Z:
765 this->append_state(syntax_element_soft_buffer_end);
767 case regex_constants::escape_type_Q:
769 case regex_constants::escape_type_C:
770 return parse_match_any();
771 case regex_constants::escape_type_X:
773 this->append_state(syntax_element_combining);
775 case regex_constants::escape_type_G:
777 this->append_state(syntax_element_restart_continue);
779 case regex_constants::escape_type_not_property:
782 case regex_constants::escape_type_property:
786 if(m_position == m_end)
788 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
791 // maybe have \p{ddd}
792 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
794 const charT* base = m_position;
795 // skip forward until we find enclosing brace:
796 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
798 if(m_position == m_end)
800 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
803 m = this->m_traits.lookup_classname(++base, m_position++);
807 m = this->m_traits.lookup_classname(m_position, m_position+1);
812 basic_char_set<charT, traits> char_set;
815 char_set.add_class(m);
816 if(0 == this->append_set(char_set))
818 fail(regex_constants::error_ctype, m_position - m_base);
823 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
826 case regex_constants::escape_type_reset_start_mark:
827 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
829 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
831 pb->icase = this->flags() & regbase::icase;
832 this->m_pdata->m_data.align();
836 goto escape_type_class_jump;
837 case regex_constants::escape_type_line_ending:
838 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
840 const charT* e = get_escape_R_string<charT>();
841 const charT* old_position = m_position;
842 const charT* old_end = m_end;
843 const charT* old_base = m_base;
846 m_end = e + traits::length(e);
847 bool r = parse_all();
848 m_position = ++old_position;
853 goto escape_type_class_jump;
854 case regex_constants::escape_type_extended_backref:
855 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
857 bool have_brace = false;
858 bool negative = false;
859 static const char* incomplete_message = "Incomplete \\g escape found.";
860 if(++m_position == m_end)
862 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
865 // maybe have \g{ddd}
866 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
867 regex_constants::syntax_type syn_end = 0;
868 if((syn == regex_constants::syntax_open_brace)
869 || (syn == regex_constants::escape_type_left_word)
870 || (syn == regex_constants::escape_type_end_buffer))
872 if(++m_position == m_end)
874 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
880 case regex_constants::syntax_open_brace:
881 syn_end = regex_constants::syntax_close_brace;
883 case regex_constants::escape_type_left_word:
884 syn_end = regex_constants::escape_type_right_word;
887 syn_end = regex_constants::escape_type_end_buffer;
891 negative = (*m_position == static_cast<charT>('-'));
892 if((negative) && (++m_position == m_end))
894 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
897 const charT* pc = m_position;
898 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
899 if((i < 0) && syn_end)
901 // Check for a named capture, get the leftmost one if there is more than one:
902 const charT* base = m_position;
903 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
907 i = hash_value_from_capture_name(base, m_position);
911 i = 1 + m_mark_count - i;
912 if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
915 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
917 pb->icase = this->flags() & regbase::icase;
921 fail(regex_constants::error_backref, m_position - m_base);
927 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
929 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
936 goto escape_type_class_jump;
937 case regex_constants::escape_type_control_v:
938 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
939 goto escape_type_class_jump;
942 this->append_literal(unescape_character());
948 template <class charT, class traits>
949 bool basic_regex_parser<charT, traits>::parse_match_any()
952 // we have a '.' that can match any character:
955 static_cast<re_dot*>(
956 this->append_state(syntax_element_wild, sizeof(re_dot))
957 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
958 ? BOOST_REGEX_DETAIL_NS::force_not_newline
959 : this->flags() & regbase::mod_s ?
960 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
964 template <class charT, class traits>
965 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
968 bool pocessive = false;
969 std::size_t insert_point;
971 // when we get to here we may have a non-greedy ? mark still to come:
973 if((m_position != m_end)
975 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
976 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
980 // OK we have a perl or emacs regex, check for a '?':
981 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
984 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
987 if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
992 // for perl regexes only check for pocessive ++ repeats.
993 if((m_position != m_end)
994 && (0 == (this->flags() & regbase::main_option_type))
995 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
1001 if(0 == this->m_last_state)
1003 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
1006 if(this->m_last_state->type == syntax_element_endmark)
1008 // insert a repeat before the '(' matching the last ')':
1009 insert_point = this->m_paren_start;
1011 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1013 // the last state was a literal with more than one character, split it in two:
1014 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1015 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1017 // now append new state:
1018 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1020 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1021 insert_point = this->getoffset(this->m_last_state);
1025 // repeat the last state whatever it was, need to add some error checking here:
1026 switch(this->m_last_state->type)
1028 case syntax_element_start_line:
1029 case syntax_element_end_line:
1030 case syntax_element_word_boundary:
1031 case syntax_element_within_word:
1032 case syntax_element_word_start:
1033 case syntax_element_word_end:
1034 case syntax_element_buffer_start:
1035 case syntax_element_buffer_end:
1036 case syntax_element_alt:
1037 case syntax_element_soft_buffer_end:
1038 case syntax_element_restart_continue:
1039 case syntax_element_jump:
1040 case syntax_element_startmark:
1041 case syntax_element_backstep:
1042 // can't legally repeat any of the above:
1043 fail(regex_constants::error_badrepeat, m_position - m_base);
1049 insert_point = this->getoffset(this->m_last_state);
1052 // OK we now know what to repeat, so insert the repeat around it:
1054 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1057 rep->greedy = greedy;
1058 rep->leading = false;
1059 // store our repeater position for later:
1060 std::ptrdiff_t rep_off = this->getoffset(rep);
1061 // and append a back jump to the repeat:
1062 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1063 jmp->alt.i = rep_off - this->getoffset(jmp);
1064 this->m_pdata->m_data.align();
1065 // now fill in the alt jump for the repeat:
1066 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1067 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1069 // If the repeat is pocessive then bracket the repeat with a (?>...)
1070 // independent sub-expression construct:
1074 if(m_position != m_end)
1077 // Check for illegal following quantifier, we have to do this here, because
1078 // the extra states we insert below circumvents our usual error checking :-(
1080 bool contin = false;
1083 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
1086 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1089 if (m_position != m_end)
1091 switch (this->m_traits.syntax_type(*m_position))
1093 case regex_constants::syntax_star:
1094 case regex_constants::syntax_plus:
1095 case regex_constants::syntax_question:
1096 case regex_constants::syntax_open_brace:
1097 fail(regex_constants::error_badrepeat, m_position - m_base);
1099 case regex_constants::syntax_open_mark:
1100 // Do we have a comment? If so we need to skip it here...
1101 if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
1102 && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
1104 while ((m_position != m_end)
1105 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
1117 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1119 pb->icase = this->flags() & regbase::icase;
1120 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1121 this->m_pdata->m_data.align();
1122 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1123 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1125 pb->icase = this->flags() & regbase::icase;
1130 template <class charT, class traits>
1131 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1133 static const char* incomplete_message = "Missing } in quantified repetition.";
1135 // parse a repeat-range:
1137 std::size_t min, max;
1140 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1142 if(this->m_position == this->m_end)
1144 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1146 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1149 // Treat the opening '{' as a literal character, rewind to start of error:
1151 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1152 return parse_literal();
1155 v = this->m_traits.toi(m_position, m_end, 10);
1157 if((v < 0) || (v > umax()))
1159 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1161 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1164 // Treat the opening '{' as a literal character, rewind to start of error:
1166 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1167 return parse_literal();
1169 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1171 if(this->m_position == this->m_end)
1173 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1175 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1178 // Treat the opening '{' as a literal character, rewind to start of error:
1180 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1181 return parse_literal();
1183 min = static_cast<std::size_t>(v);
1184 // see if we have a comma:
1185 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1187 // move on and error check:
1190 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1192 if(this->m_position == this->m_end)
1194 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1196 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1199 // Treat the opening '{' as a literal character, rewind to start of error:
1201 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1202 return parse_literal();
1204 // get the value if any:
1205 v = this->m_traits.toi(m_position, m_end, 10);
1206 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1210 // no comma, max = min:
1214 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1216 // OK now check trailing }:
1217 if(this->m_position == this->m_end)
1219 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1221 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1224 // Treat the opening '{' as a literal character, rewind to start of error:
1226 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1227 return parse_literal();
1231 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1234 if(this->m_position == this->m_end)
1236 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1242 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1246 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1250 // Treat the opening '{' as a literal character, rewind to start of error:
1252 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1253 return parse_literal();
1256 // finally go and add the repeat, unless error:
1260 // Backtrack to error location:
1262 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1264 fail(regex_constants::error_badbrace, m_position - m_base);
1267 return parse_repeat(min, max);
1270 template <class charT, class traits>
1271 bool basic_regex_parser<charT, traits>::parse_alt()
1274 // error check: if there have been no previous states,
1275 // or if the last state was a '(' then error:
1278 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1281 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1283 ((this->flags() & regbase::no_empty_expressions) == 0)
1287 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1291 // Reset mark count if required:
1293 if(m_max_mark < m_mark_count)
1294 m_max_mark = m_mark_count;
1295 if(m_mark_reset >= 0)
1296 m_mark_count = m_mark_reset;
1300 // we need to append a trailing jump:
1302 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1303 std::ptrdiff_t jump_offset = this->getoffset(pj);
1305 // now insert the alternative:
1307 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1308 jump_offset += re_alt_size;
1309 this->m_pdata->m_data.align();
1310 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1312 // update m_alt_insert_point so that the next alternate gets
1313 // inserted at the start of the second of the two we've just created:
1315 this->m_alt_insert_point = this->m_pdata->m_data.size();
1317 // the start of this alternative must have a case changes state
1318 // if the current block has messed around with case changes:
1320 if(m_has_case_change)
1322 static_cast<re_case*>(
1323 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1324 )->icase = this->m_icase;
1327 // push the alternative onto our stack, a recursive
1328 // implementation here is easier to understand (and faster
1329 // as it happens), but causes all kinds of stack overflow problems
1330 // on programs with small stacks (COM+).
1332 m_alt_jumps.push_back(jump_offset);
1336 template <class charT, class traits>
1337 bool basic_regex_parser<charT, traits>::parse_set()
1339 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1341 if(m_position == m_end)
1343 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1346 basic_char_set<charT, traits> char_set;
1348 const charT* base = m_position; // where the '[' was
1349 const charT* item_base = m_position; // where the '[' or '^' was
1351 while(m_position != m_end)
1353 switch(this->m_traits.syntax_type(*m_position))
1355 case regex_constants::syntax_caret:
1356 if(m_position == base)
1360 item_base = m_position;
1363 parse_set_literal(char_set);
1365 case regex_constants::syntax_close_set:
1366 if(m_position == item_base)
1368 parse_set_literal(char_set);
1374 if(0 == this->append_set(char_set))
1376 fail(regex_constants::error_ctype, m_position - m_base);
1381 case regex_constants::syntax_open_set:
1382 if(parse_inner_set(char_set))
1385 case regex_constants::syntax_escape:
1388 // look ahead and see if this is a character class shortcut
1392 if(this->m_traits.escape_syntax_type(*m_position)
1393 == regex_constants::escape_type_class)
1395 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1398 char_set.add_class(m);
1403 else if(this->m_traits.escape_syntax_type(*m_position)
1404 == regex_constants::escape_type_not_class)
1406 // negated character class:
1407 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1410 char_set.add_negated_class(m);
1415 // not a character class, just a regular escape:
1417 parse_set_literal(char_set);
1421 parse_set_literal(char_set);
1425 return m_position != m_end;
1428 template <class charT, class traits>
1429 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1431 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1433 // we have either a character class [:name:]
1434 // a collating element [.name.]
1435 // or an equivalence class [=name=]
1437 if(m_end == ++m_position)
1439 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1442 switch(this->m_traits.syntax_type(*m_position))
1444 case regex_constants::syntax_dot:
1446 // a collating element is treated as a literal:
1449 parse_set_literal(char_set);
1451 case regex_constants::syntax_colon:
1453 // check that character classes are actually enabled:
1454 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1455 == (regbase::basic_syntax_group | regbase::no_char_classes))
1458 parse_set_literal(char_set);
1462 if(m_end == ++m_position)
1464 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1467 const charT* name_first = m_position;
1468 // skip at least one character, then find the matching ':]'
1469 if(m_end == ++m_position)
1471 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1474 while((m_position != m_end)
1475 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1477 const charT* name_last = m_position;
1478 if(m_end == m_position)
1480 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1483 if((m_end == ++m_position)
1484 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1486 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1490 // check for negated class:
1492 bool negated = false;
1493 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1498 typedef typename traits::char_class_type m_type;
1499 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1502 if(char_set.empty() && (name_last - name_first == 1))
1504 // maybe a special case:
1506 if( (m_position != m_end)
1507 && (this->m_traits.syntax_type(*m_position)
1508 == regex_constants::syntax_close_set))
1510 if(this->m_traits.escape_syntax_type(*name_first)
1511 == regex_constants::escape_type_left_word)
1514 this->append_state(syntax_element_word_start);
1517 if(this->m_traits.escape_syntax_type(*name_first)
1518 == regex_constants::escape_type_right_word)
1521 this->append_state(syntax_element_word_end);
1526 fail(regex_constants::error_ctype, name_first - m_base);
1529 if(negated == false)
1530 char_set.add_class(m);
1532 char_set.add_negated_class(m);
1536 case regex_constants::syntax_equal:
1539 if(m_end == ++m_position)
1541 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1544 const charT* name_first = m_position;
1545 // skip at least one character, then find the matching '=]'
1546 if(m_end == ++m_position)
1548 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1551 while((m_position != m_end)
1552 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1554 const charT* name_last = m_position;
1555 if(m_end == m_position)
1557 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1560 if((m_end == ++m_position)
1561 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1563 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1566 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1567 if((0 == m.size()) || (m.size() > 2))
1569 fail(regex_constants::error_collate, name_first - m_base);
1578 char_set.add_equivalent(d);
1584 parse_set_literal(char_set);
1590 template <class charT, class traits>
1591 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1593 digraph<charT> start_range(get_next_set_literal(char_set));
1594 if(m_end == m_position)
1596 fail(regex_constants::error_brack, m_position - m_base);
1599 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1602 if(m_end == ++m_position)
1604 fail(regex_constants::error_brack, m_position - m_base);
1607 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1609 digraph<charT> end_range = get_next_set_literal(char_set);
1610 char_set.add_range(start_range, end_range);
1611 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1613 if(m_end == ++m_position)
1615 fail(regex_constants::error_brack, m_position - m_base);
1618 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1624 fail(regex_constants::error_range, m_position - m_base);
1631 char_set.add_single(start_range);
1634 template <class charT, class traits>
1635 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1637 digraph<charT> result;
1638 switch(this->m_traits.syntax_type(*m_position))
1640 case regex_constants::syntax_dash:
1641 if(!char_set.empty())
1643 // see if we are at the end of the set:
1644 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1646 fail(regex_constants::error_range, m_position - m_base);
1651 result.first = *m_position++;
1653 case regex_constants::syntax_escape:
1654 // check to see if escapes are supported first:
1655 if(this->flags() & regex_constants::no_escape_in_lists)
1657 result = *m_position++;
1661 result = unescape_character();
1663 case regex_constants::syntax_open_set:
1665 if(m_end == ++m_position)
1667 fail(regex_constants::error_collate, m_position - m_base);
1670 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1673 result.first = *m_position;
1677 if(m_end == ++m_position)
1679 fail(regex_constants::error_collate, m_position - m_base);
1682 const charT* name_first = m_position;
1683 // skip at least one character, then find the matching ':]'
1684 if(m_end == ++m_position)
1686 fail(regex_constants::error_collate, name_first - m_base);
1689 while((m_position != m_end)
1690 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1692 const charT* name_last = m_position;
1693 if(m_end == m_position)
1695 fail(regex_constants::error_collate, name_first - m_base);
1698 if((m_end == ++m_position)
1699 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1701 fail(regex_constants::error_collate, name_first - m_base);
1705 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1706 if(s.empty() || (s.size() > 2))
1708 fail(regex_constants::error_collate, name_first - m_base);
1711 result.first = s[0];
1713 result.second = s[1];
1719 result = *m_position++;
1725 // does a value fit in the specified charT type?
1727 template <class charT>
1728 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1730 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1732 template <class charT>
1733 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1735 return true; // v will alsways fit in a charT
1737 template <class charT>
1738 bool valid_value(charT c, boost::intmax_t v)
1740 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1743 template <class charT, class traits>
1744 charT basic_regex_parser<charT, traits>::unescape_character()
1747 #pragma warning(push)
1748 #pragma warning(disable:4127)
1751 if(m_position == m_end)
1753 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1756 switch(this->m_traits.escape_syntax_type(*m_position))
1758 case regex_constants::escape_type_control_a:
1759 result = charT('\a');
1761 case regex_constants::escape_type_e:
1764 case regex_constants::escape_type_control_f:
1765 result = charT('\f');
1767 case regex_constants::escape_type_control_n:
1768 result = charT('\n');
1770 case regex_constants::escape_type_control_r:
1771 result = charT('\r');
1773 case regex_constants::escape_type_control_t:
1774 result = charT('\t');
1776 case regex_constants::escape_type_control_v:
1777 result = charT('\v');
1779 case regex_constants::escape_type_word_assert:
1780 result = charT('\b');
1782 case regex_constants::escape_type_ascii_control:
1784 if(m_position == m_end)
1786 // Rewind to start of escape:
1788 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1789 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1792 result = static_cast<charT>(*m_position % 32);
1794 case regex_constants::escape_type_hex:
1796 if(m_position == m_end)
1798 // Rewind to start of escape:
1800 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1801 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1804 // maybe have \x{ddd}
1805 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1808 if(m_position == m_end)
1810 // Rewind to start of escape:
1812 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1813 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1816 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1817 if((m_position == m_end)
1819 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1820 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1822 // Rewind to start of escape:
1824 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1825 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1833 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1834 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1836 || !valid_value(charT(0), i))
1838 // Rewind to start of escape:
1840 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1841 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1847 case regex_constants::syntax_digit:
1849 // an octal escape sequence, the first character must be a zero
1850 // followed by up to 3 octal digits:
1851 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1852 const charT* bp = m_position;
1853 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1856 // Rewind to start of escape:
1858 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1859 // Oops not an octal escape after all:
1860 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1863 val = this->m_traits.toi(m_position, m_position + len, 8);
1864 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1866 // Rewind to start of escape:
1868 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1869 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1872 return static_cast<charT>(val);
1874 case regex_constants::escape_type_named_char:
1877 if(m_position == m_end)
1879 // Rewind to start of escape:
1881 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1882 fail(regex_constants::error_escape, m_position - m_base);
1885 // maybe have \N{name}
1886 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1888 const charT* base = m_position;
1889 // skip forward until we find enclosing brace:
1890 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1892 if(m_position == m_end)
1894 // Rewind to start of escape:
1896 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1897 fail(regex_constants::error_escape, m_position - m_base);
1900 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1903 // Rewind to start of escape:
1905 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1906 fail(regex_constants::error_collate, m_position - m_base);
1914 // fall through is a failure:
1915 // Rewind to start of escape:
1917 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1918 fail(regex_constants::error_escape, m_position - m_base);
1922 result = *m_position;
1928 #pragma warning(pop)
1932 template <class charT, class traits>
1933 bool basic_regex_parser<charT, traits>::parse_backref()
1935 BOOST_ASSERT(m_position != m_end);
1936 const charT* pc = m_position;
1937 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1938 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1940 // not a backref at all but an octal escape sequence:
1941 charT c = unescape_character();
1942 this->append_literal(c);
1944 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1947 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1949 pb->icase = this->flags() & regbase::icase;
1953 // Rewind to start of escape:
1955 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1956 fail(regex_constants::error_backref, m_position - m_base);
1962 template <class charT, class traits>
1963 bool basic_regex_parser<charT, traits>::parse_QE()
1966 #pragma warning(push)
1967 #pragma warning(disable:4127)
1970 // parse a \Q...\E sequence:
1972 ++m_position; // skip the Q
1973 const charT* start = m_position;
1977 while((m_position != m_end)
1978 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1980 if(m_position == m_end)
1982 // a \Q...\E sequence may terminate with the end of the expression:
1986 if(++m_position == m_end) // skip the escape
1988 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1991 // check to see if it's a \E:
1992 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1995 end = m_position - 2;
1998 // otherwise go round again:
2001 // now add all the character between the two escapes as literals:
2005 this->append_literal(*start);
2010 #pragma warning(pop)
2014 template <class charT, class traits>
2015 bool basic_regex_parser<charT, traits>::parse_perl_extension()
2017 if(++m_position == m_end)
2019 // Rewind to start of (? sequence:
2021 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2022 fail(regex_constants::error_perl_extension, m_position - m_base);
2026 // treat comments as a special case, as these
2027 // are the only ones that don't start with a leading
2030 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
2032 while((m_position != m_end)
2033 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
2038 // backup some state, and prepare the way:
2041 std::ptrdiff_t jump_offset = 0;
2042 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2043 pb->icase = this->flags() & regbase::icase;
2044 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2045 // back up insertion point for alternations, and set new point:
2046 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2047 this->m_pdata->m_data.align();
2048 m_alt_insert_point = this->m_pdata->m_data.size();
2049 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2050 bool restore_flags = true;
2051 regex_constants::syntax_option_type old_flags = this->flags();
2052 bool old_case_change = m_has_case_change;
2053 m_has_case_change = false;
2055 int mark_reset = m_mark_reset;
2056 int max_mark = m_max_mark;
2058 m_max_mark = m_mark_count;
2061 // select the actual extension used:
2063 switch(this->m_traits.syntax_type(*m_position))
2065 case regex_constants::syntax_or:
2066 m_mark_reset = m_mark_count;
2068 case regex_constants::syntax_colon:
2070 // a non-capturing mark:
2072 pb->index = markid = 0;
2075 case regex_constants::syntax_digit:
2078 // a recursive subexpression:
2080 v = this->m_traits.toi(m_position, m_end, 10);
2081 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2083 // Rewind to start of (? sequence:
2085 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2086 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2090 pb->index = markid = 0;
2091 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2094 static_cast<re_case*>(
2095 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2096 )->icase = this->flags() & regbase::icase;
2099 case regex_constants::syntax_plus:
2101 // A forward-relative recursive subexpression:
2104 v = this->m_traits.toi(m_position, m_end, 10);
2105 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2107 // Rewind to start of (? sequence:
2109 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2110 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2113 if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
2115 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2119 goto insert_recursion;
2120 case regex_constants::syntax_dash:
2122 // Possibly a backward-relative recursive subexpression:
2125 v = this->m_traits.toi(m_position, m_end, 10);
2129 // Oops not a relative recursion at all, but a (?-imsx) group:
2130 goto option_group_jump;
2132 v = m_mark_count + 1 - v;
2135 // Rewind to start of (? sequence:
2137 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2138 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2141 goto insert_recursion;
2142 case regex_constants::syntax_equal:
2143 pb->index = markid = -1;
2145 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2146 this->m_pdata->m_data.align();
2147 m_alt_insert_point = this->m_pdata->m_data.size();
2149 case regex_constants::syntax_not:
2150 pb->index = markid = -2;
2152 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2153 this->m_pdata->m_data.align();
2154 m_alt_insert_point = this->m_pdata->m_data.size();
2156 case regex_constants::escape_type_left_word:
2158 // a lookbehind assertion:
2159 if(++m_position == m_end)
2161 // Rewind to start of (? sequence:
2163 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2164 fail(regex_constants::error_perl_extension, m_position - m_base);
2167 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2168 if(t == regex_constants::syntax_not)
2169 pb->index = markid = -2;
2170 else if(t == regex_constants::syntax_equal)
2171 pb->index = markid = -1;
2174 // Probably a named capture which also starts (?< :
2177 goto named_capture_jump;
2180 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2181 this->append_state(syntax_element_backstep, sizeof(re_brace));
2182 this->m_pdata->m_data.align();
2183 m_alt_insert_point = this->m_pdata->m_data.size();
2186 case regex_constants::escape_type_right_word:
2188 // an independent sub-expression:
2190 pb->index = markid = -3;
2192 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2193 this->m_pdata->m_data.align();
2194 m_alt_insert_point = this->m_pdata->m_data.size();
2196 case regex_constants::syntax_open_mark:
2198 // a conditional expression:
2199 pb->index = markid = -4;
2200 if(++m_position == m_end)
2202 // Rewind to start of (? sequence:
2204 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2205 fail(regex_constants::error_perl_extension, m_position - m_base);
2208 v = this->m_traits.toi(m_position, m_end, 10);
2209 if(m_position == m_end)
2211 // Rewind to start of (? sequence:
2213 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2214 fail(regex_constants::error_perl_extension, m_position - m_base);
2217 if(*m_position == charT('R'))
2219 if(++m_position == m_end)
2221 // Rewind to start of (? sequence:
2223 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2224 fail(regex_constants::error_perl_extension, m_position - m_base);
2227 if(*m_position == charT('&'))
2229 const charT* base = ++m_position;
2230 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2232 if(m_position == m_end)
2234 // Rewind to start of (? sequence:
2236 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2237 fail(regex_constants::error_perl_extension, m_position - m_base);
2240 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2244 v = -this->m_traits.toi(m_position, m_end, 10);
2246 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2247 br->index = v < 0 ? (v - 1) : 0;
2248 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2250 // Rewind to start of (? sequence:
2252 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2253 fail(regex_constants::error_perl_extension, m_position - m_base);
2256 if(++m_position == m_end)
2258 // Rewind to start of (? sequence:
2260 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2261 fail(regex_constants::error_perl_extension, m_position - m_base);
2265 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2267 const charT* base = ++m_position;
2268 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2270 if(m_position == m_end)
2272 // Rewind to start of (? sequence:
2274 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2275 fail(regex_constants::error_perl_extension, m_position - m_base);
2278 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2279 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2281 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2283 // Rewind to start of (? sequence:
2285 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2286 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2289 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2291 // Rewind to start of (? sequence:
2293 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2294 fail(regex_constants::error_perl_extension, m_position - m_base);
2297 if(++m_position == m_end)
2299 // Rewind to start of (? sequence:
2301 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302 fail(regex_constants::error_perl_extension, m_position - m_base);
2306 else if(*m_position == charT('D'))
2308 const char* def = "DEFINE";
2309 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2310 ++m_position, ++def;
2311 if((m_position == m_end) || *def)
2313 // Rewind to start of (? sequence:
2315 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2316 fail(regex_constants::error_perl_extension, m_position - m_base);
2319 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2320 br->index = 9999; // special magic value!
2321 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2323 // Rewind to start of (? sequence:
2325 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2326 fail(regex_constants::error_perl_extension, m_position - m_base);
2329 if(++m_position == m_end)
2331 // Rewind to start of (? sequence:
2333 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2334 fail(regex_constants::error_perl_extension, m_position - m_base);
2340 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2342 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2344 // Rewind to start of (? sequence:
2346 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2347 fail(regex_constants::error_perl_extension, m_position - m_base);
2350 if(++m_position == m_end)
2352 // Rewind to start of (? sequence:
2354 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2355 fail(regex_constants::error_perl_extension, m_position - m_base);
2361 // verify that we have a lookahead or lookbehind assert:
2362 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2364 // Rewind to start of (? sequence:
2366 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2367 fail(regex_constants::error_perl_extension, m_position - m_base);
2370 if(++m_position == m_end)
2372 // Rewind to start of (? sequence:
2374 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2375 fail(regex_constants::error_perl_extension, m_position - m_base);
2378 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2380 if(++m_position == m_end)
2382 // Rewind to start of (? sequence:
2384 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2385 fail(regex_constants::error_perl_extension, m_position - m_base);
2388 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2389 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2391 // Rewind to start of (? sequence:
2393 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2394 fail(regex_constants::error_perl_extension, m_position - m_base);
2401 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2402 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2404 // Rewind to start of (? sequence:
2406 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2407 fail(regex_constants::error_perl_extension, m_position - m_base);
2415 case regex_constants::syntax_close_mark:
2416 // Rewind to start of (? sequence:
2418 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2419 fail(regex_constants::error_perl_extension, m_position - m_base);
2421 case regex_constants::escape_type_end_buffer:
2423 name_delim = *m_position;
2426 if(0 == (this->flags() & regbase::nosubs))
2428 markid = ++m_mark_count;
2429 #ifndef BOOST_NO_STD_DISTANCE
2430 if(this->flags() & regbase::save_subexpression_location)
2431 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2433 if(this->flags() & regbase::save_subexpression_location)
2434 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2438 const charT* base = ++m_position;
2439 if(m_position == m_end)
2441 // Rewind to start of (? sequence:
2443 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2444 fail(regex_constants::error_perl_extension, m_position - m_base);
2447 while((m_position != m_end) && (*m_position != name_delim))
2449 if(m_position == m_end)
2451 // Rewind to start of (? sequence:
2453 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2454 fail(regex_constants::error_perl_extension, m_position - m_base);
2457 this->m_pdata->set_name(base, m_position, markid);
2462 if(*m_position == charT('R'))
2466 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2468 // Rewind to start of (? sequence:
2470 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2471 fail(regex_constants::error_perl_extension, m_position - m_base);
2474 goto insert_recursion;
2476 if(*m_position == charT('&'))
2479 const charT* base = m_position;
2480 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2482 if(m_position == m_end)
2484 // Rewind to start of (? sequence:
2486 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2487 fail(regex_constants::error_perl_extension, m_position - m_base);
2490 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2491 goto insert_recursion;
2493 if(*m_position == charT('P'))
2496 if(m_position == m_end)
2498 // Rewind to start of (? sequence:
2500 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2501 fail(regex_constants::error_perl_extension, m_position - m_base);
2504 if(*m_position == charT('>'))
2507 const charT* base = m_position;
2508 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2510 if(m_position == m_end)
2512 // Rewind to start of (? sequence:
2514 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2515 fail(regex_constants::error_perl_extension, m_position - m_base);
2518 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2519 goto insert_recursion;
2523 // lets assume that we have a (?imsx) group and try and parse it:
2526 regex_constants::syntax_option_type opts = parse_options();
2527 if(m_position == m_end)
2529 // Rewind to start of (? sequence:
2531 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2532 fail(regex_constants::error_perl_extension, m_position - m_base);
2535 // make a note of whether we have a case change:
2536 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2537 pb->index = markid = 0;
2538 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2540 // update flags and carry on as normal:
2542 restore_flags = false;
2543 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2545 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2547 // update flags and carry on until the matching ')' is found:
2553 // Rewind to start of (? sequence:
2555 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2556 fail(regex_constants::error_perl_extension, m_position - m_base);
2560 // finally append a case change state if we need it:
2561 if(m_has_case_change)
2563 static_cast<re_case*>(
2564 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2565 )->icase = opts & regbase::icase;
2570 // now recursively add more states, this will terminate when we get to a
2575 // Unwind alternatives:
2577 if(0 == unwind_alts(last_paren_start))
2579 // Rewind to start of (? sequence:
2581 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2582 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2586 // we either have a ')' or we have run out of characters prematurely:
2588 if(m_position == m_end)
2590 // Rewind to start of (? sequence:
2592 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2593 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2596 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2599 // restore the flags:
2603 // append a case change state if we need it:
2604 if(m_has_case_change)
2606 static_cast<re_case*>(
2607 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2608 )->icase = old_flags & regbase::icase;
2610 this->flags(old_flags);
2613 // set up the jump pointer if we have one:
2617 this->m_pdata->m_data.align();
2618 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2619 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2620 if((this->m_last_state == jmp) && (markid != -2))
2622 // Oops... we didn't have anything inside the assertion.
2623 // Note we don't get here for negated forward lookahead as (?!)
2624 // does have some uses.
2625 // Rewind to start of (? sequence:
2627 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2628 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2633 // verify that if this is conditional expression, that we do have
2634 // an alternative, if not add one:
2638 re_syntax_base* b = this->getaddress(expected_alt_point);
2639 // Make sure we have exactly one alternative following this state:
2640 if(b->type != syntax_element_alt)
2642 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2643 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2645 else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2647 // Can't have seen more than one alternative:
2648 // Rewind to start of (? sequence:
2650 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2651 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2656 // We must *not* have seen an alternative inside a (DEFINE) block:
2657 b = this->getaddress(b->next.i, b);
2658 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2660 // Rewind to start of (? sequence:
2662 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2663 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2667 // check for invalid repetition of next state:
2668 b = this->getaddress(expected_alt_point);
2669 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2670 if((b->type != syntax_element_assert_backref)
2671 && (b->type != syntax_element_startmark))
2673 // Rewind to start of (? sequence:
2675 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2676 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2681 // append closing parenthesis state:
2683 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2685 pb->icase = this->flags() & regbase::icase;
2686 this->m_paren_start = last_paren_start;
2688 // restore the alternate insertion point:
2690 this->m_alt_insert_point = last_alt_point;
2692 // and the case change data:
2694 m_has_case_change = old_case_change;
2696 // And the mark_reset data:
2698 if(m_max_mark > m_mark_count)
2700 m_mark_count = m_max_mark;
2702 m_mark_reset = mark_reset;
2703 m_max_mark = max_mark;
2708 #ifndef BOOST_NO_STD_DISTANCE
2709 if(this->flags() & regbase::save_subexpression_location)
2710 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2712 if(this->flags() & regbase::save_subexpression_location)
2713 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2716 // allow backrefs to this mark:
2718 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2719 this->m_backrefs |= 1u << (markid - 1);
2724 template <class charT, class traits>
2725 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2729 if(static_cast<charT>(*verb) != *m_position)
2731 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2732 fail(regex_constants::error_perl_extension, m_position - m_base);
2735 if(++m_position == m_end)
2738 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2739 fail(regex_constants::error_perl_extension, m_position - m_base);
2747 template <class charT, class traits>
2748 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2750 if(++m_position == m_end)
2752 // Rewind to start of (* sequence:
2754 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2755 fail(regex_constants::error_perl_extension, m_position - m_base);
2761 if(++m_position == m_end)
2763 // Rewind to start of (* sequence:
2765 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2766 fail(regex_constants::error_perl_extension, m_position - m_base);
2769 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2771 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2773 // Rewind to start of (* sequence:
2775 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2776 fail(regex_constants::error_perl_extension, m_position - m_base);
2780 this->append_state(syntax_element_fail);
2785 if(++m_position == m_end)
2787 // Rewind to start of (* sequence:
2789 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2790 fail(regex_constants::error_perl_extension, m_position - m_base);
2793 if(match_verb("CCEPT"))
2795 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2797 // Rewind to start of (* sequence:
2799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2800 fail(regex_constants::error_perl_extension, m_position - m_base);
2804 this->append_state(syntax_element_accept);
2809 if(++m_position == m_end)
2811 // Rewind to start of (* sequence:
2813 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2814 fail(regex_constants::error_perl_extension, m_position - m_base);
2817 if(match_verb("OMMIT"))
2819 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2821 // Rewind to start of (* sequence:
2823 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2824 fail(regex_constants::error_perl_extension, m_position - m_base);
2828 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2829 this->m_pdata->m_disable_match_any = true;
2834 if(++m_position == m_end)
2836 // Rewind to start of (* sequence:
2838 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2839 fail(regex_constants::error_perl_extension, m_position - m_base);
2842 if(match_verb("RUNE"))
2844 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2846 // Rewind to start of (* sequence:
2848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2849 fail(regex_constants::error_perl_extension, m_position - m_base);
2853 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2854 this->m_pdata->m_disable_match_any = true;
2859 if(++m_position == m_end)
2861 // Rewind to start of (* sequence:
2863 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2864 fail(regex_constants::error_perl_extension, m_position - m_base);
2867 if(match_verb("KIP"))
2869 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2871 // Rewind to start of (* sequence:
2873 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2874 fail(regex_constants::error_perl_extension, m_position - m_base);
2878 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2879 this->m_pdata->m_disable_match_any = true;
2884 if(++m_position == m_end)
2886 // Rewind to start of (* sequence:
2888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2889 fail(regex_constants::error_perl_extension, m_position - m_base);
2892 if(match_verb("HEN"))
2894 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2896 // Rewind to start of (* sequence:
2898 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2899 fail(regex_constants::error_perl_extension, m_position - m_base);
2903 this->append_state(syntax_element_then);
2904 this->m_pdata->m_disable_match_any = true;
2909 // Rewind to start of (* sequence:
2911 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2912 fail(regex_constants::error_perl_extension, m_position - m_base);
2916 template <class charT, class traits>
2917 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2920 // parses an emacs style \sx or \Sx construct.
2922 if(++m_position == m_end)
2924 // Rewind to start of sequence:
2926 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2927 fail(regex_constants::error_escape, m_position - m_base);
2930 basic_char_set<charT, traits> char_set;
2934 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2940 char_set.add_class(this->m_mask_space);
2943 char_set.add_class(this->m_word_mask);
2946 char_set.add_single(digraph<charT>(charT('$')));
2947 char_set.add_single(digraph<charT>(charT('&')));
2948 char_set.add_single(digraph<charT>(charT('*')));
2949 char_set.add_single(digraph<charT>(charT('+')));
2950 char_set.add_single(digraph<charT>(charT('-')));
2951 char_set.add_single(digraph<charT>(charT('_')));
2952 char_set.add_single(digraph<charT>(charT('<')));
2953 char_set.add_single(digraph<charT>(charT('>')));
2956 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2959 char_set.add_single(digraph<charT>(charT('(')));
2960 char_set.add_single(digraph<charT>(charT('[')));
2961 char_set.add_single(digraph<charT>(charT('{')));
2964 char_set.add_single(digraph<charT>(charT(')')));
2965 char_set.add_single(digraph<charT>(charT(']')));
2966 char_set.add_single(digraph<charT>(charT('}')));
2969 char_set.add_single(digraph<charT>(charT('"')));
2970 char_set.add_single(digraph<charT>(charT('\'')));
2971 char_set.add_single(digraph<charT>(charT('`')));
2974 char_set.add_single(digraph<charT>(charT('\'')));
2975 char_set.add_single(digraph<charT>(charT(',')));
2976 char_set.add_single(digraph<charT>(charT('#')));
2979 char_set.add_single(digraph<charT>(charT(';')));
2982 char_set.add_single(digraph<charT>(charT('\n')));
2983 char_set.add_single(digraph<charT>(charT('\f')));
2986 fail(regex_constants::error_ctype, m_position - m_base);
2989 if(0 == this->append_set(char_set))
2991 fail(regex_constants::error_ctype, m_position - m_base);
2998 template <class charT, class traits>
2999 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
3001 // we have a (?imsx-imsx) group, convert it into a set of flags:
3002 regex_constants::syntax_option_type f = this->flags();
3003 bool breakout = false;
3009 f |= regex_constants::mod_s;
3010 f &= ~regex_constants::no_mod_s;
3013 f &= ~regex_constants::no_mod_m;
3016 f |= regex_constants::icase;
3019 f |= regex_constants::mod_x;
3025 if(++m_position == m_end)
3027 // Rewind to start of (? sequence:
3029 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3030 fail(regex_constants::error_paren, m_position - m_base);
3038 if(*m_position == static_cast<charT>('-'))
3040 if(++m_position == m_end)
3042 // Rewind to start of (? sequence:
3044 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3045 fail(regex_constants::error_paren, m_position - m_base);
3053 f &= ~regex_constants::mod_s;
3054 f |= regex_constants::no_mod_s;
3057 f |= regex_constants::no_mod_m;
3060 f &= ~regex_constants::icase;
3063 f &= ~regex_constants::mod_x;
3069 if(++m_position == m_end)
3071 // Rewind to start of (? sequence:
3073 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3074 fail(regex_constants::error_paren, m_position - m_base);
3083 template <class charT, class traits>
3084 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3087 // If we didn't actually add any states after the last
3088 // alternative then that's an error:
3090 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3091 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3094 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3096 ((this->flags() & regbase::no_empty_expressions) == 0)
3100 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3104 // Fix up our alternatives:
3106 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3109 // fix up the jump to point to the end of the states
3110 // that we've just added:
3112 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3113 m_alt_jumps.pop_back();
3114 this->m_pdata->m_data.align();
3115 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3116 BOOST_ASSERT(jmp->type == syntax_element_jump);
3117 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3123 #pragma warning(pop)
3126 } // namespace BOOST_REGEX_DETAIL_NS
3127 } // namespace boost
3130 #pragma warning(push)
3131 #pragma warning(disable: 4103)
3133 #ifdef BOOST_HAS_ABI_HEADERS
3134 # include BOOST_ABI_SUFFIX
3137 #pragma warning(pop)