6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
24 #pragma warning(disable: 4103)
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
34 namespace BOOST_REGEX_DETAIL_NS{
38 #pragma warning(disable:4244 4800)
41 inline boost::intmax_t umax(mpl::false_ const&)
43 // Get out clause here, just in case numeric_limits is unspecialized:
44 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
46 inline boost::intmax_t umax(mpl::true_ const&)
48 return (std::numeric_limits<std::size_t>::max)();
51 inline boost::intmax_t umax()
53 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
56 template <class charT, class traits>
57 class basic_regex_parser : public basic_regex_creator<charT, traits>
60 basic_regex_parser(regex_data<charT, traits>* data);
61 void parse(const charT* p1, const charT* p2, unsigned flags);
62 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
63 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
64 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
66 fail(error_code, position, message, position);
71 bool parse_extended();
73 bool parse_open_paren();
74 bool parse_basic_escape();
75 bool parse_extended_escape();
76 bool parse_match_any();
77 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
78 bool parse_repeat_range(bool isbasic);
82 void parse_set_literal(basic_char_set<charT, traits>& char_set);
83 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
85 bool parse_perl_extension();
86 bool parse_perl_verb();
87 bool match_verb(const char*);
88 bool add_emacs_code(bool negate);
89 bool unwind_alts(std::ptrdiff_t last_paren_start);
90 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
91 charT unescape_character();
92 regex_constants::syntax_option_type parse_options();
95 typedef bool (basic_regex_parser::*parser_proc_type)();
96 typedef typename traits::string_type string_type;
97 typedef typename traits::char_class_type char_class_type;
98 parser_proc_type m_parser_proc; // the main parser to use
99 const charT* m_base; // the start of the string being parsed
100 const charT* m_end; // the end of the string being parsed
101 const charT* m_position; // our current parser position
102 unsigned m_mark_count; // how many sub-expressions we have
103 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
104 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
105 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
106 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
107 bool m_has_case_change; // true if somewhere in the current block the case has changed
108 #if defined(BOOST_MSVC) && defined(_M_IX86)
109 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
110 // that can not otherwise be suppressed)...
111 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
112 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
114 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
117 basic_regex_parser& operator=(const basic_regex_parser&);
118 basic_regex_parser(const basic_regex_parser&);
121 template <class charT, class traits>
122 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
123 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
127 template <class charT, class traits>
128 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
130 // pass l_flags on to base class:
133 m_position = m_base = p1;
135 // empty strings are errors:
138 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
139 || (l_flags & regbase::no_empty_expressions)
143 fail(regex_constants::error_empty, 0);
146 // select which parser to use:
147 switch(l_flags & regbase::main_option_type)
149 case regbase::perl_syntax_group:
151 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
153 // Add a leading paren with index zero to give recursions a target:
155 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
157 br->icase = this->flags() & regbase::icase;
160 case regbase::basic_syntax_group:
161 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
163 case regbase::literal:
164 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
167 // Ooops, someone has managed to set more than one of the main option flags,
168 // so this must be an error:
169 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
173 // parse all our characters:
174 bool result = parse_all();
176 // Unwind our alternatives:
179 // reset l_flags as a global scope (?imsx) may have altered them:
180 this->flags(l_flags);
181 // if we haven't gobbled up all the characters then we must
182 // have had an unexpected ')' :
185 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
188 // if an error has been set then give up now:
189 if(this->m_pdata->m_status)
191 // fill in our sub-expression count:
192 this->m_pdata->m_mark_count = 1 + m_mark_count;
193 this->finalize(p1, p2);
196 template <class charT, class traits>
197 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
199 // get the error message:
200 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
201 fail(error_code, position, message);
204 template <class charT, class traits>
205 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
207 if(0 == this->m_pdata->m_status) // update the error code if not already set
208 this->m_pdata->m_status = error_code;
209 m_position = m_end; // don't bother parsing anything else
211 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
213 // Augment error message with the regular expression text:
215 if(start_pos == position)
216 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
217 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
218 if(error_code != regex_constants::error_empty)
220 if((start_pos != 0) || (end_pos != (m_end - m_base)))
221 message += " The error occurred while parsing the regular expression fragment: '";
223 message += " The error occurred while parsing the regular expression: '";
224 if(start_pos != end_pos)
226 message += std::string(m_base + start_pos, m_base + position);
227 message += ">>>HERE>>>";
228 message += std::string(m_base + position, m_base + end_pos);
234 #ifndef BOOST_NO_EXCEPTIONS
235 if(0 == (this->flags() & regex_constants::no_except))
237 boost::regex_error e(message, error_code, position);
241 (void)position; // suppress warnings.
245 template <class charT, class traits>
246 bool basic_regex_parser<charT, traits>::parse_all()
249 while(result && (m_position != m_end))
251 result = (this->*m_parser_proc)();
257 #pragma warning(push)
258 #pragma warning(disable:4702)
260 template <class charT, class traits>
261 bool basic_regex_parser<charT, traits>::parse_basic()
263 switch(this->m_traits.syntax_type(*m_position))
265 case regex_constants::syntax_escape:
266 return parse_basic_escape();
267 case regex_constants::syntax_dot:
268 return parse_match_any();
269 case regex_constants::syntax_caret:
271 this->append_state(syntax_element_start_line);
273 case regex_constants::syntax_dollar:
275 this->append_state(syntax_element_end_line);
277 case regex_constants::syntax_star:
278 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
279 return parse_literal();
283 return parse_repeat();
285 case regex_constants::syntax_plus:
286 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
287 return parse_literal();
291 return parse_repeat(1);
293 case regex_constants::syntax_question:
294 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
295 return parse_literal();
299 return parse_repeat(0, 1);
301 case regex_constants::syntax_open_set:
303 case regex_constants::syntax_newline:
304 if(this->flags() & regbase::newline_alt)
307 return parse_literal();
309 return parse_literal();
314 template <class charT, class traits>
315 bool basic_regex_parser<charT, traits>::parse_extended()
318 switch(this->m_traits.syntax_type(*m_position))
320 case regex_constants::syntax_open_mark:
321 return parse_open_paren();
322 case regex_constants::syntax_close_mark:
324 case regex_constants::syntax_escape:
325 return parse_extended_escape();
326 case regex_constants::syntax_dot:
327 return parse_match_any();
328 case regex_constants::syntax_caret:
331 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
333 case regex_constants::syntax_dollar:
336 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
338 case regex_constants::syntax_star:
339 if(m_position == this->m_base)
341 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
345 return parse_repeat();
346 case regex_constants::syntax_question:
347 if(m_position == this->m_base)
349 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
353 return parse_repeat(0,1);
354 case regex_constants::syntax_plus:
355 if(m_position == this->m_base)
357 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
361 return parse_repeat(1);
362 case regex_constants::syntax_open_brace:
364 return parse_repeat_range(false);
365 case regex_constants::syntax_close_brace:
366 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
368 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
371 result = parse_literal();
373 case regex_constants::syntax_or:
375 case regex_constants::syntax_open_set:
377 case regex_constants::syntax_newline:
378 if(this->flags() & regbase::newline_alt)
381 return parse_literal();
382 case regex_constants::syntax_hash:
384 // If we have a mod_x flag set, then skip until
385 // we get to a newline character:
388 & (regbase::no_perl_ex|regbase::mod_x))
391 while((m_position != m_end) && !is_separator(*m_position++)){}
396 result = parse_literal();
405 template <class charT, class traits>
406 bool basic_regex_parser<charT, traits>::parse_literal()
408 // append this as a literal provided it's not a space character
409 // or the perl option regbase::mod_x is not set:
412 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
414 || !this->m_traits.isctype(*m_position, this->m_mask_space))
415 this->append_literal(*m_position);
420 template <class charT, class traits>
421 bool basic_regex_parser<charT, traits>::parse_open_paren()
424 // skip the '(' and error check:
426 if(++m_position == m_end)
428 fail(regex_constants::error_paren, m_position - m_base);
432 // begin by checking for a perl-style (?...) extension:
435 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
436 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
439 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
440 return parse_perl_extension();
441 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
442 return parse_perl_verb();
445 // update our mark count, and append the required state:
448 if(0 == (this->flags() & regbase::nosubs))
450 markid = ++m_mark_count;
451 #ifndef BOOST_NO_STD_DISTANCE
452 if(this->flags() & regbase::save_subexpression_location)
453 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
455 if(this->flags() & regbase::save_subexpression_location)
456 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
459 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
461 pb->icase = this->flags() & regbase::icase;
462 std::ptrdiff_t last_paren_start = this->getoffset(pb);
463 // back up insertion point for alternations, and set new point:
464 std::ptrdiff_t last_alt_point = m_alt_insert_point;
465 this->m_pdata->m_data.align();
466 m_alt_insert_point = this->m_pdata->m_data.size();
468 // back up the current flags in case we have a nested (?imsx) group:
470 regex_constants::syntax_option_type opts = this->flags();
471 bool old_case_change = m_has_case_change;
472 m_has_case_change = false; // no changes to this scope as yet...
474 // Back up branch reset data in case we have a nested (?|...)
476 int mark_reset = m_mark_reset;
479 // now recursively add more states, this will terminate when we get to a
484 // Unwind pushed alternatives:
486 if(0 == unwind_alts(last_paren_start))
491 if(m_has_case_change)
493 // the case has changed in one or more of the alternatives
494 // within the scoped (...) block: we have to add a state
495 // to reset the case sensitivity:
496 static_cast<re_case*>(
497 this->append_state(syntax_element_toggle_case, sizeof(re_case))
498 )->icase = opts & regbase::icase;
501 m_has_case_change = old_case_change;
503 // restore branch reset:
505 m_mark_reset = mark_reset;
507 // we either have a ')' or we have run out of characters prematurely:
509 if(m_position == m_end)
511 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
514 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
515 #ifndef BOOST_NO_STD_DISTANCE
516 if(markid && (this->flags() & regbase::save_subexpression_location))
517 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
519 if(markid && (this->flags() & regbase::save_subexpression_location))
520 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
524 // append closing parenthesis state:
526 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
528 pb->icase = this->flags() & regbase::icase;
529 this->m_paren_start = last_paren_start;
531 // restore the alternate insertion point:
533 this->m_alt_insert_point = last_alt_point;
535 // allow backrefs to this mark:
537 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
538 this->m_backrefs |= 1u << (markid - 1);
543 template <class charT, class traits>
544 bool basic_regex_parser<charT, traits>::parse_basic_escape()
546 if(++m_position == m_end)
548 fail(regex_constants::error_paren, m_position - m_base);
552 switch(this->m_traits.escape_syntax_type(*m_position))
554 case regex_constants::syntax_open_mark:
555 return parse_open_paren();
556 case regex_constants::syntax_close_mark:
558 case regex_constants::syntax_plus:
559 if(this->flags() & regex_constants::bk_plus_qm)
562 return parse_repeat(1);
565 return parse_literal();
566 case regex_constants::syntax_question:
567 if(this->flags() & regex_constants::bk_plus_qm)
570 return parse_repeat(0, 1);
573 return parse_literal();
574 case regex_constants::syntax_open_brace:
575 if(this->flags() & regbase::no_intervals)
576 return parse_literal();
578 return parse_repeat_range(true);
579 case regex_constants::syntax_close_brace:
580 if(this->flags() & regbase::no_intervals)
581 return parse_literal();
582 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
584 case regex_constants::syntax_or:
585 if(this->flags() & regbase::bk_vbar)
588 result = parse_literal();
590 case regex_constants::syntax_digit:
591 return parse_backref();
592 case regex_constants::escape_type_start_buffer:
593 if(this->flags() & regbase::emacs_ex)
596 this->append_state(syntax_element_buffer_start);
599 result = parse_literal();
601 case regex_constants::escape_type_end_buffer:
602 if(this->flags() & regbase::emacs_ex)
605 this->append_state(syntax_element_buffer_end);
608 result = parse_literal();
610 case regex_constants::escape_type_word_assert:
611 if(this->flags() & regbase::emacs_ex)
614 this->append_state(syntax_element_word_boundary);
617 result = parse_literal();
619 case regex_constants::escape_type_not_word_assert:
620 if(this->flags() & regbase::emacs_ex)
623 this->append_state(syntax_element_within_word);
626 result = parse_literal();
628 case regex_constants::escape_type_left_word:
629 if(this->flags() & regbase::emacs_ex)
632 this->append_state(syntax_element_word_start);
635 result = parse_literal();
637 case regex_constants::escape_type_right_word:
638 if(this->flags() & regbase::emacs_ex)
641 this->append_state(syntax_element_word_end);
644 result = parse_literal();
647 if(this->flags() & regbase::emacs_ex)
657 basic_char_set<charT, traits> char_set;
660 char_set.add_class(this->m_word_mask);
661 if(0 == this->append_set(char_set))
663 fail(regex_constants::error_ctype, m_position - m_base);
673 return add_emacs_code(negate);
676 // not supported yet:
677 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
683 result = parse_literal();
689 template <class charT, class traits>
690 bool basic_regex_parser<charT, traits>::parse_extended_escape()
693 if(m_position == m_end)
695 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
698 bool negate = false; // in case this is a character class escape: \w \d etc
699 switch(this->m_traits.escape_syntax_type(*m_position))
701 case regex_constants::escape_type_not_class:
704 case regex_constants::escape_type_class:
706 escape_type_class_jump:
707 typedef typename traits::char_class_type m_type;
708 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
711 basic_char_set<charT, traits> char_set;
714 char_set.add_class(m);
715 if(0 == this->append_set(char_set))
717 fail(regex_constants::error_ctype, m_position - m_base);
724 // not a class, just a regular unknown escape:
726 this->append_literal(unescape_character());
729 case regex_constants::syntax_digit:
730 return parse_backref();
731 case regex_constants::escape_type_left_word:
733 this->append_state(syntax_element_word_start);
735 case regex_constants::escape_type_right_word:
737 this->append_state(syntax_element_word_end);
739 case regex_constants::escape_type_start_buffer:
741 this->append_state(syntax_element_buffer_start);
743 case regex_constants::escape_type_end_buffer:
745 this->append_state(syntax_element_buffer_end);
747 case regex_constants::escape_type_word_assert:
749 this->append_state(syntax_element_word_boundary);
751 case regex_constants::escape_type_not_word_assert:
753 this->append_state(syntax_element_within_word);
755 case regex_constants::escape_type_Z:
757 this->append_state(syntax_element_soft_buffer_end);
759 case regex_constants::escape_type_Q:
761 case regex_constants::escape_type_C:
762 return parse_match_any();
763 case regex_constants::escape_type_X:
765 this->append_state(syntax_element_combining);
767 case regex_constants::escape_type_G:
769 this->append_state(syntax_element_restart_continue);
771 case regex_constants::escape_type_not_property:
774 case regex_constants::escape_type_property:
778 if(m_position == m_end)
780 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
783 // maybe have \p{ddd}
784 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
786 const charT* base = m_position;
787 // skip forward until we find enclosing brace:
788 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
790 if(m_position == m_end)
792 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
795 m = this->m_traits.lookup_classname(++base, m_position++);
799 m = this->m_traits.lookup_classname(m_position, m_position+1);
804 basic_char_set<charT, traits> char_set;
807 char_set.add_class(m);
808 if(0 == this->append_set(char_set))
810 fail(regex_constants::error_ctype, m_position - m_base);
815 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
818 case regex_constants::escape_type_reset_start_mark:
819 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
821 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
823 pb->icase = this->flags() & regbase::icase;
824 this->m_pdata->m_data.align();
828 goto escape_type_class_jump;
829 case regex_constants::escape_type_line_ending:
830 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
832 const charT* e = get_escape_R_string<charT>();
833 const charT* old_position = m_position;
834 const charT* old_end = m_end;
835 const charT* old_base = m_base;
838 m_end = e + traits::length(e);
839 bool r = parse_all();
840 m_position = ++old_position;
845 goto escape_type_class_jump;
846 case regex_constants::escape_type_extended_backref:
847 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
849 bool have_brace = false;
850 bool negative = false;
851 static const char* incomplete_message = "Incomplete \\g escape found.";
852 if(++m_position == m_end)
854 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
857 // maybe have \g{ddd}
858 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
859 regex_constants::syntax_type syn_end = 0;
860 if((syn == regex_constants::syntax_open_brace)
861 || (syn == regex_constants::escape_type_left_word)
862 || (syn == regex_constants::escape_type_end_buffer))
864 if(++m_position == m_end)
866 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
872 case regex_constants::syntax_open_brace:
873 syn_end = regex_constants::syntax_close_brace;
875 case regex_constants::escape_type_left_word:
876 syn_end = regex_constants::escape_type_right_word;
879 syn_end = regex_constants::escape_type_end_buffer;
883 negative = (*m_position == static_cast<charT>('-'));
884 if((negative) && (++m_position == m_end))
886 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
889 const charT* pc = m_position;
890 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
891 if((i < 0) && syn_end)
893 // Check for a named capture, get the leftmost one if there is more than one:
894 const charT* base = m_position;
895 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
899 i = hash_value_from_capture_name(base, m_position);
903 i = 1 + m_mark_count - i;
904 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
907 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
909 pb->icase = this->flags() & regbase::icase;
913 fail(regex_constants::error_backref, m_position - m_base);
919 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
921 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
928 goto escape_type_class_jump;
929 case regex_constants::escape_type_control_v:
930 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
931 goto escape_type_class_jump;
934 this->append_literal(unescape_character());
940 template <class charT, class traits>
941 bool basic_regex_parser<charT, traits>::parse_match_any()
944 // we have a '.' that can match any character:
947 static_cast<re_dot*>(
948 this->append_state(syntax_element_wild, sizeof(re_dot))
949 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
950 ? BOOST_REGEX_DETAIL_NS::force_not_newline
951 : this->flags() & regbase::mod_s ?
952 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
956 template <class charT, class traits>
957 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
960 bool pocessive = false;
961 std::size_t insert_point;
963 // when we get to here we may have a non-greedy ? mark still to come:
965 if((m_position != m_end)
967 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
968 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
972 // OK we have a perl or emacs regex, check for a '?':
973 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
978 // for perl regexes only check for pocessive ++ repeats.
979 if((m_position != m_end)
980 && (0 == (this->flags() & regbase::main_option_type))
981 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
987 if(0 == this->m_last_state)
989 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
992 if(this->m_last_state->type == syntax_element_endmark)
994 // insert a repeat before the '(' matching the last ')':
995 insert_point = this->m_paren_start;
997 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
999 // the last state was a literal with more than one character, split it in two:
1000 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1001 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1003 // now append new state:
1004 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1006 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1007 insert_point = this->getoffset(this->m_last_state);
1011 // repeat the last state whatever it was, need to add some error checking here:
1012 switch(this->m_last_state->type)
1014 case syntax_element_start_line:
1015 case syntax_element_end_line:
1016 case syntax_element_word_boundary:
1017 case syntax_element_within_word:
1018 case syntax_element_word_start:
1019 case syntax_element_word_end:
1020 case syntax_element_buffer_start:
1021 case syntax_element_buffer_end:
1022 case syntax_element_alt:
1023 case syntax_element_soft_buffer_end:
1024 case syntax_element_restart_continue:
1025 case syntax_element_jump:
1026 case syntax_element_startmark:
1027 case syntax_element_backstep:
1028 // can't legally repeat any of the above:
1029 fail(regex_constants::error_badrepeat, m_position - m_base);
1035 insert_point = this->getoffset(this->m_last_state);
1038 // OK we now know what to repeat, so insert the repeat around it:
1040 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1043 rep->greedy = greedy;
1044 rep->leading = false;
1045 // store our repeater position for later:
1046 std::ptrdiff_t rep_off = this->getoffset(rep);
1047 // and append a back jump to the repeat:
1048 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1049 jmp->alt.i = rep_off - this->getoffset(jmp);
1050 this->m_pdata->m_data.align();
1051 // now fill in the alt jump for the repeat:
1052 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1053 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1055 // If the repeat is pocessive then bracket the repeat with a (?>...)
1056 // independent sub-expression construct:
1060 if(m_position != m_end)
1063 // Check for illegal following quantifier, we have to do this here, because
1064 // the extra states we insert below circumvents our usual error checking :-(
1066 switch(this->m_traits.syntax_type(*m_position))
1068 case regex_constants::syntax_star:
1069 case regex_constants::syntax_plus:
1070 case regex_constants::syntax_question:
1071 case regex_constants::syntax_open_brace:
1072 fail(regex_constants::error_badrepeat, m_position - m_base);
1076 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1078 pb->icase = this->flags() & regbase::icase;
1079 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1080 this->m_pdata->m_data.align();
1081 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1082 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1084 pb->icase = this->flags() & regbase::icase;
1089 template <class charT, class traits>
1090 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1092 static const char* incomplete_message = "Missing } in quantified repetition.";
1094 // parse a repeat-range:
1096 std::size_t min, max;
1099 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1101 if(this->m_position == this->m_end)
1103 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1105 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1108 // Treat the opening '{' as a literal character, rewind to start of error:
1110 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1111 return parse_literal();
1114 v = this->m_traits.toi(m_position, m_end, 10);
1116 if((v < 0) || (v > umax()))
1118 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1120 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1123 // Treat the opening '{' as a literal character, rewind to start of error:
1125 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1126 return parse_literal();
1128 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1130 if(this->m_position == this->m_end)
1132 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1134 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1137 // Treat the opening '{' as a literal character, rewind to start of error:
1139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1140 return parse_literal();
1142 min = static_cast<std::size_t>(v);
1143 // see if we have a comma:
1144 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1146 // move on and error check:
1149 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1151 if(this->m_position == this->m_end)
1153 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1155 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1158 // Treat the opening '{' as a literal character, rewind to start of error:
1160 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1161 return parse_literal();
1163 // get the value if any:
1164 v = this->m_traits.toi(m_position, m_end, 10);
1165 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1169 // no comma, max = min:
1173 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1175 // OK now check trailing }:
1176 if(this->m_position == this->m_end)
1178 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1180 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1183 // Treat the opening '{' as a literal character, rewind to start of error:
1185 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1186 return parse_literal();
1190 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1193 if(this->m_position == this->m_end)
1195 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1201 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1205 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1209 // Treat the opening '{' as a literal character, rewind to start of error:
1211 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1212 return parse_literal();
1215 // finally go and add the repeat, unless error:
1219 // Backtrack to error location:
1221 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1223 fail(regex_constants::error_badbrace, m_position - m_base);
1226 return parse_repeat(min, max);
1229 template <class charT, class traits>
1230 bool basic_regex_parser<charT, traits>::parse_alt()
1233 // error check: if there have been no previous states,
1234 // or if the last state was a '(' then error:
1237 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1240 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1242 ((this->flags() & regbase::no_empty_expressions) == 0)
1246 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1250 // Reset mark count if required:
1252 if(m_max_mark < m_mark_count)
1253 m_max_mark = m_mark_count;
1254 if(m_mark_reset >= 0)
1255 m_mark_count = m_mark_reset;
1259 // we need to append a trailing jump:
1261 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1262 std::ptrdiff_t jump_offset = this->getoffset(pj);
1264 // now insert the alternative:
1266 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1267 jump_offset += re_alt_size;
1268 this->m_pdata->m_data.align();
1269 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1271 // update m_alt_insert_point so that the next alternate gets
1272 // inserted at the start of the second of the two we've just created:
1274 this->m_alt_insert_point = this->m_pdata->m_data.size();
1276 // the start of this alternative must have a case changes state
1277 // if the current block has messed around with case changes:
1279 if(m_has_case_change)
1281 static_cast<re_case*>(
1282 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1283 )->icase = this->m_icase;
1286 // push the alternative onto our stack, a recursive
1287 // implementation here is easier to understand (and faster
1288 // as it happens), but causes all kinds of stack overflow problems
1289 // on programs with small stacks (COM+).
1291 m_alt_jumps.push_back(jump_offset);
1295 template <class charT, class traits>
1296 bool basic_regex_parser<charT, traits>::parse_set()
1298 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1300 if(m_position == m_end)
1302 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1305 basic_char_set<charT, traits> char_set;
1307 const charT* base = m_position; // where the '[' was
1308 const charT* item_base = m_position; // where the '[' or '^' was
1310 while(m_position != m_end)
1312 switch(this->m_traits.syntax_type(*m_position))
1314 case regex_constants::syntax_caret:
1315 if(m_position == base)
1319 item_base = m_position;
1322 parse_set_literal(char_set);
1324 case regex_constants::syntax_close_set:
1325 if(m_position == item_base)
1327 parse_set_literal(char_set);
1333 if(0 == this->append_set(char_set))
1335 fail(regex_constants::error_ctype, m_position - m_base);
1340 case regex_constants::syntax_open_set:
1341 if(parse_inner_set(char_set))
1344 case regex_constants::syntax_escape:
1347 // look ahead and see if this is a character class shortcut
1351 if(this->m_traits.escape_syntax_type(*m_position)
1352 == regex_constants::escape_type_class)
1354 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1357 char_set.add_class(m);
1362 else if(this->m_traits.escape_syntax_type(*m_position)
1363 == regex_constants::escape_type_not_class)
1365 // negated character class:
1366 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1369 char_set.add_negated_class(m);
1374 // not a character class, just a regular escape:
1376 parse_set_literal(char_set);
1380 parse_set_literal(char_set);
1384 return m_position != m_end;
1387 template <class charT, class traits>
1388 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1390 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1392 // we have either a character class [:name:]
1393 // a collating element [.name.]
1394 // or an equivalence class [=name=]
1396 if(m_end == ++m_position)
1398 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1401 switch(this->m_traits.syntax_type(*m_position))
1403 case regex_constants::syntax_dot:
1405 // a collating element is treated as a literal:
1408 parse_set_literal(char_set);
1410 case regex_constants::syntax_colon:
1412 // check that character classes are actually enabled:
1413 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1414 == (regbase::basic_syntax_group | regbase::no_char_classes))
1417 parse_set_literal(char_set);
1421 if(m_end == ++m_position)
1423 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1426 const charT* name_first = m_position;
1427 // skip at least one character, then find the matching ':]'
1428 if(m_end == ++m_position)
1430 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1433 while((m_position != m_end)
1434 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1436 const charT* name_last = m_position;
1437 if(m_end == m_position)
1439 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1442 if((m_end == ++m_position)
1443 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1445 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1449 // check for negated class:
1451 bool negated = false;
1452 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1457 typedef typename traits::char_class_type m_type;
1458 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1461 if(char_set.empty() && (name_last - name_first == 1))
1463 // maybe a special case:
1465 if( (m_position != m_end)
1466 && (this->m_traits.syntax_type(*m_position)
1467 == regex_constants::syntax_close_set))
1469 if(this->m_traits.escape_syntax_type(*name_first)
1470 == regex_constants::escape_type_left_word)
1473 this->append_state(syntax_element_word_start);
1476 if(this->m_traits.escape_syntax_type(*name_first)
1477 == regex_constants::escape_type_right_word)
1480 this->append_state(syntax_element_word_end);
1485 fail(regex_constants::error_ctype, name_first - m_base);
1488 if(negated == false)
1489 char_set.add_class(m);
1491 char_set.add_negated_class(m);
1495 case regex_constants::syntax_equal:
1498 if(m_end == ++m_position)
1500 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1503 const charT* name_first = m_position;
1504 // skip at least one character, then find the matching '=]'
1505 if(m_end == ++m_position)
1507 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1510 while((m_position != m_end)
1511 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1513 const charT* name_last = m_position;
1514 if(m_end == m_position)
1516 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1519 if((m_end == ++m_position)
1520 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1522 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1525 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1526 if((0 == m.size()) || (m.size() > 2))
1528 fail(regex_constants::error_collate, name_first - m_base);
1537 char_set.add_equivalent(d);
1543 parse_set_literal(char_set);
1549 template <class charT, class traits>
1550 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1552 digraph<charT> start_range(get_next_set_literal(char_set));
1553 if(m_end == m_position)
1555 fail(regex_constants::error_brack, m_position - m_base);
1558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1561 if(m_end == ++m_position)
1563 fail(regex_constants::error_brack, m_position - m_base);
1566 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1568 digraph<charT> end_range = get_next_set_literal(char_set);
1569 char_set.add_range(start_range, end_range);
1570 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1572 if(m_end == ++m_position)
1574 fail(regex_constants::error_brack, m_position - m_base);
1577 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1583 fail(regex_constants::error_range, m_position - m_base);
1590 char_set.add_single(start_range);
1593 template <class charT, class traits>
1594 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1596 digraph<charT> result;
1597 switch(this->m_traits.syntax_type(*m_position))
1599 case regex_constants::syntax_dash:
1600 if(!char_set.empty())
1602 // see if we are at the end of the set:
1603 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1605 fail(regex_constants::error_range, m_position - m_base);
1610 result.first = *m_position++;
1612 case regex_constants::syntax_escape:
1613 // check to see if escapes are supported first:
1614 if(this->flags() & regex_constants::no_escape_in_lists)
1616 result = *m_position++;
1620 result = unescape_character();
1622 case regex_constants::syntax_open_set:
1624 if(m_end == ++m_position)
1626 fail(regex_constants::error_collate, m_position - m_base);
1629 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1632 result.first = *m_position;
1636 if(m_end == ++m_position)
1638 fail(regex_constants::error_collate, m_position - m_base);
1641 const charT* name_first = m_position;
1642 // skip at least one character, then find the matching ':]'
1643 if(m_end == ++m_position)
1645 fail(regex_constants::error_collate, name_first - m_base);
1648 while((m_position != m_end)
1649 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1651 const charT* name_last = m_position;
1652 if(m_end == m_position)
1654 fail(regex_constants::error_collate, name_first - m_base);
1657 if((m_end == ++m_position)
1658 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1660 fail(regex_constants::error_collate, name_first - m_base);
1664 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1665 if(s.empty() || (s.size() > 2))
1667 fail(regex_constants::error_collate, name_first - m_base);
1670 result.first = s[0];
1672 result.second = s[1];
1678 result = *m_position++;
1684 // does a value fit in the specified charT type?
1686 template <class charT>
1687 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1689 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1691 template <class charT>
1692 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1694 return true; // v will alsways fit in a charT
1696 template <class charT>
1697 bool valid_value(charT c, boost::intmax_t v)
1699 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1702 template <class charT, class traits>
1703 charT basic_regex_parser<charT, traits>::unescape_character()
1706 #pragma warning(push)
1707 #pragma warning(disable:4127)
1710 if(m_position == m_end)
1712 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1715 switch(this->m_traits.escape_syntax_type(*m_position))
1717 case regex_constants::escape_type_control_a:
1718 result = charT('\a');
1720 case regex_constants::escape_type_e:
1723 case regex_constants::escape_type_control_f:
1724 result = charT('\f');
1726 case regex_constants::escape_type_control_n:
1727 result = charT('\n');
1729 case regex_constants::escape_type_control_r:
1730 result = charT('\r');
1732 case regex_constants::escape_type_control_t:
1733 result = charT('\t');
1735 case regex_constants::escape_type_control_v:
1736 result = charT('\v');
1738 case regex_constants::escape_type_word_assert:
1739 result = charT('\b');
1741 case regex_constants::escape_type_ascii_control:
1743 if(m_position == m_end)
1745 // Rewind to start of escape:
1747 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1748 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1751 result = static_cast<charT>(*m_position % 32);
1753 case regex_constants::escape_type_hex:
1755 if(m_position == m_end)
1757 // Rewind to start of escape:
1759 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1760 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1763 // maybe have \x{ddd}
1764 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1767 if(m_position == m_end)
1769 // Rewind to start of escape:
1771 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1772 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1775 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1776 if((m_position == m_end)
1778 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1779 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1781 // Rewind to start of escape:
1783 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1784 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1792 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1793 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1795 || !valid_value(charT(0), i))
1797 // Rewind to start of escape:
1799 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1800 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1806 case regex_constants::syntax_digit:
1808 // an octal escape sequence, the first character must be a zero
1809 // followed by up to 3 octal digits:
1810 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1811 const charT* bp = m_position;
1812 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1815 // Rewind to start of escape:
1817 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1818 // Oops not an octal escape after all:
1819 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1822 val = this->m_traits.toi(m_position, m_position + len, 8);
1823 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1825 // Rewind to start of escape:
1827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1828 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1831 return static_cast<charT>(val);
1833 case regex_constants::escape_type_named_char:
1836 if(m_position == m_end)
1838 // Rewind to start of escape:
1840 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1841 fail(regex_constants::error_escape, m_position - m_base);
1844 // maybe have \N{name}
1845 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1847 const charT* base = m_position;
1848 // skip forward until we find enclosing brace:
1849 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1851 if(m_position == m_end)
1853 // Rewind to start of escape:
1855 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1856 fail(regex_constants::error_escape, m_position - m_base);
1859 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1862 // Rewind to start of escape:
1864 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1865 fail(regex_constants::error_collate, m_position - m_base);
1873 // fall through is a failure:
1874 // Rewind to start of escape:
1876 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1877 fail(regex_constants::error_escape, m_position - m_base);
1881 result = *m_position;
1887 #pragma warning(pop)
1891 template <class charT, class traits>
1892 bool basic_regex_parser<charT, traits>::parse_backref()
1894 BOOST_ASSERT(m_position != m_end);
1895 const charT* pc = m_position;
1896 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1897 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1899 // not a backref at all but an octal escape sequence:
1900 charT c = unescape_character();
1901 this->append_literal(c);
1903 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1906 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1908 pb->icase = this->flags() & regbase::icase;
1912 // Rewind to start of escape:
1914 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1915 fail(regex_constants::error_backref, m_position - m_base);
1921 template <class charT, class traits>
1922 bool basic_regex_parser<charT, traits>::parse_QE()
1925 #pragma warning(push)
1926 #pragma warning(disable:4127)
1929 // parse a \Q...\E sequence:
1931 ++m_position; // skip the Q
1932 const charT* start = m_position;
1936 while((m_position != m_end)
1937 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1939 if(m_position == m_end)
1941 // a \Q...\E sequence may terminate with the end of the expression:
1945 if(++m_position == m_end) // skip the escape
1947 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1950 // check to see if it's a \E:
1951 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1954 end = m_position - 2;
1957 // otherwise go round again:
1960 // now add all the character between the two escapes as literals:
1964 this->append_literal(*start);
1969 #pragma warning(pop)
1973 template <class charT, class traits>
1974 bool basic_regex_parser<charT, traits>::parse_perl_extension()
1976 if(++m_position == m_end)
1978 // Rewind to start of (? sequence:
1980 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1981 fail(regex_constants::error_perl_extension, m_position - m_base);
1985 // treat comments as a special case, as these
1986 // are the only ones that don't start with a leading
1989 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1991 while((m_position != m_end)
1992 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1997 // backup some state, and prepare the way:
2000 std::ptrdiff_t jump_offset = 0;
2001 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2002 pb->icase = this->flags() & regbase::icase;
2003 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2004 // back up insertion point for alternations, and set new point:
2005 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2006 this->m_pdata->m_data.align();
2007 m_alt_insert_point = this->m_pdata->m_data.size();
2008 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2009 bool restore_flags = true;
2010 regex_constants::syntax_option_type old_flags = this->flags();
2011 bool old_case_change = m_has_case_change;
2012 m_has_case_change = false;
2014 int mark_reset = m_mark_reset;
2015 int max_mark = m_max_mark;
2017 m_max_mark = m_mark_count;
2020 // select the actual extension used:
2022 switch(this->m_traits.syntax_type(*m_position))
2024 case regex_constants::syntax_or:
2025 m_mark_reset = m_mark_count;
2027 case regex_constants::syntax_colon:
2029 // a non-capturing mark:
2031 pb->index = markid = 0;
2034 case regex_constants::syntax_digit:
2037 // a recursive subexpression:
2039 v = this->m_traits.toi(m_position, m_end, 10);
2040 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2042 // Rewind to start of (? sequence:
2044 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2045 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2049 pb->index = markid = 0;
2050 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2053 static_cast<re_case*>(
2054 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2055 )->icase = this->flags() & regbase::icase;
2058 case regex_constants::syntax_plus:
2060 // A forward-relative recursive subexpression:
2063 v = this->m_traits.toi(m_position, m_end, 10);
2064 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2066 // Rewind to start of (? sequence:
2068 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2069 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2073 goto insert_recursion;
2074 case regex_constants::syntax_dash:
2076 // Possibly a backward-relative recursive subexpression:
2079 v = this->m_traits.toi(m_position, m_end, 10);
2083 // Oops not a relative recursion at all, but a (?-imsx) group:
2084 goto option_group_jump;
2086 v = m_mark_count + 1 - v;
2089 // Rewind to start of (? sequence:
2091 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2092 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2095 goto insert_recursion;
2096 case regex_constants::syntax_equal:
2097 pb->index = markid = -1;
2099 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2100 this->m_pdata->m_data.align();
2101 m_alt_insert_point = this->m_pdata->m_data.size();
2103 case regex_constants::syntax_not:
2104 pb->index = markid = -2;
2106 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2107 this->m_pdata->m_data.align();
2108 m_alt_insert_point = this->m_pdata->m_data.size();
2110 case regex_constants::escape_type_left_word:
2112 // a lookbehind assertion:
2113 if(++m_position == m_end)
2115 // Rewind to start of (? sequence:
2117 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2118 fail(regex_constants::error_perl_extension, m_position - m_base);
2121 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2122 if(t == regex_constants::syntax_not)
2123 pb->index = markid = -2;
2124 else if(t == regex_constants::syntax_equal)
2125 pb->index = markid = -1;
2128 // Probably a named capture which also starts (?< :
2131 goto named_capture_jump;
2134 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2135 this->append_state(syntax_element_backstep, sizeof(re_brace));
2136 this->m_pdata->m_data.align();
2137 m_alt_insert_point = this->m_pdata->m_data.size();
2140 case regex_constants::escape_type_right_word:
2142 // an independent sub-expression:
2144 pb->index = markid = -3;
2146 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2147 this->m_pdata->m_data.align();
2148 m_alt_insert_point = this->m_pdata->m_data.size();
2150 case regex_constants::syntax_open_mark:
2152 // a conditional expression:
2153 pb->index = markid = -4;
2154 if(++m_position == m_end)
2156 // Rewind to start of (? sequence:
2158 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2159 fail(regex_constants::error_perl_extension, m_position - m_base);
2162 v = this->m_traits.toi(m_position, m_end, 10);
2163 if(m_position == m_end)
2165 // Rewind to start of (? sequence:
2167 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2168 fail(regex_constants::error_perl_extension, m_position - m_base);
2171 if(*m_position == charT('R'))
2173 if(++m_position == m_end)
2175 // Rewind to start of (? sequence:
2177 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2178 fail(regex_constants::error_perl_extension, m_position - m_base);
2181 if(*m_position == charT('&'))
2183 const charT* base = ++m_position;
2184 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2186 if(m_position == m_end)
2188 // Rewind to start of (? sequence:
2190 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2191 fail(regex_constants::error_perl_extension, m_position - m_base);
2194 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2198 v = -this->m_traits.toi(m_position, m_end, 10);
2200 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2201 br->index = v < 0 ? (v - 1) : 0;
2202 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2204 // Rewind to start of (? sequence:
2206 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2207 fail(regex_constants::error_perl_extension, m_position - m_base);
2210 if(++m_position == m_end)
2212 // Rewind to start of (? sequence:
2214 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2215 fail(regex_constants::error_perl_extension, m_position - m_base);
2219 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2221 const charT* base = ++m_position;
2222 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2224 if(m_position == m_end)
2226 // Rewind to start of (? sequence:
2228 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2229 fail(regex_constants::error_perl_extension, m_position - m_base);
2232 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2233 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2235 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2237 // Rewind to start of (? sequence:
2239 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2240 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2243 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2245 // Rewind to start of (? sequence:
2247 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2248 fail(regex_constants::error_perl_extension, m_position - m_base);
2251 if(++m_position == m_end)
2253 // Rewind to start of (? sequence:
2255 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2256 fail(regex_constants::error_perl_extension, m_position - m_base);
2260 else if(*m_position == charT('D'))
2262 const char* def = "DEFINE";
2263 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2264 ++m_position, ++def;
2265 if((m_position == m_end) || *def)
2267 // Rewind to start of (? sequence:
2269 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2270 fail(regex_constants::error_perl_extension, m_position - m_base);
2273 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2274 br->index = 9999; // special magic value!
2275 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2277 // Rewind to start of (? sequence:
2279 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2280 fail(regex_constants::error_perl_extension, m_position - m_base);
2283 if(++m_position == m_end)
2285 // Rewind to start of (? sequence:
2287 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2288 fail(regex_constants::error_perl_extension, m_position - m_base);
2294 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2296 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2298 // Rewind to start of (? sequence:
2300 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2301 fail(regex_constants::error_perl_extension, m_position - m_base);
2304 if(++m_position == m_end)
2306 // Rewind to start of (? sequence:
2308 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2309 fail(regex_constants::error_perl_extension, m_position - m_base);
2315 // verify that we have a lookahead or lookbehind assert:
2316 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2318 // Rewind to start of (? sequence:
2320 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2321 fail(regex_constants::error_perl_extension, m_position - m_base);
2324 if(++m_position == m_end)
2326 // Rewind to start of (? sequence:
2328 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2329 fail(regex_constants::error_perl_extension, m_position - m_base);
2332 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2334 if(++m_position == m_end)
2336 // Rewind to start of (? sequence:
2338 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2339 fail(regex_constants::error_perl_extension, m_position - m_base);
2342 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2343 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2345 // Rewind to start of (? sequence:
2347 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2348 fail(regex_constants::error_perl_extension, m_position - m_base);
2355 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2356 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2358 // Rewind to start of (? sequence:
2360 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2361 fail(regex_constants::error_perl_extension, m_position - m_base);
2369 case regex_constants::syntax_close_mark:
2370 // Rewind to start of (? sequence:
2372 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2373 fail(regex_constants::error_perl_extension, m_position - m_base);
2375 case regex_constants::escape_type_end_buffer:
2377 name_delim = *m_position;
2380 if(0 == (this->flags() & regbase::nosubs))
2382 markid = ++m_mark_count;
2383 #ifndef BOOST_NO_STD_DISTANCE
2384 if(this->flags() & regbase::save_subexpression_location)
2385 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2387 if(this->flags() & regbase::save_subexpression_location)
2388 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2392 const charT* base = ++m_position;
2393 if(m_position == m_end)
2395 // Rewind to start of (? sequence:
2397 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2398 fail(regex_constants::error_perl_extension, m_position - m_base);
2401 while((m_position != m_end) && (*m_position != name_delim))
2403 if(m_position == m_end)
2405 // Rewind to start of (? sequence:
2407 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2408 fail(regex_constants::error_perl_extension, m_position - m_base);
2411 this->m_pdata->set_name(base, m_position, markid);
2416 if(*m_position == charT('R'))
2420 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2422 // Rewind to start of (? sequence:
2424 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2425 fail(regex_constants::error_perl_extension, m_position - m_base);
2428 goto insert_recursion;
2430 if(*m_position == charT('&'))
2433 const charT* base = m_position;
2434 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2436 if(m_position == m_end)
2438 // Rewind to start of (? sequence:
2440 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2441 fail(regex_constants::error_perl_extension, m_position - m_base);
2444 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2445 goto insert_recursion;
2447 if(*m_position == charT('P'))
2450 if(m_position == m_end)
2452 // Rewind to start of (? sequence:
2454 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2455 fail(regex_constants::error_perl_extension, m_position - m_base);
2458 if(*m_position == charT('>'))
2461 const charT* base = m_position;
2462 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2464 if(m_position == m_end)
2466 // Rewind to start of (? sequence:
2468 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2469 fail(regex_constants::error_perl_extension, m_position - m_base);
2472 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2473 goto insert_recursion;
2477 // lets assume that we have a (?imsx) group and try and parse it:
2480 regex_constants::syntax_option_type opts = parse_options();
2481 if(m_position == m_end)
2483 // Rewind to start of (? sequence:
2485 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2486 fail(regex_constants::error_perl_extension, m_position - m_base);
2489 // make a note of whether we have a case change:
2490 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2491 pb->index = markid = 0;
2492 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2494 // update flags and carry on as normal:
2496 restore_flags = false;
2497 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2499 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2501 // update flags and carry on until the matching ')' is found:
2507 // Rewind to start of (? sequence:
2509 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2510 fail(regex_constants::error_perl_extension, m_position - m_base);
2514 // finally append a case change state if we need it:
2515 if(m_has_case_change)
2517 static_cast<re_case*>(
2518 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2519 )->icase = opts & regbase::icase;
2524 // now recursively add more states, this will terminate when we get to a
2529 // Unwind alternatives:
2531 if(0 == unwind_alts(last_paren_start))
2533 // Rewind to start of (? sequence:
2535 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2536 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2540 // we either have a ')' or we have run out of characters prematurely:
2542 if(m_position == m_end)
2544 // Rewind to start of (? sequence:
2546 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2547 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2550 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2553 // restore the flags:
2557 // append a case change state if we need it:
2558 if(m_has_case_change)
2560 static_cast<re_case*>(
2561 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2562 )->icase = old_flags & regbase::icase;
2564 this->flags(old_flags);
2567 // set up the jump pointer if we have one:
2571 this->m_pdata->m_data.align();
2572 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2573 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2574 if((this->m_last_state == jmp) && (markid != -2))
2576 // Oops... we didn't have anything inside the assertion.
2577 // Note we don't get here for negated forward lookahead as (?!)
2578 // does have some uses.
2579 // Rewind to start of (? sequence:
2581 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2582 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2587 // verify that if this is conditional expression, that we do have
2588 // an alternative, if not add one:
2592 re_syntax_base* b = this->getaddress(expected_alt_point);
2593 // Make sure we have exactly one alternative following this state:
2594 if(b->type != syntax_element_alt)
2596 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2597 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2599 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2601 // Can't have seen more than one alternative:
2602 // Rewind to start of (? sequence:
2604 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2605 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2610 // We must *not* have seen an alternative inside a (DEFINE) block:
2611 b = this->getaddress(b->next.i, b);
2612 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2614 // Rewind to start of (? sequence:
2616 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2617 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2621 // check for invalid repetition of next state:
2622 b = this->getaddress(expected_alt_point);
2623 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2624 if((b->type != syntax_element_assert_backref)
2625 && (b->type != syntax_element_startmark))
2627 // Rewind to start of (? sequence:
2629 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2630 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2635 // append closing parenthesis state:
2637 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2639 pb->icase = this->flags() & regbase::icase;
2640 this->m_paren_start = last_paren_start;
2642 // restore the alternate insertion point:
2644 this->m_alt_insert_point = last_alt_point;
2646 // and the case change data:
2648 m_has_case_change = old_case_change;
2650 // And the mark_reset data:
2652 if(m_max_mark > m_mark_count)
2654 m_mark_count = m_max_mark;
2656 m_mark_reset = mark_reset;
2657 m_max_mark = max_mark;
2662 #ifndef BOOST_NO_STD_DISTANCE
2663 if(this->flags() & regbase::save_subexpression_location)
2664 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2666 if(this->flags() & regbase::save_subexpression_location)
2667 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2670 // allow backrefs to this mark:
2672 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2673 this->m_backrefs |= 1u << (markid - 1);
2678 template <class charT, class traits>
2679 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2683 if(static_cast<charT>(*verb) != *m_position)
2685 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2686 fail(regex_constants::error_perl_extension, m_position - m_base);
2689 if(++m_position == m_end)
2692 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2693 fail(regex_constants::error_perl_extension, m_position - m_base);
2701 template <class charT, class traits>
2702 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2704 if(++m_position == m_end)
2706 // Rewind to start of (* sequence:
2708 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2709 fail(regex_constants::error_perl_extension, m_position - m_base);
2715 if(++m_position == m_end)
2717 // Rewind to start of (* sequence:
2719 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2720 fail(regex_constants::error_perl_extension, m_position - m_base);
2723 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2725 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2727 // Rewind to start of (* sequence:
2729 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2730 fail(regex_constants::error_perl_extension, m_position - m_base);
2734 this->append_state(syntax_element_fail);
2739 if(++m_position == m_end)
2741 // Rewind to start of (* sequence:
2743 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2744 fail(regex_constants::error_perl_extension, m_position - m_base);
2747 if(match_verb("CCEPT"))
2749 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2751 // Rewind to start of (* sequence:
2753 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2754 fail(regex_constants::error_perl_extension, m_position - m_base);
2758 this->append_state(syntax_element_accept);
2763 if(++m_position == m_end)
2765 // Rewind to start of (* sequence:
2767 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2768 fail(regex_constants::error_perl_extension, m_position - m_base);
2771 if(match_verb("OMMIT"))
2773 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2775 // Rewind to start of (* sequence:
2777 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2778 fail(regex_constants::error_perl_extension, m_position - m_base);
2782 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2783 this->m_pdata->m_disable_match_any = true;
2788 if(++m_position == m_end)
2790 // Rewind to start of (* sequence:
2792 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2793 fail(regex_constants::error_perl_extension, m_position - m_base);
2796 if(match_verb("RUNE"))
2798 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2800 // Rewind to start of (* sequence:
2802 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2803 fail(regex_constants::error_perl_extension, m_position - m_base);
2807 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2808 this->m_pdata->m_disable_match_any = true;
2813 if(++m_position == m_end)
2815 // Rewind to start of (* sequence:
2817 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2818 fail(regex_constants::error_perl_extension, m_position - m_base);
2821 if(match_verb("KIP"))
2823 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2825 // Rewind to start of (* sequence:
2827 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2828 fail(regex_constants::error_perl_extension, m_position - m_base);
2832 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2833 this->m_pdata->m_disable_match_any = true;
2838 if(++m_position == m_end)
2840 // Rewind to start of (* sequence:
2842 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2843 fail(regex_constants::error_perl_extension, m_position - m_base);
2846 if(match_verb("HEN"))
2848 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2850 // Rewind to start of (* sequence:
2852 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2853 fail(regex_constants::error_perl_extension, m_position - m_base);
2857 this->append_state(syntax_element_then);
2858 this->m_pdata->m_disable_match_any = true;
2866 template <class charT, class traits>
2867 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2870 // parses an emacs style \sx or \Sx construct.
2872 if(++m_position == m_end)
2874 // Rewind to start of sequence:
2876 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2877 fail(regex_constants::error_escape, m_position - m_base);
2880 basic_char_set<charT, traits> char_set;
2884 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2890 char_set.add_class(this->m_mask_space);
2893 char_set.add_class(this->m_word_mask);
2896 char_set.add_single(digraph<charT>(charT('$')));
2897 char_set.add_single(digraph<charT>(charT('&')));
2898 char_set.add_single(digraph<charT>(charT('*')));
2899 char_set.add_single(digraph<charT>(charT('+')));
2900 char_set.add_single(digraph<charT>(charT('-')));
2901 char_set.add_single(digraph<charT>(charT('_')));
2902 char_set.add_single(digraph<charT>(charT('<')));
2903 char_set.add_single(digraph<charT>(charT('>')));
2906 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2909 char_set.add_single(digraph<charT>(charT('(')));
2910 char_set.add_single(digraph<charT>(charT('[')));
2911 char_set.add_single(digraph<charT>(charT('{')));
2914 char_set.add_single(digraph<charT>(charT(')')));
2915 char_set.add_single(digraph<charT>(charT(']')));
2916 char_set.add_single(digraph<charT>(charT('}')));
2919 char_set.add_single(digraph<charT>(charT('"')));
2920 char_set.add_single(digraph<charT>(charT('\'')));
2921 char_set.add_single(digraph<charT>(charT('`')));
2924 char_set.add_single(digraph<charT>(charT('\'')));
2925 char_set.add_single(digraph<charT>(charT(',')));
2926 char_set.add_single(digraph<charT>(charT('#')));
2929 char_set.add_single(digraph<charT>(charT(';')));
2932 char_set.add_single(digraph<charT>(charT('\n')));
2933 char_set.add_single(digraph<charT>(charT('\f')));
2936 fail(regex_constants::error_ctype, m_position - m_base);
2939 if(0 == this->append_set(char_set))
2941 fail(regex_constants::error_ctype, m_position - m_base);
2948 template <class charT, class traits>
2949 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2951 // we have a (?imsx-imsx) group, convert it into a set of flags:
2952 regex_constants::syntax_option_type f = this->flags();
2953 bool breakout = false;
2959 f |= regex_constants::mod_s;
2960 f &= ~regex_constants::no_mod_s;
2963 f &= ~regex_constants::no_mod_m;
2966 f |= regex_constants::icase;
2969 f |= regex_constants::mod_x;
2975 if(++m_position == m_end)
2977 // Rewind to start of (? sequence:
2979 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2980 fail(regex_constants::error_paren, m_position - m_base);
2988 if(*m_position == static_cast<charT>('-'))
2990 if(++m_position == m_end)
2992 // Rewind to start of (? sequence:
2994 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2995 fail(regex_constants::error_paren, m_position - m_base);
3003 f &= ~regex_constants::mod_s;
3004 f |= regex_constants::no_mod_s;
3007 f |= regex_constants::no_mod_m;
3010 f &= ~regex_constants::icase;
3013 f &= ~regex_constants::mod_x;
3019 if(++m_position == m_end)
3021 // Rewind to start of (? sequence:
3023 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3024 fail(regex_constants::error_paren, m_position - m_base);
3033 template <class charT, class traits>
3034 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3037 // If we didn't actually add any states after the last
3038 // alternative then that's an error:
3040 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3041 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3044 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3046 ((this->flags() & regbase::no_empty_expressions) == 0)
3050 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3054 // Fix up our alternatives:
3056 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3059 // fix up the jump to point to the end of the states
3060 // that we've just added:
3062 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3063 m_alt_jumps.pop_back();
3064 this->m_pdata->m_data.align();
3065 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3066 BOOST_ASSERT(jmp->type == syntax_element_jump);
3067 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3073 #pragma warning(pop)
3076 } // namespace BOOST_REGEX_DETAIL_NS
3077 } // namespace boost
3080 #pragma warning(push)
3081 #pragma warning(disable: 4103)
3083 #ifdef BOOST_HAS_ABI_HEADERS
3084 # include BOOST_ABI_SUFFIX
3087 #pragma warning(pop)