6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
24 #pragma warning(disable: 4103)
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
34 namespace BOOST_REGEX_DETAIL_NS{
38 #pragma warning(disable:4244 4800)
41 inline boost::intmax_t umax(mpl::false_ const&)
43 // Get out clause here, just in case numeric_limits is unspecialized:
44 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
46 inline boost::intmax_t umax(mpl::true_ const&)
48 return (std::numeric_limits<std::size_t>::max)();
51 inline boost::intmax_t umax()
53 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
56 template <class charT, class traits>
57 class basic_regex_parser : public basic_regex_creator<charT, traits>
60 basic_regex_parser(regex_data<charT, traits>* data);
61 void parse(const charT* p1, const charT* p2, unsigned flags);
62 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
63 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
64 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
66 fail(error_code, position, message, position);
71 bool parse_extended();
73 bool parse_open_paren();
74 bool parse_basic_escape();
75 bool parse_extended_escape();
76 bool parse_match_any();
77 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
78 bool parse_repeat_range(bool isbasic);
82 void parse_set_literal(basic_char_set<charT, traits>& char_set);
83 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
85 bool parse_perl_extension();
86 bool parse_perl_verb();
87 bool match_verb(const char*);
88 bool add_emacs_code(bool negate);
89 bool unwind_alts(std::ptrdiff_t last_paren_start);
90 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
91 charT unescape_character();
92 regex_constants::syntax_option_type parse_options();
95 typedef bool (basic_regex_parser::*parser_proc_type)();
96 typedef typename traits::string_type string_type;
97 typedef typename traits::char_class_type char_class_type;
98 parser_proc_type m_parser_proc; // the main parser to use
99 const charT* m_base; // the start of the string being parsed
100 const charT* m_end; // the end of the string being parsed
101 const charT* m_position; // our current parser position
102 unsigned m_mark_count; // how many sub-expressions we have
103 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
104 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
105 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
106 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
107 bool m_has_case_change; // true if somewhere in the current block the case has changed
108 #if defined(BOOST_MSVC) && defined(_M_IX86)
109 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
110 // that can not otherwise be suppressed)...
111 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
112 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
114 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
117 basic_regex_parser& operator=(const basic_regex_parser&);
118 basic_regex_parser(const basic_regex_parser&);
121 template <class charT, class traits>
122 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
123 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
127 template <class charT, class traits>
128 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
130 // pass l_flags on to base class:
133 m_position = m_base = p1;
135 // empty strings are errors:
138 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
139 || (l_flags & regbase::no_empty_expressions)
143 fail(regex_constants::error_empty, 0);
146 // select which parser to use:
147 switch(l_flags & regbase::main_option_type)
149 case regbase::perl_syntax_group:
151 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
153 // Add a leading paren with index zero to give recursions a target:
155 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
157 br->icase = this->flags() & regbase::icase;
160 case regbase::basic_syntax_group:
161 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
163 case regbase::literal:
164 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
167 // Ooops, someone has managed to set more than one of the main option flags,
168 // so this must be an error:
169 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
173 // parse all our characters:
174 bool result = parse_all();
176 // Unwind our alternatives:
179 // reset l_flags as a global scope (?imsx) may have altered them:
180 this->flags(l_flags);
181 // if we haven't gobbled up all the characters then we must
182 // have had an unexpected ')' :
185 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
188 // if an error has been set then give up now:
189 if(this->m_pdata->m_status)
191 // fill in our sub-expression count:
192 this->m_pdata->m_mark_count = 1 + m_mark_count;
193 this->finalize(p1, p2);
196 template <class charT, class traits>
197 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
199 // get the error message:
200 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
201 fail(error_code, position, message);
204 template <class charT, class traits>
205 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
207 if(0 == this->m_pdata->m_status) // update the error code if not already set
208 this->m_pdata->m_status = error_code;
209 m_position = m_end; // don't bother parsing anything else
211 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
213 // Augment error message with the regular expression text:
215 if(start_pos == position)
216 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
217 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
218 if(error_code != regex_constants::error_empty)
220 if((start_pos != 0) || (end_pos != (m_end - m_base)))
221 message += " The error occurred while parsing the regular expression fragment: '";
223 message += " The error occurred while parsing the regular expression: '";
224 if(start_pos != end_pos)
226 message += std::string(m_base + start_pos, m_base + position);
227 message += ">>>HERE>>>";
228 message += std::string(m_base + position, m_base + end_pos);
234 #ifndef BOOST_NO_EXCEPTIONS
235 if(0 == (this->flags() & regex_constants::no_except))
237 boost::regex_error e(message, error_code, position);
241 (void)position; // suppress warnings.
245 template <class charT, class traits>
246 bool basic_regex_parser<charT, traits>::parse_all()
249 while(result && (m_position != m_end))
251 result = (this->*m_parser_proc)();
257 #pragma warning(push)
258 #pragma warning(disable:4702)
260 template <class charT, class traits>
261 bool basic_regex_parser<charT, traits>::parse_basic()
263 switch(this->m_traits.syntax_type(*m_position))
265 case regex_constants::syntax_escape:
266 return parse_basic_escape();
267 case regex_constants::syntax_dot:
268 return parse_match_any();
269 case regex_constants::syntax_caret:
271 this->append_state(syntax_element_start_line);
273 case regex_constants::syntax_dollar:
275 this->append_state(syntax_element_end_line);
277 case regex_constants::syntax_star:
278 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
279 return parse_literal();
283 return parse_repeat();
285 case regex_constants::syntax_plus:
286 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
287 return parse_literal();
291 return parse_repeat(1);
293 case regex_constants::syntax_question:
294 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
295 return parse_literal();
299 return parse_repeat(0, 1);
301 case regex_constants::syntax_open_set:
303 case regex_constants::syntax_newline:
304 if(this->flags() & regbase::newline_alt)
307 return parse_literal();
309 return parse_literal();
314 template <class charT, class traits>
315 bool basic_regex_parser<charT, traits>::parse_extended()
318 switch(this->m_traits.syntax_type(*m_position))
320 case regex_constants::syntax_open_mark:
321 return parse_open_paren();
322 case regex_constants::syntax_close_mark:
324 case regex_constants::syntax_escape:
325 return parse_extended_escape();
326 case regex_constants::syntax_dot:
327 return parse_match_any();
328 case regex_constants::syntax_caret:
331 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
333 case regex_constants::syntax_dollar:
336 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
338 case regex_constants::syntax_star:
339 if(m_position == this->m_base)
341 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
345 return parse_repeat();
346 case regex_constants::syntax_question:
347 if(m_position == this->m_base)
349 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
353 return parse_repeat(0,1);
354 case regex_constants::syntax_plus:
355 if(m_position == this->m_base)
357 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
361 return parse_repeat(1);
362 case regex_constants::syntax_open_brace:
364 return parse_repeat_range(false);
365 case regex_constants::syntax_close_brace:
366 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
368 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
371 result = parse_literal();
373 case regex_constants::syntax_or:
375 case regex_constants::syntax_open_set:
377 case regex_constants::syntax_newline:
378 if(this->flags() & regbase::newline_alt)
381 return parse_literal();
382 case regex_constants::syntax_hash:
384 // If we have a mod_x flag set, then skip until
385 // we get to a newline character:
388 & (regbase::no_perl_ex|regbase::mod_x))
391 while((m_position != m_end) && !is_separator(*m_position++)){}
396 result = parse_literal();
405 template <class charT, class traits>
406 bool basic_regex_parser<charT, traits>::parse_literal()
408 // append this as a literal provided it's not a space character
409 // or the perl option regbase::mod_x is not set:
412 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
414 || !this->m_traits.isctype(*m_position, this->m_mask_space))
415 this->append_literal(*m_position);
420 template <class charT, class traits>
421 bool basic_regex_parser<charT, traits>::parse_open_paren()
424 // skip the '(' and error check:
426 if(++m_position == m_end)
428 fail(regex_constants::error_paren, m_position - m_base);
432 // begin by checking for a perl-style (?...) extension:
435 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
436 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
439 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
440 return parse_perl_extension();
441 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
442 return parse_perl_verb();
445 // update our mark count, and append the required state:
448 if(0 == (this->flags() & regbase::nosubs))
450 markid = ++m_mark_count;
451 #ifndef BOOST_NO_STD_DISTANCE
452 if(this->flags() & regbase::save_subexpression_location)
453 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
455 if(this->flags() & regbase::save_subexpression_location)
456 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
459 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
461 pb->icase = this->flags() & regbase::icase;
462 std::ptrdiff_t last_paren_start = this->getoffset(pb);
463 // back up insertion point for alternations, and set new point:
464 std::ptrdiff_t last_alt_point = m_alt_insert_point;
465 this->m_pdata->m_data.align();
466 m_alt_insert_point = this->m_pdata->m_data.size();
468 // back up the current flags in case we have a nested (?imsx) group:
470 regex_constants::syntax_option_type opts = this->flags();
471 bool old_case_change = m_has_case_change;
472 m_has_case_change = false; // no changes to this scope as yet...
474 // Back up branch reset data in case we have a nested (?|...)
476 int mark_reset = m_mark_reset;
479 // now recursively add more states, this will terminate when we get to a
484 // Unwind pushed alternatives:
486 if(0 == unwind_alts(last_paren_start))
491 if(m_has_case_change)
493 // the case has changed in one or more of the alternatives
494 // within the scoped (...) block: we have to add a state
495 // to reset the case sensitivity:
496 static_cast<re_case*>(
497 this->append_state(syntax_element_toggle_case, sizeof(re_case))
498 )->icase = opts & regbase::icase;
501 m_has_case_change = old_case_change;
503 // restore branch reset:
505 m_mark_reset = mark_reset;
507 // we either have a ')' or we have run out of characters prematurely:
509 if(m_position == m_end)
511 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
514 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
516 #ifndef BOOST_NO_STD_DISTANCE
517 if(markid && (this->flags() & regbase::save_subexpression_location))
518 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
520 if(markid && (this->flags() & regbase::save_subexpression_location))
521 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
525 // append closing parenthesis state:
527 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
529 pb->icase = this->flags() & regbase::icase;
530 this->m_paren_start = last_paren_start;
532 // restore the alternate insertion point:
534 this->m_alt_insert_point = last_alt_point;
536 // allow backrefs to this mark:
538 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
539 this->m_backrefs |= 1u << (markid - 1);
544 template <class charT, class traits>
545 bool basic_regex_parser<charT, traits>::parse_basic_escape()
547 if(++m_position == m_end)
549 fail(regex_constants::error_paren, m_position - m_base);
553 switch(this->m_traits.escape_syntax_type(*m_position))
555 case regex_constants::syntax_open_mark:
556 return parse_open_paren();
557 case regex_constants::syntax_close_mark:
559 case regex_constants::syntax_plus:
560 if(this->flags() & regex_constants::bk_plus_qm)
563 return parse_repeat(1);
566 return parse_literal();
567 case regex_constants::syntax_question:
568 if(this->flags() & regex_constants::bk_plus_qm)
571 return parse_repeat(0, 1);
574 return parse_literal();
575 case regex_constants::syntax_open_brace:
576 if(this->flags() & regbase::no_intervals)
577 return parse_literal();
579 return parse_repeat_range(true);
580 case regex_constants::syntax_close_brace:
581 if(this->flags() & regbase::no_intervals)
582 return parse_literal();
583 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
585 case regex_constants::syntax_or:
586 if(this->flags() & regbase::bk_vbar)
589 result = parse_literal();
591 case regex_constants::syntax_digit:
592 return parse_backref();
593 case regex_constants::escape_type_start_buffer:
594 if(this->flags() & regbase::emacs_ex)
597 this->append_state(syntax_element_buffer_start);
600 result = parse_literal();
602 case regex_constants::escape_type_end_buffer:
603 if(this->flags() & regbase::emacs_ex)
606 this->append_state(syntax_element_buffer_end);
609 result = parse_literal();
611 case regex_constants::escape_type_word_assert:
612 if(this->flags() & regbase::emacs_ex)
615 this->append_state(syntax_element_word_boundary);
618 result = parse_literal();
620 case regex_constants::escape_type_not_word_assert:
621 if(this->flags() & regbase::emacs_ex)
624 this->append_state(syntax_element_within_word);
627 result = parse_literal();
629 case regex_constants::escape_type_left_word:
630 if(this->flags() & regbase::emacs_ex)
633 this->append_state(syntax_element_word_start);
636 result = parse_literal();
638 case regex_constants::escape_type_right_word:
639 if(this->flags() & regbase::emacs_ex)
642 this->append_state(syntax_element_word_end);
645 result = parse_literal();
648 if(this->flags() & regbase::emacs_ex)
658 basic_char_set<charT, traits> char_set;
661 char_set.add_class(this->m_word_mask);
662 if(0 == this->append_set(char_set))
664 fail(regex_constants::error_ctype, m_position - m_base);
674 return add_emacs_code(negate);
677 // not supported yet:
678 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
684 result = parse_literal();
690 template <class charT, class traits>
691 bool basic_regex_parser<charT, traits>::parse_extended_escape()
694 if(m_position == m_end)
696 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
699 bool negate = false; // in case this is a character class escape: \w \d etc
700 switch(this->m_traits.escape_syntax_type(*m_position))
702 case regex_constants::escape_type_not_class:
705 case regex_constants::escape_type_class:
707 escape_type_class_jump:
708 typedef typename traits::char_class_type m_type;
709 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
712 basic_char_set<charT, traits> char_set;
715 char_set.add_class(m);
716 if(0 == this->append_set(char_set))
718 fail(regex_constants::error_ctype, m_position - m_base);
725 // not a class, just a regular unknown escape:
727 this->append_literal(unescape_character());
730 case regex_constants::syntax_digit:
731 return parse_backref();
732 case regex_constants::escape_type_left_word:
734 this->append_state(syntax_element_word_start);
736 case regex_constants::escape_type_right_word:
738 this->append_state(syntax_element_word_end);
740 case regex_constants::escape_type_start_buffer:
742 this->append_state(syntax_element_buffer_start);
744 case regex_constants::escape_type_end_buffer:
746 this->append_state(syntax_element_buffer_end);
748 case regex_constants::escape_type_word_assert:
750 this->append_state(syntax_element_word_boundary);
752 case regex_constants::escape_type_not_word_assert:
754 this->append_state(syntax_element_within_word);
756 case regex_constants::escape_type_Z:
758 this->append_state(syntax_element_soft_buffer_end);
760 case regex_constants::escape_type_Q:
762 case regex_constants::escape_type_C:
763 return parse_match_any();
764 case regex_constants::escape_type_X:
766 this->append_state(syntax_element_combining);
768 case regex_constants::escape_type_G:
770 this->append_state(syntax_element_restart_continue);
772 case regex_constants::escape_type_not_property:
775 case regex_constants::escape_type_property:
779 if(m_position == m_end)
781 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
784 // maybe have \p{ddd}
785 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
787 const charT* base = m_position;
788 // skip forward until we find enclosing brace:
789 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
791 if(m_position == m_end)
793 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
796 m = this->m_traits.lookup_classname(++base, m_position++);
800 m = this->m_traits.lookup_classname(m_position, m_position+1);
805 basic_char_set<charT, traits> char_set;
808 char_set.add_class(m);
809 if(0 == this->append_set(char_set))
811 fail(regex_constants::error_ctype, m_position - m_base);
816 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
819 case regex_constants::escape_type_reset_start_mark:
820 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
822 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
824 pb->icase = this->flags() & regbase::icase;
825 this->m_pdata->m_data.align();
829 goto escape_type_class_jump;
830 case regex_constants::escape_type_line_ending:
831 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
833 const charT* e = get_escape_R_string<charT>();
834 const charT* old_position = m_position;
835 const charT* old_end = m_end;
836 const charT* old_base = m_base;
839 m_end = e + traits::length(e);
840 bool r = parse_all();
841 m_position = ++old_position;
846 goto escape_type_class_jump;
847 case regex_constants::escape_type_extended_backref:
848 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
850 bool have_brace = false;
851 bool negative = false;
852 static const char* incomplete_message = "Incomplete \\g escape found.";
853 if(++m_position == m_end)
855 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
858 // maybe have \g{ddd}
859 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
860 regex_constants::syntax_type syn_end = 0;
861 if((syn == regex_constants::syntax_open_brace)
862 || (syn == regex_constants::escape_type_left_word)
863 || (syn == regex_constants::escape_type_end_buffer))
865 if(++m_position == m_end)
867 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
873 case regex_constants::syntax_open_brace:
874 syn_end = regex_constants::syntax_close_brace;
876 case regex_constants::escape_type_left_word:
877 syn_end = regex_constants::escape_type_right_word;
880 syn_end = regex_constants::escape_type_end_buffer;
884 negative = (*m_position == static_cast<charT>('-'));
885 if((negative) && (++m_position == m_end))
887 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
890 const charT* pc = m_position;
891 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
892 if((i < 0) && syn_end)
894 // Check for a named capture, get the leftmost one if there is more than one:
895 const charT* base = m_position;
896 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
900 i = hash_value_from_capture_name(base, m_position);
904 i = 1 + m_mark_count - i;
905 if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
908 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
910 pb->icase = this->flags() & regbase::icase;
914 fail(regex_constants::error_backref, m_position - m_base);
920 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
922 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
929 goto escape_type_class_jump;
930 case regex_constants::escape_type_control_v:
931 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
932 goto escape_type_class_jump;
935 this->append_literal(unescape_character());
941 template <class charT, class traits>
942 bool basic_regex_parser<charT, traits>::parse_match_any()
945 // we have a '.' that can match any character:
948 static_cast<re_dot*>(
949 this->append_state(syntax_element_wild, sizeof(re_dot))
950 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
951 ? BOOST_REGEX_DETAIL_NS::force_not_newline
952 : this->flags() & regbase::mod_s ?
953 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
957 template <class charT, class traits>
958 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
961 bool pocessive = false;
962 std::size_t insert_point;
964 // when we get to here we may have a non-greedy ? mark still to come:
966 if((m_position != m_end)
968 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
969 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
973 // OK we have a perl or emacs regex, check for a '?':
974 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
979 // for perl regexes only check for pocessive ++ repeats.
980 if((m_position != m_end)
981 && (0 == (this->flags() & regbase::main_option_type))
982 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
988 if(0 == this->m_last_state)
990 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
993 if(this->m_last_state->type == syntax_element_endmark)
995 // insert a repeat before the '(' matching the last ')':
996 insert_point = this->m_paren_start;
998 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1000 // the last state was a literal with more than one character, split it in two:
1001 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1002 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1004 // now append new state:
1005 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1007 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1008 insert_point = this->getoffset(this->m_last_state);
1012 // repeat the last state whatever it was, need to add some error checking here:
1013 switch(this->m_last_state->type)
1015 case syntax_element_start_line:
1016 case syntax_element_end_line:
1017 case syntax_element_word_boundary:
1018 case syntax_element_within_word:
1019 case syntax_element_word_start:
1020 case syntax_element_word_end:
1021 case syntax_element_buffer_start:
1022 case syntax_element_buffer_end:
1023 case syntax_element_alt:
1024 case syntax_element_soft_buffer_end:
1025 case syntax_element_restart_continue:
1026 case syntax_element_jump:
1027 case syntax_element_startmark:
1028 case syntax_element_backstep:
1029 // can't legally repeat any of the above:
1030 fail(regex_constants::error_badrepeat, m_position - m_base);
1036 insert_point = this->getoffset(this->m_last_state);
1039 // OK we now know what to repeat, so insert the repeat around it:
1041 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1044 rep->greedy = greedy;
1045 rep->leading = false;
1046 // store our repeater position for later:
1047 std::ptrdiff_t rep_off = this->getoffset(rep);
1048 // and append a back jump to the repeat:
1049 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1050 jmp->alt.i = rep_off - this->getoffset(jmp);
1051 this->m_pdata->m_data.align();
1052 // now fill in the alt jump for the repeat:
1053 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1054 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1056 // If the repeat is pocessive then bracket the repeat with a (?>...)
1057 // independent sub-expression construct:
1061 if(m_position != m_end)
1064 // Check for illegal following quantifier, we have to do this here, because
1065 // the extra states we insert below circumvents our usual error checking :-(
1067 switch(this->m_traits.syntax_type(*m_position))
1069 case regex_constants::syntax_star:
1070 case regex_constants::syntax_plus:
1071 case regex_constants::syntax_question:
1072 case regex_constants::syntax_open_brace:
1073 fail(regex_constants::error_badrepeat, m_position - m_base);
1077 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1079 pb->icase = this->flags() & regbase::icase;
1080 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1081 this->m_pdata->m_data.align();
1082 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1083 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1085 pb->icase = this->flags() & regbase::icase;
1090 template <class charT, class traits>
1091 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1093 static const char* incomplete_message = "Missing } in quantified repetition.";
1095 // parse a repeat-range:
1097 std::size_t min, max;
1100 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1102 if(this->m_position == this->m_end)
1104 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1106 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1109 // Treat the opening '{' as a literal character, rewind to start of error:
1111 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1112 return parse_literal();
1115 v = this->m_traits.toi(m_position, m_end, 10);
1117 if((v < 0) || (v > umax()))
1119 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1121 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1124 // Treat the opening '{' as a literal character, rewind to start of error:
1126 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1127 return parse_literal();
1129 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1131 if(this->m_position == this->m_end)
1133 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1135 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1138 // Treat the opening '{' as a literal character, rewind to start of error:
1140 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1141 return parse_literal();
1143 min = static_cast<std::size_t>(v);
1144 // see if we have a comma:
1145 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1147 // move on and error check:
1150 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1152 if(this->m_position == this->m_end)
1154 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1156 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1159 // Treat the opening '{' as a literal character, rewind to start of error:
1161 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1162 return parse_literal();
1164 // get the value if any:
1165 v = this->m_traits.toi(m_position, m_end, 10);
1166 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1170 // no comma, max = min:
1174 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1176 // OK now check trailing }:
1177 if(this->m_position == this->m_end)
1179 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1181 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1184 // Treat the opening '{' as a literal character, rewind to start of error:
1186 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1187 return parse_literal();
1191 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1194 if(this->m_position == this->m_end)
1196 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1202 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1206 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1210 // Treat the opening '{' as a literal character, rewind to start of error:
1212 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1213 return parse_literal();
1216 // finally go and add the repeat, unless error:
1220 // Backtrack to error location:
1222 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1224 fail(regex_constants::error_badbrace, m_position - m_base);
1227 return parse_repeat(min, max);
1230 template <class charT, class traits>
1231 bool basic_regex_parser<charT, traits>::parse_alt()
1234 // error check: if there have been no previous states,
1235 // or if the last state was a '(' then error:
1238 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1241 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1243 ((this->flags() & regbase::no_empty_expressions) == 0)
1247 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1251 // Reset mark count if required:
1253 if(m_max_mark < m_mark_count)
1254 m_max_mark = m_mark_count;
1255 if(m_mark_reset >= 0)
1256 m_mark_count = m_mark_reset;
1260 // we need to append a trailing jump:
1262 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1263 std::ptrdiff_t jump_offset = this->getoffset(pj);
1265 // now insert the alternative:
1267 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1268 jump_offset += re_alt_size;
1269 this->m_pdata->m_data.align();
1270 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1272 // update m_alt_insert_point so that the next alternate gets
1273 // inserted at the start of the second of the two we've just created:
1275 this->m_alt_insert_point = this->m_pdata->m_data.size();
1277 // the start of this alternative must have a case changes state
1278 // if the current block has messed around with case changes:
1280 if(m_has_case_change)
1282 static_cast<re_case*>(
1283 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1284 )->icase = this->m_icase;
1287 // push the alternative onto our stack, a recursive
1288 // implementation here is easier to understand (and faster
1289 // as it happens), but causes all kinds of stack overflow problems
1290 // on programs with small stacks (COM+).
1292 m_alt_jumps.push_back(jump_offset);
1296 template <class charT, class traits>
1297 bool basic_regex_parser<charT, traits>::parse_set()
1299 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1301 if(m_position == m_end)
1303 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1306 basic_char_set<charT, traits> char_set;
1308 const charT* base = m_position; // where the '[' was
1309 const charT* item_base = m_position; // where the '[' or '^' was
1311 while(m_position != m_end)
1313 switch(this->m_traits.syntax_type(*m_position))
1315 case regex_constants::syntax_caret:
1316 if(m_position == base)
1320 item_base = m_position;
1323 parse_set_literal(char_set);
1325 case regex_constants::syntax_close_set:
1326 if(m_position == item_base)
1328 parse_set_literal(char_set);
1334 if(0 == this->append_set(char_set))
1336 fail(regex_constants::error_ctype, m_position - m_base);
1341 case regex_constants::syntax_open_set:
1342 if(parse_inner_set(char_set))
1345 case regex_constants::syntax_escape:
1348 // look ahead and see if this is a character class shortcut
1352 if(this->m_traits.escape_syntax_type(*m_position)
1353 == regex_constants::escape_type_class)
1355 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1358 char_set.add_class(m);
1363 else if(this->m_traits.escape_syntax_type(*m_position)
1364 == regex_constants::escape_type_not_class)
1366 // negated character class:
1367 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1370 char_set.add_negated_class(m);
1375 // not a character class, just a regular escape:
1377 parse_set_literal(char_set);
1381 parse_set_literal(char_set);
1385 return m_position != m_end;
1388 template <class charT, class traits>
1389 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1391 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1393 // we have either a character class [:name:]
1394 // a collating element [.name.]
1395 // or an equivalence class [=name=]
1397 if(m_end == ++m_position)
1399 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1402 switch(this->m_traits.syntax_type(*m_position))
1404 case regex_constants::syntax_dot:
1406 // a collating element is treated as a literal:
1409 parse_set_literal(char_set);
1411 case regex_constants::syntax_colon:
1413 // check that character classes are actually enabled:
1414 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1415 == (regbase::basic_syntax_group | regbase::no_char_classes))
1418 parse_set_literal(char_set);
1422 if(m_end == ++m_position)
1424 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1427 const charT* name_first = m_position;
1428 // skip at least one character, then find the matching ':]'
1429 if(m_end == ++m_position)
1431 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1434 while((m_position != m_end)
1435 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1437 const charT* name_last = m_position;
1438 if(m_end == m_position)
1440 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1443 if((m_end == ++m_position)
1444 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1446 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1450 // check for negated class:
1452 bool negated = false;
1453 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1458 typedef typename traits::char_class_type m_type;
1459 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1462 if(char_set.empty() && (name_last - name_first == 1))
1464 // maybe a special case:
1466 if( (m_position != m_end)
1467 && (this->m_traits.syntax_type(*m_position)
1468 == regex_constants::syntax_close_set))
1470 if(this->m_traits.escape_syntax_type(*name_first)
1471 == regex_constants::escape_type_left_word)
1474 this->append_state(syntax_element_word_start);
1477 if(this->m_traits.escape_syntax_type(*name_first)
1478 == regex_constants::escape_type_right_word)
1481 this->append_state(syntax_element_word_end);
1486 fail(regex_constants::error_ctype, name_first - m_base);
1489 if(negated == false)
1490 char_set.add_class(m);
1492 char_set.add_negated_class(m);
1496 case regex_constants::syntax_equal:
1499 if(m_end == ++m_position)
1501 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1504 const charT* name_first = m_position;
1505 // skip at least one character, then find the matching '=]'
1506 if(m_end == ++m_position)
1508 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1511 while((m_position != m_end)
1512 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1514 const charT* name_last = m_position;
1515 if(m_end == m_position)
1517 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1520 if((m_end == ++m_position)
1521 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1523 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1526 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1527 if((0 == m.size()) || (m.size() > 2))
1529 fail(regex_constants::error_collate, name_first - m_base);
1538 char_set.add_equivalent(d);
1544 parse_set_literal(char_set);
1550 template <class charT, class traits>
1551 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1553 digraph<charT> start_range(get_next_set_literal(char_set));
1554 if(m_end == m_position)
1556 fail(regex_constants::error_brack, m_position - m_base);
1559 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1562 if(m_end == ++m_position)
1564 fail(regex_constants::error_brack, m_position - m_base);
1567 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1569 digraph<charT> end_range = get_next_set_literal(char_set);
1570 char_set.add_range(start_range, end_range);
1571 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1573 if(m_end == ++m_position)
1575 fail(regex_constants::error_brack, m_position - m_base);
1578 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1584 fail(regex_constants::error_range, m_position - m_base);
1591 char_set.add_single(start_range);
1594 template <class charT, class traits>
1595 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1597 digraph<charT> result;
1598 switch(this->m_traits.syntax_type(*m_position))
1600 case regex_constants::syntax_dash:
1601 if(!char_set.empty())
1603 // see if we are at the end of the set:
1604 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1606 fail(regex_constants::error_range, m_position - m_base);
1611 result.first = *m_position++;
1613 case regex_constants::syntax_escape:
1614 // check to see if escapes are supported first:
1615 if(this->flags() & regex_constants::no_escape_in_lists)
1617 result = *m_position++;
1621 result = unescape_character();
1623 case regex_constants::syntax_open_set:
1625 if(m_end == ++m_position)
1627 fail(regex_constants::error_collate, m_position - m_base);
1630 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1633 result.first = *m_position;
1637 if(m_end == ++m_position)
1639 fail(regex_constants::error_collate, m_position - m_base);
1642 const charT* name_first = m_position;
1643 // skip at least one character, then find the matching ':]'
1644 if(m_end == ++m_position)
1646 fail(regex_constants::error_collate, name_first - m_base);
1649 while((m_position != m_end)
1650 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1652 const charT* name_last = m_position;
1653 if(m_end == m_position)
1655 fail(regex_constants::error_collate, name_first - m_base);
1658 if((m_end == ++m_position)
1659 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1661 fail(regex_constants::error_collate, name_first - m_base);
1665 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1666 if(s.empty() || (s.size() > 2))
1668 fail(regex_constants::error_collate, name_first - m_base);
1671 result.first = s[0];
1673 result.second = s[1];
1679 result = *m_position++;
1685 // does a value fit in the specified charT type?
1687 template <class charT>
1688 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1690 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1692 template <class charT>
1693 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1695 return true; // v will alsways fit in a charT
1697 template <class charT>
1698 bool valid_value(charT c, boost::intmax_t v)
1700 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1703 template <class charT, class traits>
1704 charT basic_regex_parser<charT, traits>::unescape_character()
1707 #pragma warning(push)
1708 #pragma warning(disable:4127)
1711 if(m_position == m_end)
1713 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1716 switch(this->m_traits.escape_syntax_type(*m_position))
1718 case regex_constants::escape_type_control_a:
1719 result = charT('\a');
1721 case regex_constants::escape_type_e:
1724 case regex_constants::escape_type_control_f:
1725 result = charT('\f');
1727 case regex_constants::escape_type_control_n:
1728 result = charT('\n');
1730 case regex_constants::escape_type_control_r:
1731 result = charT('\r');
1733 case regex_constants::escape_type_control_t:
1734 result = charT('\t');
1736 case regex_constants::escape_type_control_v:
1737 result = charT('\v');
1739 case regex_constants::escape_type_word_assert:
1740 result = charT('\b');
1742 case regex_constants::escape_type_ascii_control:
1744 if(m_position == m_end)
1746 // Rewind to start of escape:
1748 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1749 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1752 result = static_cast<charT>(*m_position % 32);
1754 case regex_constants::escape_type_hex:
1756 if(m_position == m_end)
1758 // Rewind to start of escape:
1760 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1761 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1764 // maybe have \x{ddd}
1765 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1768 if(m_position == m_end)
1770 // Rewind to start of escape:
1772 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1773 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1776 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1777 if((m_position == m_end)
1779 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1780 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1782 // Rewind to start of escape:
1784 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1785 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1793 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1794 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1796 || !valid_value(charT(0), i))
1798 // Rewind to start of escape:
1800 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1801 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1807 case regex_constants::syntax_digit:
1809 // an octal escape sequence, the first character must be a zero
1810 // followed by up to 3 octal digits:
1811 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1812 const charT* bp = m_position;
1813 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1816 // Rewind to start of escape:
1818 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1819 // Oops not an octal escape after all:
1820 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1823 val = this->m_traits.toi(m_position, m_position + len, 8);
1824 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1826 // Rewind to start of escape:
1828 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1829 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1832 return static_cast<charT>(val);
1834 case regex_constants::escape_type_named_char:
1837 if(m_position == m_end)
1839 // Rewind to start of escape:
1841 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1842 fail(regex_constants::error_escape, m_position - m_base);
1845 // maybe have \N{name}
1846 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1848 const charT* base = m_position;
1849 // skip forward until we find enclosing brace:
1850 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1852 if(m_position == m_end)
1854 // Rewind to start of escape:
1856 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1857 fail(regex_constants::error_escape, m_position - m_base);
1860 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1863 // Rewind to start of escape:
1865 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1866 fail(regex_constants::error_collate, m_position - m_base);
1874 // fall through is a failure:
1875 // Rewind to start of escape:
1877 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1878 fail(regex_constants::error_escape, m_position - m_base);
1882 result = *m_position;
1888 #pragma warning(pop)
1892 template <class charT, class traits>
1893 bool basic_regex_parser<charT, traits>::parse_backref()
1895 BOOST_ASSERT(m_position != m_end);
1896 const charT* pc = m_position;
1897 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1898 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1900 // not a backref at all but an octal escape sequence:
1901 charT c = unescape_character();
1902 this->append_literal(c);
1904 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1907 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1909 pb->icase = this->flags() & regbase::icase;
1913 // Rewind to start of escape:
1915 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1916 fail(regex_constants::error_backref, m_position - m_base);
1922 template <class charT, class traits>
1923 bool basic_regex_parser<charT, traits>::parse_QE()
1926 #pragma warning(push)
1927 #pragma warning(disable:4127)
1930 // parse a \Q...\E sequence:
1932 ++m_position; // skip the Q
1933 const charT* start = m_position;
1937 while((m_position != m_end)
1938 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1940 if(m_position == m_end)
1942 // a \Q...\E sequence may terminate with the end of the expression:
1946 if(++m_position == m_end) // skip the escape
1948 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1951 // check to see if it's a \E:
1952 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1955 end = m_position - 2;
1958 // otherwise go round again:
1961 // now add all the character between the two escapes as literals:
1965 this->append_literal(*start);
1970 #pragma warning(pop)
1974 template <class charT, class traits>
1975 bool basic_regex_parser<charT, traits>::parse_perl_extension()
1977 if(++m_position == m_end)
1979 // Rewind to start of (? sequence:
1981 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1982 fail(regex_constants::error_perl_extension, m_position - m_base);
1986 // treat comments as a special case, as these
1987 // are the only ones that don't start with a leading
1990 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1992 while((m_position != m_end)
1993 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1998 // backup some state, and prepare the way:
2001 std::ptrdiff_t jump_offset = 0;
2002 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2003 pb->icase = this->flags() & regbase::icase;
2004 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2005 // back up insertion point for alternations, and set new point:
2006 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2007 this->m_pdata->m_data.align();
2008 m_alt_insert_point = this->m_pdata->m_data.size();
2009 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2010 bool restore_flags = true;
2011 regex_constants::syntax_option_type old_flags = this->flags();
2012 bool old_case_change = m_has_case_change;
2013 m_has_case_change = false;
2015 int mark_reset = m_mark_reset;
2016 int max_mark = m_max_mark;
2018 m_max_mark = m_mark_count;
2021 // select the actual extension used:
2023 switch(this->m_traits.syntax_type(*m_position))
2025 case regex_constants::syntax_or:
2026 m_mark_reset = m_mark_count;
2028 case regex_constants::syntax_colon:
2030 // a non-capturing mark:
2032 pb->index = markid = 0;
2035 case regex_constants::syntax_digit:
2038 // a recursive subexpression:
2040 v = this->m_traits.toi(m_position, m_end, 10);
2041 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2043 // Rewind to start of (? sequence:
2045 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2046 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2050 pb->index = markid = 0;
2051 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2054 static_cast<re_case*>(
2055 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2056 )->icase = this->flags() & regbase::icase;
2059 case regex_constants::syntax_plus:
2061 // A forward-relative recursive subexpression:
2064 v = this->m_traits.toi(m_position, m_end, 10);
2065 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2067 // Rewind to start of (? sequence:
2069 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2070 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2074 goto insert_recursion;
2075 case regex_constants::syntax_dash:
2077 // Possibly a backward-relative recursive subexpression:
2080 v = this->m_traits.toi(m_position, m_end, 10);
2084 // Oops not a relative recursion at all, but a (?-imsx) group:
2085 goto option_group_jump;
2087 v = m_mark_count + 1 - v;
2090 // Rewind to start of (? sequence:
2092 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2093 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2096 goto insert_recursion;
2097 case regex_constants::syntax_equal:
2098 pb->index = markid = -1;
2100 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2101 this->m_pdata->m_data.align();
2102 m_alt_insert_point = this->m_pdata->m_data.size();
2104 case regex_constants::syntax_not:
2105 pb->index = markid = -2;
2107 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2108 this->m_pdata->m_data.align();
2109 m_alt_insert_point = this->m_pdata->m_data.size();
2111 case regex_constants::escape_type_left_word:
2113 // a lookbehind assertion:
2114 if(++m_position == m_end)
2116 // Rewind to start of (? sequence:
2118 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2119 fail(regex_constants::error_perl_extension, m_position - m_base);
2122 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2123 if(t == regex_constants::syntax_not)
2124 pb->index = markid = -2;
2125 else if(t == regex_constants::syntax_equal)
2126 pb->index = markid = -1;
2129 // Probably a named capture which also starts (?< :
2132 goto named_capture_jump;
2135 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2136 this->append_state(syntax_element_backstep, sizeof(re_brace));
2137 this->m_pdata->m_data.align();
2138 m_alt_insert_point = this->m_pdata->m_data.size();
2141 case regex_constants::escape_type_right_word:
2143 // an independent sub-expression:
2145 pb->index = markid = -3;
2147 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2148 this->m_pdata->m_data.align();
2149 m_alt_insert_point = this->m_pdata->m_data.size();
2151 case regex_constants::syntax_open_mark:
2153 // a conditional expression:
2154 pb->index = markid = -4;
2155 if(++m_position == m_end)
2157 // Rewind to start of (? sequence:
2159 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2160 fail(regex_constants::error_perl_extension, m_position - m_base);
2163 v = this->m_traits.toi(m_position, m_end, 10);
2164 if(m_position == m_end)
2166 // Rewind to start of (? sequence:
2168 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2169 fail(regex_constants::error_perl_extension, m_position - m_base);
2172 if(*m_position == charT('R'))
2174 if(++m_position == m_end)
2176 // Rewind to start of (? sequence:
2178 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2179 fail(regex_constants::error_perl_extension, m_position - m_base);
2182 if(*m_position == charT('&'))
2184 const charT* base = ++m_position;
2185 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2187 if(m_position == m_end)
2189 // Rewind to start of (? sequence:
2191 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2192 fail(regex_constants::error_perl_extension, m_position - m_base);
2195 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2199 v = -this->m_traits.toi(m_position, m_end, 10);
2201 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2202 br->index = v < 0 ? (v - 1) : 0;
2203 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2205 // Rewind to start of (? sequence:
2207 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2208 fail(regex_constants::error_perl_extension, m_position - m_base);
2211 if(++m_position == m_end)
2213 // Rewind to start of (? sequence:
2215 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2216 fail(regex_constants::error_perl_extension, m_position - m_base);
2220 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2222 const charT* base = ++m_position;
2223 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2225 if(m_position == m_end)
2227 // Rewind to start of (? sequence:
2229 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2230 fail(regex_constants::error_perl_extension, m_position - m_base);
2233 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2234 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2236 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2238 // Rewind to start of (? sequence:
2240 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2241 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2244 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2246 // Rewind to start of (? sequence:
2248 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2249 fail(regex_constants::error_perl_extension, m_position - m_base);
2252 if(++m_position == m_end)
2254 // Rewind to start of (? sequence:
2256 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2257 fail(regex_constants::error_perl_extension, m_position - m_base);
2261 else if(*m_position == charT('D'))
2263 const char* def = "DEFINE";
2264 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2265 ++m_position, ++def;
2266 if((m_position == m_end) || *def)
2268 // Rewind to start of (? sequence:
2270 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2271 fail(regex_constants::error_perl_extension, m_position - m_base);
2274 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2275 br->index = 9999; // special magic value!
2276 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2278 // Rewind to start of (? sequence:
2280 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2281 fail(regex_constants::error_perl_extension, m_position - m_base);
2284 if(++m_position == m_end)
2286 // Rewind to start of (? sequence:
2288 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2289 fail(regex_constants::error_perl_extension, m_position - m_base);
2295 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2297 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2299 // Rewind to start of (? sequence:
2301 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302 fail(regex_constants::error_perl_extension, m_position - m_base);
2305 if(++m_position == m_end)
2307 // Rewind to start of (? sequence:
2309 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2310 fail(regex_constants::error_perl_extension, m_position - m_base);
2316 // verify that we have a lookahead or lookbehind assert:
2317 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2319 // Rewind to start of (? sequence:
2321 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2322 fail(regex_constants::error_perl_extension, m_position - m_base);
2325 if(++m_position == m_end)
2327 // Rewind to start of (? sequence:
2329 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2330 fail(regex_constants::error_perl_extension, m_position - m_base);
2333 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2335 if(++m_position == m_end)
2337 // Rewind to start of (? sequence:
2339 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2340 fail(regex_constants::error_perl_extension, m_position - m_base);
2343 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2344 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2346 // Rewind to start of (? sequence:
2348 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2349 fail(regex_constants::error_perl_extension, m_position - m_base);
2356 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2357 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2359 // Rewind to start of (? sequence:
2361 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2362 fail(regex_constants::error_perl_extension, m_position - m_base);
2370 case regex_constants::syntax_close_mark:
2371 // Rewind to start of (? sequence:
2373 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2374 fail(regex_constants::error_perl_extension, m_position - m_base);
2376 case regex_constants::escape_type_end_buffer:
2378 name_delim = *m_position;
2381 if(0 == (this->flags() & regbase::nosubs))
2383 markid = ++m_mark_count;
2384 #ifndef BOOST_NO_STD_DISTANCE
2385 if(this->flags() & regbase::save_subexpression_location)
2386 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2388 if(this->flags() & regbase::save_subexpression_location)
2389 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2393 const charT* base = ++m_position;
2394 if(m_position == m_end)
2396 // Rewind to start of (? sequence:
2398 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2399 fail(regex_constants::error_perl_extension, m_position - m_base);
2402 while((m_position != m_end) && (*m_position != name_delim))
2404 if(m_position == m_end)
2406 // Rewind to start of (? sequence:
2408 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2409 fail(regex_constants::error_perl_extension, m_position - m_base);
2412 this->m_pdata->set_name(base, m_position, markid);
2417 if(*m_position == charT('R'))
2421 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2423 // Rewind to start of (? sequence:
2425 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2426 fail(regex_constants::error_perl_extension, m_position - m_base);
2429 goto insert_recursion;
2431 if(*m_position == charT('&'))
2434 const charT* base = m_position;
2435 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2437 if(m_position == m_end)
2439 // Rewind to start of (? sequence:
2441 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2442 fail(regex_constants::error_perl_extension, m_position - m_base);
2445 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2446 goto insert_recursion;
2448 if(*m_position == charT('P'))
2451 if(m_position == m_end)
2453 // Rewind to start of (? sequence:
2455 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2456 fail(regex_constants::error_perl_extension, m_position - m_base);
2459 if(*m_position == charT('>'))
2462 const charT* base = m_position;
2463 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2465 if(m_position == m_end)
2467 // Rewind to start of (? sequence:
2469 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2470 fail(regex_constants::error_perl_extension, m_position - m_base);
2473 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2474 goto insert_recursion;
2478 // lets assume that we have a (?imsx) group and try and parse it:
2481 regex_constants::syntax_option_type opts = parse_options();
2482 if(m_position == m_end)
2484 // Rewind to start of (? sequence:
2486 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2487 fail(regex_constants::error_perl_extension, m_position - m_base);
2490 // make a note of whether we have a case change:
2491 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2492 pb->index = markid = 0;
2493 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2495 // update flags and carry on as normal:
2497 restore_flags = false;
2498 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2500 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2502 // update flags and carry on until the matching ')' is found:
2508 // Rewind to start of (? sequence:
2510 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2511 fail(regex_constants::error_perl_extension, m_position - m_base);
2515 // finally append a case change state if we need it:
2516 if(m_has_case_change)
2518 static_cast<re_case*>(
2519 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2520 )->icase = opts & regbase::icase;
2525 // now recursively add more states, this will terminate when we get to a
2530 // Unwind alternatives:
2532 if(0 == unwind_alts(last_paren_start))
2534 // Rewind to start of (? sequence:
2536 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2537 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2541 // we either have a ')' or we have run out of characters prematurely:
2543 if(m_position == m_end)
2545 // Rewind to start of (? sequence:
2547 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2548 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2551 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2554 // restore the flags:
2558 // append a case change state if we need it:
2559 if(m_has_case_change)
2561 static_cast<re_case*>(
2562 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2563 )->icase = old_flags & regbase::icase;
2565 this->flags(old_flags);
2568 // set up the jump pointer if we have one:
2572 this->m_pdata->m_data.align();
2573 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2574 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2575 if((this->m_last_state == jmp) && (markid != -2))
2577 // Oops... we didn't have anything inside the assertion.
2578 // Note we don't get here for negated forward lookahead as (?!)
2579 // does have some uses.
2580 // Rewind to start of (? sequence:
2582 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2583 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2588 // verify that if this is conditional expression, that we do have
2589 // an alternative, if not add one:
2593 re_syntax_base* b = this->getaddress(expected_alt_point);
2594 // Make sure we have exactly one alternative following this state:
2595 if(b->type != syntax_element_alt)
2597 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2598 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2600 else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2602 // Can't have seen more than one alternative:
2603 // Rewind to start of (? sequence:
2605 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2606 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2611 // We must *not* have seen an alternative inside a (DEFINE) block:
2612 b = this->getaddress(b->next.i, b);
2613 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2615 // Rewind to start of (? sequence:
2617 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2618 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2622 // check for invalid repetition of next state:
2623 b = this->getaddress(expected_alt_point);
2624 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2625 if((b->type != syntax_element_assert_backref)
2626 && (b->type != syntax_element_startmark))
2628 // Rewind to start of (? sequence:
2630 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2631 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2636 // append closing parenthesis state:
2638 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2640 pb->icase = this->flags() & regbase::icase;
2641 this->m_paren_start = last_paren_start;
2643 // restore the alternate insertion point:
2645 this->m_alt_insert_point = last_alt_point;
2647 // and the case change data:
2649 m_has_case_change = old_case_change;
2651 // And the mark_reset data:
2653 if(m_max_mark > m_mark_count)
2655 m_mark_count = m_max_mark;
2657 m_mark_reset = mark_reset;
2658 m_max_mark = max_mark;
2663 #ifndef BOOST_NO_STD_DISTANCE
2664 if(this->flags() & regbase::save_subexpression_location)
2665 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2667 if(this->flags() & regbase::save_subexpression_location)
2668 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2671 // allow backrefs to this mark:
2673 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2674 this->m_backrefs |= 1u << (markid - 1);
2679 template <class charT, class traits>
2680 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2684 if(static_cast<charT>(*verb) != *m_position)
2686 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2687 fail(regex_constants::error_perl_extension, m_position - m_base);
2690 if(++m_position == m_end)
2693 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2694 fail(regex_constants::error_perl_extension, m_position - m_base);
2702 template <class charT, class traits>
2703 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2705 if(++m_position == m_end)
2707 // Rewind to start of (* sequence:
2709 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2710 fail(regex_constants::error_perl_extension, m_position - m_base);
2716 if(++m_position == m_end)
2718 // Rewind to start of (* sequence:
2720 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2721 fail(regex_constants::error_perl_extension, m_position - m_base);
2724 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2726 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2728 // Rewind to start of (* sequence:
2730 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2731 fail(regex_constants::error_perl_extension, m_position - m_base);
2735 this->append_state(syntax_element_fail);
2740 if(++m_position == m_end)
2742 // Rewind to start of (* sequence:
2744 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2745 fail(regex_constants::error_perl_extension, m_position - m_base);
2748 if(match_verb("CCEPT"))
2750 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2752 // Rewind to start of (* sequence:
2754 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2755 fail(regex_constants::error_perl_extension, m_position - m_base);
2759 this->append_state(syntax_element_accept);
2764 if(++m_position == m_end)
2766 // Rewind to start of (* sequence:
2768 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2769 fail(regex_constants::error_perl_extension, m_position - m_base);
2772 if(match_verb("OMMIT"))
2774 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2776 // Rewind to start of (* sequence:
2778 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2779 fail(regex_constants::error_perl_extension, m_position - m_base);
2783 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2784 this->m_pdata->m_disable_match_any = true;
2789 if(++m_position == m_end)
2791 // Rewind to start of (* sequence:
2793 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2794 fail(regex_constants::error_perl_extension, m_position - m_base);
2797 if(match_verb("RUNE"))
2799 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2801 // Rewind to start of (* sequence:
2803 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2804 fail(regex_constants::error_perl_extension, m_position - m_base);
2808 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2809 this->m_pdata->m_disable_match_any = true;
2814 if(++m_position == m_end)
2816 // Rewind to start of (* sequence:
2818 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2819 fail(regex_constants::error_perl_extension, m_position - m_base);
2822 if(match_verb("KIP"))
2824 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2826 // Rewind to start of (* sequence:
2828 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2829 fail(regex_constants::error_perl_extension, m_position - m_base);
2833 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2834 this->m_pdata->m_disable_match_any = true;
2839 if(++m_position == m_end)
2841 // Rewind to start of (* sequence:
2843 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2844 fail(regex_constants::error_perl_extension, m_position - m_base);
2847 if(match_verb("HEN"))
2849 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2851 // Rewind to start of (* sequence:
2853 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2854 fail(regex_constants::error_perl_extension, m_position - m_base);
2858 this->append_state(syntax_element_then);
2859 this->m_pdata->m_disable_match_any = true;
2864 // Rewind to start of (* sequence:
2866 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2867 fail(regex_constants::error_perl_extension, m_position - m_base);
2871 template <class charT, class traits>
2872 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2875 // parses an emacs style \sx or \Sx construct.
2877 if(++m_position == m_end)
2879 // Rewind to start of sequence:
2881 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2882 fail(regex_constants::error_escape, m_position - m_base);
2885 basic_char_set<charT, traits> char_set;
2889 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2895 char_set.add_class(this->m_mask_space);
2898 char_set.add_class(this->m_word_mask);
2901 char_set.add_single(digraph<charT>(charT('$')));
2902 char_set.add_single(digraph<charT>(charT('&')));
2903 char_set.add_single(digraph<charT>(charT('*')));
2904 char_set.add_single(digraph<charT>(charT('+')));
2905 char_set.add_single(digraph<charT>(charT('-')));
2906 char_set.add_single(digraph<charT>(charT('_')));
2907 char_set.add_single(digraph<charT>(charT('<')));
2908 char_set.add_single(digraph<charT>(charT('>')));
2911 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2914 char_set.add_single(digraph<charT>(charT('(')));
2915 char_set.add_single(digraph<charT>(charT('[')));
2916 char_set.add_single(digraph<charT>(charT('{')));
2919 char_set.add_single(digraph<charT>(charT(')')));
2920 char_set.add_single(digraph<charT>(charT(']')));
2921 char_set.add_single(digraph<charT>(charT('}')));
2924 char_set.add_single(digraph<charT>(charT('"')));
2925 char_set.add_single(digraph<charT>(charT('\'')));
2926 char_set.add_single(digraph<charT>(charT('`')));
2929 char_set.add_single(digraph<charT>(charT('\'')));
2930 char_set.add_single(digraph<charT>(charT(',')));
2931 char_set.add_single(digraph<charT>(charT('#')));
2934 char_set.add_single(digraph<charT>(charT(';')));
2937 char_set.add_single(digraph<charT>(charT('\n')));
2938 char_set.add_single(digraph<charT>(charT('\f')));
2941 fail(regex_constants::error_ctype, m_position - m_base);
2944 if(0 == this->append_set(char_set))
2946 fail(regex_constants::error_ctype, m_position - m_base);
2953 template <class charT, class traits>
2954 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2956 // we have a (?imsx-imsx) group, convert it into a set of flags:
2957 regex_constants::syntax_option_type f = this->flags();
2958 bool breakout = false;
2964 f |= regex_constants::mod_s;
2965 f &= ~regex_constants::no_mod_s;
2968 f &= ~regex_constants::no_mod_m;
2971 f |= regex_constants::icase;
2974 f |= regex_constants::mod_x;
2980 if(++m_position == m_end)
2982 // Rewind to start of (? sequence:
2984 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2985 fail(regex_constants::error_paren, m_position - m_base);
2993 if(*m_position == static_cast<charT>('-'))
2995 if(++m_position == m_end)
2997 // Rewind to start of (? sequence:
2999 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3000 fail(regex_constants::error_paren, m_position - m_base);
3008 f &= ~regex_constants::mod_s;
3009 f |= regex_constants::no_mod_s;
3012 f |= regex_constants::no_mod_m;
3015 f &= ~regex_constants::icase;
3018 f &= ~regex_constants::mod_x;
3024 if(++m_position == m_end)
3026 // Rewind to start of (? sequence:
3028 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3029 fail(regex_constants::error_paren, m_position - m_base);
3038 template <class charT, class traits>
3039 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3042 // If we didn't actually add any states after the last
3043 // alternative then that's an error:
3045 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3046 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3049 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3051 ((this->flags() & regbase::no_empty_expressions) == 0)
3055 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3059 // Fix up our alternatives:
3061 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3064 // fix up the jump to point to the end of the states
3065 // that we've just added:
3067 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3068 m_alt_jumps.pop_back();
3069 this->m_pdata->m_data.align();
3070 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3071 BOOST_ASSERT(jmp->type == syntax_element_jump);
3072 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3078 #pragma warning(pop)
3081 } // namespace BOOST_REGEX_DETAIL_NS
3082 } // namespace boost
3085 #pragma warning(push)
3086 #pragma warning(disable: 4103)
3088 #ifdef BOOST_HAS_ABI_HEADERS
3089 # include BOOST_ABI_SUFFIX
3092 #pragma warning(pop)