6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE basic_regex_parser.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares template class basic_regex_parser.
19 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
20 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
24 #pragma warning(disable: 4103)
26 #ifdef BOOST_HAS_ABI_HEADERS
27 # include BOOST_ABI_PREFIX
34 namespace BOOST_REGEX_DETAIL_NS{
38 #pragma warning(disable:4244 4800)
41 template <class charT, class traits>
42 class basic_regex_parser : public basic_regex_creator<charT, traits>
45 basic_regex_parser(regex_data<charT, traits>* data);
46 void parse(const charT* p1, const charT* p2, unsigned flags);
47 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
48 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
49 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
51 fail(error_code, position, message, position);
56 bool parse_extended();
58 bool parse_open_paren();
59 bool parse_basic_escape();
60 bool parse_extended_escape();
61 bool parse_match_any();
62 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
63 bool parse_repeat_range(bool isbasic);
67 void parse_set_literal(basic_char_set<charT, traits>& char_set);
68 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
70 bool parse_perl_extension();
71 bool parse_perl_verb();
72 bool match_verb(const char*);
73 bool add_emacs_code(bool negate);
74 bool unwind_alts(std::ptrdiff_t last_paren_start);
75 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
76 charT unescape_character();
77 regex_constants::syntax_option_type parse_options();
80 typedef bool (basic_regex_parser::*parser_proc_type)();
81 typedef typename traits::string_type string_type;
82 typedef typename traits::char_class_type char_class_type;
83 parser_proc_type m_parser_proc; // the main parser to use
84 const charT* m_base; // the start of the string being parsed
85 const charT* m_end; // the end of the string being parsed
86 const charT* m_position; // our current parser position
87 unsigned m_mark_count; // how many sub-expressions we have
88 int m_mark_reset; // used to indicate that we're inside a (?|...) block.
89 unsigned m_max_mark; // largest mark count seen inside a (?|...) block.
90 std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
91 std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
92 bool m_has_case_change; // true if somewhere in the current block the case has changed
93 #if defined(BOOST_MSVC) && defined(_M_IX86)
94 // This is an ugly warning suppression workaround (for warnings *inside* std::vector
95 // that can not otherwise be suppressed)...
96 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
97 std::vector<long> m_alt_jumps; // list of alternative in the current scope.
99 std::vector<std::ptrdiff_t> m_alt_jumps; // list of alternative in the current scope.
102 basic_regex_parser& operator=(const basic_regex_parser&);
103 basic_regex_parser(const basic_regex_parser&);
106 template <class charT, class traits>
107 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
108 : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
112 template <class charT, class traits>
113 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
115 // pass l_flags on to base class:
118 m_position = m_base = p1;
120 // empty strings are errors:
123 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
124 || (l_flags & regbase::no_empty_expressions)
128 fail(regex_constants::error_empty, 0);
131 // select which parser to use:
132 switch(l_flags & regbase::main_option_type)
134 case regbase::perl_syntax_group:
136 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
138 // Add a leading paren with index zero to give recursions a target:
140 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
142 br->icase = this->flags() & regbase::icase;
145 case regbase::basic_syntax_group:
146 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
148 case regbase::literal:
149 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
152 // Ooops, someone has managed to set more than one of the main option flags,
153 // so this must be an error:
154 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
158 // parse all our characters:
159 bool result = parse_all();
161 // Unwind our alternatives:
164 // reset l_flags as a global scope (?imsx) may have altered them:
165 this->flags(l_flags);
166 // if we haven't gobbled up all the characters then we must
167 // have had an unexpected ')' :
170 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
173 // if an error has been set then give up now:
174 if(this->m_pdata->m_status)
176 // fill in our sub-expression count:
177 this->m_pdata->m_mark_count = 1 + m_mark_count;
178 this->finalize(p1, p2);
181 template <class charT, class traits>
182 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
184 // get the error message:
185 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
186 fail(error_code, position, message);
189 template <class charT, class traits>
190 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
192 if(0 == this->m_pdata->m_status) // update the error code if not already set
193 this->m_pdata->m_status = error_code;
194 m_position = m_end; // don't bother parsing anything else
196 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
198 // Augment error message with the regular expression text:
200 if(start_pos == position)
201 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
202 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
203 if(error_code != regex_constants::error_empty)
205 if((start_pos != 0) || (end_pos != (m_end - m_base)))
206 message += " The error occurred while parsing the regular expression fragment: '";
208 message += " The error occurred while parsing the regular expression: '";
209 if(start_pos != end_pos)
211 message += std::string(m_base + start_pos, m_base + position);
212 message += ">>>HERE>>>";
213 message += std::string(m_base + position, m_base + end_pos);
219 #ifndef BOOST_NO_EXCEPTIONS
220 if(0 == (this->flags() & regex_constants::no_except))
222 boost::regex_error e(message, error_code, position);
226 (void)position; // suppress warnings.
230 template <class charT, class traits>
231 bool basic_regex_parser<charT, traits>::parse_all()
234 while(result && (m_position != m_end))
236 result = (this->*m_parser_proc)();
242 #pragma warning(push)
243 #pragma warning(disable:4702)
245 template <class charT, class traits>
246 bool basic_regex_parser<charT, traits>::parse_basic()
248 switch(this->m_traits.syntax_type(*m_position))
250 case regex_constants::syntax_escape:
251 return parse_basic_escape();
252 case regex_constants::syntax_dot:
253 return parse_match_any();
254 case regex_constants::syntax_caret:
256 this->append_state(syntax_element_start_line);
258 case regex_constants::syntax_dollar:
260 this->append_state(syntax_element_end_line);
262 case regex_constants::syntax_star:
263 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
264 return parse_literal();
268 return parse_repeat();
270 case regex_constants::syntax_plus:
271 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
272 return parse_literal();
276 return parse_repeat(1);
278 case regex_constants::syntax_question:
279 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
280 return parse_literal();
284 return parse_repeat(0, 1);
286 case regex_constants::syntax_open_set:
288 case regex_constants::syntax_newline:
289 if(this->flags() & regbase::newline_alt)
292 return parse_literal();
294 return parse_literal();
299 template <class charT, class traits>
300 bool basic_regex_parser<charT, traits>::parse_extended()
303 switch(this->m_traits.syntax_type(*m_position))
305 case regex_constants::syntax_open_mark:
306 return parse_open_paren();
307 case regex_constants::syntax_close_mark:
309 case regex_constants::syntax_escape:
310 return parse_extended_escape();
311 case regex_constants::syntax_dot:
312 return parse_match_any();
313 case regex_constants::syntax_caret:
316 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
318 case regex_constants::syntax_dollar:
321 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
323 case regex_constants::syntax_star:
324 if(m_position == this->m_base)
326 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
330 return parse_repeat();
331 case regex_constants::syntax_question:
332 if(m_position == this->m_base)
334 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
338 return parse_repeat(0,1);
339 case regex_constants::syntax_plus:
340 if(m_position == this->m_base)
342 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
346 return parse_repeat(1);
347 case regex_constants::syntax_open_brace:
349 return parse_repeat_range(false);
350 case regex_constants::syntax_close_brace:
351 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
353 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
356 result = parse_literal();
358 case regex_constants::syntax_or:
360 case regex_constants::syntax_open_set:
362 case regex_constants::syntax_newline:
363 if(this->flags() & regbase::newline_alt)
366 return parse_literal();
367 case regex_constants::syntax_hash:
369 // If we have a mod_x flag set, then skip until
370 // we get to a newline character:
373 & (regbase::no_perl_ex|regbase::mod_x))
376 while((m_position != m_end) && !is_separator(*m_position++)){}
381 result = parse_literal();
390 template <class charT, class traits>
391 bool basic_regex_parser<charT, traits>::parse_literal()
393 // append this as a literal provided it's not a space character
394 // or the perl option regbase::mod_x is not set:
397 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
399 || !this->m_traits.isctype(*m_position, this->m_mask_space))
400 this->append_literal(*m_position);
405 template <class charT, class traits>
406 bool basic_regex_parser<charT, traits>::parse_open_paren()
409 // skip the '(' and error check:
411 if(++m_position == m_end)
413 fail(regex_constants::error_paren, m_position - m_base);
417 // begin by checking for a perl-style (?...) extension:
420 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
421 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
424 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
425 return parse_perl_extension();
426 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
427 return parse_perl_verb();
430 // update our mark count, and append the required state:
433 if(0 == (this->flags() & regbase::nosubs))
435 markid = ++m_mark_count;
436 #ifndef BOOST_NO_STD_DISTANCE
437 if(this->flags() & regbase::save_subexpression_location)
438 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
440 if(this->flags() & regbase::save_subexpression_location)
441 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
444 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
446 pb->icase = this->flags() & regbase::icase;
447 std::ptrdiff_t last_paren_start = this->getoffset(pb);
448 // back up insertion point for alternations, and set new point:
449 std::ptrdiff_t last_alt_point = m_alt_insert_point;
450 this->m_pdata->m_data.align();
451 m_alt_insert_point = this->m_pdata->m_data.size();
453 // back up the current flags in case we have a nested (?imsx) group:
455 regex_constants::syntax_option_type opts = this->flags();
456 bool old_case_change = m_has_case_change;
457 m_has_case_change = false; // no changes to this scope as yet...
459 // Back up branch reset data in case we have a nested (?|...)
461 int mark_reset = m_mark_reset;
464 // now recursively add more states, this will terminate when we get to a
469 // Unwind pushed alternatives:
471 if(0 == unwind_alts(last_paren_start))
476 if(m_has_case_change)
478 // the case has changed in one or more of the alternatives
479 // within the scoped (...) block: we have to add a state
480 // to reset the case sensitivity:
481 static_cast<re_case*>(
482 this->append_state(syntax_element_toggle_case, sizeof(re_case))
483 )->icase = opts & regbase::icase;
486 m_has_case_change = old_case_change;
488 // restore branch reset:
490 m_mark_reset = mark_reset;
492 // we either have a ')' or we have run out of characters prematurely:
494 if(m_position == m_end)
496 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
499 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
500 #ifndef BOOST_NO_STD_DISTANCE
501 if(markid && (this->flags() & regbase::save_subexpression_location))
502 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
504 if(markid && (this->flags() & regbase::save_subexpression_location))
505 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
509 // append closing parenthesis state:
511 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
513 pb->icase = this->flags() & regbase::icase;
514 this->m_paren_start = last_paren_start;
516 // restore the alternate insertion point:
518 this->m_alt_insert_point = last_alt_point;
520 // allow backrefs to this mark:
522 if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
523 this->m_backrefs |= 1u << (markid - 1);
528 template <class charT, class traits>
529 bool basic_regex_parser<charT, traits>::parse_basic_escape()
533 switch(this->m_traits.escape_syntax_type(*m_position))
535 case regex_constants::syntax_open_mark:
536 return parse_open_paren();
537 case regex_constants::syntax_close_mark:
539 case regex_constants::syntax_plus:
540 if(this->flags() & regex_constants::bk_plus_qm)
543 return parse_repeat(1);
546 return parse_literal();
547 case regex_constants::syntax_question:
548 if(this->flags() & regex_constants::bk_plus_qm)
551 return parse_repeat(0, 1);
554 return parse_literal();
555 case regex_constants::syntax_open_brace:
556 if(this->flags() & regbase::no_intervals)
557 return parse_literal();
559 return parse_repeat_range(true);
560 case regex_constants::syntax_close_brace:
561 if(this->flags() & regbase::no_intervals)
562 return parse_literal();
563 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
565 case regex_constants::syntax_or:
566 if(this->flags() & regbase::bk_vbar)
569 result = parse_literal();
571 case regex_constants::syntax_digit:
572 return parse_backref();
573 case regex_constants::escape_type_start_buffer:
574 if(this->flags() & regbase::emacs_ex)
577 this->append_state(syntax_element_buffer_start);
580 result = parse_literal();
582 case regex_constants::escape_type_end_buffer:
583 if(this->flags() & regbase::emacs_ex)
586 this->append_state(syntax_element_buffer_end);
589 result = parse_literal();
591 case regex_constants::escape_type_word_assert:
592 if(this->flags() & regbase::emacs_ex)
595 this->append_state(syntax_element_word_boundary);
598 result = parse_literal();
600 case regex_constants::escape_type_not_word_assert:
601 if(this->flags() & regbase::emacs_ex)
604 this->append_state(syntax_element_within_word);
607 result = parse_literal();
609 case regex_constants::escape_type_left_word:
610 if(this->flags() & regbase::emacs_ex)
613 this->append_state(syntax_element_word_start);
616 result = parse_literal();
618 case regex_constants::escape_type_right_word:
619 if(this->flags() & regbase::emacs_ex)
622 this->append_state(syntax_element_word_end);
625 result = parse_literal();
628 if(this->flags() & regbase::emacs_ex)
638 basic_char_set<charT, traits> char_set;
641 char_set.add_class(this->m_word_mask);
642 if(0 == this->append_set(char_set))
644 fail(regex_constants::error_ctype, m_position - m_base);
654 return add_emacs_code(negate);
657 // not supported yet:
658 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
664 result = parse_literal();
670 template <class charT, class traits>
671 bool basic_regex_parser<charT, traits>::parse_extended_escape()
674 if(m_position == m_end)
676 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
679 bool negate = false; // in case this is a character class escape: \w \d etc
680 switch(this->m_traits.escape_syntax_type(*m_position))
682 case regex_constants::escape_type_not_class:
685 case regex_constants::escape_type_class:
687 escape_type_class_jump:
688 typedef typename traits::char_class_type m_type;
689 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
692 basic_char_set<charT, traits> char_set;
695 char_set.add_class(m);
696 if(0 == this->append_set(char_set))
698 fail(regex_constants::error_ctype, m_position - m_base);
705 // not a class, just a regular unknown escape:
707 this->append_literal(unescape_character());
710 case regex_constants::syntax_digit:
711 return parse_backref();
712 case regex_constants::escape_type_left_word:
714 this->append_state(syntax_element_word_start);
716 case regex_constants::escape_type_right_word:
718 this->append_state(syntax_element_word_end);
720 case regex_constants::escape_type_start_buffer:
722 this->append_state(syntax_element_buffer_start);
724 case regex_constants::escape_type_end_buffer:
726 this->append_state(syntax_element_buffer_end);
728 case regex_constants::escape_type_word_assert:
730 this->append_state(syntax_element_word_boundary);
732 case regex_constants::escape_type_not_word_assert:
734 this->append_state(syntax_element_within_word);
736 case regex_constants::escape_type_Z:
738 this->append_state(syntax_element_soft_buffer_end);
740 case regex_constants::escape_type_Q:
742 case regex_constants::escape_type_C:
743 return parse_match_any();
744 case regex_constants::escape_type_X:
746 this->append_state(syntax_element_combining);
748 case regex_constants::escape_type_G:
750 this->append_state(syntax_element_restart_continue);
752 case regex_constants::escape_type_not_property:
755 case regex_constants::escape_type_property:
759 if(m_position == m_end)
761 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
764 // maybe have \p{ddd}
765 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
767 const charT* base = m_position;
768 // skip forward until we find enclosing brace:
769 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
771 if(m_position == m_end)
773 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
776 m = this->m_traits.lookup_classname(++base, m_position++);
780 m = this->m_traits.lookup_classname(m_position, m_position+1);
785 basic_char_set<charT, traits> char_set;
788 char_set.add_class(m);
789 if(0 == this->append_set(char_set))
791 fail(regex_constants::error_ctype, m_position - m_base);
796 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
799 case regex_constants::escape_type_reset_start_mark:
800 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
802 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
804 pb->icase = this->flags() & regbase::icase;
805 this->m_pdata->m_data.align();
809 goto escape_type_class_jump;
810 case regex_constants::escape_type_line_ending:
811 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
813 const charT* e = get_escape_R_string<charT>();
814 const charT* old_position = m_position;
815 const charT* old_end = m_end;
816 const charT* old_base = m_base;
819 m_end = e + traits::length(e);
820 bool r = parse_all();
821 m_position = ++old_position;
826 goto escape_type_class_jump;
827 case regex_constants::escape_type_extended_backref:
828 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
830 bool have_brace = false;
831 bool negative = false;
832 static const char* incomplete_message = "Incomplete \\g escape found.";
833 if(++m_position == m_end)
835 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
838 // maybe have \g{ddd}
839 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
840 regex_constants::syntax_type syn_end = 0;
841 if((syn == regex_constants::syntax_open_brace)
842 || (syn == regex_constants::escape_type_left_word)
843 || (syn == regex_constants::escape_type_end_buffer))
845 if(++m_position == m_end)
847 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
853 case regex_constants::syntax_open_brace:
854 syn_end = regex_constants::syntax_close_brace;
856 case regex_constants::escape_type_left_word:
857 syn_end = regex_constants::escape_type_right_word;
860 syn_end = regex_constants::escape_type_end_buffer;
864 negative = (*m_position == static_cast<charT>('-'));
865 if((negative) && (++m_position == m_end))
867 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
870 const charT* pc = m_position;
871 int i = this->m_traits.toi(pc, m_end, 10);
872 if((i < 0) && syn_end)
874 // Check for a named capture, get the leftmost one if there is more than one:
875 const charT* base = m_position;
876 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
880 i = hash_value_from_capture_name(base, m_position);
884 i = 1 + m_mark_count - i;
885 if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
888 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
890 pb->icase = this->flags() & regbase::icase;
894 fail(regex_constants::error_backref, m_position - m_base);
900 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
902 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
909 goto escape_type_class_jump;
910 case regex_constants::escape_type_control_v:
911 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
912 goto escape_type_class_jump;
915 this->append_literal(unescape_character());
921 template <class charT, class traits>
922 bool basic_regex_parser<charT, traits>::parse_match_any()
925 // we have a '.' that can match any character:
928 static_cast<re_dot*>(
929 this->append_state(syntax_element_wild, sizeof(re_dot))
930 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
931 ? BOOST_REGEX_DETAIL_NS::force_not_newline
932 : this->flags() & regbase::mod_s ?
933 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
937 template <class charT, class traits>
938 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
941 bool pocessive = false;
942 std::size_t insert_point;
944 // when we get to here we may have a non-greedy ? mark still to come:
946 if((m_position != m_end)
948 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
949 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
953 // OK we have a perl or emacs regex, check for a '?':
954 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
959 // for perl regexes only check for pocessive ++ repeats.
960 if((m_position != m_end)
961 && (0 == (this->flags() & regbase::main_option_type))
962 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
968 if(0 == this->m_last_state)
970 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
973 if(this->m_last_state->type == syntax_element_endmark)
975 // insert a repeat before the '(' matching the last ')':
976 insert_point = this->m_paren_start;
978 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
980 // the last state was a literal with more than one character, split it in two:
981 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
982 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
984 // now append new state:
985 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
987 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
988 insert_point = this->getoffset(this->m_last_state);
992 // repeat the last state whatever it was, need to add some error checking here:
993 switch(this->m_last_state->type)
995 case syntax_element_start_line:
996 case syntax_element_end_line:
997 case syntax_element_word_boundary:
998 case syntax_element_within_word:
999 case syntax_element_word_start:
1000 case syntax_element_word_end:
1001 case syntax_element_buffer_start:
1002 case syntax_element_buffer_end:
1003 case syntax_element_alt:
1004 case syntax_element_soft_buffer_end:
1005 case syntax_element_restart_continue:
1006 case syntax_element_jump:
1007 case syntax_element_startmark:
1008 case syntax_element_backstep:
1009 // can't legally repeat any of the above:
1010 fail(regex_constants::error_badrepeat, m_position - m_base);
1016 insert_point = this->getoffset(this->m_last_state);
1019 // OK we now know what to repeat, so insert the repeat around it:
1021 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1024 rep->greedy = greedy;
1025 rep->leading = false;
1026 // store our repeater position for later:
1027 std::ptrdiff_t rep_off = this->getoffset(rep);
1028 // and append a back jump to the repeat:
1029 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1030 jmp->alt.i = rep_off - this->getoffset(jmp);
1031 this->m_pdata->m_data.align();
1032 // now fill in the alt jump for the repeat:
1033 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1034 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1036 // If the repeat is pocessive then bracket the repeat with a (?>...)
1037 // independent sub-expression construct:
1041 if(m_position != m_end)
1044 // Check for illegal following quantifier, we have to do this here, because
1045 // the extra states we insert below circumvents our usual error checking :-(
1047 switch(this->m_traits.syntax_type(*m_position))
1049 case regex_constants::syntax_star:
1050 case regex_constants::syntax_plus:
1051 case regex_constants::syntax_question:
1052 case regex_constants::syntax_open_brace:
1053 fail(regex_constants::error_badrepeat, m_position - m_base);
1057 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1059 pb->icase = this->flags() & regbase::icase;
1060 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1061 this->m_pdata->m_data.align();
1062 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1063 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1065 pb->icase = this->flags() & regbase::icase;
1070 template <class charT, class traits>
1071 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1073 static const char* incomplete_message = "Missing } in quantified repetition.";
1075 // parse a repeat-range:
1077 std::size_t min, max;
1080 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1082 if(this->m_position == this->m_end)
1084 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1086 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1089 // Treat the opening '{' as a literal character, rewind to start of error:
1091 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1092 return parse_literal();
1095 v = this->m_traits.toi(m_position, m_end, 10);
1099 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1101 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1104 // Treat the opening '{' as a literal character, rewind to start of error:
1106 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1107 return parse_literal();
1109 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1111 if(this->m_position == this->m_end)
1113 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1115 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1118 // Treat the opening '{' as a literal character, rewind to start of error:
1120 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1121 return parse_literal();
1124 // see if we have a comma:
1125 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1127 // move on and error check:
1130 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1132 if(this->m_position == this->m_end)
1134 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1136 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1139 // Treat the opening '{' as a literal character, rewind to start of error:
1141 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1142 return parse_literal();
1144 // get the value if any:
1145 v = this->m_traits.toi(m_position, m_end, 10);
1146 max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1150 // no comma, max = min:
1154 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1156 // OK now check trailing }:
1157 if(this->m_position == this->m_end)
1159 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1161 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1164 // Treat the opening '{' as a literal character, rewind to start of error:
1166 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1167 return parse_literal();
1171 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1174 if(this->m_position == this->m_end)
1176 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1182 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1186 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1190 // Treat the opening '{' as a literal character, rewind to start of error:
1192 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1193 return parse_literal();
1196 // finally go and add the repeat, unless error:
1200 // Backtrack to error location:
1202 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1204 fail(regex_constants::error_badbrace, m_position - m_base);
1207 return parse_repeat(min, max);
1210 template <class charT, class traits>
1211 bool basic_regex_parser<charT, traits>::parse_alt()
1214 // error check: if there have been no previous states,
1215 // or if the last state was a '(' then error:
1218 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1221 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1223 ((this->flags() & regbase::no_empty_expressions) == 0)
1227 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1231 // Reset mark count if required:
1233 if(m_max_mark < m_mark_count)
1234 m_max_mark = m_mark_count;
1235 if(m_mark_reset >= 0)
1236 m_mark_count = m_mark_reset;
1240 // we need to append a trailing jump:
1242 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1243 std::ptrdiff_t jump_offset = this->getoffset(pj);
1245 // now insert the alternative:
1247 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1248 jump_offset += re_alt_size;
1249 this->m_pdata->m_data.align();
1250 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1252 // update m_alt_insert_point so that the next alternate gets
1253 // inserted at the start of the second of the two we've just created:
1255 this->m_alt_insert_point = this->m_pdata->m_data.size();
1257 // the start of this alternative must have a case changes state
1258 // if the current block has messed around with case changes:
1260 if(m_has_case_change)
1262 static_cast<re_case*>(
1263 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1264 )->icase = this->m_icase;
1267 // push the alternative onto our stack, a recursive
1268 // implementation here is easier to understand (and faster
1269 // as it happens), but causes all kinds of stack overflow problems
1270 // on programs with small stacks (COM+).
1272 m_alt_jumps.push_back(jump_offset);
1276 template <class charT, class traits>
1277 bool basic_regex_parser<charT, traits>::parse_set()
1279 static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1281 if(m_position == m_end)
1283 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1286 basic_char_set<charT, traits> char_set;
1288 const charT* base = m_position; // where the '[' was
1289 const charT* item_base = m_position; // where the '[' or '^' was
1291 while(m_position != m_end)
1293 switch(this->m_traits.syntax_type(*m_position))
1295 case regex_constants::syntax_caret:
1296 if(m_position == base)
1300 item_base = m_position;
1303 parse_set_literal(char_set);
1305 case regex_constants::syntax_close_set:
1306 if(m_position == item_base)
1308 parse_set_literal(char_set);
1314 if(0 == this->append_set(char_set))
1316 fail(regex_constants::error_ctype, m_position - m_base);
1321 case regex_constants::syntax_open_set:
1322 if(parse_inner_set(char_set))
1325 case regex_constants::syntax_escape:
1328 // look ahead and see if this is a character class shortcut
1332 if(this->m_traits.escape_syntax_type(*m_position)
1333 == regex_constants::escape_type_class)
1335 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1338 char_set.add_class(m);
1343 else if(this->m_traits.escape_syntax_type(*m_position)
1344 == regex_constants::escape_type_not_class)
1346 // negated character class:
1347 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1350 char_set.add_negated_class(m);
1355 // not a character class, just a regular escape:
1357 parse_set_literal(char_set);
1361 parse_set_literal(char_set);
1365 return m_position != m_end;
1368 template <class charT, class traits>
1369 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1371 static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1373 // we have either a character class [:name:]
1374 // a collating element [.name.]
1375 // or an equivalence class [=name=]
1377 if(m_end == ++m_position)
1379 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1382 switch(this->m_traits.syntax_type(*m_position))
1384 case regex_constants::syntax_dot:
1386 // a collating element is treated as a literal:
1389 parse_set_literal(char_set);
1391 case regex_constants::syntax_colon:
1393 // check that character classes are actually enabled:
1394 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1395 == (regbase::basic_syntax_group | regbase::no_char_classes))
1398 parse_set_literal(char_set);
1402 if(m_end == ++m_position)
1404 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1407 const charT* name_first = m_position;
1408 // skip at least one character, then find the matching ':]'
1409 if(m_end == ++m_position)
1411 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1414 while((m_position != m_end)
1415 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1417 const charT* name_last = m_position;
1418 if(m_end == m_position)
1420 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1423 if((m_end == ++m_position)
1424 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1426 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1430 // check for negated class:
1432 bool negated = false;
1433 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1438 typedef typename traits::char_class_type m_type;
1439 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1442 if(char_set.empty() && (name_last - name_first == 1))
1444 // maybe a special case:
1446 if( (m_position != m_end)
1447 && (this->m_traits.syntax_type(*m_position)
1448 == regex_constants::syntax_close_set))
1450 if(this->m_traits.escape_syntax_type(*name_first)
1451 == regex_constants::escape_type_left_word)
1454 this->append_state(syntax_element_word_start);
1457 if(this->m_traits.escape_syntax_type(*name_first)
1458 == regex_constants::escape_type_right_word)
1461 this->append_state(syntax_element_word_end);
1466 fail(regex_constants::error_ctype, name_first - m_base);
1469 if(negated == false)
1470 char_set.add_class(m);
1472 char_set.add_negated_class(m);
1476 case regex_constants::syntax_equal:
1479 if(m_end == ++m_position)
1481 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1484 const charT* name_first = m_position;
1485 // skip at least one character, then find the matching '=]'
1486 if(m_end == ++m_position)
1488 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1491 while((m_position != m_end)
1492 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1494 const charT* name_last = m_position;
1495 if(m_end == m_position)
1497 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1500 if((m_end == ++m_position)
1501 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1503 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1506 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1507 if((0 == m.size()) || (m.size() > 2))
1509 fail(regex_constants::error_collate, name_first - m_base);
1518 char_set.add_equivalent(d);
1524 parse_set_literal(char_set);
1530 template <class charT, class traits>
1531 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1533 digraph<charT> start_range(get_next_set_literal(char_set));
1534 if(m_end == m_position)
1536 fail(regex_constants::error_brack, m_position - m_base);
1539 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1542 if(m_end == ++m_position)
1544 fail(regex_constants::error_brack, m_position - m_base);
1547 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1549 digraph<charT> end_range = get_next_set_literal(char_set);
1550 char_set.add_range(start_range, end_range);
1551 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1553 if(m_end == ++m_position)
1555 fail(regex_constants::error_brack, m_position - m_base);
1558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1564 fail(regex_constants::error_range, m_position - m_base);
1571 char_set.add_single(start_range);
1574 template <class charT, class traits>
1575 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1577 digraph<charT> result;
1578 switch(this->m_traits.syntax_type(*m_position))
1580 case regex_constants::syntax_dash:
1581 if(!char_set.empty())
1583 // see if we are at the end of the set:
1584 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1586 fail(regex_constants::error_range, m_position - m_base);
1591 result.first = *m_position++;
1593 case regex_constants::syntax_escape:
1594 // check to see if escapes are supported first:
1595 if(this->flags() & regex_constants::no_escape_in_lists)
1597 result = *m_position++;
1601 result = unescape_character();
1603 case regex_constants::syntax_open_set:
1605 if(m_end == ++m_position)
1607 fail(regex_constants::error_collate, m_position - m_base);
1610 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1613 result.first = *m_position;
1617 if(m_end == ++m_position)
1619 fail(regex_constants::error_collate, m_position - m_base);
1622 const charT* name_first = m_position;
1623 // skip at least one character, then find the matching ':]'
1624 if(m_end == ++m_position)
1626 fail(regex_constants::error_collate, name_first - m_base);
1629 while((m_position != m_end)
1630 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1632 const charT* name_last = m_position;
1633 if(m_end == m_position)
1635 fail(regex_constants::error_collate, name_first - m_base);
1638 if((m_end == ++m_position)
1639 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1641 fail(regex_constants::error_collate, name_first - m_base);
1645 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1646 if(s.empty() || (s.size() > 2))
1648 fail(regex_constants::error_collate, name_first - m_base);
1651 result.first = s[0];
1653 result.second = s[1];
1659 result = *m_position++;
1665 // does a value fit in the specified charT type?
1667 template <class charT>
1668 bool valid_value(charT, int v, const mpl::true_&)
1670 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1672 template <class charT>
1673 bool valid_value(charT, int, const mpl::false_&)
1675 return true; // v will alsways fit in a charT
1677 template <class charT>
1678 bool valid_value(charT c, int v)
1680 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
1683 template <class charT, class traits>
1684 charT basic_regex_parser<charT, traits>::unescape_character()
1687 #pragma warning(push)
1688 #pragma warning(disable:4127)
1691 if(m_position == m_end)
1693 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1696 switch(this->m_traits.escape_syntax_type(*m_position))
1698 case regex_constants::escape_type_control_a:
1699 result = charT('\a');
1701 case regex_constants::escape_type_e:
1704 case regex_constants::escape_type_control_f:
1705 result = charT('\f');
1707 case regex_constants::escape_type_control_n:
1708 result = charT('\n');
1710 case regex_constants::escape_type_control_r:
1711 result = charT('\r');
1713 case regex_constants::escape_type_control_t:
1714 result = charT('\t');
1716 case regex_constants::escape_type_control_v:
1717 result = charT('\v');
1719 case regex_constants::escape_type_word_assert:
1720 result = charT('\b');
1722 case regex_constants::escape_type_ascii_control:
1724 if(m_position == m_end)
1726 // Rewind to start of escape:
1728 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1729 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1732 result = static_cast<charT>(*m_position % 32);
1734 case regex_constants::escape_type_hex:
1736 if(m_position == m_end)
1738 // Rewind to start of escape:
1740 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1741 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1744 // maybe have \x{ddd}
1745 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1748 if(m_position == m_end)
1750 // Rewind to start of escape:
1752 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1753 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1756 int i = this->m_traits.toi(m_position, m_end, 16);
1757 if((m_position == m_end)
1759 || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
1760 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1762 // Rewind to start of escape:
1764 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1765 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1773 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1774 int i = this->m_traits.toi(m_position, m_position + len, 16);
1776 || !valid_value(charT(0), i))
1778 // Rewind to start of escape:
1780 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1781 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1787 case regex_constants::syntax_digit:
1789 // an octal escape sequence, the first character must be a zero
1790 // followed by up to 3 octal digits:
1791 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1792 const charT* bp = m_position;
1793 int val = this->m_traits.toi(bp, bp + 1, 8);
1796 // Rewind to start of escape:
1798 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1799 // Oops not an octal escape after all:
1800 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1803 val = this->m_traits.toi(m_position, m_position + len, 8);
1806 // Rewind to start of escape:
1808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1812 return static_cast<charT>(val);
1814 case regex_constants::escape_type_named_char:
1817 if(m_position == m_end)
1819 // Rewind to start of escape:
1821 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1822 fail(regex_constants::error_escape, m_position - m_base);
1825 // maybe have \N{name}
1826 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1828 const charT* base = m_position;
1829 // skip forward until we find enclosing brace:
1830 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1832 if(m_position == m_end)
1834 // Rewind to start of escape:
1836 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1837 fail(regex_constants::error_escape, m_position - m_base);
1840 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1843 // Rewind to start of escape:
1845 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1846 fail(regex_constants::error_collate, m_position - m_base);
1854 // fall through is a failure:
1855 // Rewind to start of escape:
1857 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1858 fail(regex_constants::error_escape, m_position - m_base);
1862 result = *m_position;
1868 #pragma warning(pop)
1872 template <class charT, class traits>
1873 bool basic_regex_parser<charT, traits>::parse_backref()
1875 BOOST_ASSERT(m_position != m_end);
1876 const charT* pc = m_position;
1877 int i = this->m_traits.toi(pc, pc + 1, 10);
1878 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1880 // not a backref at all but an octal escape sequence:
1881 charT c = unescape_character();
1882 this->append_literal(c);
1884 else if((i > 0) && (this->m_backrefs & (1u << (i-1))))
1887 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1889 pb->icase = this->flags() & regbase::icase;
1893 // Rewind to start of escape:
1895 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1896 fail(regex_constants::error_backref, m_position - m_base);
1902 template <class charT, class traits>
1903 bool basic_regex_parser<charT, traits>::parse_QE()
1906 #pragma warning(push)
1907 #pragma warning(disable:4127)
1910 // parse a \Q...\E sequence:
1912 ++m_position; // skip the Q
1913 const charT* start = m_position;
1917 while((m_position != m_end)
1918 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1920 if(m_position == m_end)
1922 // a \Q...\E sequence may terminate with the end of the expression:
1926 if(++m_position == m_end) // skip the escape
1928 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
1931 // check to see if it's a \E:
1932 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
1935 end = m_position - 2;
1938 // otherwise go round again:
1941 // now add all the character between the two escapes as literals:
1945 this->append_literal(*start);
1950 #pragma warning(pop)
1954 template <class charT, class traits>
1955 bool basic_regex_parser<charT, traits>::parse_perl_extension()
1957 if(++m_position == m_end)
1959 // Rewind to start of (? sequence:
1961 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
1962 fail(regex_constants::error_perl_extension, m_position - m_base);
1966 // treat comments as a special case, as these
1967 // are the only ones that don't start with a leading
1970 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
1972 while((m_position != m_end)
1973 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
1978 // backup some state, and prepare the way:
1981 std::ptrdiff_t jump_offset = 0;
1982 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
1983 pb->icase = this->flags() & regbase::icase;
1984 std::ptrdiff_t last_paren_start = this->getoffset(pb);
1985 // back up insertion point for alternations, and set new point:
1986 std::ptrdiff_t last_alt_point = m_alt_insert_point;
1987 this->m_pdata->m_data.align();
1988 m_alt_insert_point = this->m_pdata->m_data.size();
1989 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
1990 bool restore_flags = true;
1991 regex_constants::syntax_option_type old_flags = this->flags();
1992 bool old_case_change = m_has_case_change;
1993 m_has_case_change = false;
1995 int mark_reset = m_mark_reset;
1996 int max_mark = m_max_mark;
1998 m_max_mark = m_mark_count;
2001 // select the actual extension used:
2003 switch(this->m_traits.syntax_type(*m_position))
2005 case regex_constants::syntax_or:
2006 m_mark_reset = m_mark_count;
2008 case regex_constants::syntax_colon:
2010 // a non-capturing mark:
2012 pb->index = markid = 0;
2015 case regex_constants::syntax_digit:
2018 // a recursive subexpression:
2020 v = this->m_traits.toi(m_position, m_end, 10);
2021 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2023 // Rewind to start of (? sequence:
2025 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2026 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2030 pb->index = markid = 0;
2031 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2034 static_cast<re_case*>(
2035 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2036 )->icase = this->flags() & regbase::icase;
2039 case regex_constants::syntax_plus:
2041 // A forward-relative recursive subexpression:
2044 v = this->m_traits.toi(m_position, m_end, 10);
2045 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2047 // Rewind to start of (? sequence:
2049 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2050 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2054 goto insert_recursion;
2055 case regex_constants::syntax_dash:
2057 // Possibly a backward-relative recursive subexpression:
2060 v = this->m_traits.toi(m_position, m_end, 10);
2064 // Oops not a relative recursion at all, but a (?-imsx) group:
2065 goto option_group_jump;
2067 v = m_mark_count + 1 - v;
2070 // Rewind to start of (? sequence:
2072 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2073 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2076 goto insert_recursion;
2077 case regex_constants::syntax_equal:
2078 pb->index = markid = -1;
2080 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2081 this->m_pdata->m_data.align();
2082 m_alt_insert_point = this->m_pdata->m_data.size();
2084 case regex_constants::syntax_not:
2085 pb->index = markid = -2;
2087 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2088 this->m_pdata->m_data.align();
2089 m_alt_insert_point = this->m_pdata->m_data.size();
2091 case regex_constants::escape_type_left_word:
2093 // a lookbehind assertion:
2094 if(++m_position == m_end)
2096 // Rewind to start of (? sequence:
2098 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2099 fail(regex_constants::error_perl_extension, m_position - m_base);
2102 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2103 if(t == regex_constants::syntax_not)
2104 pb->index = markid = -2;
2105 else if(t == regex_constants::syntax_equal)
2106 pb->index = markid = -1;
2109 // Probably a named capture which also starts (?< :
2112 goto named_capture_jump;
2115 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2116 this->append_state(syntax_element_backstep, sizeof(re_brace));
2117 this->m_pdata->m_data.align();
2118 m_alt_insert_point = this->m_pdata->m_data.size();
2121 case regex_constants::escape_type_right_word:
2123 // an independent sub-expression:
2125 pb->index = markid = -3;
2127 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2128 this->m_pdata->m_data.align();
2129 m_alt_insert_point = this->m_pdata->m_data.size();
2131 case regex_constants::syntax_open_mark:
2133 // a conditional expression:
2134 pb->index = markid = -4;
2135 if(++m_position == m_end)
2137 // Rewind to start of (? sequence:
2139 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2140 fail(regex_constants::error_perl_extension, m_position - m_base);
2143 v = this->m_traits.toi(m_position, m_end, 10);
2144 if(m_position == m_end)
2146 // Rewind to start of (? sequence:
2148 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2149 fail(regex_constants::error_perl_extension, m_position - m_base);
2152 if(*m_position == charT('R'))
2154 if(++m_position == m_end)
2156 // Rewind to start of (? sequence:
2158 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2159 fail(regex_constants::error_perl_extension, m_position - m_base);
2162 if(*m_position == charT('&'))
2164 const charT* base = ++m_position;
2165 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2167 if(m_position == m_end)
2169 // Rewind to start of (? sequence:
2171 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2172 fail(regex_constants::error_perl_extension, m_position - m_base);
2175 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2179 v = -this->m_traits.toi(m_position, m_end, 10);
2181 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2182 br->index = v < 0 ? (v - 1) : 0;
2183 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2185 // Rewind to start of (? sequence:
2187 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2188 fail(regex_constants::error_perl_extension, m_position - m_base);
2191 if(++m_position == m_end)
2193 // Rewind to start of (? sequence:
2195 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2196 fail(regex_constants::error_perl_extension, m_position - m_base);
2200 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2202 const charT* base = ++m_position;
2203 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2205 if(m_position == m_end)
2207 // Rewind to start of (? sequence:
2209 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2210 fail(regex_constants::error_perl_extension, m_position - m_base);
2213 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2214 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2216 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2218 // Rewind to start of (? sequence:
2220 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2221 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2224 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2226 // Rewind to start of (? sequence:
2228 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2229 fail(regex_constants::error_perl_extension, m_position - m_base);
2232 if(++m_position == m_end)
2234 // Rewind to start of (? sequence:
2236 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2237 fail(regex_constants::error_perl_extension, m_position - m_base);
2241 else if(*m_position == charT('D'))
2243 const char* def = "DEFINE";
2244 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2245 ++m_position, ++def;
2246 if((m_position == m_end) || *def)
2248 // Rewind to start of (? sequence:
2250 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2251 fail(regex_constants::error_perl_extension, m_position - m_base);
2254 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2255 br->index = 9999; // special magic value!
2256 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2258 // Rewind to start of (? sequence:
2260 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2261 fail(regex_constants::error_perl_extension, m_position - m_base);
2264 if(++m_position == m_end)
2266 // Rewind to start of (? sequence:
2268 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2269 fail(regex_constants::error_perl_extension, m_position - m_base);
2275 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2277 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2279 // Rewind to start of (? sequence:
2281 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2282 fail(regex_constants::error_perl_extension, m_position - m_base);
2285 if(++m_position == m_end)
2287 // Rewind to start of (? sequence:
2289 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2290 fail(regex_constants::error_perl_extension, m_position - m_base);
2296 // verify that we have a lookahead or lookbehind assert:
2297 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2299 // Rewind to start of (? sequence:
2301 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2302 fail(regex_constants::error_perl_extension, m_position - m_base);
2305 if(++m_position == m_end)
2307 // Rewind to start of (? sequence:
2309 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2310 fail(regex_constants::error_perl_extension, m_position - m_base);
2313 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2315 if(++m_position == m_end)
2317 // Rewind to start of (? sequence:
2319 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2320 fail(regex_constants::error_perl_extension, m_position - m_base);
2323 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2324 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2326 // Rewind to start of (? sequence:
2328 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2329 fail(regex_constants::error_perl_extension, m_position - m_base);
2336 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2337 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2339 // Rewind to start of (? sequence:
2341 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2342 fail(regex_constants::error_perl_extension, m_position - m_base);
2350 case regex_constants::syntax_close_mark:
2351 // Rewind to start of (? sequence:
2353 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354 fail(regex_constants::error_perl_extension, m_position - m_base);
2356 case regex_constants::escape_type_end_buffer:
2358 name_delim = *m_position;
2361 if(0 == (this->flags() & regbase::nosubs))
2363 markid = ++m_mark_count;
2364 #ifndef BOOST_NO_STD_DISTANCE
2365 if(this->flags() & regbase::save_subexpression_location)
2366 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2368 if(this->flags() & regbase::save_subexpression_location)
2369 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2373 const charT* base = ++m_position;
2374 if(m_position == m_end)
2376 // Rewind to start of (? sequence:
2378 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2379 fail(regex_constants::error_perl_extension, m_position - m_base);
2382 while((m_position != m_end) && (*m_position != name_delim))
2384 if(m_position == m_end)
2386 // Rewind to start of (? sequence:
2388 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2389 fail(regex_constants::error_perl_extension, m_position - m_base);
2392 this->m_pdata->set_name(base, m_position, markid);
2397 if(*m_position == charT('R'))
2401 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2403 // Rewind to start of (? sequence:
2405 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2406 fail(regex_constants::error_perl_extension, m_position - m_base);
2409 goto insert_recursion;
2411 if(*m_position == charT('&'))
2414 const charT* base = m_position;
2415 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2417 if(m_position == m_end)
2419 // Rewind to start of (? sequence:
2421 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2422 fail(regex_constants::error_perl_extension, m_position - m_base);
2425 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2426 goto insert_recursion;
2428 if(*m_position == charT('P'))
2431 if(m_position == m_end)
2433 // Rewind to start of (? sequence:
2435 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2436 fail(regex_constants::error_perl_extension, m_position - m_base);
2439 if(*m_position == charT('>'))
2442 const charT* base = m_position;
2443 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2445 if(m_position == m_end)
2447 // Rewind to start of (? sequence:
2449 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2450 fail(regex_constants::error_perl_extension, m_position - m_base);
2453 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2454 goto insert_recursion;
2458 // lets assume that we have a (?imsx) group and try and parse it:
2461 regex_constants::syntax_option_type opts = parse_options();
2462 if(m_position == m_end)
2464 // Rewind to start of (? sequence:
2466 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2467 fail(regex_constants::error_perl_extension, m_position - m_base);
2470 // make a note of whether we have a case change:
2471 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2472 pb->index = markid = 0;
2473 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2475 // update flags and carry on as normal:
2477 restore_flags = false;
2478 old_case_change |= m_has_case_change; // defer end of scope by one ')'
2480 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2482 // update flags and carry on until the matching ')' is found:
2488 // Rewind to start of (? sequence:
2490 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491 fail(regex_constants::error_perl_extension, m_position - m_base);
2495 // finally append a case change state if we need it:
2496 if(m_has_case_change)
2498 static_cast<re_case*>(
2499 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2500 )->icase = opts & regbase::icase;
2505 // now recursively add more states, this will terminate when we get to a
2510 // Unwind alternatives:
2512 if(0 == unwind_alts(last_paren_start))
2514 // Rewind to start of (? sequence:
2516 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2517 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2521 // we either have a ')' or we have run out of characters prematurely:
2523 if(m_position == m_end)
2525 // Rewind to start of (? sequence:
2527 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2528 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2531 BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2534 // restore the flags:
2538 // append a case change state if we need it:
2539 if(m_has_case_change)
2541 static_cast<re_case*>(
2542 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2543 )->icase = old_flags & regbase::icase;
2545 this->flags(old_flags);
2548 // set up the jump pointer if we have one:
2552 this->m_pdata->m_data.align();
2553 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2554 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2555 if((this->m_last_state == jmp) && (markid != -2))
2557 // Oops... we didn't have anything inside the assertion.
2558 // Note we don't get here for negated forward lookahead as (?!)
2559 // does have some uses.
2560 // Rewind to start of (? sequence:
2562 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2563 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2568 // verify that if this is conditional expression, that we do have
2569 // an alternative, if not add one:
2573 re_syntax_base* b = this->getaddress(expected_alt_point);
2574 // Make sure we have exactly one alternative following this state:
2575 if(b->type != syntax_element_alt)
2577 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2578 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2580 else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2582 // Can't have seen more than one alternative:
2583 // Rewind to start of (? sequence:
2585 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2586 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2591 // We must *not* have seen an alternative inside a (DEFINE) block:
2592 b = this->getaddress(b->next.i, b);
2593 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2595 // Rewind to start of (? sequence:
2597 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2598 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2602 // check for invalid repetition of next state:
2603 b = this->getaddress(expected_alt_point);
2604 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2605 if((b->type != syntax_element_assert_backref)
2606 && (b->type != syntax_element_startmark))
2608 // Rewind to start of (? sequence:
2610 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2611 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2616 // append closing parenthesis state:
2618 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2620 pb->icase = this->flags() & regbase::icase;
2621 this->m_paren_start = last_paren_start;
2623 // restore the alternate insertion point:
2625 this->m_alt_insert_point = last_alt_point;
2627 // and the case change data:
2629 m_has_case_change = old_case_change;
2631 // And the mark_reset data:
2633 if(m_max_mark > m_mark_count)
2635 m_mark_count = m_max_mark;
2637 m_mark_reset = mark_reset;
2638 m_max_mark = max_mark;
2643 #ifndef BOOST_NO_STD_DISTANCE
2644 if(this->flags() & regbase::save_subexpression_location)
2645 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1;
2647 if(this->flags() & regbase::save_subexpression_location)
2648 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2651 // allow backrefs to this mark:
2653 if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
2654 this->m_backrefs |= 1u << (markid - 1);
2659 template <class charT, class traits>
2660 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2664 if(static_cast<charT>(*verb) != *m_position)
2666 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2667 fail(regex_constants::error_perl_extension, m_position - m_base);
2670 if(++m_position == m_end)
2673 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2674 fail(regex_constants::error_perl_extension, m_position - m_base);
2682 template <class charT, class traits>
2683 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2685 if(++m_position == m_end)
2687 // Rewind to start of (* sequence:
2689 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2690 fail(regex_constants::error_perl_extension, m_position - m_base);
2696 if(++m_position == m_end)
2698 // Rewind to start of (* sequence:
2700 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2701 fail(regex_constants::error_perl_extension, m_position - m_base);
2704 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2706 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2708 // Rewind to start of (* sequence:
2710 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2711 fail(regex_constants::error_perl_extension, m_position - m_base);
2715 this->append_state(syntax_element_fail);
2720 if(++m_position == m_end)
2722 // Rewind to start of (* sequence:
2724 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2725 fail(regex_constants::error_perl_extension, m_position - m_base);
2728 if(match_verb("CCEPT"))
2730 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2732 // Rewind to start of (* sequence:
2734 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2735 fail(regex_constants::error_perl_extension, m_position - m_base);
2739 this->append_state(syntax_element_accept);
2744 if(++m_position == m_end)
2746 // Rewind to start of (* sequence:
2748 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2749 fail(regex_constants::error_perl_extension, m_position - m_base);
2752 if(match_verb("OMMIT"))
2754 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2756 // Rewind to start of (* sequence:
2758 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2759 fail(regex_constants::error_perl_extension, m_position - m_base);
2763 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2764 this->m_pdata->m_disable_match_any = true;
2769 if(++m_position == m_end)
2771 // Rewind to start of (* sequence:
2773 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2774 fail(regex_constants::error_perl_extension, m_position - m_base);
2777 if(match_verb("RUNE"))
2779 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2781 // Rewind to start of (* sequence:
2783 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2784 fail(regex_constants::error_perl_extension, m_position - m_base);
2788 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2789 this->m_pdata->m_disable_match_any = true;
2794 if(++m_position == m_end)
2796 // Rewind to start of (* sequence:
2798 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2799 fail(regex_constants::error_perl_extension, m_position - m_base);
2802 if(match_verb("KIP"))
2804 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2806 // Rewind to start of (* sequence:
2808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2809 fail(regex_constants::error_perl_extension, m_position - m_base);
2813 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2814 this->m_pdata->m_disable_match_any = true;
2819 if(++m_position == m_end)
2821 // Rewind to start of (* sequence:
2823 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2824 fail(regex_constants::error_perl_extension, m_position - m_base);
2827 if(match_verb("HEN"))
2829 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2831 // Rewind to start of (* sequence:
2833 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2834 fail(regex_constants::error_perl_extension, m_position - m_base);
2838 this->append_state(syntax_element_then);
2839 this->m_pdata->m_disable_match_any = true;
2847 template <class charT, class traits>
2848 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2851 // parses an emacs style \sx or \Sx construct.
2853 if(++m_position == m_end)
2855 // Rewind to start of sequence:
2857 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2858 fail(regex_constants::error_escape, m_position - m_base);
2861 basic_char_set<charT, traits> char_set;
2865 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2871 char_set.add_class(this->m_mask_space);
2874 char_set.add_class(this->m_word_mask);
2877 char_set.add_single(digraph<charT>(charT('$')));
2878 char_set.add_single(digraph<charT>(charT('&')));
2879 char_set.add_single(digraph<charT>(charT('*')));
2880 char_set.add_single(digraph<charT>(charT('+')));
2881 char_set.add_single(digraph<charT>(charT('-')));
2882 char_set.add_single(digraph<charT>(charT('_')));
2883 char_set.add_single(digraph<charT>(charT('<')));
2884 char_set.add_single(digraph<charT>(charT('>')));
2887 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2890 char_set.add_single(digraph<charT>(charT('(')));
2891 char_set.add_single(digraph<charT>(charT('[')));
2892 char_set.add_single(digraph<charT>(charT('{')));
2895 char_set.add_single(digraph<charT>(charT(')')));
2896 char_set.add_single(digraph<charT>(charT(']')));
2897 char_set.add_single(digraph<charT>(charT('}')));
2900 char_set.add_single(digraph<charT>(charT('"')));
2901 char_set.add_single(digraph<charT>(charT('\'')));
2902 char_set.add_single(digraph<charT>(charT('`')));
2905 char_set.add_single(digraph<charT>(charT('\'')));
2906 char_set.add_single(digraph<charT>(charT(',')));
2907 char_set.add_single(digraph<charT>(charT('#')));
2910 char_set.add_single(digraph<charT>(charT(';')));
2913 char_set.add_single(digraph<charT>(charT('\n')));
2914 char_set.add_single(digraph<charT>(charT('\f')));
2917 fail(regex_constants::error_ctype, m_position - m_base);
2920 if(0 == this->append_set(char_set))
2922 fail(regex_constants::error_ctype, m_position - m_base);
2929 template <class charT, class traits>
2930 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
2932 // we have a (?imsx-imsx) group, convert it into a set of flags:
2933 regex_constants::syntax_option_type f = this->flags();
2934 bool breakout = false;
2940 f |= regex_constants::mod_s;
2941 f &= ~regex_constants::no_mod_s;
2944 f &= ~regex_constants::no_mod_m;
2947 f |= regex_constants::icase;
2950 f |= regex_constants::mod_x;
2956 if(++m_position == m_end)
2958 // Rewind to start of (? sequence:
2960 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2961 fail(regex_constants::error_paren, m_position - m_base);
2969 if(*m_position == static_cast<charT>('-'))
2971 if(++m_position == m_end)
2973 // Rewind to start of (? sequence:
2975 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2976 fail(regex_constants::error_paren, m_position - m_base);
2984 f &= ~regex_constants::mod_s;
2985 f |= regex_constants::no_mod_s;
2988 f |= regex_constants::no_mod_m;
2991 f &= ~regex_constants::icase;
2994 f &= ~regex_constants::mod_x;
3000 if(++m_position == m_end)
3002 // Rewind to start of (? sequence:
3004 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3005 fail(regex_constants::error_paren, m_position - m_base);
3014 template <class charT, class traits>
3015 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3018 // If we didn't actually add any states after the last
3019 // alternative then that's an error:
3021 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3022 && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)
3025 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3027 ((this->flags() & regbase::no_empty_expressions) == 0)
3031 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3035 // Fix up our alternatives:
3037 while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start))
3040 // fix up the jump to point to the end of the states
3041 // that we've just added:
3043 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3044 m_alt_jumps.pop_back();
3045 this->m_pdata->m_data.align();
3046 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3047 BOOST_ASSERT(jmp->type == syntax_element_jump);
3048 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3054 #pragma warning(pop)
3057 } // namespace BOOST_REGEX_DETAIL_NS
3058 } // namespace boost
3061 #pragma warning(push)
3062 #pragma warning(disable: 4103)
3064 #ifdef BOOST_HAS_ABI_HEADERS
3065 # include BOOST_ABI_SUFFIX
3068 #pragma warning(pop)