6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE perl_matcher_common.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Definitions of perl_matcher member functions that are
17 * specific to the recursive implementation.
20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
21 #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
25 #pragma warning(disable: 4103)
27 #ifdef BOOST_HAS_ABI_HEADERS
28 # include BOOST_ABI_PREFIX
36 #pragma warning(disable: 4800)
42 template <class BidiIterator>
46 sub_match<BidiIterator> sub;
49 backup_subex(const match_results<BidiIterator, A>& w, int i)
50 : index(i), sub(w[i], false) {}
52 void restore(match_results<BidiIterator, A>& w)
54 w.set_first(sub.first, index, index == 0);
55 w.set_second(sub.second, index, sub.matched, index == 0);
57 const sub_match<BidiIterator>& get() { return sub; }
60 template <class BidiIterator, class Allocator, class traits>
61 bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
63 static matcher_proc_type const s_match_vtable[30] =
65 (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
66 &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
67 &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
68 &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
69 &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
70 &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
71 &perl_matcher<BidiIterator, Allocator, traits>::match_match,
72 &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
73 &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
74 &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
75 &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
76 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
77 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
78 &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
79 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
80 &perl_matcher<BidiIterator, Allocator, traits>::match_set,
81 &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
82 &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
83 &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
84 &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
85 &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
86 &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
87 // Although this next line *should* be evaluated at compile time, in practice
88 // some compilers (VC++) emit run-time initialisation which breaks thread
89 // safety, so use a dispatch function instead:
90 //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
91 &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
92 &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
93 &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
94 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
95 &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
96 &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
97 &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
98 &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
101 if(state_count > max_state_count)
102 raise_error(traits_inst, regex_constants::error_complexity);
105 matcher_proc_type proc = s_match_vtable[pstate->type];
109 if((m_match_flags & match_partial) && (position == last) && (position != search_base))
110 m_has_partial_match = true;
117 template <class BidiIterator, class Allocator, class traits>
118 bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
120 int index = static_cast<const re_brace*>(pstate)->index;
121 icase = static_cast<const re_brace*>(pstate)->icase;
126 pstate = pstate->next.p;
131 // forward lookahead assert:
132 BidiIterator old_position(position);
133 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
134 pstate = pstate->next.p->next.p;
135 r = match_all_states();
136 pstate = next_pstate;
137 position = old_position;
138 if((r && (index != -1)) || (!r && (index != -2)))
146 // independent sub-expression:
147 bool old_independent = m_independent;
148 m_independent = true;
149 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
150 pstate = pstate->next.p->next.p;
151 r = match_all_states();
152 pstate = next_pstate;
153 m_independent = old_independent;
154 #ifdef BOOST_REGEX_MATCH_EXTRA
155 if(r && (m_match_flags & match_extra))
158 // our captures have been stored in *m_presult
159 // we need to unpack them, and insert them
160 // back in the right order when we unwind the stack:
163 match_results<BidiIterator, Allocator> tm(*m_presult);
164 for(i = 0; i < tm.size(); ++i)
165 (*m_presult)[i].get_captures().clear();
166 // match everything else:
167 r = match_all_states();
168 // now place the stored captures back:
169 for(i = 0; i < tm.size(); ++i)
171 typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
172 seq& s1 = (*m_presult)[i].get_captures();
173 const seq& s2 = tm[i].captures();
185 // conditional expression:
186 const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
187 BOOST_ASSERT(alt->type == syntax_element_alt);
188 pstate = alt->next.p;
189 if(pstate->type == syntax_element_assert_backref)
191 if(!match_assert_backref())
197 // zero width assertion, have to match this recursively:
198 BOOST_ASSERT(pstate->type == syntax_element_startmark);
199 bool negated = static_cast<const re_brace*>(pstate)->index == -2;
200 BidiIterator saved_position = position;
201 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
202 pstate = pstate->next.p->next.p;
203 bool res = match_all_states();
204 position = saved_position;
208 pstate = next_pstate;
216 // Reset start of $0, since we have a \K escape
217 backup_subex<BidiIterator> sub(*m_presult, 0);
218 m_presult->set_first(position, 0, true);
219 pstate = pstate->next.p;
220 r = match_all_states();
222 sub.restore(*m_presult);
227 BOOST_ASSERT(index > 0);
228 if((m_match_flags & match_nosubs) == 0)
230 backup_subex<BidiIterator> sub(*m_presult, index);
231 m_presult->set_first(position, index);
232 pstate = pstate->next.p;
233 r = match_all_states();
235 sub.restore(*m_presult);
236 #ifdef BOOST_REGEX_MATCH_EXTRA
238 // we have a match, push the capture information onto the stack:
240 else if(sub.get().matched && (match_extra & m_match_flags))
241 ((*m_presult)[index]).get_captures().push_back(sub.get());
246 pstate = pstate->next.p;
254 template <class BidiIterator, class Allocator, class traits>
255 bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
257 bool take_first, take_second;
258 const re_alt* jmp = static_cast<const re_alt*>(pstate);
260 // find out which of these two alternatives we need to take:
263 take_first = jmp->can_be_null & mask_take;
264 take_second = jmp->can_be_null & mask_skip;
268 take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
269 take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
274 // we can take the first alternative,
275 // see if we need to push next alternative:
278 BidiIterator oldposition(position);
279 const re_syntax_base* old_pstate = jmp->alt.p;
280 pstate = pstate->next.p;
281 if(!match_all_states())
284 position = oldposition;
288 pstate = pstate->next.p;
296 return false; // neither option is possible
299 template <class BidiIterator, class Allocator, class traits>
300 bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
303 #pragma warning(push)
304 #pragma warning(disable:4127 4244)
306 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
308 // Always copy the repeat count, so that the state is restored
309 // when we exit this scope:
311 repeater_count<BidiIterator> r(rep->state_id, &next_count, position);
313 // If we've had at least one repeat already, and the last one
314 // matched the NULL string then set the repeat count to
317 next_count->check_null_repeat(position, rep->max);
319 // find out which of these two alternatives we need to take:
320 bool take_first, take_second;
323 take_first = rep->can_be_null & mask_take;
324 take_second = rep->can_be_null & mask_skip;
328 take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
329 take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
332 if(next_count->get_count() < rep->min)
334 // we must take the repeat:
337 // increase the counter:
339 pstate = rep->next.p;
340 return match_all_states();
344 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
347 // try and take the repeat if we can:
348 if((next_count->get_count() < rep->max) && take_first)
350 // store position in case we fail:
351 BidiIterator pos = position;
352 // increase the counter:
354 pstate = rep->next.p;
355 if(match_all_states())
357 // failed repeat, reset posistion and fall through for alternative:
365 return false; // can't take anything, fail...
369 // try and skip the repeat if we can:
372 // store position in case we fail:
373 BidiIterator pos = position;
375 if(match_all_states())
377 // failed alternative, reset posistion and fall through for repeat:
380 if((next_count->get_count() < rep->max) && take_first)
382 // increase the counter:
384 pstate = rep->next.p;
385 return match_all_states();
394 template <class BidiIterator, class Allocator, class traits>
395 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
398 #pragma warning(push)
399 #pragma warning(disable:4127)
402 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
403 re_syntax_base* psingle = rep->next.p;
404 // match compulsary repeats first:
405 while(count < rep->min)
412 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
416 while(count < rep->max)
423 if((rep->leading) && (count < rep->max))
426 return backtrack_till_match(count - rep->min);
430 // non-greedy, keep trying till we get a match:
431 BidiIterator save_pos;
434 if((rep->leading) && (rep->max == UINT_MAX))
439 if(match_all_states())
441 if(count >= rep->max)
455 template <class BidiIterator, class Allocator, class traits>
456 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
459 #pragma warning(push)
460 #pragma warning(disable:4127)
462 if(m_match_flags & match_not_dot_null)
463 return match_dot_repeat_slow();
464 if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
465 return match_dot_repeat_slow();
467 // start by working out how much we can skip:
469 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
471 #pragma warning(push)
472 #pragma warning(disable:4267)
474 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
475 std::size_t count = (std::min)(static_cast<std::size_t>(::boost::re_detail::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min));
479 return false; // not enough text left to match
481 std::advance(position, count);
485 if((rep->leading) && (count < rep->max) && greedy)
488 return backtrack_till_match(count - rep->min);
490 // non-greedy, keep trying till we get a match:
491 BidiIterator save_pos;
494 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
499 if((rep->leading) && (rep->max == UINT_MAX))
504 if(match_all_states())
506 if(count >= rep->max)
510 position = ++save_pos;
518 template <class BidiIterator, class Allocator, class traits>
519 bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
522 #pragma warning(push)
523 #pragma warning(disable:4127)
524 #pragma warning(disable:4267)
527 #pragma option push -w-8008 -w-8066 -w-8004
529 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
530 BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
531 const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
533 // start by working out how much we can skip:
535 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
536 std::size_t count, desired;
537 if(::boost::is_random_access_iterator<BidiIterator>::value)
541 (std::size_t)(greedy ? rep->max : rep->min),
542 (std::size_t)::boost::re_detail::distance(position, last));
547 while(--desired && (traits_inst.translate_nocase(*position) == what))
554 while(--desired && (traits_inst.translate(*position) == what))
559 count = count - desired;
564 desired = greedy ? rep->max : rep->min;
565 while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
571 if((rep->leading) && (count < rep->max) && greedy)
577 return backtrack_till_match(count - rep->min);
579 // non-greedy, keep trying till we get a match:
580 BidiIterator save_pos;
583 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
585 if((traits_inst.translate(*position, icase) == what))
591 return false; // counldn't repeat even though it was the only option
593 if((rep->leading) && (rep->max == UINT_MAX))
598 if(match_all_states())
600 if(count >= rep->max)
605 if(traits_inst.translate(*position, icase) == what)
623 template <class BidiIterator, class Allocator, class traits>
624 bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
627 #pragma warning(push)
628 #pragma warning(disable:4127)
631 #pragma option push -w-8008 -w-8066 -w-8004
633 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
634 const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
637 // start by working out how much we can skip:
639 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
640 std::size_t desired = greedy ? rep->max : rep->min;
641 if(::boost::is_random_access_iterator<BidiIterator>::value)
643 BidiIterator end = position;
644 std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired));
645 BidiIterator origin(position);
646 while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
650 count = (unsigned)::boost::re_detail::distance(origin, position);
654 while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
660 if((rep->leading) && (count < rep->max) && greedy)
666 return backtrack_till_match(count - rep->min);
668 // non-greedy, keep trying till we get a match:
669 BidiIterator save_pos;
672 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
674 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
680 return false; // counldn't repeat even though it was the only option
682 if((rep->leading) && (rep->max == UINT_MAX))
687 if(match_all_states())
689 if(count >= rep->max)
694 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
712 template <class BidiIterator, class Allocator, class traits>
713 bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
716 #pragma warning(push)
717 #pragma warning(disable:4127)
720 #pragma option push -w-8008 -w-8066 -w-8004
722 typedef typename traits::char_class_type char_class_type;
723 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
724 const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
727 // start by working out how much we can skip:
729 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
730 std::size_t desired = greedy ? rep->max : rep->min;
731 if(::boost::is_random_access_iterator<BidiIterator>::value)
733 BidiIterator end = position;
734 std::advance(end, (std::min)((std::size_t)::boost::re_detail::distance(position, last), desired));
735 BidiIterator origin(position);
736 while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
740 count = (unsigned)::boost::re_detail::distance(origin, position);
744 while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
750 if((rep->leading) && (count < rep->max) && greedy)
756 return backtrack_till_match(count - rep->min);
758 // non-greedy, keep trying till we get a match:
759 BidiIterator save_pos;
762 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
764 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
770 return false; // counldn't repeat even though it was the only option
772 if((rep->leading) && (rep->max == UINT_MAX))
777 if(match_all_states())
779 if(count >= rep->max)
784 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
802 template <class BidiIterator, class Allocator, class traits>
803 bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
806 #pragma warning(push)
807 #pragma warning(disable:4127)
809 if((m_match_flags & match_partial) && (position == last))
810 m_has_partial_match = true;
812 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
813 BidiIterator backtrack = position;
816 if(rep->can_be_null & mask_skip)
819 if(match_all_states())
824 position = --backtrack;
832 while(count && !can_start(*position, rep->_map, mask_skip))
839 backtrack = position;
840 if(match_all_states())
844 position = --backtrack;
853 template <class BidiIterator, class Allocator, class traits>
854 bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
856 BOOST_ASSERT(pstate->type == syntax_element_recurse);
858 // Set new call stack:
860 if(recursion_stack.capacity() == 0)
862 recursion_stack.reserve(50);
864 recursion_stack.push_back(recursion_info<results_type>());
865 recursion_stack.back().preturn_address = pstate->next.p;
866 recursion_stack.back().results = *m_presult;
867 recursion_stack.back().repeater_stack = next_count;
868 pstate = static_cast<const re_jump*>(pstate)->alt.p;
869 recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
871 repeater_count<BidiIterator>* saved = next_count;
872 repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
874 bool result = match_all_states();
879 next_count = recursion_stack.back().repeater_stack;
880 *m_presult = recursion_stack.back().results;
881 recursion_stack.pop_back();
887 template <class BidiIterator, class Allocator, class traits>
888 bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
890 int index = static_cast<const re_brace*>(pstate)->index;
891 icase = static_cast<const re_brace*>(pstate)->icase;
894 if((m_match_flags & match_nosubs) == 0)
896 m_presult->set_second(position, index);
898 if(!recursion_stack.empty())
900 if(index == recursion_stack.back().idx)
902 recursion_info<results_type> saved = recursion_stack.back();
903 recursion_stack.pop_back();
904 pstate = saved.preturn_address;
905 repeater_count<BidiIterator>* saved_count = next_count;
906 next_count = saved.repeater_stack;
907 *m_presult = saved.results;
908 if(!match_all_states())
910 recursion_stack.push_back(saved);
911 next_count = saved_count;
917 else if((index < 0) && (index != -4))
919 // matched forward lookahead:
923 pstate = pstate ? pstate->next.p : 0;
927 template <class BidiIterator, class Allocator, class traits>
928 bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
930 if(!recursion_stack.empty())
932 BOOST_ASSERT(0 == recursion_stack.back().idx);
933 const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
934 *m_presult = recursion_stack.back().results;
935 recursion_stack.pop_back();
936 if(!match_all_states())
938 recursion_stack.push_back(recursion_info<results_type>());
939 recursion_stack.back().preturn_address = saved_state;
940 recursion_stack.back().results = *m_presult;
945 if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
947 if((m_match_flags & match_all) && (position != last))
949 if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
951 m_presult->set_second(position);
953 m_has_found_match = true;
954 if((m_match_flags & match_posix) == match_posix)
956 m_result.maybe_assign(*m_presult);
957 if((m_match_flags & match_any) == 0)
960 #ifdef BOOST_REGEX_MATCH_EXTRA
961 if(match_extra & m_match_flags)
963 for(unsigned i = 0; i < m_presult->size(); ++i)
964 if((*m_presult)[i].matched)
965 ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
973 } // namespace re_detail
980 #pragma warning(push)
981 #pragma warning(disable: 4103)
983 #ifdef BOOST_HAS_ABI_HEADERS
984 # include BOOST_ABI_SUFFIX