6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE perl_matcher_common.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Definitions of perl_matcher member functions that are
17 * specific to the recursive implementation.
20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
21 #define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP
25 #pragma warning(disable: 4103)
27 #ifdef BOOST_HAS_ABI_HEADERS
28 # include BOOST_ABI_PREFIX
36 #pragma warning(disable: 4800)
42 template <class BidiIterator>
46 sub_match<BidiIterator> sub;
49 backup_subex(const match_results<BidiIterator, A>& w, int i)
50 : index(i), sub(w[i], false) {}
52 void restore(match_results<BidiIterator, A>& w)
54 w.set_first(sub.first, index, index == 0);
55 w.set_second(sub.second, index, sub.matched, index == 0);
57 const sub_match<BidiIterator>& get() { return sub; }
60 template <class BidiIterator, class Allocator, class traits>
61 bool perl_matcher<BidiIterator, Allocator, traits>::match_all_states()
63 static matcher_proc_type const s_match_vtable[30] =
65 (&perl_matcher<BidiIterator, Allocator, traits>::match_startmark),
66 &perl_matcher<BidiIterator, Allocator, traits>::match_endmark,
67 &perl_matcher<BidiIterator, Allocator, traits>::match_literal,
68 &perl_matcher<BidiIterator, Allocator, traits>::match_start_line,
69 &perl_matcher<BidiIterator, Allocator, traits>::match_end_line,
70 &perl_matcher<BidiIterator, Allocator, traits>::match_wild,
71 &perl_matcher<BidiIterator, Allocator, traits>::match_match,
72 &perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary,
73 &perl_matcher<BidiIterator, Allocator, traits>::match_within_word,
74 &perl_matcher<BidiIterator, Allocator, traits>::match_word_start,
75 &perl_matcher<BidiIterator, Allocator, traits>::match_word_end,
76 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start,
77 &perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end,
78 &perl_matcher<BidiIterator, Allocator, traits>::match_backref,
79 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set,
80 &perl_matcher<BidiIterator, Allocator, traits>::match_set,
81 &perl_matcher<BidiIterator, Allocator, traits>::match_jump,
82 &perl_matcher<BidiIterator, Allocator, traits>::match_alt,
83 &perl_matcher<BidiIterator, Allocator, traits>::match_rep,
84 &perl_matcher<BidiIterator, Allocator, traits>::match_combining,
85 &perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end,
86 &perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue,
87 // Although this next line *should* be evaluated at compile time, in practice
88 // some compilers (VC++) emit run-time initialisation which breaks thread
89 // safety, so use a dispatch function instead:
90 //(::boost::is_random_access_iterator<BidiIterator>::value ? &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast : &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow),
91 &perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_dispatch,
92 &perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat,
93 &perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat,
94 &perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat,
95 &perl_matcher<BidiIterator, Allocator, traits>::match_backstep,
96 &perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref,
97 &perl_matcher<BidiIterator, Allocator, traits>::match_toggle_case,
98 &perl_matcher<BidiIterator, Allocator, traits>::match_recursion,
101 if(state_count > max_state_count)
102 raise_error(traits_inst, regex_constants::error_complexity);
105 matcher_proc_type proc = s_match_vtable[pstate->type];
109 if((m_match_flags & match_partial) && (position == last) && (position != search_base))
110 m_has_partial_match = true;
117 template <class BidiIterator, class Allocator, class traits>
118 bool perl_matcher<BidiIterator, Allocator, traits>::match_startmark()
120 int index = static_cast<const re_brace*>(pstate)->index;
121 icase = static_cast<const re_brace*>(pstate)->icase;
126 pstate = pstate->next.p;
131 // forward lookahead assert:
132 BidiIterator old_position(position);
133 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
134 pstate = pstate->next.p->next.p;
135 r = match_all_states();
136 pstate = next_pstate;
137 position = old_position;
138 if((r && (index != -1)) || (!r && (index != -2)))
146 // independent sub-expression:
147 bool old_independent = m_independent;
148 m_independent = true;
149 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
150 pstate = pstate->next.p->next.p;
151 r = match_all_states();
152 pstate = next_pstate;
153 m_independent = old_independent;
154 #ifdef BOOST_REGEX_MATCH_EXTRA
155 if(r && (m_match_flags & match_extra))
158 // our captures have been stored in *m_presult
159 // we need to unpack them, and insert them
160 // back in the right order when we unwind the stack:
163 match_results<BidiIterator, Allocator> tm(*m_presult);
164 for(i = 0; i < tm.size(); ++i)
165 (*m_presult)[i].get_captures().clear();
166 // match everything else:
167 r = match_all_states();
168 // now place the stored captures back:
169 for(i = 0; i < tm.size(); ++i)
171 typedef typename sub_match<BidiIterator>::capture_sequence_type seq;
172 seq& s1 = (*m_presult)[i].get_captures();
173 const seq& s2 = tm[i].captures();
185 // conditional expression:
186 const re_alt* alt = static_cast<const re_alt*>(pstate->next.p);
187 BOOST_ASSERT(alt->type == syntax_element_alt);
188 pstate = alt->next.p;
189 if(pstate->type == syntax_element_assert_backref)
191 if(!match_assert_backref())
197 // zero width assertion, have to match this recursively:
198 BOOST_ASSERT(pstate->type == syntax_element_startmark);
199 bool negated = static_cast<const re_brace*>(pstate)->index == -2;
200 BidiIterator saved_position = position;
201 const re_syntax_base* next_pstate = static_cast<const re_jump*>(pstate->next.p)->alt.p->next.p;
202 pstate = pstate->next.p->next.p;
203 bool res = match_all_states();
204 position = saved_position;
208 pstate = next_pstate;
216 // Reset start of $0, since we have a \K escape
217 backup_subex<BidiIterator> sub(*m_presult, 0);
218 m_presult->set_first(position, 0, true);
219 pstate = pstate->next.p;
220 r = match_all_states();
222 sub.restore(*m_presult);
227 BOOST_ASSERT(index > 0);
228 if((m_match_flags & match_nosubs) == 0)
230 backup_subex<BidiIterator> sub(*m_presult, index);
231 m_presult->set_first(position, index);
232 pstate = pstate->next.p;
233 r = match_all_states();
235 sub.restore(*m_presult);
236 #ifdef BOOST_REGEX_MATCH_EXTRA
238 // we have a match, push the capture information onto the stack:
240 else if(sub.get().matched && (match_extra & m_match_flags))
241 ((*m_presult)[index]).get_captures().push_back(sub.get());
246 pstate = pstate->next.p;
254 template <class BidiIterator, class Allocator, class traits>
255 bool perl_matcher<BidiIterator, Allocator, traits>::match_alt()
257 bool take_first, take_second;
258 const re_alt* jmp = static_cast<const re_alt*>(pstate);
260 // find out which of these two alternatives we need to take:
263 take_first = jmp->can_be_null & mask_take;
264 take_second = jmp->can_be_null & mask_skip;
268 take_first = can_start(*position, jmp->_map, (unsigned char)mask_take);
269 take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip);
274 // we can take the first alternative,
275 // see if we need to push next alternative:
278 BidiIterator oldposition(position);
279 const re_syntax_base* old_pstate = jmp->alt.p;
280 pstate = pstate->next.p;
281 if(!match_all_states())
284 position = oldposition;
288 pstate = pstate->next.p;
296 return false; // neither option is possible
299 template <class BidiIterator, class Allocator, class traits>
300 bool perl_matcher<BidiIterator, Allocator, traits>::match_rep()
303 #pragma warning(push)
304 #pragma warning(disable:4127 4244)
306 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
308 // Always copy the repeat count, so that the state is restored
309 // when we exit this scope:
311 repeater_count<BidiIterator> r(rep->state_id, &next_count, position);
313 // If we've had at least one repeat already, and the last one
314 // matched the NULL string then set the repeat count to
317 next_count->check_null_repeat(position, rep->max);
319 // find out which of these two alternatives we need to take:
320 bool take_first, take_second;
323 take_first = rep->can_be_null & mask_take;
324 take_second = rep->can_be_null & mask_skip;
328 take_first = can_start(*position, rep->_map, (unsigned char)mask_take);
329 take_second = can_start(*position, rep->_map, (unsigned char)mask_skip);
332 if(next_count->get_count() < rep->min)
334 // we must take the repeat:
337 // increase the counter:
339 pstate = rep->next.p;
340 return match_all_states();
344 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
347 // try and take the repeat if we can:
348 if((next_count->get_count() < rep->max) && take_first)
350 // store position in case we fail:
351 BidiIterator pos = position;
352 // increase the counter:
354 pstate = rep->next.p;
355 if(match_all_states())
357 // failed repeat, reset posistion and fall through for alternative:
365 return false; // can't take anything, fail...
369 // try and skip the repeat if we can:
372 // store position in case we fail:
373 BidiIterator pos = position;
375 if(match_all_states())
377 // failed alternative, reset posistion and fall through for repeat:
380 if((next_count->get_count() < rep->max) && take_first)
382 // increase the counter:
384 pstate = rep->next.p;
385 return match_all_states();
394 template <class BidiIterator, class Allocator, class traits>
395 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_slow()
398 #pragma warning(push)
399 #pragma warning(disable:4127)
402 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
403 re_syntax_base* psingle = rep->next.p;
404 // match compulsary repeats first:
405 while(count < rep->min)
412 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
416 while(count < rep->max)
423 if((rep->leading) && (count < rep->max))
426 return backtrack_till_match(count - rep->min);
430 // non-greedy, keep trying till we get a match:
431 BidiIterator save_pos;
434 if((rep->leading) && (rep->max == UINT_MAX))
439 if(match_all_states())
441 if(count >= rep->max)
455 template <class BidiIterator, class Allocator, class traits>
456 bool perl_matcher<BidiIterator, Allocator, traits>::match_dot_repeat_fast()
459 #pragma warning(push)
460 #pragma warning(disable:4127)
462 if(m_match_flags & match_not_dot_null)
463 return match_dot_repeat_slow();
464 if((static_cast<const re_dot*>(pstate->next.p)->mask & match_any_mask) == 0)
465 return match_dot_repeat_slow();
467 // start by working out how much we can skip:
469 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
471 #pragma warning(push)
472 #pragma warning(disable:4267)
474 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
475 std::size_t count = (std::min)(static_cast<std::size_t>(::boost::re_detail::distance(position, last)), static_cast<std::size_t>(greedy ? rep->max : rep->min));
479 return false; // not enough text left to match
481 std::advance(position, count);
485 if((rep->leading) && (count < rep->max) && greedy)
488 return backtrack_till_match(count - rep->min);
490 // non-greedy, keep trying till we get a match:
491 BidiIterator save_pos;
494 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
499 if((rep->leading) && (rep->max == UINT_MAX))
504 if(match_all_states())
506 if(count >= rep->max)
510 position = ++save_pos;
518 template <class BidiIterator, class Allocator, class traits>
519 bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
522 #pragma warning(push)
523 #pragma warning(disable:4127)
524 #pragma warning(disable:4267)
527 #pragma option push -w-8008 -w-8066 -w-8004
529 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
530 BOOST_ASSERT(1 == static_cast<const re_literal*>(rep->next.p)->length);
531 const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
533 // start by working out how much we can skip:
535 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
536 std::size_t count, desired;
537 if(::boost::is_random_access_iterator<BidiIterator>::value)
541 (std::size_t)(greedy ? rep->max : rep->min),
542 (std::size_t)::boost::re_detail::distance(position, last));
547 while(--desired && (traits_inst.translate_nocase(*position) == what))
554 while(--desired && (traits_inst.translate(*position) == what))
559 count = count - desired;
564 desired = greedy ? rep->max : rep->min;
565 while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what))
571 if((rep->leading) && (count < rep->max) && greedy)
577 return backtrack_till_match(count - rep->min);
579 // non-greedy, keep trying till we get a match:
580 BidiIterator save_pos;
583 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
585 if((traits_inst.translate(*position, icase) == what))
591 return false; // counldn't repeat even though it was the only option
593 if((rep->leading) && (rep->max == UINT_MAX))
598 if(match_all_states())
600 if(count >= rep->max)
605 if(traits_inst.translate(*position, icase) == what)
623 template <class BidiIterator, class Allocator, class traits>
624 bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
627 #pragma warning(push)
628 #pragma warning(disable:4127)
631 #pragma option push -w-8008 -w-8066 -w-8004
633 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
634 const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
637 // start by working out how much we can skip:
639 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
640 std::size_t desired = greedy ? rep->max : rep->min;
641 if(::boost::is_random_access_iterator<BidiIterator>::value)
643 BidiIterator end = position;
644 // Move end forward by "desired", preferably without using distance or advance if we can
645 // as these can be slow for some iterator types.
646 std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::re_detail::distance(position, last);
650 std::advance(end, desired);
651 BidiIterator origin(position);
652 while((position != end) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
656 count = (unsigned)::boost::re_detail::distance(origin, position);
660 while((count < desired) && (position != last) && map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
666 if((rep->leading) && (count < rep->max) && greedy)
672 return backtrack_till_match(count - rep->min);
674 // non-greedy, keep trying till we get a match:
675 BidiIterator save_pos;
678 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
680 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
686 return false; // counldn't repeat even though it was the only option
688 if((rep->leading) && (rep->max == UINT_MAX))
693 if(match_all_states())
695 if(count >= rep->max)
700 if(map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
718 template <class BidiIterator, class Allocator, class traits>
719 bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
722 #pragma warning(push)
723 #pragma warning(disable:4127)
726 #pragma option push -w-8008 -w-8066 -w-8004
728 typedef typename traits::char_class_type char_class_type;
729 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
730 const re_set_long<char_class_type>* set = static_cast<const re_set_long<char_class_type>*>(pstate->next.p);
733 // start by working out how much we can skip:
735 bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent);
736 std::size_t desired = greedy ? rep->max : rep->min;
737 if(::boost::is_random_access_iterator<BidiIterator>::value)
739 BidiIterator end = position;
740 // Move end forward by "desired", preferably without using distance or advance if we can
741 // as these can be slow for some iterator types.
742 std::size_t len = (desired == (std::numeric_limits<std::size_t>::max)()) ? 0u : ::boost::re_detail::distance(position, last);
746 std::advance(end, desired);
747 BidiIterator origin(position);
748 while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
752 count = (unsigned)::boost::re_detail::distance(origin, position);
756 while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase)))
762 if((rep->leading) && (count < rep->max) && greedy)
768 return backtrack_till_match(count - rep->min);
770 // non-greedy, keep trying till we get a match:
771 BidiIterator save_pos;
774 while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
776 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
782 return false; // counldn't repeat even though it was the only option
784 if((rep->leading) && (rep->max == UINT_MAX))
789 if(match_all_states())
791 if(count >= rep->max)
796 if(position != re_is_set_member(position, last, set, re.get_data(), icase))
814 template <class BidiIterator, class Allocator, class traits>
815 bool perl_matcher<BidiIterator, Allocator, traits>::backtrack_till_match(std::size_t count)
818 #pragma warning(push)
819 #pragma warning(disable:4127)
821 if((m_match_flags & match_partial) && (position == last))
822 m_has_partial_match = true;
824 const re_repeat* rep = static_cast<const re_repeat*>(pstate);
825 BidiIterator backtrack = position;
828 if(rep->can_be_null & mask_skip)
831 if(match_all_states())
836 position = --backtrack;
844 while(count && !can_start(*position, rep->_map, mask_skip))
851 backtrack = position;
852 if(match_all_states())
856 position = --backtrack;
865 template <class BidiIterator, class Allocator, class traits>
866 bool perl_matcher<BidiIterator, Allocator, traits>::match_recursion()
868 BOOST_ASSERT(pstate->type == syntax_element_recurse);
870 // Set new call stack:
872 if(recursion_stack.capacity() == 0)
874 recursion_stack.reserve(50);
876 recursion_stack.push_back(recursion_info<results_type>());
877 recursion_stack.back().preturn_address = pstate->next.p;
878 recursion_stack.back().results = *m_presult;
879 recursion_stack.back().repeater_stack = next_count;
880 pstate = static_cast<const re_jump*>(pstate)->alt.p;
881 recursion_stack.back().idx = static_cast<const re_brace*>(pstate)->index;
883 repeater_count<BidiIterator>* saved = next_count;
884 repeater_count<BidiIterator> r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those
886 bool result = match_all_states();
891 next_count = recursion_stack.back().repeater_stack;
892 *m_presult = recursion_stack.back().results;
893 recursion_stack.pop_back();
899 template <class BidiIterator, class Allocator, class traits>
900 bool perl_matcher<BidiIterator, Allocator, traits>::match_endmark()
902 int index = static_cast<const re_brace*>(pstate)->index;
903 icase = static_cast<const re_brace*>(pstate)->icase;
906 if((m_match_flags & match_nosubs) == 0)
908 m_presult->set_second(position, index);
910 if(!recursion_stack.empty())
912 if(index == recursion_stack.back().idx)
914 recursion_info<results_type> saved = recursion_stack.back();
915 recursion_stack.pop_back();
916 pstate = saved.preturn_address;
917 repeater_count<BidiIterator>* saved_count = next_count;
918 next_count = saved.repeater_stack;
919 *m_presult = saved.results;
920 if(!match_all_states())
922 recursion_stack.push_back(saved);
923 next_count = saved_count;
929 else if((index < 0) && (index != -4))
931 // matched forward lookahead:
935 pstate = pstate ? pstate->next.p : 0;
939 template <class BidiIterator, class Allocator, class traits>
940 bool perl_matcher<BidiIterator, Allocator, traits>::match_match()
942 if(!recursion_stack.empty())
944 BOOST_ASSERT(0 == recursion_stack.back().idx);
945 const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address;
946 *m_presult = recursion_stack.back().results;
947 recursion_stack.pop_back();
948 if(!match_all_states())
950 recursion_stack.push_back(recursion_info<results_type>());
951 recursion_stack.back().preturn_address = saved_state;
952 recursion_stack.back().results = *m_presult;
957 if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
959 if((m_match_flags & match_all) && (position != last))
961 if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
963 m_presult->set_second(position);
965 m_has_found_match = true;
966 if((m_match_flags & match_posix) == match_posix)
968 m_result.maybe_assign(*m_presult);
969 if((m_match_flags & match_any) == 0)
972 #ifdef BOOST_REGEX_MATCH_EXTRA
973 if(match_extra & m_match_flags)
975 for(unsigned i = 0; i < m_presult->size(); ++i)
976 if((*m_presult)[i].matched)
977 ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
985 } // namespace re_detail
992 #pragma warning(push)
993 #pragma warning(disable: 4103)
995 #ifdef BOOST_HAS_ABI_HEADERS
996 # include BOOST_ABI_SUFFIX