6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE perl_matcher_common.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Definitions of perl_matcher member functions that are
17 * common to both the recursive and non-recursive versions.
20 #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
21 #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
23 #ifdef BOOST_HAS_ABI_HEADERS
24 # include BOOST_ABI_PREFIX
28 # pragma option push -w-8008 -w-8066
34 template <class BidiIterator, class Allocator, class traits, class Allocator2>
35 perl_matcher<BidiIterator, Allocator, traits, Allocator2>::perl_matcher(BidiIterator first, BidiIterator end,
36 match_results<BidiIterator, Allocator>& what,
37 const reg_expression<char_type, traits, Allocator2>& e,
39 : m_result(what), base(first), last(end),
40 position(first), re(e), traits_inst(e.get_traits()),
41 next_count(&rep_obj), rep_obj(&next_count)
43 typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
47 // precondition failure: e is not a valid regex.
48 std::invalid_argument ex("Invalid regular expression object");
49 boost::throw_exception(ex);
53 icase = re.flags() & regex_constants::icase;
54 estimate_max_state_count(static_cast<category*>(0));
55 if(!(m_match_flags & (match_perl|match_posix)))
57 if((re.flags() & regex_constants::perlex) || (re.flags() & regex_constants::literal))
58 m_match_flags |= match_perl;
60 m_match_flags |= match_posix;
62 if(m_match_flags & match_posix)
64 m_temp_match.reset(new match_results<BidiIterator, Allocator>());
65 m_presult = m_temp_match.get();
68 m_presult = &m_result;
69 #ifdef BOOST_REGEX_NON_RECURSIVE
75 template <class BidiIterator, class Allocator, class traits, class Allocator2>
76 void perl_matcher<BidiIterator, Allocator, traits, Allocator2>::estimate_max_state_count(std::random_access_iterator_tag*)
78 difference_type dist = boost::re_detail::distance(base, last);
79 traits_size_type states = static_cast<traits_size_type>(re.size());
81 difference_type lim = (std::numeric_limits<difference_type>::max)() - 100000 - states;
82 if(dist > (difference_type)(lim / states))
83 max_state_count = lim;
85 max_state_count = 100000 + states * dist;
87 template <class BidiIterator, class Allocator, class traits, class Allocator2>
88 void perl_matcher<BidiIterator, Allocator, traits, Allocator2>::estimate_max_state_count(void*)
90 // we don't know how long the sequence is:
91 max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
94 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
95 template <class BidiIterator, class Allocator, class traits, class Allocator2>
96 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::protected_call(
97 protected_proc_type proc)
100 return (this->*proc)();
101 }__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode())
103 reset_stack_guard_page();
105 // we only get here after a stack overflow:
106 raise_error<traits>(traits_inst, REG_E_MEMORY);
107 // and we never really get here at all:
112 template <class BidiIterator, class Allocator, class traits, class Allocator2>
113 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match()
115 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
116 return protected_call(&perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_imp);
122 template <class BidiIterator, class Allocator, class traits, class Allocator2>
123 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_imp()
125 // initialise our stack if we are non-recursive:
126 #ifdef BOOST_REGEX_NON_RECURSIVE
127 save_state_init init(&m_stack_base, &m_backup_state);
128 used_block_count = BOOST_REGEX_MAX_BLOCKS;
129 #if !defined(BOOST_NO_EXCEPTIONS)
134 // reset our state machine:
138 m_match_flags |= regex_constants::match_all;
139 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
140 m_presult->set_base(base);
141 if(m_match_flags & match_posix)
142 m_result = *m_presult;
143 verify_options(re.flags(), m_match_flags);
144 if(0 == match_prefix())
146 return m_result[0].second == last;
148 #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
152 // unwind all pushed states, apart from anything else this
153 // ensures that all the states are correctly destructed
154 // not just the memory freed.
155 while(unwind(true)){}
161 template <class BidiIterator, class Allocator, class traits, class Allocator2>
162 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find()
164 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
165 return protected_call(&perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_imp);
171 template <class BidiIterator, class Allocator, class traits, class Allocator2>
172 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_imp()
174 static matcher_proc_type const s_find_vtable[7] =
176 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_any,
177 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_word,
178 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_line,
179 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_buf,
180 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_prefix,
181 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_lit,
182 &perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_lit,
185 // initialise our stack if we are non-recursive:
186 #ifdef BOOST_REGEX_NON_RECURSIVE
187 save_state_init init(&m_stack_base, &m_backup_state);
188 used_block_count = BOOST_REGEX_MAX_BLOCKS;
189 #if !defined(BOOST_NO_EXCEPTIONS)
195 if((m_match_flags & regex_constants::match_init) == 0)
197 // reset our state machine:
200 pstate = access::first(re);
201 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last);
202 m_presult->set_base(base);
203 m_match_flags |= regex_constants::match_init;
208 search_base = position = m_result[0].second;
209 // If last match was null and match_not_null was not set then increment
210 // our start position, otherwise we go into an infinite loop:
211 if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
219 m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last);
220 if(base != search_base)
221 m_match_flags |= match_prev_avail;
223 if(m_match_flags & match_posix)
225 m_result.set_size(re.mark_count(), base, last);
226 m_result.set_base(base);
229 verify_options(re.flags(), m_match_flags);
230 // find out what kind of expression we have:
231 unsigned type = (m_match_flags & match_continuous) ?
232 static_cast<unsigned int>(regbase::restart_continue)
233 : static_cast<unsigned int>(access::restart_type(re));
235 // call the appropriate search routine:
236 matcher_proc_type proc = s_find_vtable[type];
237 return (this->*proc)();
239 #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
243 // unwind all pushed states, apart from anything else this
244 // ensures that all the states are correctly destructed
245 // not just the memory freed.
246 while(unwind(true)){}
252 template <class BidiIterator, class Allocator, class traits, class Allocator2>
253 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_prefix()
255 m_has_partial_match = false;
256 m_has_found_match = false;
257 pstate = access::first(re);
258 m_presult->set_first(position);
261 if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
263 m_has_found_match = true;
264 m_presult->set_second(last, 0, false);
267 #ifdef BOOST_REGEX_MATCH_EXTRA
268 if(m_has_found_match && (match_extra & m_match_flags))
271 // we have a match, reverse the capture information:
273 for(unsigned i = 0; i < m_presult->size(); ++i)
275 typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
276 std::reverse(seq.begin(), seq.end());
280 if(!m_has_found_match)
281 position = restart; // reset search postion
282 return m_has_found_match;
285 template <class BidiIterator, class Allocator, class traits, class Allocator2>
286 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_endmark()
288 int index = static_cast<const re_brace*>(pstate)->index;
291 if((m_match_flags & match_nosubs) == 0)
292 m_presult->set_second(position, index);
296 // matched forward lookahead:
300 pstate = pstate->next.p;
304 template <class BidiIterator, class Allocator, class traits, class Allocator2>
305 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_literal()
307 unsigned int len = static_cast<const re_literal*>(pstate)->length;
308 const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
310 // compare string with what we stored in
312 for(unsigned int i = 0; i < len; ++i, ++position)
314 if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
317 pstate = pstate->next.p;
321 template <class BidiIterator, class Allocator, class traits, class Allocator2>
322 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_start_line()
326 if((m_match_flags & match_prev_avail) == 0)
328 if((m_match_flags & match_not_bol) == 0)
330 pstate = pstate->next.p;
336 else if(m_match_flags & match_single_line)
339 // check the previous value character:
340 BidiIterator t(position);
344 if(traits_inst.is_separator(*t) && !((*t == '\r') && (*position == '\n')) )
346 pstate = pstate->next.p;
350 else if(traits_inst.is_separator(*t))
352 pstate = pstate->next.p;
358 template <class BidiIterator, class Allocator, class traits, class Allocator2>
359 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_end_line()
363 if(m_match_flags & match_single_line)
365 // we're not yet at the end so *first is always valid:
366 if(traits_inst.is_separator(*position))
368 if((position != base) || (m_match_flags & match_prev_avail))
370 // check that we're not in the middle of \r\n sequence
371 BidiIterator t(position);
373 if((*t == '\r') && (*position == '\n'))
378 pstate = pstate->next.p;
382 else if((m_match_flags & match_not_eol) == 0)
384 pstate = pstate->next.p;
390 template <class BidiIterator, class Allocator, class traits, class Allocator2>
391 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_wild()
395 if(traits_inst.is_separator(*position) && (m_match_flags & match_not_dot_newline))
397 if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
399 pstate = pstate->next.p;
404 template <class BidiIterator, class Allocator, class traits, class Allocator2>
405 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_match()
407 if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first))
409 if((m_match_flags & match_all) && (position != last))
411 if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base))
413 m_presult->set_second(position);
415 m_has_found_match = true;
416 if((m_match_flags & (match_posix|match_any)) == match_posix)
418 m_result.maybe_assign(*m_presult);
421 #ifdef BOOST_REGEX_MATCH_EXTRA
422 if(match_extra & m_match_flags)
424 for(unsigned i = 0; i < m_presult->size(); ++i)
425 if((*m_presult)[i].matched)
426 ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]);
432 template <class BidiIterator, class Allocator, class traits, class Allocator2>
433 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_word_boundary()
435 bool b; // indcates whether next character is a word character
438 // prev and this character must be opposites:
439 #if defined(BOOST_REGEX_USE_C_LOCALE) && defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95)
440 b = traits::is_class(*position, traits::char_class_word);
442 b = traits_inst.is_class(*position, traits::char_class_word);
447 b = (m_match_flags & match_not_eow) ? true : false;
449 if((position == base) && ((m_match_flags & match_prev_avail) == 0))
451 if(m_match_flags & match_not_bow)
459 b ^= traits_inst.is_class(*position, traits::char_class_word);
464 pstate = pstate->next.p;
467 return false; // no match if we get to here...
470 template <class BidiIterator, class Allocator, class traits, class Allocator2>
471 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_within_word()
475 // both prev and this character must be traits::char_class_word:
476 if(traits_inst.is_class(*position, traits::char_class_word))
479 if((position == base) && ((m_match_flags & match_prev_avail) == 0))
484 b = traits_inst.is_class(*position, traits::char_class_word);
489 pstate = pstate->next.p;
496 template <class BidiIterator, class Allocator, class traits, class Allocator2>
497 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_word_start()
500 return false; // can't be starting a word if we're already at the end of input
501 if(!traits_inst.is_class(*position, traits::char_class_word))
502 return false; // next character isn't a word character
503 if((position == base) && ((m_match_flags & match_prev_avail) == 0))
505 if(m_match_flags & match_not_bow)
506 return false; // no previous input
510 // otherwise inside buffer:
511 BidiIterator t(position);
513 if(traits_inst.is_class(*t, traits::char_class_word))
514 return false; // previous character not non-word
516 // OK we have a match:
517 pstate = pstate->next.p;
521 template <class BidiIterator, class Allocator, class traits, class Allocator2>
522 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_word_end()
524 if((position == base) && ((m_match_flags & match_prev_avail) == 0))
525 return false; // start of buffer can't be end of word
526 BidiIterator t(position);
528 if(traits_inst.is_class(*t, traits::char_class_word) == false)
529 return false; // previous character wasn't a word character
533 if(m_match_flags & match_not_eow)
534 return false; // end of buffer but not end of word
538 // otherwise inside buffer:
539 if(traits_inst.is_class(*position, traits::char_class_word))
540 return false; // next character is a word character
542 pstate = pstate->next.p;
543 return true; // if we fall through to here then we've succeeded
546 template <class BidiIterator, class Allocator, class traits, class Allocator2>
547 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_buffer_start()
549 if((position != base) || (m_match_flags & match_not_bob))
552 pstate = pstate->next.p;
556 template <class BidiIterator, class Allocator, class traits, class Allocator2>
557 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_buffer_end()
559 if((position != last) || (m_match_flags & match_not_eob))
562 pstate = pstate->next.p;
566 template <class BidiIterator, class Allocator, class traits, class Allocator2>
567 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_backref()
569 // compare with what we previously matched:
570 BidiIterator i = (*m_presult)[static_cast<const re_brace*>(pstate)->index].first;
571 BidiIterator j = (*m_presult)[static_cast<const re_brace*>(pstate)->index].second;
574 if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
579 pstate = pstate->next.p;
583 template <class BidiIterator, class Allocator, class traits, class Allocator2>
584 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_long_set()
586 // let the traits class do the work:
589 BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long*>(pstate), re);
592 pstate = pstate->next.p;
599 template <class BidiIterator, class Allocator, class traits, class Allocator2>
600 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_set()
604 if(static_cast<const re_set*>(pstate)->_map[(traits_uchar_type)traits_inst.translate(*position, icase)])
606 pstate = pstate->next.p;
613 template <class BidiIterator, class Allocator, class traits, class Allocator2>
614 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_jump()
616 pstate = static_cast<const re_jump*>(pstate)->alt.p;
620 template <class BidiIterator, class Allocator, class traits, class Allocator2>
621 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_combining()
625 if(traits_inst.is_combining(traits_inst.translate(*position, icase)))
628 while((position != last) && traits_inst.is_combining(traits_inst.translate(*position, icase)))
630 pstate = pstate->next.p;
634 template <class BidiIterator, class Allocator, class traits, class Allocator2>
635 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_soft_buffer_end()
637 if(m_match_flags & match_not_eob)
639 BidiIterator p(position);
640 while((p != last) && traits_inst.is_separator(traits_inst.translate(*p, icase)))++p;
643 pstate = pstate->next.p;
647 template <class BidiIterator, class Allocator, class traits, class Allocator2>
648 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::match_restart_continue()
650 if(position == search_base)
652 pstate = pstate->next.p;
658 template <class BidiIterator, class Allocator, class traits, class Allocator2>
659 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_any()
662 #pragma warning(push)
663 #pragma warning(disable:4127)
665 const unsigned char* _map = access::get_map(re);
668 // skip everything we can't match:
669 while((position != last) && !access::can_start(*position, _map, (unsigned char)mask_any) )
673 // run out of characters, try a null match if possible:
674 if(access::first(re)->can_be_null)
675 return match_prefix();
678 // now try and obtain a match:
691 template <class BidiIterator, class Allocator, class traits, class Allocator2>
692 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_word()
695 #pragma warning(push)
696 #pragma warning(disable:4127)
698 // do search optimised for word starts:
699 const unsigned char* _map = access::get_map(re);
700 if((m_match_flags & match_prev_avail) || (position != base))
702 else if(match_prefix())
706 while((position != last) && traits_inst.is_class(*position, traits::char_class_word))
708 while((position != last) && !traits_inst.is_class(*position, traits::char_class_word))
713 if(access::can_start(*position, _map, (unsigned char)mask_any) )
727 template <class BidiIterator, class Allocator, class traits, class Allocator2>
728 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_line()
730 // do search optimised for line starts:
731 const unsigned char* _map = access::get_map(re);
734 while(position != last)
736 while((position != last) && (*position != '\n'))
743 if((access::first(re)->can_be_null) && match_prefix())
748 if( access::can_start(*position, _map, (unsigned char)mask_any) )
760 template <class BidiIterator, class Allocator, class traits, class Allocator2>
761 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_buf()
763 if((position == base) && ((m_match_flags & match_not_bob) == 0))
764 return match_prefix();
768 template <class BidiIterator, class Allocator, class traits, class Allocator2>
769 bool perl_matcher<BidiIterator, Allocator, traits, Allocator2>::find_restart_lit()
772 return false; // can't possibly match if we're at the end already
774 unsigned type = (m_match_flags & match_continuous) ?
775 static_cast<unsigned int>(regbase::restart_continue)
776 : static_cast<unsigned int>(access::restart_type(re));
778 const kmp_info<char_type>* info = access::get_kmp(re);
780 const char_type* x = info->pstr;
782 while (position != last)
784 while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
785 j = info->kmp_next[j];
790 if(type == regbase::restart_fixed_lit)
792 std::advance(position, -j);
794 std::advance(restart, len);
795 m_result.set_first(position);
796 m_result.set_second(restart);
803 std::advance(position, -j);
808 for(int k = 0; (restart != position) && (k < j); ++k, --restart)
809 {} // dwa 10/20/2000 - warning suppression for MWCW
813 j = 0; //we could do better than this...
818 if((m_match_flags & match_partial) && (position == last) && j)
820 // we need to check for a partial match:
822 std::advance(position, -j);
823 return match_prefix();
828 } // namespace re_detail
835 #ifdef BOOST_HAS_ABI_HEADERS
836 # include BOOST_ABI_SUFFIX