3 * Copyright (c) 1998-2000
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE regex_match.hpp
20 * DESCRIPTION: Regular expression matching algorithms.
21 * Note this is an internal header file included
22 * by regex.hpp, do not include on its own.
26 #ifndef BOOST_REGEX_MATCH_HPP
27 #define BOOST_REGEX_MATCH_HPP
34 #if __BORLANDC__ == 0x530
35 #pragma option push -a4 -b -Ve
36 #elif __BORLANDC__ > 0x530
37 #pragma option push -a8 -b -Ve
41 template <class iterator, class charT, class traits_type, class Allocator>
42 iterator BOOST_RE_CALL re_is_set_member(iterator next,
45 const reg_expression<charT, traits_type, Allocator>& e)
47 const charT* p = (const charT*)(set_+1);
50 bool icase = e.flags() & regbase::icase;
52 if(next == last) return next;
54 typedef typename traits_type::string_type traits_string_type;
55 const traits_type& traits_inst = e.get_traits();
57 // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
61 // try and match a single character, could be a multi-character
62 // collating element...
63 for(i = 0; i < set_->csingles; ++i)
68 // treat null string as special case:
69 if(traits_inst.translate(*ptr, icase) != *p)
74 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
78 while(*p && (ptr != last))
80 if(traits_inst.translate(*ptr, icase) != *p)
86 if(*p == 0) // if null we've matched
87 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
94 charT col = traits_inst.translate(*next, icase);
97 if(set_->cranges || set_->cequivalents)
99 traits_string_type s2(1, col);
100 traits_string_type s1;
102 // try and match a range, NB only a single character can match
105 if(e.flags() & regbase::nocollate)
108 traits_inst.transform(s1, s2);
109 for(i = 0; i < set_->cranges; ++i)
116 return set_->isnot ? next : ++next;
124 // skip second string
130 // try and match an equivalence class, NB only a single character can match
131 if(set_->cequivalents)
133 traits_inst.transform_primary(s1, s2);
134 for(i = 0; i < set_->cequivalents; ++i)
137 return set_->isnot ? next : ++next;
144 if(traits_inst.is_class(col, set_->cclasses) == true)
145 return set_->isnot ? next : ++next;
146 return set_->isnot ? ++next : next;
149 template <class iterator, class Allocator>
150 class _priv_match_data
153 typedef BOOST_RE_MAYBE_TYPENAME REBIND_TYPE(int, Allocator) i_alloc;
154 typedef BOOST_RE_MAYBE_TYPENAME REBIND_TYPE(iterator, Allocator) it_alloc;
156 match_results_base<iterator, Allocator> temp_match;
158 jstack<match_results_base<iterator, Allocator>, Allocator> matches;
159 jstack<iterator, Allocator> prev_pos;
160 jstack<const re_syntax_base*, Allocator> prev_record;
161 jstack<int, Allocator> prev_acc;
163 unsigned int caccumulators;
164 iterator* loop_starts;
166 _priv_match_data(const match_results_base<iterator, Allocator>&);
173 void set_accumulator_size(unsigned int size);
174 int* get_accumulators()
178 iterator* get_loop_starts()
184 template <class iterator, class Allocator>
185 _priv_match_data<iterator, Allocator>::_priv_match_data(const match_results_base<iterator, Allocator>& m)
186 : temp_match(m), matches(64, m.allocator()), prev_pos(64, m.allocator()), prev_record(64, m.allocator())
193 template <class iterator, class Allocator>
194 void _priv_match_data<iterator, Allocator>::set_accumulator_size(unsigned int size)
196 if(size > caccumulators)
199 caccumulators = size;
200 accumulators = i_alloc(temp_match.allocator()).allocate(caccumulators);
201 loop_starts = it_alloc(temp_match.allocator()).allocate(caccumulators);
202 for(unsigned i = 0; i < caccumulators; ++i)
203 new (loop_starts + i) iterator();
207 template <class iterator, class Allocator>
208 void _priv_match_data<iterator, Allocator>::free()
212 //REBIND_INSTANCE(int, Allocator, temp_match.allocator()).deallocate(accumulators, caccumulators);
213 i_alloc temp1(temp_match.allocator());
214 temp1.deallocate(accumulators, caccumulators);
215 for(unsigned i = 0; i < caccumulators; ++i)
216 jm_destroy(loop_starts + i);
217 //REBIND_INSTANCE(iterator, Allocator, temp_match.allocator()).deallocate(loop_starts, caccumulators);
218 it_alloc temp2(temp_match.allocator());
219 temp2.deallocate(loop_starts, caccumulators);
223 template <class charT, class traits, class Allocator>
224 struct access_t : public reg_expression<charT, traits, Allocator>
226 typedef typename is_byte<charT>::width_type width_type;
227 typedef reg_expression<charT, traits, Allocator> base_type;
228 typedef charT char_type;
229 typedef traits traits_type;
230 typedef Allocator alloc_type;
232 static int repeat_count(const base_type& b)
233 { return base_type::repeat_count(b); }
234 static unsigned int restart_type(const base_type& b)
235 { return base_type::restart_type(b); }
236 static const re_syntax_base* first(const base_type& b)
237 { return base_type::first(b); }
238 static const unsigned char* get_map(const base_type& b)
239 { return base_type::get_map(b); }
240 static unsigned int leading_length(const base_type& b)
241 { return base_type::leading_length(b); }
242 static const kmp_info<charT>* get_kmp(const base_type& b)
243 { return base_type::get_kmp(b); }
244 static bool can_start(char_type c, const unsigned char* _map, unsigned char mask)
246 return reg_expression<char_type, traits_type, alloc_type>::can_start(c, _map, mask, width_type());
251 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
254 // template don't merge if they contain switch statements so declare these
255 // templates in unnamed namespace (ie with internal linkage), each translation
256 // unit then gets its own local copy, it works seemlessly but bloats the app.
260 template <class iterator, class Allocator, class charT, class traits, class Allocator2>
261 bool query_match_aux(iterator first,
263 match_results<iterator, Allocator>& m,
264 const reg_expression<charT, traits, Allocator2>& e,
266 _priv_match_data<iterator, Allocator>& pd,
269 typedef access_t<charT, traits, Allocator2> access;
271 if(e.flags() & regbase::failbit)
274 typedef typename traits::size_type traits_size_type;
275 typedef typename traits::uchar_type traits_uchar_type;
276 typedef typename is_byte<charT>::width_type width_type;
278 // declare some local aliases to reduce pointer loads
279 // good optimising compilers should make this unnecessary!!
280 jstack<match_results_base<iterator, Allocator>, Allocator>& matches = pd.matches;
281 jstack<iterator, Allocator>& prev_pos = pd.prev_pos;
282 jstack<const re_syntax_base*, Allocator>& prev_record = pd.prev_record;
283 jstack<int, Allocator>& prev_acc = pd.prev_acc;
284 match_results_base<iterator, Allocator>& temp_match = pd.temp_match;
285 temp_match.set_first(first);
287 const re_syntax_base* ptr = access::first(e);
288 bool match_found = false;
289 bool have_partial_match = false;
290 bool need_push_match = (e.mark_count() > 1);
291 int cur_acc = -1; // no active accumulator
292 pd.set_accumulator_size(access::repeat_count(e));
293 int* accumulators = pd.get_accumulators();
294 iterator* start_loop = pd.get_loop_starts();
296 bool icase = e.flags() & regbase::icase;
298 iterator base = first;
299 const traits& traits_inst = e.get_traits();
300 // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
304 // prepare m for failure:
306 if((flags & match_init) == 0)
308 m.init_fail(first, last);
318 case syntax_element_match:
321 // match found, save then fallback in case we missed a
323 if((flags & match_not_null) && (first == temp_match[0].first))
325 temp_match.set_second(first);
326 m.maybe_assign(temp_match);
328 if(((flags & match_any) && ((first == last) || !(flags & match_all))) || ((first == last) && (need_push_match == false)))
330 // either we don't care what we match or we've matched
331 // the whole string and can't match anything longer.
332 while(matches.empty() == false)
334 while(prev_pos.empty() == false)
336 while(prev_record.empty() == false)
338 while(prev_acc.empty() == false)
344 case syntax_element_startmark:
345 if(((re_brace*)ptr)->index > 0)
346 temp_match.set_first(first, ((re_brace*)ptr)->index);
349 case syntax_element_endmark:
350 if(((re_brace*)ptr)->index > 0)
351 temp_match.set_second(first, ((re_brace*)ptr)->index);
354 case syntax_element_literal:
356 unsigned int len = ((re_literal*)ptr)->length;
357 charT* what = (charT*)(((re_literal*)ptr) + 1);
359 // compare string with what we stored in
361 for(unsigned int i = 0; i < len; ++i, ++first)
363 if((first == last) || (traits_inst.translate(*first, icase) != what[i]))
369 case syntax_element_start_line:
371 if(first == temp_match[0].first)
373 // we're at the start of the buffer
374 if(flags & match_prev_avail)
377 // check the previous value even though its before
378 // the start of our "buffer".
381 if(traits::is_separator(*t) && !((*t == '\r') && (*first == '\n')) )
388 if((flags & match_not_bol) == 0)
395 // we're in the middle of the string
396 goto inner_line_check;
397 case syntax_element_end_line:
398 // we're not yet at the end so *first is always valid:
399 if(traits::is_separator(*first))
401 if((first != base) || (flags & match_prev_avail))
403 // check that we're not in the middle of \r\n sequence
406 if((*t == '\r') && (*first == '\n'))
415 case syntax_element_wild:
416 // anything except possibly NULL or \n:
417 if(traits::is_separator(*first))
419 if(flags & match_not_dot_newline)
425 if(*first == charT(0))
427 if(flags & match_not_dot_null)
436 case syntax_element_word_boundary:
438 // prev and this character must be opposites:
439 bool b = traits_inst.is_class(*first, traits::char_class_word);
440 if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0))
442 if(flags & match_not_bow)
450 b ^= traits_inst.is_class(*first, traits::char_class_word);
460 case syntax_element_within_word:
461 // both prev and this character must be traits::char_class_word:
462 if(traits_inst.is_class(*first, traits::char_class_word))
465 if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0))
470 b = traits_inst.is_class(*first, traits::char_class_word);
480 case syntax_element_word_start:
481 if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0))
484 if(flags & match_not_bow)
486 if(traits_inst.is_class(*first, traits::char_class_word))
493 // otherwise inside buffer:
494 if(traits_inst.is_class(*first, traits::char_class_word))
498 if(traits_inst.is_class(*t, traits::char_class_word) == false)
504 goto failure; // if we fall through to here then we've failed
505 case syntax_element_word_end:
506 if((first == temp_match[0].first) && ((flags & match_prev_avail) == 0))
507 goto failure; // start of buffer can't be end of word
509 // otherwise inside buffer:
510 if(traits_inst.is_class(*first, traits::char_class_word) == false)
514 if(traits_inst.is_class(*t, traits::char_class_word))
520 goto failure; // if we fall through to here then we've failed
521 case syntax_element_buffer_start:
522 if((first != temp_match[0].first) || (flags & match_not_bob))
527 case syntax_element_buffer_end:
528 if((first != last) || (flags & match_not_eob))
533 case syntax_element_backref:
535 // compare with what we previously matched:
536 iterator i = temp_match[((re_brace*)ptr)->index].first;
537 iterator j = temp_match[((re_brace*)ptr)->index].second;
540 if((first == last) || (traits_inst.translate(*first, icase) != traits_inst.translate(*i, icase)))
548 case syntax_element_long_set:
550 // let the traits class do the work:
551 iterator t = re_is_set_member(first, last, (re_set_long*)ptr, e);
560 case syntax_element_set:
561 // lookup character in table:
562 if(((re_set*)ptr)->_map[(traits_uchar_type)traits_inst.translate(*first, icase)])
569 case syntax_element_jump:
570 ptr = ((re_jump*)ptr)->alt.p;
572 case syntax_element_alt:
575 if(access::can_start(*first, ((re_jump*)ptr)->_map, (unsigned char)mask_take))
577 // we can take the first alternative,
578 // see if we need to push next alternative:
579 if(access::can_start(*first, ((re_jump*)ptr)->_map, mask_skip))
582 matches.push(temp_match);
583 for(k = 0; k <= cur_acc; ++k)
584 prev_pos.push(start_loop[k]);
585 prev_pos.push(first);
586 prev_record.push(ptr);
587 for(k = 0; k <= cur_acc; ++k)
588 prev_acc.push(accumulators[k]);
589 prev_acc.push(cur_acc);
594 if(access::can_start(*first, ((re_jump*)ptr)->_map, mask_skip))
596 ptr = ((re_jump*)ptr)->alt.p;
599 goto failure; // neither option is possible
601 case syntax_element_rep:
604 // if we're moving to a higher id (nested repeats etc)
605 // zero out our accumualtors:
606 if(cur_acc < ((re_repeat*)ptr)->id)
608 cur_acc = ((re_repeat*)ptr)->id;
609 accumulators[cur_acc] = 0;
610 start_loop[cur_acc] = first;
613 cur_acc = ((re_repeat*)ptr)->id;
615 if(((re_repeat*)ptr)->leading)
618 //charT c = traits_inst.translate(*first);
620 // first of all test for special case where this is last element,
621 // if that is the case then repeat as many times as possible,
622 // as long as the repeat is greedy:
624 if((((re_repeat*)ptr)->alt.p->type == syntax_element_match)
625 && (((re_repeat*)ptr)->greedy == true))
627 // see if we can take the repeat:
628 if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
629 && access::can_start(*first, ((re_repeat*)ptr)->_map, mask_take))
631 // push terminating match as fallback:
632 if((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
634 if((prev_record.empty() == false) && (prev_record.peek() == ((re_repeat*)ptr)->alt.p))
636 // we already have the required fallback
637 // don't add any more, just update this one:
639 matches.peek() = temp_match;
640 prev_pos.peek() = first;
645 matches.push(temp_match);
646 prev_pos.push(first);
647 prev_record.push(((re_repeat*)ptr)->alt.p);
650 // move to next item in list:
651 if((first != start_loop[cur_acc]) || !accumulators[cur_acc])
653 ++accumulators[cur_acc];
655 start_loop[cur_acc] = first;
660 // see if we can skip the repeat:
661 if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
662 && access::can_start(*first, ((re_repeat*)ptr)->_map, mask_skip))
664 ptr = ((re_repeat*)ptr)->alt.p;
671 // OK if we get to here then the repeat is either non-terminal or non-greedy,
672 // see if we can skip the repeat:
673 if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
674 && access::can_start(*first, ((re_repeat*)ptr)->_map, mask_skip))
676 // see if we can push failure info:
677 if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
678 && access::can_start(*first, ((re_repeat*)ptr)->_map, mask_take))
680 // check to see if the last loop matched a NULL string
681 // if so then we really don't want to loop again:
682 if(((unsigned int)accumulators[cur_acc] == ((re_repeat*)ptr)->min)
683 || (first != start_loop[cur_acc]))
686 matches.push(temp_match);
687 prev_pos.push(first);
688 prev_record.push(ptr);
689 for(k = 0; k <= cur_acc; ++k)
690 prev_acc.push(accumulators[k]);
691 // for non-greedy repeats save whether we have a match already:
692 if(((re_repeat*)ptr)->greedy == false)
694 prev_acc.push(match_found);
699 ptr = ((re_repeat*)ptr)->alt.p;
703 // otherwise see if we can take the repeat:
704 if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
705 && access::can_start(*first, ((re_repeat*)ptr)->_map, mask_take) &&
706 ((first != start_loop[cur_acc]) || !accumulators[cur_acc]))
708 // move to next item in list:
709 ++accumulators[cur_acc];
711 start_loop[cur_acc] = first;
715 // if we get here then neither option is allowed so fail:
719 case syntax_element_combining:
720 if(traits_inst.is_combining(traits_inst.translate(*first, icase)))
723 while((first != last) && traits_inst.is_combining(traits_inst.translate(*first, icase)))++first;
726 case syntax_element_soft_buffer_end:
728 if(flags & match_not_eob)
731 while((p != last) && traits_inst.is_separator(traits_inst.translate(*first, icase)))++p;
737 case syntax_element_restart_continue:
738 if(first != temp_match[-1].first)
743 jm_assert(0); // should never get to here!!
749 // if we get to here then we've run out of characters to match against,
750 // we could however still have non-character regex items left
751 if((ptr->can_be_null == 0) && ((flags & match_partial) == 0))
758 case syntax_element_match:
760 case syntax_element_startmark:
761 temp_match.set_first(first, ((re_brace*)ptr)->index);
764 case syntax_element_endmark:
765 temp_match.set_second(first, ((re_brace*)ptr)->index);
768 case syntax_element_start_line:
769 goto outer_line_check;
770 case syntax_element_end_line:
771 // we're at the end so *first is never valid:
772 if((flags & match_not_eol) == 0)
778 case syntax_element_word_boundary:
779 case syntax_element_word_end:
780 if(((flags & match_not_eow) == 0) && (first != temp_match[0].first))
784 if(traits_inst.is_class(*t, traits::char_class_word))
791 case syntax_element_buffer_end:
792 case syntax_element_soft_buffer_end:
793 if(flags & match_not_eob)
798 case syntax_element_jump:
799 ptr = ((re_jump*)ptr)->alt.p;
801 case syntax_element_alt:
802 if(ptr->can_be_null & mask_take)
804 // we can test the first alternative,
805 // see if we need to push next alternative:
806 if(ptr->can_be_null & mask_skip)
809 matches.push(temp_match);
810 for(k = 0; k <= cur_acc; ++k)
811 prev_pos.push(start_loop[k]);
812 prev_pos.push(first);
813 prev_record.push(ptr);
814 for(k = 0; k <= cur_acc; ++k)
815 prev_acc.push(accumulators[k]);
816 prev_acc.push(cur_acc);
821 if(ptr->can_be_null & mask_skip)
823 ptr = ((re_jump*)ptr)->alt.p;
826 goto failure; // neither option is possible
827 case syntax_element_rep:
828 // if we're moving to a higher id (nested repeats etc)
829 // zero out our accumualtors:
830 if(cur_acc < ((re_repeat*)ptr)->id)
832 cur_acc = ((re_repeat*)ptr)->id;
833 accumulators[cur_acc] = 0;
834 start_loop[cur_acc] = first;
837 cur_acc = ((re_repeat*)ptr)->id;
839 // see if we can skip the repeat:
840 if(((unsigned int)accumulators[cur_acc] >= ((re_repeat*)ptr)->min)
841 && ((ptr->can_be_null & mask_skip) || (flags & match_partial)))
843 // don't push failure info, there's no point:
844 ptr = ((re_repeat*)ptr)->alt.p;
848 // otherwise see if we can take the repeat:
849 if(((unsigned int)accumulators[cur_acc] < ((re_repeat*)ptr)->max)
850 && (((ptr->can_be_null & (mask_take | mask_skip)) == (mask_take | mask_skip))) || (flags & match_partial))
852 // move to next item in list:
853 ++accumulators[cur_acc];
855 start_loop[cur_acc] = first;
859 // if we get here then neither option is allowed so fail:
861 case syntax_element_restart_continue:
862 if(first != temp_match[-1].first)
874 // check for possible partial match:
876 if((flags & match_partial)
877 && !match_found // no full match already
878 && (base != first) // some charcters have been consumed
879 && (first == last)) // end of input has been reached
881 have_partial_match = true;
882 m.maybe_assign(temp_match);
885 if(prev_record.empty() == false)
887 ptr = prev_record.peek();
890 case syntax_element_alt:
891 // get next alternative:
892 ptr = ((re_jump*)ptr)->alt.p;
894 matches.pop(temp_match);
895 prev_acc.pop(cur_acc);
896 for(k = cur_acc; k >= 0; --k)
897 prev_acc.pop(accumulators[k]);
899 for(k = cur_acc; k >= 0; --k)
900 prev_pos.pop(start_loop[k]);
903 case syntax_element_rep:
905 // we're doing least number of repeats first,
906 // increment count and repeat again:
907 bool saved_matched = match_found;
909 matches.pop(temp_match);
911 cur_acc = ((re_repeat*)ptr)->id;
912 if(((re_repeat*)ptr)->greedy == false)
914 saved_matched = prev_acc.peek();
917 for(k = cur_acc; k >= 0; --k)
918 prev_acc.pop(accumulators[k]);
920 if((unsigned int)++accumulators[cur_acc] > ((re_repeat*)ptr)->max)
921 goto failure; // repetions exhausted.
923 // if the repeat is non-greedy, and we found a match then fail again:
924 if((((re_repeat*)ptr)->greedy == false) && (match_found == true))
928 else if (match_found == false)
929 match_found = saved_matched;
931 start_loop[cur_acc] = first;
934 case syntax_element_match:
936 matches.pop(temp_match);
942 // mustn't get here!!
946 if(match_found || have_partial_match)
949 // if we get to here then everything has failed
950 // and no match was found:
953 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
958 template <class iterator>
959 void _skip_and_inc(unsigned int& clines, iterator& last_line, iterator& first, const iterator last)
973 template <class iterator>
974 void _skip_and_dec(unsigned int& clines, iterator& last_line, iterator& first, iterator base, unsigned int len)
976 bool need_line = false;
977 for(unsigned int i = 0; i < len; ++i)
991 if(last_line != base)
996 while((last_line != base) && (*last_line != '\n'))
998 if(*last_line == '\n')
1003 template <class iterator>
1004 inline void _inc_one(unsigned int& clines, iterator& last_line, iterator& first)
1008 last_line = ++first;
1015 template <class iterator, class Allocator>
1016 struct grep_search_predicate
1018 match_results<iterator, Allocator>* pm;
1019 grep_search_predicate(match_results<iterator, Allocator>* p) : pm(p) {}
1020 bool operator()(const match_results<iterator, Allocator>& m)
1022 *pm = static_cast<const match_results_base<iterator, Allocator>&>(m);
1027 #if !defined(BOOST_RE_NO_TEMPLATE_RETURNS) && !defined(BOOST_RE_NO_PARTIAL_FUNC_SPEC)
1029 template <class iterator, class Allocator>
1030 inline const match_results_base<iterator, Allocator>& grep_out_type(const grep_search_predicate<iterator, Allocator>& o, const Allocator&)
1037 template <class T, class Allocator>
1038 inline const Allocator& grep_out_type(const T&, const Allocator& a)
1043 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
1046 // template don't merge if they contain switch statements so declare these
1047 // templates in unnamed namespace (ie with internal linkage), each translation
1048 // unit then gets its own local copy, it works seemlessly but bloats the app.
1054 // find all non-overlapping matches within the sequence first last:
1056 template <class Predicate, class I, class charT, class traits, class A, class A2>
1057 unsigned int reg_grep2(Predicate foo, I first, I last, const reg_expression<charT, traits, A>& e, unsigned flags, A2 a)
1059 typedef access_t<charT, traits, A> access;
1061 if(e.flags() & regbase::failbit)
1064 typedef typename traits::size_type traits_size_type;
1065 typedef typename traits::uchar_type traits_uchar_type;
1066 typedef typename is_byte<charT>::width_type width_type;
1068 match_results<I, A2> m(grep_out_type(foo, a));
1070 m.set_size(e.mark_count(), first, last);
1071 m.set_line(1, first);
1074 unsigned int clines = 1;
1075 unsigned int cmatches = 0;
1076 I last_line = first;
1080 const traits& traits_inst = e.get_traits();
1081 // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
1085 flags |= match_init;
1087 _priv_match_data<I, A2> pd(m);
1089 const unsigned char* _map = access::get_map(e);
1094 // special case, only test if can_be_null,
1095 // don't dereference any pointers!!
1096 if(access::first(e)->can_be_null)
1098 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1107 // try one time whatever:
1108 if( access::can_start(*first, _map, (unsigned char)mask_any) )
1110 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1115 // update to end of what matched
1116 // trying to match again with match_not_null set if this
1117 // is a null match...
1119 if(first == m[0].second)
1121 next_base = m[0].second;
1122 pd.temp_match.init_fail(next_base, last);
1123 m.init_fail(next_base, last);
1124 if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart))
1133 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1134 {} // dwa 10/20/2000 - warning suppression for MWCW
1137 _skip_and_inc(clines, last_line, first, restart);
1142 _skip_and_inc(clines, last_line, first, m[0].second);
1143 next_base = m[0].second;
1144 pd.temp_match.init_fail(next_base, last);
1145 m.init_fail(next_base, last);
1150 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1151 {} // dwa 10/20/2000 - warning suppression for MWCW
1154 _skip_and_inc(clines, last_line, first, restart);
1158 _inc_one(clines, last_line, first);
1159 flags |= match_prev_avail | match_not_bob;
1162 // depending on what the first record is we may be able to
1163 // optimise the search:
1164 type = (flags & match_continuous) ? regbase::restart_continue : access::restart_type(e);
1166 if(type == regbase::restart_buf)
1171 case regbase::restart_lit:
1172 case regbase::restart_fixed_lit:
1174 const kmp_info<charT>* info = access::get_kmp(e);
1175 int len = info->len;
1176 const charT* x = info->pstr;
1178 bool icase = e.flags() & regbase::icase;
1179 while (first != last)
1181 while((j > -1) && (x[j] != traits_inst.translate(*first, icase)))
1182 j = info->kmp_next[j];
1183 _inc_one(clines, last_line, first);
1187 if(type == regbase::restart_fixed_lit)
1189 _skip_and_dec(clines, last_line, first, base, j);
1193 m.set_second(restart);
1194 m.set_line(clines, last_line);
1198 _skip_and_inc(clines, last_line, first, restart);
1199 next_base = m[0].second;
1200 pd.temp_match.init_fail(next_base, last);
1201 m.init_fail(next_base, last);
1207 _skip_and_dec(clines, last_line, first, base, j);
1208 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1211 m.set_line(clines, last_line);
1215 // update to end of what matched
1216 _skip_and_inc(clines, last_line, first, m[0].second);
1217 next_base = m[0].second;
1218 pd.temp_match.init_fail(next_base, last);
1219 m.init_fail(next_base, last);
1224 for(int k = 0; (restart != first) && (k < j); ++k, --restart)
1225 {} // dwa 10/20/2000 - warning suppression for MWCW
1228 _skip_and_inc(clines, last_line, first, restart);
1229 j = 0; //we could do better than this...
1236 case regbase::restart_any:
1238 while(first != last)
1240 if( access::can_start(*first, _map, (unsigned char)mask_any) )
1242 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1245 m.set_line(clines, last_line);
1249 // update to end of what matched
1250 // trying to match again with match_not_null set if this
1251 // is a null match...
1253 if(first == m[0].second)
1255 next_base = m[0].second;
1256 pd.temp_match.init_fail(next_base, last);
1257 m.init_fail(next_base, last);
1258 if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart))
1260 m.set_line(clines, last_line);
1268 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1269 {} // dwa 10/20/2000 - warning suppression for MWCW
1272 _skip_and_inc(clines, last_line, first, restart);
1277 _skip_and_inc(clines, last_line, first, m[0].second);
1278 next_base = m[0].second;
1279 pd.temp_match.init_fail(next_base, last);
1280 m.init_fail(next_base, last);
1286 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1287 {} // dwa 10/20/2000 - warning suppression for MWCW
1290 _skip_and_inc(clines, last_line, first, restart);
1294 _inc_one(clines, last_line, first);
1298 case regbase::restart_word:
1300 // do search optimised for word starts:
1301 while(first != last)
1306 // skip the word characters:
1307 while((first != last) && traits_inst.is_class(*first, traits::char_class_word))
1309 // now skip the white space:
1310 while((first != last) && (traits_inst.is_class(*first, traits::char_class_word) == false))
1314 // hack to work around gcc optimisation bug
1315 // just expand the contents of _inc_one here:
1318 last_line = ++first;
1324 _inc_one(clines, last_line, first);
1330 if( access::can_start(*first, _map, (unsigned char)mask_any) )
1332 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1334 m.set_line(clines, last_line);
1338 // update to end of what matched
1339 // trying to match again with match_not_null set if this
1340 // is a null match...
1342 if(first == m[0].second)
1344 next_base = m[0].second;
1345 pd.temp_match.init_fail(next_base, last);
1346 m.init_fail(next_base, last);
1347 if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart))
1349 m.set_line(clines, last_line);
1357 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1358 {} // dwa 10/20/2000 - warning suppression for MWCW
1361 _skip_and_inc(clines, last_line, first, restart);
1366 _skip_and_inc(clines, last_line, first, m[0].second);
1367 next_base = m[0].second;
1368 pd.temp_match.init_fail(next_base, last);
1369 m.init_fail(next_base, last);
1374 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1375 {} // dwa 10/20/2000 - warning suppression for MWCW
1378 _skip_and_inc(clines, last_line, first, restart);
1382 _inc_one(clines, last_line, first);
1386 case regbase::restart_line:
1388 // do search optimised for line starts:
1389 while(first != last)
1391 // find first charcter after a line break:
1395 while((first != last) && (*first != '\n'))
1406 if( access::can_start(*first, _map, (unsigned char)mask_any) )
1408 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1410 m.set_line(clines, last_line);
1414 // update to end of what matched
1415 // trying to match again with match_not_null set if this
1416 // is a null match...
1418 if(first == m[0].second)
1420 next_base = m[0].second;
1421 pd.temp_match.init_fail(next_base, last);
1422 m.init_fail(next_base, last);
1423 if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart))
1425 m.set_line(clines, last_line);
1433 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1434 {} // dwa 10/20/2000 - warning suppression for MWCW
1437 _skip_and_inc(clines, last_line, first, restart);
1442 _skip_and_inc(clines, last_line, first, m[0].second);
1443 next_base = m[0].second;
1444 pd.temp_match.init_fail(next_base, last);
1445 m.init_fail(next_base, last);
1450 for(unsigned int i = 0; (restart != first) && (i < access::leading_length(e)); ++i, --restart)
1451 {} // dwa 10/20/2000 - warning suppression for MWCW
1454 _skip_and_inc(clines, last_line, first, restart);
1458 _inc_one(clines, last_line, first);
1462 case regbase::restart_continue:
1464 while(first != last)
1466 if( access::can_start(*first, _map, (unsigned char)mask_any) )
1468 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1470 m.set_line(clines, last_line);
1474 // update to end of what matched
1475 // trying to match again with match_not_null set if this
1476 // is a null match...
1477 if(first == m[0].second)
1479 next_base = m[0].second;
1480 pd.temp_match.init_fail(next_base, last);
1481 m.init_fail(next_base, last);
1482 if(query_match_aux(first, last, m, e, flags | match_not_null, pd, &restart))
1484 m.set_line(clines, last_line);
1490 return cmatches; // can't continue from null match
1492 _skip_and_inc(clines, last_line, first, m[0].second);
1493 next_base = m[0].second;
1494 pd.temp_match.init_fail(next_base, last);
1495 m.init_fail(next_base, last);
1506 // finally check trailing null string:
1507 if(access::first(e)->can_be_null)
1509 if(query_match_aux(first, last, m, e, flags, pd, &restart))
1511 m.set_line(clines, last_line);
1520 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
1521 } // namespace {anon}
1524 } // namespace re_detail
1528 // returns true if the specified regular expression matches
1529 // the whole of the input. Fills in what matched in m.
1531 template <class iterator, class Allocator, class charT, class traits, class Allocator2>
1532 bool regex_match(iterator first, iterator last, match_results<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default)
1534 // prepare m for failure:
1535 if((flags & match_init) == 0)
1537 m.set_size(e.mark_count(), first, last);
1539 m.set_line(1, first);
1541 flags |= match_all; // must match all of input.
1542 re_detail::_priv_match_data<iterator, Allocator> pd(m);
1544 bool result = re_detail::query_match_aux(first, last, m, e, flags, pd, &restart);
1545 if(result && (last == m[0].second))
1549 template <class iterator, class charT, class traits, class Allocator2>
1550 bool regex_match(iterator first, iterator last, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default)
1552 match_results<iterator> m;
1553 return regex_match(first, last, m, e, flags);
1556 // query_match convenience interfaces:
1557 #ifndef BOOST_RE_NO_PARTIAL_FUNC_SPEC
1559 // this isn't really a partial specialisation, but template function
1560 // overloading - if the compiler doesn't support partial specialisation
1561 // then it really won't support this either:
1562 template <class charT, class Allocator, class traits, class Allocator2>
1563 inline bool regex_match(const charT* str,
1564 match_results<const charT*, Allocator>& m,
1565 const reg_expression<charT, traits, Allocator2>& e,
1566 unsigned flags = match_default)
1568 return regex_match(str, str + traits::length(str), m, e, flags);
1571 template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2>
1572 inline bool regex_match(const std::basic_string<charT, ST, SA>& s,
1573 match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
1574 const reg_expression<charT, traits, Allocator2>& e,
1575 unsigned flags = match_default)
1577 return regex_match(s.begin(), s.end(), m, e, flags);
1579 template <class charT, class traits, class Allocator2>
1580 inline bool regex_match(const charT* str,
1581 const reg_expression<charT, traits, Allocator2>& e,
1582 unsigned flags = match_default)
1584 match_results<const charT*> m;
1585 return regex_match(str, str + traits::length(str), m, e, flags);
1588 template <class ST, class SA, class charT, class traits, class Allocator2>
1589 inline bool regex_match(const std::basic_string<charT, ST, SA>& s,
1590 const reg_expression<charT, traits, Allocator2>& e,
1591 unsigned flags = match_default)
1593 typedef typename std::basic_string<charT, ST, SA>::const_iterator iterator;
1594 match_results<iterator> m;
1595 return regex_match(s.begin(), s.end(), m, e, flags);
1597 #else // partial ordering
1598 inline bool regex_match(const char* str,
1601 unsigned flags = match_default)
1603 return regex_match(str, str + regex::traits_type::length(str), m, e, flags);
1605 inline bool regex_match(const char* str,
1607 unsigned flags = match_default)
1609 match_results<const char*> m;
1610 return regex_match(str, str + regex::traits_type::length(str), m, e, flags);
1612 #ifndef BOOST_RE_NO_WCSTRING
1613 inline bool regex_match(const wchar_t* str,
1616 unsigned flags = match_default)
1618 return regex_match(str, str + wregex::traits_type::length(str), m, e, flags);
1620 inline bool regex_match(const wchar_t* str,
1622 unsigned flags = match_default)
1624 match_results<const wchar_t*> m;
1625 return regex_match(str, str + wregex::traits_type::length(str), m, e, flags);
1628 inline bool regex_match(const std::string& s,
1629 match_results<std::string::const_iterator, regex::allocator_type>& m,
1631 unsigned flags = match_default)
1633 return regex_match(s.begin(), s.end(), m, e, flags);
1635 inline bool regex_match(const std::string& s,
1637 unsigned flags = match_default)
1639 match_results<std::string::const_iterator, regex::allocator_type> m;
1640 return regex_match(s.begin(), s.end(), m, e, flags);
1642 #if !defined(BOOST_RE_NO_WCSTRING)
1643 inline bool regex_match(const std::basic_string<wchar_t>& s,
1644 match_results<std::basic_string<wchar_t>::const_iterator, wregex::allocator_type>& m,
1646 unsigned flags = match_default)
1648 return regex_match(s.begin(), s.end(), m, e, flags);
1650 inline bool regex_match(const std::basic_string<wchar_t>& s,
1652 unsigned flags = match_default)
1654 match_results<std::basic_string<wchar_t>::const_iterator, wregex::allocator_type> m;
1655 return regex_match(s.begin(), s.end(), m, e, flags);
1661 template <class iterator, class Allocator, class charT, class traits, class Allocator2>
1662 bool regex_search(iterator first, iterator last, match_results<iterator, Allocator>& m, const reg_expression<charT, traits, Allocator2>& e, unsigned flags = match_default)
1664 if(e.flags() & regbase::failbit)
1667 typedef typename traits::size_type traits_size_type;
1668 typedef typename traits::uchar_type traits_uchar_type;
1670 return re_detail::reg_grep2(re_detail::grep_search_predicate<iterator, Allocator>(&m), first, last, e, flags, m.allocator());
1674 // regex_search convenience interfaces:
1675 #ifndef BOOST_RE_NO_PARTIAL_FUNC_SPEC
1677 // this isn't really a partial specialisation, but template function
1678 // overloading - if the compiler doesn't support partial specialisation
1679 // then it really won't support this either:
1680 template <class charT, class Allocator, class traits, class Allocator2>
1681 inline bool regex_search(const charT* str,
1682 match_results<const charT*, Allocator>& m,
1683 const reg_expression<charT, traits, Allocator2>& e,
1684 unsigned flags = match_default)
1686 return regex_search(str, str + traits::length(str), m, e, flags);
1689 template <class ST, class SA, class Allocator, class charT, class traits, class Allocator2>
1690 inline bool regex_search(const std::basic_string<charT, ST, SA>& s,
1691 match_results<typename std::basic_string<charT, ST, SA>::const_iterator, Allocator>& m,
1692 const reg_expression<charT, traits, Allocator2>& e,
1693 unsigned flags = match_default)
1695 return regex_search(s.begin(), s.end(), m, e, flags);
1697 #else // partial specialisation
1698 inline bool regex_search(const char* str,
1701 unsigned flags = match_default)
1703 return regex_search(str, str + regex::traits_type::length(str), m, e, flags);
1705 #ifndef BOOST_RE_NO_WCSTRING
1706 inline bool regex_search(const wchar_t* str,
1709 unsigned flags = match_default)
1711 return regex_search(str, str + wregex::traits_type::length(str), m, e, flags);
1714 inline bool regex_search(const std::string& s,
1715 match_results<std::string::const_iterator, regex::allocator_type>& m,
1717 unsigned flags = match_default)
1719 return regex_search(s.begin(), s.end(), m, e, flags);
1721 #if !defined(BOOST_RE_NO_WCSTRING)
1722 inline bool regex_search(const std::basic_string<wchar_t>& s,
1723 match_results<std::basic_string<wchar_t>::const_iterator, wregex::allocator_type>& m,
1725 unsigned flags = match_default)
1727 return regex_search(s.begin(), s.end(), m, e, flags);
1736 // find all non-overlapping matches within the sequence first last:
1738 template <class Predicate, class iterator, class charT, class traits, class Allocator>
1739 inline unsigned int regex_grep(Predicate foo, iterator first, iterator last, const reg_expression<charT, traits, Allocator>& e, unsigned flags = match_default)
1741 return re_detail::reg_grep2(foo, first, last, e, flags, e.allocator());
1745 // regex_grep convenience interfaces:
1746 #ifndef BOOST_RE_NO_PARTIAL_FUNC_SPEC
1748 // this isn't really a partial specialisation, but template function
1749 // overloading - if the compiler doesn't support partial specialisation
1750 // then it really won't support this either:
1751 template <class Predicate, class charT, class Allocator, class traits>
1752 inline unsigned int regex_grep(Predicate foo, const charT* str,
1753 const reg_expression<charT, traits, Allocator>& e,
1754 unsigned flags = match_default)
1756 return regex_grep(foo, str, str + traits::length(str), e, flags);
1759 template <class Predicate, class ST, class SA, class Allocator, class charT, class traits>
1760 inline unsigned int regex_grep(Predicate foo, const std::basic_string<charT, ST, SA>& s,
1761 const reg_expression<charT, traits, Allocator>& e,
1762 unsigned flags = match_default)
1764 return regex_grep(foo, s.begin(), s.end(), e, flags);
1766 #else // partial specialisation
1767 inline unsigned int regex_grep(bool (*foo)(const cmatch&), const char* str,
1769 unsigned flags = match_default)
1771 return regex_grep(foo, str, str + regex::traits_type::length(str), e, flags);
1773 #ifndef BOOST_RE_NO_WCSTRING
1774 inline unsigned int regex_grep(bool (*foo)(const wcmatch&), const wchar_t* str,
1776 unsigned flags = match_default)
1778 return regex_grep(foo, str, str + wregex::traits_type::length(str), e, flags);
1781 inline unsigned int regex_grep(bool (*foo)(const match_results<std::string::const_iterator, regex::allocator_type>&), const std::string& s,
1783 unsigned flags = match_default)
1785 return regex_grep(foo, s.begin(), s.end(), e, flags);
1787 #if !defined(BOOST_RE_NO_WCSTRING)
1788 inline unsigned int regex_grep(bool (*foo)(const match_results<std::basic_string<wchar_t>::const_iterator, wregex::allocator_type>&),
1789 const std::basic_string<wchar_t>& s,
1791 unsigned flags = match_default)
1793 return regex_grep(foo, s.begin(), s.end(), e, flags);
1801 #if __BORLANDC__ > 0x520
1806 } // namespace boost
1808 #endif // BOOST_REGEX_MATCH_HPP