3 * Copyright (c) 1998-2000
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE regex_compile.hpp
20 * DESCRIPTION: Declares reg_expression<> member functions. This is
21 * an internal header file, do not include directly.
24 #ifndef BOOST_REGEX_COMPILE_HPP
25 #define BOOST_REGEX_COMPILE_HPP
31 #if __BORLANDC__ == 0x530
32 #pragma option push -a4 -b -Ve
33 #elif __BORLANDC__ > 0x530
34 #pragma option push -a8 -b -Ve
38 template <class traits>
41 typedef typename traits::char_type char_type;
44 kmp_translator(bool c, traits* p) : icase(c), pt(p) {}
45 char_type operator()(char_type c)
47 return pt->translate(c, icase);
52 template <class charT, class traits_type, class Allocator>
53 bool BOOST_RE_CALL re_maybe_set_member(charT c,
55 const reg_expression<charT, traits_type, Allocator>& e)
57 const charT* p = (const charT*)(set_+1);
58 bool icase = e.flags() & regbase::icase;
59 charT col = e.get_traits().translate(c, icase);
60 for(unsigned int i = 0; i < set_->csingles; ++i)
63 return set_->isnot ? false : true;
68 return set_->isnot ? true : false;
71 } // namespace re_detail
73 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
76 // templates don't merge if they contain switch statements so declare these
77 // templates in unnamed namespace (ie with internal linkage), each translation
78 // unit then gets its own local copy, it works seemlessly but bloats the app.
82 template <class charT, class traits, class Allocator>
83 inline bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_wide_type&)
85 if((traits_size_type)(traits_uchar_type)c >= 256)
87 return BOOST_RE_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
90 template <class charT, class traits, class Allocator>
91 inline bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_narrow_type&)
93 return BOOST_RE_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
96 template <class charT, class traits, class Allocator>
97 CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const Allocator& a)
98 : regbase(), data(a), pkmp(0), error_code_(REG_EMPTY)
102 template <class charT, class traits, class Allocator>
103 CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, flag_type f, const Allocator& a)
104 : data(a), pkmp(0), error_code_(REG_EMPTY)
106 set_expression(p, f);
109 template <class charT, class traits, class Allocator>
110 CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p1, const charT* p2, flag_type f, const Allocator& a)
111 : data(a), pkmp(0), error_code_(REG_EMPTY)
113 set_expression(p1, p2, f);
116 template <class charT, class traits, class Allocator>
117 CONSTRUCTOR_INLINE reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a)
118 : data(a), pkmp(0), error_code_(REG_EMPTY)
120 set_expression(p, p + len, f);
123 template <class charT, class traits, class Allocator>
124 reg_expression<charT, traits, Allocator>::reg_expression(const reg_expression<charT, traits, Allocator>& e)
125 : regbase(e), data(e.allocator()), pkmp(0), error_code_(REG_EMPTY)
128 // we do a deep copy only if e is a valid expression, otherwise fail.
130 if(e.error_code() == 0)
132 const charT* pe = e.expression();
133 set_expression(pe, pe + e._expression_len, e.flags());
136 fail(e.error_code());
139 template <class charT, class traits, class Allocator>
140 reg_expression<charT, traits, Allocator>::~reg_expression()
143 re_detail::kmp_free(pkmp, data.allocator());
146 template <class charT, class traits, class Allocator>
147 reg_expression<charT, traits, Allocator>& BOOST_RE_CALL reg_expression<charT, traits, Allocator>::operator=(const reg_expression<charT, traits, Allocator>& e)
150 // we do a deep copy only if e is a valid expression, otherwise fail.
152 if(this == &e) return *this;
154 fail(e.error_code());
155 if(error_code() == 0)
156 set_expression(e._expression, e._expression + e._expression_len, e.flags());
160 template <class charT, class traits, class Allocator>
161 inline bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::operator==(const reg_expression<charT, traits, Allocator>& e)const
163 return (_flags == e.flags())
164 && (_expression_len == e._expression_len)
165 && (std::memcmp(_expression, e._expression, _expression_len * sizeof(charT)) == 0);
168 template <class charT, class traits, class Allocator>
169 bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::operator<(const reg_expression<charT, traits, Allocator>& e)const
172 // we can't offer a diffinitive ordering, but we can be consistant:
173 if(_flags != e.flags()) return _flags < e.flags();
174 if(_expression_len != e._expression_len) return _expression_len < e._expression_len;
175 return std::memcmp(expression(), e.expression(), _expression_len);
178 template <class charT, class traits, class Allocator>
179 Allocator BOOST_RE_CALL reg_expression<charT, traits, Allocator>::allocator()const
181 return data.allocator();
184 template <class charT, class traits, class Allocator>
185 Allocator BOOST_RE_CALL reg_expression<charT, traits, Allocator>::get_allocator()const
187 return data.allocator();
190 template <class charT, class traits, class Allocator>
191 unsigned int BOOST_RE_CALL reg_expression<charT, traits, Allocator>::parse_inner_set(const charT*& first, const charT* last)
194 // we have an inner [...] construct
196 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
197 const charT* base = first;
198 while( (first != last)
199 && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) != traits_type::syntax_close_set) )
206 if(*(base+1) != *(first-2))
208 unsigned int result = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
209 if((result == traits_type::syntax_colon) && ((first-base) == 5))
211 return traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+2));
213 return ((result == traits_type::syntax_colon) || (result == traits_type::syntax_dot) || (result == traits_type::syntax_equal)) ? result : 0;
217 template <class charT, class traits, class Allocator>
218 bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::skip_space(const charT*& first, const charT* last)
221 // returns true if we get to last:
223 while((first != last) && (traits_inst.is_class(*first, traits_type::char_class_space) == true))
227 return first == last;
230 template <class charT, class traits, class Allocator>
231 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::parse_range(const charT*& ptr, const charT* end, unsigned& min, unsigned& max)
234 // we have {x} or {x,} or {x,y} NB no spaces inside braces
235 // anything else is illegal
236 // On input ptr points to "{"
239 if(skip_space(ptr, end))
244 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_digit)
249 min = traits_inst.toi(ptr, end, 10);
250 if(skip_space(ptr, end))
255 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_comma)
257 //we have a second interval:
259 if(skip_space(ptr, end))
264 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_digit)
265 max = traits_inst.toi(ptr, end, 10);
273 if(skip_space(ptr, end))
283 if(_flags & bk_braces)
285 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_slash)
292 // back\ is OK now check the }
294 if((ptr == end) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace))
301 else if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace)
308 template <class charT, class traits, class Allocator>
309 charT BOOST_RE_CALL reg_expression<charT, traits, Allocator>::parse_escape(const charT*& first, const charT* last)
312 traits_size_type c_unsigned = (traits_size_type)(traits_uchar_type)*first;
313 // this is only used for the switch(), but cannot be folded in
314 // due to a bug in Comeau 4.2.44beta3
315 traits_size_type syntax = traits_inst.syntax_type(c_unsigned);
318 case traits_type::syntax_a:
322 case traits_type::syntax_f:
326 case traits_type::syntax_n:
330 case traits_type::syntax_r:
334 case traits_type::syntax_t:
338 case traits_type::syntax_v:
342 case traits_type::syntax_x:
349 // maybe have \x{ddd}
350 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) == traits_type::syntax_open_brace)
358 if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
363 c = (charT)traits_inst.toi(first, last, -16);
364 if((first == last) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) != traits_type::syntax_close_brace))
373 if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
378 c = (charT)traits_inst.toi(first, last, -16);
381 case traits_type::syntax_c:
388 if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
389 || ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
394 c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
397 case traits_type::syntax_e:
401 case traits_type::syntax_digit:
402 c = (charT)traits_inst.toi(first, last, -8);
411 template <class charT, class traits, class Allocator>
412 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_maps()
414 re_detail::re_syntax_base* record = (re_detail::re_syntax_base*)data.data();
415 // always compile the first _map:
416 std::memset(startmap, 0, 256);
417 record->can_be_null = 0;
418 compile_map(record, startmap, NULL, re_detail::mask_all);
420 while(record->type != re_detail::syntax_element_match)
422 if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
424 std::memset(&(((re_detail::re_jump*)record)->_map), 0, 256);
425 record->can_be_null = 0;
426 compile_map(record->next.p, ((re_detail::re_jump*)record)->_map, &(record->can_be_null), re_detail::mask_take, ((re_detail::re_jump*)record)->alt.p);
427 compile_map(((re_detail::re_jump*)record)->alt.p, ((re_detail::re_jump*)record)->_map, &(record->can_be_null), re_detail::mask_skip);
431 record->can_be_null = 0;
432 compile_map(record, NULL, &(record->can_be_null), re_detail::mask_all);
434 record = record->next.p;
436 record->can_be_null = re_detail::mask_all;
439 template <class charT, class traits, class Allocator>
440 bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::probe_start(
441 re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
447 case re_detail::syntax_element_startmark:
448 case re_detail::syntax_element_endmark:
449 case re_detail::syntax_element_start_line:
450 case re_detail::syntax_element_word_boundary:
451 case re_detail::syntax_element_buffer_start:
452 case re_detail::syntax_element_restart_continue:
453 // doesn't tell us anything about the next character, so:
454 return probe_start(node->next.p, cc, terminal);
455 case re_detail::syntax_element_literal:
456 // only the first character of the literal can match:
457 // note these have already been translated:
458 if(*(charT*)(((re_detail::re_literal*)node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))
461 case re_detail::syntax_element_end_line:
462 // next character (if there is one!) must be a newline:
463 if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))
466 case re_detail::syntax_element_wild:
468 case re_detail::syntax_element_match:
470 case re_detail::syntax_element_within_word:
471 case re_detail::syntax_element_word_start:
472 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);
473 case re_detail::syntax_element_word_end:
474 // what follows must not be a word character,
475 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;
476 case re_detail::syntax_element_buffer_end:
477 // we can be null, nothing must follow,
478 // NB we assume that this is followed by
479 // re_detail::syntax_element_match, if its not then we can
480 // never match anything anyway!!
482 case re_detail::syntax_element_soft_buffer_end:
483 // we can be null, only newlines must follow,
484 // NB we assume that this is followed by
485 // re_detail::syntax_element_match, if its not then we can
486 // never match anything anyway!!
487 return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));
488 case re_detail::syntax_element_backref:
489 // there's no easy way to determine this
490 // which is not to say it can't be done!
493 case re_detail::syntax_element_long_set:
494 // we can not be null,
495 // we need to add already translated values in the set
496 // to values in the _map
497 return re_detail::re_maybe_set_member(cc, (re_detail::re_set_long*)node, *this) || (re_detail::re_is_set_member((const charT*)&cc, (const charT*)(&cc+1), (re_detail::re_set_long*)node, *this) != &cc);
498 case re_detail::syntax_element_set:
499 // set all the elements that are set in corresponding set:
500 c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));
501 return ((re_detail::re_set*)node)->_map[c] != 0;
502 case re_detail::syntax_element_jump:
503 if(((re_detail::re_jump*)node)->alt.p < node)
506 // caused only by end of repeat section, we'll treat this
507 // the same as a match, because the sub-expression has matched.
508 if(node->next.p == terminal)
509 return true; // null repeat - we can always take this
513 // take the jump, add in fix for the fact that if the
514 // repeat that we're jumping to has non-zero minimum count
515 // then we need to add in the possiblity that we could still
517 re_detail::re_syntax_base* next = ((re_detail::re_jump*)node)->alt.p;
518 bool b = probe_start(next, cc, terminal);
519 if((next->type == re_detail::syntax_element_rep) && (((re_detail::re_repeat*)next)->min != 0))
521 b = b || probe_start(((re_detail::re_jump*)next)->alt.p, cc, terminal);
527 // take the jump and compile:
528 return probe_start(((re_detail::re_jump*)node)->alt.p, cc, terminal);
529 case re_detail::syntax_element_alt:
530 // we need to take the OR of the two alternatives:
531 return probe_start(((re_detail::re_jump*)node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
532 case re_detail::syntax_element_rep:
533 // we need to take the OR of the two alternatives
534 if(((re_detail::re_repeat*)node)->min == 0)
535 return probe_start(node->next.p, cc, ((re_detail::re_jump*)node)->alt.p) || probe_start(((re_detail::re_jump*)node)->alt.p, cc, terminal);
537 return probe_start(node->next.p, cc, ((re_detail::re_jump*)node)->alt.p);
538 case re_detail::syntax_element_combining:
539 return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));
544 template <class charT, class traits, class Allocator>
545 bool BOOST_RE_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
549 case re_detail::syntax_element_startmark:
550 case re_detail::syntax_element_endmark:
551 case re_detail::syntax_element_start_line:
552 case re_detail::syntax_element_word_boundary:
553 case re_detail::syntax_element_buffer_start:
554 case re_detail::syntax_element_restart_continue:
555 case re_detail::syntax_element_end_line:
556 case re_detail::syntax_element_word_end:
557 // doesn't tell us anything about the next character, so:
558 return probe_start_null(node->next.p, terminal);
559 case re_detail::syntax_element_match:
560 case re_detail::syntax_element_buffer_end:
561 case re_detail::syntax_element_soft_buffer_end:
562 case re_detail::syntax_element_backref:
564 case re_detail::syntax_element_jump:
565 if(((re_detail::re_jump*)node)->alt.p < node)
568 // caused only by end of repeat section, we'll treat this
569 // the same as a match, because the sub-expression has matched.
570 // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
571 // these are really nonsensence and make the matching code much
572 // harder, it would be nice to get rid of them altogether.
573 if(node->next.p == terminal)
576 return probe_start_null(((re_detail::re_jump*)node)->alt.p, terminal);
579 // take the jump and compile:
580 return probe_start_null(((re_detail::re_jump*)node)->alt.p, terminal);
581 case re_detail::syntax_element_alt:
582 // we need to take the OR of the two alternatives:
583 return probe_start_null(((re_detail::re_jump*)node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
584 case re_detail::syntax_element_rep:
585 // only need to consider skipping the repeat:
586 return probe_start_null(((re_detail::re_jump*)node)->alt.p, terminal);
593 template <class charT, class traits, class Allocator>
594 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_map(
595 re_detail::re_syntax_base* node, unsigned char* _map,
596 unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
600 for(unsigned int i = 0; i < 256; ++i)
602 if(probe_start(node, (charT)i, terminal))
606 if(pnull && probe_start_null(node, terminal))
610 template <class charT, class traits, class Allocator>
611 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size)
613 // move all offsets starting with j->link forward by size
614 // called after an insert:
615 j = (re_detail::re_syntax_base*)((const char*)data.data() + j->next.i);
620 case re_detail::syntax_element_rep:
621 ((re_detail::re_jump*)j)->alt.i += size;
624 case re_detail::syntax_element_jump:
625 case re_detail::syntax_element_alt:
626 ((re_detail::re_jump*)j)->alt.i += size;
633 if(j->next.i == size)
635 j = (re_detail::re_syntax_base*)((const char*)data.data() + j->next.i);
639 template <class charT, class traits, class Allocator>
640 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
642 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
643 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
644 re_detail::jstack<jm_uintfast32_t, Allocator> classes(64, data.allocator());
645 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
650 dat->next.i = data.size();
652 return compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::is_byte<charT>::width_type());
655 template <class charT, class traits, class Allocator>
656 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
658 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
659 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
660 re_detail::jstack<jm_uintfast32_t, Allocator> classes(64, data.allocator());
661 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
662 bool has_digraphs = false;
663 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
665 bool started = false;
676 unsigned l = last_none;
677 traits_string_type s;
679 while((first != last) && !done)
681 traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
682 // this is only used for the switch(), but cannot be folded in
683 // due to a bug in Comeau 4.2.44beta3
684 traits_size_type syntax = traits_inst.syntax_type(c);
687 case traits_type::syntax_caret:
688 if(!started && !isnot)
695 goto char_set_literal;
698 case traits_type::syntax_open_set:
700 if((_flags & char_classes) == 0)
703 goto char_set_literal;
705 // check to see if we really have a class:
706 const charT* base = first;
707 // this is only used for the switch(), but cannot be folded in
708 // due to a bug in Comeau 4.2.44beta3
709 unsigned int inner_set = parse_inner_set(first, last);
712 case traits_type::syntax_colon:
719 jm_uintfast32_t id = traits_inst.lookup_classname(base+2, first-2);
720 if(_flags & regbase::icase)
722 if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))
724 id = traits_type::char_class_alpha;
737 case traits_type::syntax_dot:
739 // we have a collating element [.collating-name.]
741 if(traits_inst.lookup_collatename(s, base+2, first-2))
746 if(s.size())goto char_set_literal;
750 case traits_type::syntax_equal:
752 // we have an equivalence class [=collating-name=]
754 if(traits_inst.lookup_collatename(s, base+2, first-2))
756 unsigned len = s.size();
762 s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
765 traits_string_type s2;
766 traits_inst.transform_primary(s2, s);
767 equivalents.push(s2);
775 case traits_type::syntax_left_word:
776 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
779 return add_simple(0, re_detail::syntax_element_word_start);
783 case traits_type::syntax_right_word:
784 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
787 return add_simple(0, re_detail::syntax_element_word_end);
794 unsigned int t = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
795 if((t != traits_type::syntax_colon) && (t != traits_type::syntax_dot) && (t != traits_type::syntax_equal))
799 goto char_set_literal;
812 case traits_type::syntax_close_set:
816 goto char_set_literal;
820 case traits_type::syntax_dash:
824 goto char_set_literal;
827 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set)
831 goto char_set_literal;
833 if((singles.empty() == true) || (l != last_single))
838 ranges.push(singles.peek());
839 if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
843 case traits_type::syntax_slash:
844 if(_flags & regbase::escape_in_lists)
849 traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
850 // this is only used for the switch(), but cannot be folded in
851 // due to a bug in Comeau 4.2.44beta3
852 traits_size_type syntax = traits_inst.syntax_type(c);
855 case traits_type::syntax_w:
861 classes.push(traits_type::char_class_word);
866 case traits_type::syntax_d:
872 classes.push(traits_type::char_class_digit);
877 case traits_type::syntax_s:
883 classes.push(traits_type::char_class_space);
888 case traits_type::syntax_l:
894 classes.push(traits_type::char_class_lower);
899 case traits_type::syntax_u:
905 classes.push(traits_type::char_class_upper);
910 case traits_type::syntax_W:
911 case traits_type::syntax_D:
912 case traits_type::syntax_S:
913 case traits_type::syntax_U:
914 case traits_type::syntax_L:
918 c = parse_escape(first, last);
921 goto char_set_literal;
927 goto char_set_literal;
933 // get string length to stop us going past the end of string (DWA)
934 unsigned len = s.size();
937 s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
945 if(s.size() > 1) // add ligatures to singles list as well
959 re_detail::re_syntax_base* result;
961 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::_wide_type());
963 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::is_byte<charT>::width_type());
966 if((result == 0) && (_flags & regbase::use_except))
972 template <class charT, class traits, class Allocator>
973 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<jm_uintfast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&)
975 size_type base = data.size();
976 data.extend(sizeof(re_detail::re_set_long));
977 unsigned int csingles = 0;
978 unsigned int cranges = 0;
979 jm_uintfast32_t cclasses = 0;
980 unsigned int cequivalents = 0;
981 bool nocollate_state = flags() & regbase::nocollate;
983 while(singles.empty() == false)
986 const traits_string_type& s = singles.peek();
987 unsigned len = (s.size() + 1) * sizeof(charT);
988 std::memcpy((charT*)data.extend(len), s.c_str(), len);
991 while(ranges.empty() == false)
993 traits_string_type c1, c2;
997 traits_inst.transform(c1, ranges.peek());
1002 traits_inst.transform(c2, ranges.peek());
1006 // for some reason bc5 crashes when throwing exceptions
1007 // from here - probably an EH-compiler bug, but hard to
1009 // delay throw to later:
1011 jm_uintfast32_t f = _flags;
1012 _flags &= ~regbase::use_except;
1021 unsigned len = (re_detail::re_strlen(c1.c_str()) + 1) * sizeof(charT);
1022 std::memcpy(data.extend(len), c1.c_str(), len);
1023 len = (re_detail::re_strlen(c2.c_str()) + 1) * sizeof(charT);
1024 std::memcpy(data.extend(len), c2.c_str(), len);
1026 while(classes.empty() == false)
1028 cclasses |= classes.peek();
1031 while(equivalents.empty() == false)
1034 const traits_string_type& s = equivalents.peek();
1035 unsigned len = (re_detail::re_strlen(s.c_str()) + 1) * sizeof(charT);
1036 std::memcpy((charT*)data.extend(len), s.c_str(), len);
1040 re_detail::re_set_long* dat = (re_detail::re_set_long*)((unsigned char*)data.data() + base);
1041 dat->type = re_detail::syntax_element_long_set;
1042 dat->csingles = csingles;
1043 dat->cranges = cranges;
1044 dat->cclasses = cclasses;
1045 dat->cequivalents = cequivalents;
1051 template <class charT, class traits, class Allocator>
1052 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<jm_uintfast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&)
1054 re_detail::re_set* dat = (re_detail::re_set*)data.extend(sizeof(re_detail::re_set));
1055 std::memset(dat, 0, sizeof(re_detail::re_set));
1057 while(singles.empty() == false)
1059 dat->_map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = re_detail::mask_all;
1062 while(ranges.empty() == false)
1064 traits_string_type c1, c2, c3, c4;
1066 if(flags() & regbase::nocollate)
1069 traits_inst.transform(c1, ranges.peek());
1071 if(flags() & regbase::nocollate)
1074 traits_inst.transform(c2, ranges.peek());
1079 // for some reason bc5 crashes when throwing exceptions
1080 // from here - probably an EH-compiler bug, but hard to
1082 // delay throw to later:
1084 jm_uintfast32_t f = _flags;
1085 _flags &= ~regbase::use_except;
1093 for(unsigned int i = 0; i < 256; ++i)
1096 if(flags() & regbase::nocollate)
1099 traits_inst.transform(c3, c4);
1100 if((c3 <= c1) && (c3 >= c2))
1101 dat->_map[i] = re_detail::mask_all;
1104 while(equivalents.empty() == false)
1106 traits_string_type c1, c2;
1107 for(unsigned int i = 0; i < 256; ++i)
1110 traits_inst.transform_primary(c1, c2);
1111 if(c1 == equivalents.peek())
1112 dat->_map[i] = re_detail::mask_all;
1117 jm_uintfast32_t flags = 0;
1118 while(classes.empty() == false)
1120 flags |= classes.peek();
1125 for(unsigned int i = 0; i < 256; ++i)
1127 if(traits_inst.is_class(charT(i), flags))
1128 dat->_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regbase::icase))] = re_detail::mask_all;
1134 for(unsigned int i = 0; i < 256; ++i)
1136 dat->_map[i] = !dat->_map[i];
1140 dat->type = re_detail::syntax_element_set;
1146 template <class charT, class traits, class Allocator>
1147 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_detail::re_syntax_base* b, unsigned cbraces)
1149 typedef BOOST_RE_MAYBE_TYPENAME REBIND_TYPE(bool, Allocator) b_alloc;
1151 register unsigned char* base = (unsigned char*)b;
1152 register re_detail::re_syntax_base* ptr = b;
1154 b_alloc a(data.allocator());
1157 pb = a.allocate(cbraces);
1158 for(unsigned i = 0; i < cbraces; ++i)
1167 case re_detail::syntax_element_rep:
1168 ((re_detail::re_jump*)ptr)->alt.p = (re_detail::re_syntax_base*)(base + ((re_detail::re_jump*)ptr)->alt.i);
1169 #ifdef BOOST_RE_DEBUG
1170 if((re_detail::padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p) && (((re_detail::re_jump*)ptr)->alt.p != b))
1172 jm_trace("padding mis-aligment in repeat jump to object type: " << ((re_detail::re_jump*)ptr)->alt.p->type)
1173 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1176 ((re_detail::re_repeat*)ptr)->id = repeats;
1179 case re_detail::syntax_element_jump:
1180 case re_detail::syntax_element_alt:
1181 ((re_detail::re_jump*)ptr)->alt.p = (re_detail::re_syntax_base*)(base + ((re_detail::re_jump*)ptr)->alt.i);
1182 #ifdef BOOST_RE_DEBUG
1183 if((re_detail::padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p) && (((re_detail::re_jump*)ptr)->alt.p != b))
1185 jm_trace("padding mis-aligment in alternation jump to object type: " << ((re_detail::re_jump*)ptr)->alt.p->type)
1186 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1190 case re_detail::syntax_element_backref:
1191 if((((re_detail::re_brace*)ptr)->index >= cbraces) || (pb[((re_detail::re_brace*)ptr)->index] == false) )
1194 a.deallocate(pb, cbraces);
1198 case re_detail::syntax_element_endmark:
1199 pb[((re_detail::re_brace*)ptr)->index] = true;
1203 ptr->next.p = (re_detail::re_syntax_base*)(base + ptr->next.i);
1204 #ifdef BOOST_RE_DEBUG
1205 if((re_detail::padding_mask & (int)(ptr->next.p)) && (((re_detail::re_jump*)ptr)->alt.p != b))
1207 jm_trace("padding mis-alignment in next record of type " << ptr->next.p->type)
1208 jm_assert(0 == (re_detail::padding_mask & (int)(ptr->next.p)));
1214 a.deallocate(pb, cbraces);
1220 a.deallocate(pb, cbraces);
1226 template <class charT, class traits, class Allocator>
1227 unsigned int BOOST_RE_CALL reg_expression<charT, traits, Allocator>::set_expression(const charT* p, const charT* end, flag_type f)
1229 if(p == expression())
1231 traits_string_type s(p, end);
1232 return set_expression(s.c_str(), s.c_str() + s.size(), f);
1234 typedef typename traits_type::sentry sentry_t;
1235 sentry_t sent(traits_inst);
1238 const charT* base = p;
1241 fail(REG_NOERROR); // clear any error
1246 return error_code();
1249 const charT* ptr = p;
1251 re_detail::jstack<unsigned int, Allocator> mark(64, data.allocator());
1252 re_detail::jstack<unsigned int, Allocator> markid(64, data.allocator());
1253 unsigned int last_mark_popped = 0;
1254 register traits_size_type c;
1255 register re_detail::re_syntax_base* dat;
1257 unsigned rep_min = 0;
1258 unsigned rep_max = 0;
1266 if(_flags & regbase::literal)
1270 dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regbase::icase)));
1277 c = (traits_size_type)(traits_uchar_type)*ptr;
1278 // this is only used for the switch(), but cannot be folded in
1279 // due to a bug in Comeau 4.2.44beta3
1280 traits_size_type syntax = traits_inst.syntax_type(c);
1283 case traits_type::syntax_open_bracket:
1284 if(_flags & bk_parens)
1286 dat = add_literal(dat, (charT)c);
1292 dat = add_simple(dat, re_detail::syntax_element_startmark, sizeof(re_detail::re_brace));
1294 ((re_detail::re_brace*)dat)->index = marks++;
1295 mark.push(data.index(dat));
1298 // check for perl like (?...) extention syntax
1299 c = (traits_size_type)(traits_uchar_type)*ptr;
1300 if(((_flags & bk_parens) == 0) && (traits_type::syntax_question == traits_inst.syntax_type(c)))
1303 c = (traits_size_type)(traits_uchar_type)*ptr;
1304 // this is only used for the switch(), but cannot be folded in
1305 // due to a bug in Comeau 4.2.44beta3
1306 traits_size_type syntax = traits_inst.syntax_type(c);
1309 case traits_type::syntax_colon:
1310 ((re_detail::re_brace*)dat)->index = 0;
1316 case traits_type::syntax_hash:
1317 // comment just skip it:
1318 ((re_detail::re_brace*)dat)->index = 0;
1324 c = (traits_size_type)(traits_uchar_type)*ptr;
1325 }while(traits_type::syntax_close_bracket != traits_inst.syntax_type(c));
1330 // error, return to standard parsing and let that handle the error:
1336 case traits_type::syntax_close_bracket:
1337 if(_flags & bk_parens)
1339 dat = add_literal(dat, (charT)c);
1348 dat->next.i = data.size();
1354 return error_code();
1356 // see if we have an empty alternative:
1357 if(mark.peek() == data.index(dat) )
1359 re_detail::re_syntax_base* para = (re_detail::re_syntax_base*)((char*)data.data() + mark.peek());
1360 if(para->type == re_detail::syntax_element_jump)
1363 return error_code();
1367 // pop any pushed alternatives and set the target end destination:
1368 dat = (re_detail::re_syntax_base*)((unsigned char*)data.data() + mark.peek());
1369 while(dat->type == re_detail::syntax_element_jump)
1371 ((re_detail::re_jump*)dat)->alt.i = data.size();
1373 dat = (re_detail::re_jump*)((unsigned char*)data.data() + mark.peek());
1377 return error_code();
1381 dat = add_simple(0, re_detail::syntax_element_endmark, sizeof(re_detail::re_brace));
1382 ((re_detail::re_brace*)dat)->index = markid.peek();
1384 last_mark_popped = mark.peek();
1388 case traits_type::syntax_char:
1389 dat = add_literal(dat, (charT)c);
1392 case traits_type::syntax_slash:
1397 return error_code();
1399 c = (traits_size_type)(traits_uchar_type)*ptr;
1400 // this is only used for the switch(), but cannot be folded in
1401 // due to a bug in Comeau 4.2.44beta3
1402 traits_size_type syntax = traits_inst.syntax_type(c);
1405 case traits_type::syntax_open_bracket:
1406 if(_flags & bk_parens)
1407 goto open_bracked_jump;
1409 case traits_type::syntax_close_bracket:
1410 if(_flags & bk_parens)
1411 goto close_bracked_jump;
1413 case traits_type::syntax_plus:
1414 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1417 rep_max = (unsigned)-1;
1421 case traits_type::syntax_question:
1422 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1429 case traits_type::syntax_or:
1430 if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
1432 goto alt_string_jump;
1433 case traits_type::syntax_open_brace:
1434 if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
1437 // we have {x} or {x,} or {x,y}:
1438 parse_range(ptr, end, rep_min, rep_max);
1441 case traits_type::syntax_digit:
1442 if(_flags & bk_refs)
1445 int i = traits_inst.toi((charT)c);
1448 // we can have \025 which means take char whose
1449 // code is 25 (octal), so parse string:
1450 c = traits_inst.toi(ptr, end, -8);
1454 dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
1455 ((re_detail::re_brace*)dat)->index = i;
1460 case traits_type::syntax_b: // re_detail::syntax_element_word_boundary
1461 dat = add_simple(dat, re_detail::syntax_element_word_boundary);
1464 case traits_type::syntax_B:
1465 dat = add_simple(dat, re_detail::syntax_element_within_word);
1468 case traits_type::syntax_left_word:
1469 dat = add_simple(dat, re_detail::syntax_element_word_start);
1472 case traits_type::syntax_right_word:
1473 dat = add_simple(dat, re_detail::syntax_element_word_end);
1476 case traits_type::syntax_w: //re_detail::syntax_element_word_char
1477 dat = compile_set_simple(dat, traits_type::char_class_word);
1480 case traits_type::syntax_W:
1481 dat = compile_set_simple(dat, traits_type::char_class_word, true);
1484 case traits_type::syntax_d: //re_detail::syntax_element_word_char
1485 dat = compile_set_simple(dat, traits_type::char_class_digit);
1488 case traits_type::syntax_D:
1489 dat = compile_set_simple(dat, traits_type::char_class_digit, true);
1492 case traits_type::syntax_s: //re_detail::syntax_element_word_char
1493 dat = compile_set_simple(dat, traits_type::char_class_space);
1496 case traits_type::syntax_S:
1497 dat = compile_set_simple(dat, traits_type::char_class_space, true);
1500 case traits_type::syntax_l: //re_detail::syntax_element_word_char
1501 dat = compile_set_simple(dat, traits_type::char_class_lower);
1504 case traits_type::syntax_L:
1505 dat = compile_set_simple(dat, traits_type::char_class_lower, true);
1508 case traits_type::syntax_u: //re_detail::syntax_element_word_char
1509 dat = compile_set_simple(dat, traits_type::char_class_upper);
1512 case traits_type::syntax_U:
1513 dat = compile_set_simple(dat, traits_type::char_class_upper, true);
1516 case traits_type::syntax_Q:
1523 return error_code();
1525 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)
1528 if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))
1532 dat = add_literal(dat, *(ptr-1));
1536 dat = add_literal(dat, *ptr);
1541 case traits_type::syntax_C:
1542 dat = add_simple(dat, re_detail::syntax_element_wild);
1545 case traits_type::syntax_X:
1546 dat = add_simple(dat, re_detail::syntax_element_combining);
1549 case traits_type::syntax_Z:
1550 dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);
1553 case traits_type::syntax_G:
1554 dat = add_simple(dat, re_detail::syntax_element_restart_continue);
1557 case traits_type::syntax_start_buffer:
1558 dat = add_simple(dat, re_detail::syntax_element_buffer_start);
1561 case traits_type::syntax_end_buffer:
1562 dat = add_simple(dat, re_detail::syntax_element_buffer_end);
1566 c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);
1567 dat = add_literal(dat, (charT)c);
1570 dat = add_literal(dat, (charT)c);
1574 case traits_type::syntax_dollar:
1575 dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));
1578 case traits_type::syntax_caret:
1579 dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));
1582 case traits_type::syntax_dot:
1583 dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));
1586 case traits_type::syntax_star:
1588 rep_max = (unsigned)-1;
1596 return error_code();
1600 case re_detail::syntax_element_endmark:
1601 offset = last_mark_popped;
1603 case re_detail::syntax_element_literal:
1604 if(((re_detail::re_literal*)dat)->length > 1)
1607 charT lit = *(charT*)((char*)dat + sizeof(re_detail::re_literal) + ((((re_detail::re_literal*)dat)->length-1)*sizeof(charT)));
1608 --((re_detail::re_literal*)dat)->length;
1609 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1610 ((re_detail::re_literal*)dat)->length = 1;
1611 *((charT*)(((re_detail::re_literal*)dat)+1)) = lit;
1613 offset = (char*)dat - (char*)data.data();
1615 case re_detail::syntax_element_backref:
1616 case re_detail::syntax_element_long_set:
1617 case re_detail::syntax_element_set:
1618 case re_detail::syntax_element_wild:
1619 case re_detail::syntax_element_combining:
1620 // we're repeating a single item:
1621 offset = (char*)dat - (char*)data.data();
1625 return error_code();
1628 dat->next.i = data.size();
1629 //unsigned pos = (char*)dat - (char*)data.data();
1631 // add the trailing jump:
1632 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1633 ((re_detail::re_jump*)dat)->alt.i = 0;
1635 // now insert the leading repeater:
1636 dat = (re_detail::re_syntax_base*)data.insert(offset, re_detail::re_repeater_size);
1637 dat->next.i = ((char*)dat - (char*)data.data()) + re_detail::re_repeater_size;
1638 dat->type = re_detail::syntax_element_rep;
1639 ((re_detail::re_repeat*)dat)->alt.i = data.size();
1640 ((re_detail::re_repeat*)dat)->min = rep_min;
1641 ((re_detail::re_repeat*)dat)->max = rep_max;
1642 ((re_detail::re_repeat*)dat)->leading = false;
1643 ((re_detail::re_repeat*)dat)->greedy = true;
1644 move_offsets(dat, re_detail::re_repeater_size);
1647 // now check to see if we have a non-greedy repeat:
1648 if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0)
1650 c = (traits_size_type)(traits_uchar_type)*ptr;
1651 if(traits_type::syntax_question == traits_inst.syntax_type(c))
1653 // OK repeat is non-greedy:
1654 ((re_detail::re_repeat*)dat)->greedy = false;
1658 dat = (re_detail::re_syntax_base*)((char*)data.data() + data.size() - re_detail::re_jump_size);
1659 ((re_detail::re_repeat*)dat)->alt.i = offset;
1662 case traits_type::syntax_plus:
1663 if(_flags & (bk_plus_qm | limited_ops))
1665 dat = add_literal(dat, (charT)c);
1670 rep_max = (unsigned)-1;
1672 case traits_type::syntax_question:
1673 if(_flags & (bk_plus_qm | limited_ops))
1675 dat = add_literal(dat, (charT)c);
1682 case traits_type::syntax_open_set:
1687 dat->next.i = data.size();
1690 dat = compile_set(ptr, end);
1693 if((_flags & regbase::failbit) == 0)
1695 return error_code();
1698 case traits_type::syntax_or:
1700 if(_flags & (bk_vbar | limited_ops))
1702 dat = add_literal(dat, (charT)c);
1712 // start of pattern can't have empty "|"
1714 return error_code();
1716 // see if we have an empty alternative:
1717 if(mark.empty() == false)
1718 if(mark.peek() == data.index(dat))
1721 return error_code();
1724 /*dat = */add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1727 // now work out where to insert:
1728 unsigned int offset = 0;
1729 if(mark.empty() == false)
1731 // we have a '(' or '|' to go back to:
1732 offset = mark.peek();
1733 re_detail::re_syntax_base* base = (re_detail::re_syntax_base*)((unsigned char*)data.data() + offset);
1734 offset = base->next.i;
1736 re_detail::re_jump* j = (re_detail::re_jump*)data.insert(offset, re_detail::re_jump_size);
1737 j->type = re_detail::syntax_element_alt;
1738 j->next.i = offset + re_detail::re_jump_size;
1739 j->alt.i = data.size();
1740 move_offsets(j, re_detail::re_jump_size);
1741 dat = (re_detail::re_syntax_base*)((unsigned char*)data.data() + data.size() - re_detail::re_jump_size);
1742 mark.push(data.size() - re_detail::re_jump_size);
1746 case traits_type::syntax_open_brace:
1747 if((_flags & bk_braces) || ((_flags & intervals) == 0))
1749 dat = add_literal(dat, (charT)c);
1753 // we have {x} or {x,} or {x,y}:
1754 parse_range(ptr, end, rep_min, rep_max);
1756 case traits_type::syntax_newline:
1757 if(_flags & newline_alt)
1758 goto alt_string_jump;
1759 dat = add_literal(dat, (charT)c);
1762 case traits_type::syntax_close_brace:
1763 if(_flags & bk_braces)
1765 dat = add_literal(dat, (charT)c);
1770 return error_code();
1772 dat = add_literal(dat, (charT)c);
1783 dat->next.i = data.size();
1786 // see if we have an empty alternative:
1787 if(mark.empty() == false)
1788 if(mark.peek() == data.index(dat) )
1790 re_detail::re_syntax_base* para = (re_detail::re_syntax_base*)((char*)data.data() + mark.peek());
1791 if(para->type == re_detail::syntax_element_jump)
1794 return error_code();
1800 if(mark.empty() == false)
1802 // pop any pushed alternatives and set the target end destination:
1803 dat = (re_detail::re_syntax_base*)((unsigned char*)data.data() + mark.peek());
1804 while(dat->type == re_detail::syntax_element_jump)
1806 ((re_detail::re_jump*)dat)->alt.i = data.size();
1808 if(mark.empty() == true)
1810 dat = (re_detail::re_jump*)((unsigned char*)data.data() + mark.peek());
1814 dat = (re_detail::re_brace*)data.extend(sizeof(re_detail::re_syntax_base));
1815 dat->type = re_detail::syntax_element_match;
1818 if(mark.empty() == false)
1821 return error_code();
1825 // allocate space for start _map:
1826 startmap = (unsigned char*)data.extend(256 + ((end - base + 1) * sizeof(charT)));
1828 // and copy the expression we just compiled:
1829 _expression = (charT*)((const char*)startmap + 256);
1830 _expression_len = end - base;
1831 std::memcpy(_expression, base, _expression_len * sizeof(charT));
1832 *(_expression + _expression_len) = charT(0);
1835 // now we need to apply fixups to the array
1836 // so that we can use pointers and not indexes
1837 fixup_apply((re_detail::re_syntax_base*)data.data(), marks);
1839 // check for error during fixup:
1840 if(_flags & regbase::failbit)
1841 return error_code();
1844 // finally compile the maps so that we can make intelligent choices
1845 // whenever we encounter an alternative:
1849 re_detail::kmp_free(pkmp, data.allocator());
1852 re_detail::re_syntax_base* sbase = (re_detail::re_syntax_base*)data.data();
1853 _restart_type = probe_restart(sbase);
1854 _leading_len = fixup_leading_rep(sbase, 0);
1855 if((sbase->type == re_detail::syntax_element_literal) && (sbase->next.p->type == re_detail::syntax_element_match))
1857 _restart_type = restart_fixed_lit;
1860 charT* p1 = (charT*)((char*)sbase + sizeof(re_detail::re_literal));
1861 charT* p2 = p1 + ((re_detail::re_literal*)sbase)->length;
1862 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
1865 return error_code();
1871 template <class charT, class traits, class Allocator>
1872 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::add_simple(re_detail::re_syntax_base* dat, re_detail::syntax_element_type type, unsigned int size)
1877 dat->next.i = data.size();
1879 if(size < sizeof(re_detail::re_syntax_base))
1880 size = sizeof(re_detail::re_syntax_base);
1881 dat = (re_detail::re_syntax_base*)data.extend(size);
1887 template <class charT, class traits, class Allocator>
1888 re_detail::re_syntax_base* BOOST_RE_CALL reg_expression<charT, traits, Allocator>::add_literal(re_detail::re_syntax_base* dat, charT c)
1890 if(dat && (dat->type == re_detail::syntax_element_literal))
1892 // add another charT to the list:
1893 std::ptrdiff_t pos = (unsigned char*)dat - (unsigned char*)data.data();
1894 *(charT*)data.extend(sizeof(charT)) = traits_inst.translate(c, (_flags & regbase::icase));
1895 dat = (re_detail::re_syntax_base*)((unsigned char*)data.data() + pos);
1896 ++(((re_detail::re_literal*)dat)->length);
1901 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1902 ((re_detail::re_literal*)dat)->length = 1;
1903 *((charT*)(((re_detail::re_literal*)dat)+1)) = traits_inst.translate(c, (_flags & regbase::icase));
1908 template <class charT, class traits, class Allocator>
1909 unsigned int BOOST_RE_CALL reg_expression<charT, traits, Allocator>::probe_restart(re_detail::re_syntax_base* dat)
1913 case re_detail::syntax_element_startmark:
1914 case re_detail::syntax_element_endmark:
1915 return probe_restart(dat->next.p);
1916 case re_detail::syntax_element_start_line:
1917 return regbase::restart_line;
1918 case re_detail::syntax_element_word_start:
1919 return regbase::restart_word;
1920 case re_detail::syntax_element_buffer_start:
1921 return regbase::restart_buf;
1922 case re_detail::syntax_element_restart_continue:
1923 return regbase::restart_continue;
1925 return regbase::restart_any;
1929 template <class charT, class traits, class Allocator>
1930 unsigned int BOOST_RE_CALL reg_expression<charT, traits, Allocator>::fixup_leading_rep(re_detail::re_syntax_base* dat, re_detail::re_syntax_base* end)
1932 unsigned int len = 0;
1933 bool leading_lit = end ? false : true;
1938 case re_detail::syntax_element_literal:
1939 len += ((re_detail::re_literal*)dat)->length;
1940 if((leading_lit) && (((re_detail::re_literal*)dat)->length > 2))
1942 // we can do a literal search for the leading literal string
1943 // using Knuth-Morris-Pratt (or whatever), and only then check for
1944 // matches. We need a decent length string though to make it
1946 _leading_string = (charT*)((char*)dat + sizeof(re_detail::re_literal));
1947 _leading_string_len = ((re_detail::re_literal*)dat)->length;
1948 _restart_type = restart_lit;
1949 leading_lit = false;
1950 const charT* p1 = _leading_string;
1951 const charT* p2 = _leading_string + _leading_string_len;
1952 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
1954 leading_lit = false;
1956 case re_detail::syntax_element_wild:
1958 leading_lit = false;
1960 case re_detail::syntax_element_match:
1962 case re_detail::syntax_element_backref:
1963 //case re_detail::syntax_element_jump:
1964 case re_detail::syntax_element_alt:
1965 case re_detail::syntax_element_combining:
1967 case re_detail::syntax_element_long_set:
1969 // we need to verify that there are no multi-character
1970 // collating elements inside the repeat:
1971 const charT* p = (const charT*)((const char*)dat + sizeof(re_detail::re_set_long));
1972 unsigned int csingles = ((re_detail::re_set_long*)dat)->csingles;
1973 for(unsigned int i = 0; i < csingles; ++i)
1975 if(re_detail::re_strlen(p) > 1)
1981 leading_lit = false;
1984 case re_detail::syntax_element_set:
1986 leading_lit = false;
1988 case re_detail::syntax_element_rep:
1989 if((len == 0) && (1 == fixup_leading_rep(dat->next.p, ((re_detail::re_repeat*)dat)->alt.p) ))
1991 ((re_detail::re_repeat*)dat)->leading = true;
2003 template <class charT, class traits, class Allocator>
2004 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::fail(unsigned int err)
2008 throw bad_expression(traits_inst.error_string(err));
2012 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
2017 #if __BORLANDC__ > 0x520
2022 } // namespace boost
2025 #endif // BOOST_REGEX_COMPILE_HPP