3 * Copyright (c) 1998-2002
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE regex_compile.hpp
19 * VERSION see <boost/version.hpp>
20 * DESCRIPTION: Declares reg_expression<> member functions. This is
21 * an internal header file, do not include directly.
24 #ifndef BOOST_REGEX_COMPILE_HPP
25 #define BOOST_REGEX_COMPILE_HPP
29 #pragma option push -a8 -b -Vx -Ve -pc -w-8004
34 template <class traits>
37 typedef typename traits::char_type char_type;
40 kmp_translator(bool c, traits* p) : icase(c), pt(p) {}
41 char_type operator()(char_type c)
43 return pt->translate(c, icase);
48 template <class charT, class traits_type, class Allocator>
49 bool BOOST_REGEX_CALL re_maybe_set_member(charT c,
50 const re_set_long* set_,
51 const reg_expression<charT, traits_type, Allocator>& e)
53 const charT* p = reinterpret_cast<const charT*>(set_+1);
54 bool icase = e.flags() & regbase::icase;
55 charT col = e.get_traits().translate(c, icase);
56 for(unsigned int i = 0; i < set_->csingles; ++i)
59 return set_->isnot ? false : true;
64 return set_->isnot ? true : false;
67 } // namespace re_detail
70 template <class charT, class traits, class Allocator>
71 inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_wide_type&)
73 if((traits_size_type)(traits_uchar_type)c >= 256)
75 return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
78 template <class charT, class traits, class Allocator>
79 inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_narrow_type&)
81 return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
84 template <class charT, class traits, class Allocator>
85 reg_expression<charT, traits, Allocator>::reg_expression(const Allocator& a)
86 : regbase(), data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
90 template <class charT, class traits, class Allocator>
91 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, flag_type f, const Allocator& a)
92 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
94 set_expression(p, f | regbase::use_except);
97 template <class charT, class traits, class Allocator>
98 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p1, const charT* p2, flag_type f, const Allocator& a)
99 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
101 set_expression(p1, p2, f | regbase::use_except);
104 template <class charT, class traits, class Allocator>
105 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a)
106 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
108 set_expression(p, p + len, f | regbase::use_except);
111 template <class charT, class traits, class Allocator>
112 reg_expression<charT, traits, Allocator>::reg_expression(const reg_expression<charT, traits, Allocator>& e)
113 : regbase(e), data(e.allocator()), pkmp(0), error_code_(REG_EMPTY), _expression(0)
116 // we do a deep copy only if e is a valid expression, otherwise fail.
118 if(e.error_code() == 0)
120 const charT* pe = e.expression();
121 set_expression(pe, pe + e._expression_len, e.flags() | regbase::use_except);
125 _flags = e.flags() & ~(regbase::use_except);
126 fail(e.error_code());
130 template <class charT, class traits, class Allocator>
131 reg_expression<charT, traits, Allocator>::~reg_expression()
134 re_detail::kmp_free(pkmp, data.allocator());
137 template <class charT, class traits, class Allocator>
138 reg_expression<charT, traits, Allocator>& BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator=(const reg_expression<charT, traits, Allocator>& e)
141 // we do a deep copy only if e is a valid expression, otherwise fail.
143 if(this == &e) return *this;
145 fail(e.error_code());
146 if(error_code() == 0)
147 set_expression(e._expression, e._expression + e._expression_len, e.flags() | regbase::use_except);
151 template <class charT, class traits, class Allocator>
152 inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator==(const reg_expression<charT, traits, Allocator>& e)const
154 return (_flags == e.flags())
155 && (_expression_len == e._expression_len)
156 && (std::memcmp(_expression, e._expression, _expression_len * sizeof(charT)) == 0);
159 template <class charT, class traits, class Allocator>
160 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator<(const reg_expression<charT, traits, Allocator>& e)const
163 // we can't offer a diffinitive ordering, but we can be consistant:
164 if(_flags != e.flags()) return _flags < e.flags();
165 if(_expression_len != e._expression_len) return _expression_len < e._expression_len;
166 return std::memcmp(expression(), e.expression(), _expression_len);
169 template <class charT, class traits, class Allocator>
170 Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::allocator()const
172 return data.allocator();
175 template <class charT, class traits, class Allocator>
176 Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::get_allocator()const
178 return data.allocator();
181 template <class charT, class traits, class Allocator>
182 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_inner_set(const charT*& first, const charT* last)
185 // we have an inner [...] construct
187 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
188 const charT* base = first;
189 while( (first != last)
190 && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) != traits_type::syntax_close_set) )
197 if(*(base+1) != *(first-2))
199 unsigned int result = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
200 if((result == traits_type::syntax_colon) && ((first-base) == 5))
202 return traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+2));
204 return ((result == traits_type::syntax_colon) || (result == traits_type::syntax_dot) || (result == traits_type::syntax_equal)) ? result : 0;
208 template <class charT, class traits, class Allocator>
209 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::skip_space(const charT*& first, const charT* last)
212 // returns true if we get to last:
214 while((first != last) && (traits_inst.is_class(*first, traits_type::char_class_space) == true))
218 return first == last;
221 template <class charT, class traits, class Allocator>
222 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_range(const charT*& ptr, const charT* end, unsigned& min, unsigned& max)
225 // we have {x} or {x,} or {x,y} NB no spaces inside braces
226 // anything else is illegal
227 // On input ptr points to "{"
230 if(skip_space(ptr, end))
235 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_digit)
240 min = traits_inst.toi(ptr, end, 10);
241 if(skip_space(ptr, end))
246 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_comma)
248 //we have a second interval:
250 if(skip_space(ptr, end))
255 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_digit)
256 max = traits_inst.toi(ptr, end, 10);
264 if(skip_space(ptr, end))
274 if(_flags & bk_braces)
276 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_slash)
283 // back\ is OK now check the }
285 if((ptr == end) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace))
292 else if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace)
299 template <class charT, class traits, class Allocator>
300 charT BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_escape(const charT*& first, const charT* last)
303 traits_size_type c_unsigned = (traits_size_type)(traits_uchar_type)*first;
304 // this is only used for the switch(), but cannot be folded in
305 // due to a bug in Comeau 4.2.44beta3
306 traits_size_type syntax = traits_inst.syntax_type(c_unsigned);
309 case traits_type::syntax_a:
313 case traits_type::syntax_f:
317 case traits_type::syntax_n:
321 case traits_type::syntax_r:
325 case traits_type::syntax_t:
329 case traits_type::syntax_v:
333 case traits_type::syntax_x:
340 // maybe have \x{ddd}
341 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) == traits_type::syntax_open_brace)
349 if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
354 c = (charT)traits_inst.toi(first, last, -16);
355 if((first == last) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*first)) != traits_type::syntax_close_brace))
364 if(traits_inst.is_class(*first, traits_type::char_class_xdigit) == false)
369 c = (charT)traits_inst.toi(first, last, -16);
372 case traits_type::syntax_c:
379 if(((traits_uchar_type)(*first) < (traits_uchar_type)'@')
380 || ((traits_uchar_type)(*first) > (traits_uchar_type)127) )
385 c = (charT)((traits_uchar_type)(*first) - (traits_uchar_type)'@');
388 case traits_type::syntax_e:
392 case traits_type::syntax_digit:
393 c = (charT)traits_inst.toi(first, last, -8);
402 template <class charT, class traits, class Allocator>
403 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps()
405 re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());
406 // always compile the first _map:
407 std::memset(startmap, 0, 256);
408 record->can_be_null = 0;
409 compile_map(record, startmap, 0, re_detail::mask_all);
411 while(record->type != re_detail::syntax_element_match)
413 if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
415 std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);
416 record->can_be_null = 0;
417 compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);
418 compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);
422 record->can_be_null = 0;
423 compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);
425 record = record->next.p;
427 record->can_be_null = re_detail::mask_all;
430 template <class charT, class traits, class Allocator>
431 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
432 re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
438 case re_detail::syntax_element_startmark:
439 if(static_cast<const re_detail::re_brace*>(node)->index == -1)
441 return probe_start(node->next.p->next.p, cc, terminal)
442 && probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
445 case re_detail::syntax_element_endmark:
446 case re_detail::syntax_element_start_line:
447 case re_detail::syntax_element_word_boundary:
448 case re_detail::syntax_element_buffer_start:
449 case re_detail::syntax_element_restart_continue:
450 // doesn't tell us anything about the next character, so:
451 return probe_start(node->next.p, cc, terminal);
452 case re_detail::syntax_element_literal:
453 // only the first character of the literal can match:
454 // note these have already been translated:
455 if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regbase::icase)))
458 case re_detail::syntax_element_end_line:
459 // next character (if there is one!) must be a newline:
460 if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase))))
463 case re_detail::syntax_element_wild:
465 case re_detail::syntax_element_match:
467 case re_detail::syntax_element_within_word:
468 case re_detail::syntax_element_word_start:
469 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word);
470 case re_detail::syntax_element_word_end:
471 // what follows must not be a word character,
472 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regbase::icase)), traits_type::char_class_word) ? false : true;
473 case re_detail::syntax_element_buffer_end:
474 // we can be null, nothing must follow,
475 // NB we assume that this is followed by
476 // re_detail::syntax_element_match, if its not then we can
477 // never match anything anyway!!
479 case re_detail::syntax_element_soft_buffer_end:
480 // we can be null, only newlines must follow,
481 // NB we assume that this is followed by
482 // re_detail::syntax_element_match, if its not then we can
483 // never match anything anyway!!
484 return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regbase::icase)));
485 case re_detail::syntax_element_backref:
486 // there's no easy way to determine this
487 // which is not to say it can't be done!
490 case re_detail::syntax_element_long_set:
491 // we can not be null,
492 // we need to add already translated values in the set
493 // to values in the _map
494 return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);
495 case re_detail::syntax_element_set:
496 // set all the elements that are set in corresponding set:
497 c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regbase::icase));
498 return static_cast<re_detail::re_set*>(node)->_map[c] != 0;
499 case re_detail::syntax_element_jump:
500 if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
503 // caused only by end of repeat section, we'll treat this
504 // the same as a match, because the sub-expression has matched.
505 if(node->next.p == terminal)
506 return true; // null repeat - we can always take this
510 // take the jump, add in fix for the fact that if the
511 // repeat that we're jumping to has non-zero minimum count
512 // then we need to add in the possiblity that we could still
514 re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;
515 bool b = probe_start(next, cc, terminal);
516 if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))
518 b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);
524 // take the jump and compile:
525 return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
526 case re_detail::syntax_element_alt:
527 // we need to take the OR of the two alternatives:
528 return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
529 case re_detail::syntax_element_rep:
530 // we need to take the OR of the two alternatives
531 if(static_cast<re_detail::re_repeat*>(node)->min == 0)
532 return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
534 return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);
535 case re_detail::syntax_element_combining:
536 return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regbase::icase)));
541 template <class charT, class traits, class Allocator>
542 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
546 case re_detail::syntax_element_startmark:
547 case re_detail::syntax_element_endmark:
548 case re_detail::syntax_element_start_line:
549 case re_detail::syntax_element_word_boundary:
550 case re_detail::syntax_element_buffer_start:
551 case re_detail::syntax_element_restart_continue:
552 case re_detail::syntax_element_end_line:
553 case re_detail::syntax_element_word_end:
554 // doesn't tell us anything about the next character, so:
555 return probe_start_null(node->next.p, terminal);
556 case re_detail::syntax_element_match:
557 case re_detail::syntax_element_buffer_end:
558 case re_detail::syntax_element_soft_buffer_end:
559 case re_detail::syntax_element_backref:
561 case re_detail::syntax_element_jump:
562 if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
565 // caused only by end of repeat section, we'll treat this
566 // the same as a match, because the sub-expression has matched.
567 // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
568 // these are really nonsensence and make the matching code much
569 // harder, it would be nice to get rid of them altogether.
570 if(node->next.p == terminal)
573 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
576 // take the jump and compile:
577 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
578 case re_detail::syntax_element_alt:
579 // we need to take the OR of the two alternatives:
580 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
581 case re_detail::syntax_element_rep:
582 // only need to consider skipping the repeat:
583 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
590 template <class charT, class traits, class Allocator>
591 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(
592 re_detail::re_syntax_base* node, unsigned char* _map,
593 unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
597 for(unsigned int i = 0; i < 256; ++i)
599 if(probe_start(node, (charT)i, terminal))
603 if(pnull && probe_start_null(node, terminal))
607 template <class charT, class traits, class Allocator>
608 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned size)
611 # pragma warning(push)
612 # pragma warning(disable: 4127)
614 // move all offsets starting with j->link forward by size
615 // called after an insert:
616 j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
621 case re_detail::syntax_element_rep:
622 static_cast<re_detail::re_jump*>(j)->alt.i += size;
625 case re_detail::syntax_element_jump:
626 case re_detail::syntax_element_alt:
627 static_cast<re_detail::re_jump*>(j)->alt.i += size;
634 if(j->next.i == size)
636 j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
639 # pragma warning(pop)
643 template <class charT, class traits, class Allocator>
644 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
646 typedef typename re_detail::is_byte<charT>::width_type width_type;
647 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
648 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
649 re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
650 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
655 dat->next.i = data.size();
657 return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
660 template <class charT, class traits, class Allocator>
661 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& first, const charT* last)
663 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
664 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
665 re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
666 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
667 bool has_digraphs = false;
668 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_open_set);
670 bool started = false;
681 unsigned l = last_none;
682 traits_string_type s;
684 while((first != last) && !done)
686 traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
687 // this is only used for the switch(), but cannot be folded in
688 // due to a bug in Comeau 4.2.44beta3
689 traits_size_type syntax = traits_inst.syntax_type(c);
692 case traits_type::syntax_caret:
693 if(!started && !isnot)
700 goto char_set_literal;
703 case traits_type::syntax_open_set:
705 if((_flags & char_classes) == 0)
708 goto char_set_literal;
710 // check to see if we really have a class:
711 const charT* base = first;
712 // this is only used for the switch(), but cannot be folded in
713 // due to a bug in Comeau 4.2.44beta3
714 unsigned int inner_set = parse_inner_set(first, last);
717 case traits_type::syntax_colon:
724 boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, first-2);
725 if(_flags & regbase::icase)
727 if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))
729 id = traits_type::char_class_alpha;
742 case traits_type::syntax_dot:
744 // we have a collating element [.collating-name.]
746 if(traits_inst.lookup_collatename(s, base+2, first-2))
751 if(s.size())goto char_set_literal;
755 case traits_type::syntax_equal:
757 // we have an equivalence class [=collating-name=]
759 if(traits_inst.lookup_collatename(s, base+2, first-2))
761 std::size_t len = s.size();
767 s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
770 traits_string_type s2;
771 traits_inst.transform_primary(s2, s);
772 equivalents.push(s2);
780 case traits_type::syntax_left_word:
781 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
784 return add_simple(0, re_detail::syntax_element_word_start);
788 case traits_type::syntax_right_word:
789 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set))
792 return add_simple(0, re_detail::syntax_element_word_end);
799 unsigned int t = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
800 if((t != traits_type::syntax_colon) && (t != traits_type::syntax_dot) && (t != traits_type::syntax_equal))
804 goto char_set_literal;
817 case traits_type::syntax_close_set:
821 goto char_set_literal;
825 case traits_type::syntax_dash:
829 goto char_set_literal;
832 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*first) == traits_type::syntax_close_set)
836 goto char_set_literal;
838 if((singles.empty() == true) || (l != last_single))
843 ranges.push(singles.peek());
844 if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
848 case traits_type::syntax_slash:
849 if(_flags & regbase::escape_in_lists)
854 traits_size_type c = (traits_size_type)(traits_uchar_type)*first;
855 // this is only used for the switch(), but cannot be folded in
856 // due to a bug in Comeau 4.2.44beta3
857 traits_size_type syntax = traits_inst.syntax_type(c);
860 case traits_type::syntax_w:
866 classes.push(traits_type::char_class_word);
871 case traits_type::syntax_d:
877 classes.push(traits_type::char_class_digit);
882 case traits_type::syntax_s:
888 classes.push(traits_type::char_class_space);
893 case traits_type::syntax_l:
899 classes.push(traits_type::char_class_lower);
904 case traits_type::syntax_u:
910 classes.push(traits_type::char_class_upper);
915 case traits_type::syntax_W:
916 case traits_type::syntax_D:
917 case traits_type::syntax_S:
918 case traits_type::syntax_U:
919 case traits_type::syntax_L:
923 c = parse_escape(first, last);
926 goto char_set_literal;
932 goto char_set_literal;
938 // get string length to stop us going past the end of string (DWA)
939 std::size_t len = s.size();
942 s[i] = traits_inst.translate(s[i], (_flags & regbase::icase));
950 if(s.size() > 1) // add ligatures to singles list as well
964 typedef typename re_detail::is_byte<charT>::width_type width_type;
966 re_detail::re_syntax_base* result;
968 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::_wide_type());
970 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
973 if((result == 0) && (_flags & regbase::use_except))
979 template <class charT, class traits, class Allocator>
980 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&)
982 size_type base = data.size();
983 data.extend(sizeof(re_detail::re_set_long));
984 unsigned int csingles = 0;
985 unsigned int cranges = 0;
986 boost::uint_fast32_t cclasses = 0;
987 unsigned int cequivalents = 0;
988 bool nocollate_state = flags() & regbase::nocollate;
990 while(singles.empty() == false)
993 const traits_string_type& s = singles.peek();
994 std::size_t len = (s.size() + 1) * sizeof(charT);
995 std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
998 while(ranges.empty() == false)
1000 traits_string_type c1, c2;
1004 traits_inst.transform(c1, ranges.peek());
1009 traits_inst.transform(c2, ranges.peek());
1013 // for some reason bc5 crashes when throwing exceptions
1014 // from here - probably an EH-compiler bug, but hard to
1016 // delay throw to later:
1018 boost::uint_fast32_t f = _flags;
1019 _flags &= ~regbase::use_except;
1028 std::size_t len = (re_detail::re_strlen(c1.c_str()) + 1) * sizeof(charT);
1029 std::memcpy(data.extend(len), c1.c_str(), len);
1030 len = (re_detail::re_strlen(c2.c_str()) + 1) * sizeof(charT);
1031 std::memcpy(data.extend(len), c2.c_str(), len);
1033 while(classes.empty() == false)
1035 cclasses |= classes.peek();
1038 while(equivalents.empty() == false)
1041 const traits_string_type& s = equivalents.peek();
1042 std::size_t len = (re_detail::re_strlen(s.c_str()) + 1) * sizeof(charT);
1043 std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
1047 re_detail::re_set_long* dat = reinterpret_cast<re_detail::re_set_long*>(reinterpret_cast<unsigned char*>(data.data()) + base);
1048 dat->type = re_detail::syntax_element_long_set;
1049 dat->csingles = csingles;
1050 dat->cranges = cranges;
1051 dat->cclasses = cclasses;
1052 dat->cequivalents = cequivalents;
1058 template <class charT, class traits, class Allocator>
1059 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&)
1061 re_detail::re_set* dat = reinterpret_cast<re_detail::re_set*>(data.extend(sizeof(re_detail::re_set)));
1062 std::memset(dat, 0, sizeof(re_detail::re_set));
1064 while(singles.empty() == false)
1066 dat->_map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = re_detail::mask_all;
1069 while(ranges.empty() == false)
1071 traits_string_type c1, c2, c3, c4;
1073 if(flags() & regbase::nocollate)
1076 traits_inst.transform(c1, ranges.peek());
1078 if(flags() & regbase::nocollate)
1081 traits_inst.transform(c2, ranges.peek());
1086 // for some reason bc5 crashes when throwing exceptions
1087 // from here - probably an EH-compiler bug, but hard to
1089 // delay throw to later:
1091 boost::uint_fast32_t f = _flags;
1092 _flags &= ~regbase::use_except;
1100 for(unsigned int i = 0; i < 256; ++i)
1103 if(flags() & regbase::nocollate)
1106 traits_inst.transform(c3, c4);
1107 if((c3 <= c1) && (c3 >= c2))
1108 dat->_map[i] = re_detail::mask_all;
1111 while(equivalents.empty() == false)
1113 traits_string_type c1, c2;
1114 for(unsigned int i = 0; i < 256; ++i)
1117 traits_inst.transform_primary(c1, c2);
1118 if(c1 == equivalents.peek())
1119 dat->_map[i] = re_detail::mask_all;
1124 boost::uint_fast32_t flags = 0;
1125 while(classes.empty() == false)
1127 flags |= classes.peek();
1132 for(unsigned int i = 0; i < 256; ++i)
1134 if(traits_inst.is_class(charT(i), flags))
1135 dat->_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regbase::icase))] = re_detail::mask_all;
1141 for(unsigned int i = 0; i < 256; ++i)
1143 dat->_map[i] = !dat->_map[i];
1147 dat->type = re_detail::syntax_element_set;
1152 #ifndef __CODEGUARD__
1153 // this must not be inline when Borland's codeguard support is turned
1154 // on, otherwise we _will_ get surious codeguard errors...
1157 re_detail::re_syntax_base* add_offset(void* base, std::ptrdiff_t off)
1159 return reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(base) + off);
1163 template <class charT, class traits, class Allocator>
1164 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_detail::re_syntax_base* b, unsigned cbraces)
1166 typedef typename boost::detail::rebind_allocator<bool, Allocator>::type b_alloc;
1168 register unsigned char* base = reinterpret_cast<unsigned char*>(b);
1169 register re_detail::re_syntax_base* ptr = b;
1171 b_alloc a(data.allocator());
1172 #ifndef BOOST_NO_EXCEPTIONS
1176 pb = a.allocate(cbraces);
1177 BOOST_REGEX_NOEH_ASSERT(pb)
1178 for(unsigned i = 0; i < cbraces; ++i)
1187 case re_detail::syntax_element_rep:
1188 jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1189 static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1190 #ifdef BOOST_REGEX_DEBUG
1191 if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1193 jm_trace("padding mis-aligment in repeat jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1194 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1197 static_cast<re_detail::re_repeat*>(ptr)->id = repeats;
1200 case re_detail::syntax_element_jump:
1201 case re_detail::syntax_element_alt:
1202 jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1203 static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1204 #ifdef BOOST_REGEX_DEBUG
1205 if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b)))
1207 jm_trace("padding mis-aligment in alternation jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1208 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1212 case re_detail::syntax_element_backref:
1213 if((static_cast<re_detail::re_brace*>(ptr)->index >= (int)cbraces) || (pb[static_cast<re_detail::re_brace*>(ptr)->index] == false) )
1216 a.deallocate(pb, cbraces);
1220 case re_detail::syntax_element_endmark:
1221 if(static_cast<re_detail::re_brace*>(ptr)->index > 0)
1222 pb[static_cast<re_detail::re_brace*>(ptr)->index] = true;
1226 jm_assert(data.size() > ptr->next.i);
1227 ptr->next.p = add_offset(base, ptr->next.i);
1228 #ifdef BOOST_REGEX_DEBUG
1229 if((re_detail::padding_mask & (int)(ptr->next.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1231 jm_trace("padding mis-alignment in next record of type " << ptr->next.p->type)
1232 jm_assert(0 == (re_detail::padding_mask & (int)(ptr->next.p)));
1238 a.deallocate(pb, cbraces);
1240 #ifndef BOOST_NO_EXCEPTIONS
1245 a.deallocate(pb, cbraces);
1252 template <class charT, class traits, class Allocator>
1253 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expression(const charT* p, const charT* end, flag_type f)
1256 # pragma warning(push)
1257 # pragma warning(disable: 4127)
1260 if(p == expression())
1262 traits_string_type s(p, end);
1263 return set_expression(s.c_str(), s.c_str() + s.size(), f);
1265 typedef typename traits_type::sentry sentry_t;
1266 sentry_t sent(traits_inst);
1269 const charT* base = p;
1272 fail(REG_NOERROR); // clear any error
1277 return error_code();
1280 const charT* ptr = p;
1282 re_detail::jstack<std::size_t, Allocator> mark(64, data.allocator());
1283 re_detail::jstack<int, Allocator> markid(64, data.allocator());
1284 std::size_t last_mark_popped = 0;
1285 register traits_size_type c;
1286 register re_detail::re_syntax_base* dat;
1288 unsigned rep_min = 0;
1289 unsigned rep_max = 0;
1297 if(_flags & regbase::literal)
1301 dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regbase::icase)));
1308 c = (traits_size_type)(traits_uchar_type)*ptr;
1309 // this is only used for the switch(), but cannot be folded in
1310 // due to a bug in Comeau 4.2.44beta3
1311 traits_size_type syntax = traits_inst.syntax_type(c);
1314 case traits_type::syntax_open_bracket:
1315 if(_flags & bk_parens)
1317 dat = add_literal(dat, (charT)c);
1323 dat = add_simple(dat, re_detail::syntax_element_startmark, sizeof(re_detail::re_brace));
1325 static_cast<re_detail::re_brace*>(dat)->index = marks++;
1326 mark.push(data.index(dat));
1329 // check for perl like (?...) extention syntax
1330 c = (traits_size_type)(traits_uchar_type)*ptr;
1331 if(((_flags & bk_parens) == 0) && (traits_type::syntax_question == traits_inst.syntax_type(c)))
1334 c = (traits_size_type)(traits_uchar_type)*ptr;
1335 // this is only used for the switch(), but cannot be folded in
1336 // due to a bug in Comeau 4.2.44beta3
1337 traits_size_type syntax = traits_inst.syntax_type(c);
1340 case traits_type::syntax_colon:
1341 static_cast<re_detail::re_brace*>(dat)->index = 0;
1347 case traits_type::syntax_equal:
1348 static_cast<re_detail::re_brace*>(dat)->index = -1;
1351 common_forward_assert:
1355 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1358 // we don't know what value to put here yet,
1359 // use an arbitrarily large value for now
1360 // and check it later:
1361 static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1362 mark.push(data.size() - re_detail::re_jump_size);
1364 case traits_type::syntax_not:
1365 static_cast<re_detail::re_brace*>(dat)->index = -2;
1368 goto common_forward_assert;
1369 case traits_type::syntax_hash:
1370 // comment just skip it:
1371 static_cast<re_detail::re_brace*>(dat)->index = 0;
1377 c = (traits_size_type)(traits_uchar_type)*ptr;
1378 }while(traits_type::syntax_close_bracket != traits_inst.syntax_type(c));
1383 // error, return to standard parsing and let that handle the error:
1389 case traits_type::syntax_close_bracket:
1390 if(_flags & bk_parens)
1392 dat = add_literal(dat, (charT)c);
1401 dat->next.i = data.size();
1407 return error_code();
1409 // see if we have an empty alternative:
1410 if(mark.peek() == data.index(dat) )
1412 re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1413 if(para->type == re_detail::syntax_element_jump)
1416 return error_code();
1420 // pop any pushed alternatives and set the target end destination:
1421 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1422 while(dat->type == re_detail::syntax_element_jump)
1424 static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1429 return error_code();
1431 dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1434 dat = add_simple(0, re_detail::syntax_element_endmark, sizeof(re_detail::re_brace));
1435 static_cast<re_detail::re_brace*>(dat)->index = markid.peek();
1437 last_mark_popped = mark.peek();
1441 case traits_type::syntax_char:
1442 dat = add_literal(dat, (charT)c);
1445 case traits_type::syntax_slash:
1450 return error_code();
1452 c = (traits_size_type)(traits_uchar_type)*ptr;
1453 // this is only used for the switch(), but cannot be folded in
1454 // due to a bug in Comeau 4.2.44beta3
1455 traits_size_type syntax = traits_inst.syntax_type(c);
1458 case traits_type::syntax_open_bracket:
1459 if(_flags & bk_parens)
1460 goto open_bracked_jump;
1462 case traits_type::syntax_close_bracket:
1463 if(_flags & bk_parens)
1464 goto close_bracked_jump;
1466 case traits_type::syntax_plus:
1467 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1470 rep_max = (unsigned)-1;
1474 case traits_type::syntax_question:
1475 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1482 case traits_type::syntax_or:
1483 if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
1485 goto alt_string_jump;
1486 case traits_type::syntax_open_brace:
1487 if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
1490 // we have {x} or {x,} or {x,y}:
1491 parse_range(ptr, end, rep_min, rep_max);
1494 case traits_type::syntax_digit:
1495 if(_flags & bk_refs)
1498 int i = traits_inst.toi((charT)c);
1501 // we can have \025 which means take char whose
1502 // code is 25 (octal), so parse string:
1503 c = traits_inst.toi(ptr, end, -8);
1507 dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
1508 static_cast<re_detail::re_brace*>(dat)->index = i;
1513 case traits_type::syntax_b: // re_detail::syntax_element_word_boundary
1514 dat = add_simple(dat, re_detail::syntax_element_word_boundary);
1517 case traits_type::syntax_B:
1518 dat = add_simple(dat, re_detail::syntax_element_within_word);
1521 case traits_type::syntax_left_word:
1522 dat = add_simple(dat, re_detail::syntax_element_word_start);
1525 case traits_type::syntax_right_word:
1526 dat = add_simple(dat, re_detail::syntax_element_word_end);
1529 case traits_type::syntax_w: //re_detail::syntax_element_word_char
1530 dat = compile_set_simple(dat, traits_type::char_class_word);
1533 case traits_type::syntax_W:
1534 dat = compile_set_simple(dat, traits_type::char_class_word, true);
1537 case traits_type::syntax_d: //re_detail::syntax_element_word_char
1538 dat = compile_set_simple(dat, traits_type::char_class_digit);
1541 case traits_type::syntax_D:
1542 dat = compile_set_simple(dat, traits_type::char_class_digit, true);
1545 case traits_type::syntax_s: //re_detail::syntax_element_word_char
1546 dat = compile_set_simple(dat, traits_type::char_class_space);
1549 case traits_type::syntax_S:
1550 dat = compile_set_simple(dat, traits_type::char_class_space, true);
1553 case traits_type::syntax_l: //re_detail::syntax_element_word_char
1554 dat = compile_set_simple(dat, traits_type::char_class_lower);
1557 case traits_type::syntax_L:
1558 dat = compile_set_simple(dat, traits_type::char_class_lower, true);
1561 case traits_type::syntax_u: //re_detail::syntax_element_word_char
1562 dat = compile_set_simple(dat, traits_type::char_class_upper);
1565 case traits_type::syntax_U:
1566 dat = compile_set_simple(dat, traits_type::char_class_upper, true);
1569 case traits_type::syntax_Q:
1576 return error_code();
1578 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)
1581 if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))
1585 dat = add_literal(dat, *(ptr-1));
1589 dat = add_literal(dat, *ptr);
1594 case traits_type::syntax_C:
1595 dat = add_simple(dat, re_detail::syntax_element_wild);
1598 case traits_type::syntax_X:
1599 dat = add_simple(dat, re_detail::syntax_element_combining);
1602 case traits_type::syntax_Z:
1603 dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);
1606 case traits_type::syntax_G:
1607 dat = add_simple(dat, re_detail::syntax_element_restart_continue);
1610 case traits_type::syntax_start_buffer:
1611 dat = add_simple(dat, re_detail::syntax_element_buffer_start);
1614 case traits_type::syntax_end_buffer:
1615 dat = add_simple(dat, re_detail::syntax_element_buffer_end);
1619 c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);
1620 dat = add_literal(dat, (charT)c);
1623 dat = add_literal(dat, (charT)c);
1627 case traits_type::syntax_dollar:
1628 dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));
1631 case traits_type::syntax_caret:
1632 dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));
1635 case traits_type::syntax_dot:
1636 dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));
1639 case traits_type::syntax_star:
1641 rep_max = (unsigned)-1;
1645 std::ptrdiff_t offset;
1649 return error_code();
1653 case re_detail::syntax_element_endmark:
1654 offset = last_mark_popped;
1656 case re_detail::syntax_element_literal:
1657 if(static_cast<re_detail::re_literal*>(dat)->length > 1)
1660 charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT)));
1661 --static_cast<re_detail::re_literal*>(dat)->length;
1662 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1663 static_cast<re_detail::re_literal*>(dat)->length = 1;
1664 *reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit;
1666 offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1668 case re_detail::syntax_element_backref:
1669 case re_detail::syntax_element_long_set:
1670 case re_detail::syntax_element_set:
1671 case re_detail::syntax_element_wild:
1672 case re_detail::syntax_element_combining:
1673 // we're repeating a single item:
1674 offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1678 return error_code();
1681 dat->next.i = data.size();
1682 //unsigned pos = (char*)dat - (char*)data.data();
1684 // add the trailing jump:
1685 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1686 static_cast<re_detail::re_jump*>(dat)->alt.i = 0;
1688 // now insert the leading repeater:
1689 dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size));
1690 dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size;
1691 dat->type = re_detail::syntax_element_rep;
1692 static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size();
1693 static_cast<re_detail::re_repeat*>(dat)->min = rep_min;
1694 static_cast<re_detail::re_repeat*>(dat)->max = rep_max;
1695 static_cast<re_detail::re_repeat*>(dat)->leading = false;
1696 static_cast<re_detail::re_repeat*>(dat)->greedy = true;
1697 move_offsets(dat, re_detail::re_repeater_size);
1700 // now check to see if we have a non-greedy repeat:
1701 if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0)
1703 c = (traits_size_type)(traits_uchar_type)*ptr;
1704 if(traits_type::syntax_question == traits_inst.syntax_type(c))
1706 // OK repeat is non-greedy:
1707 static_cast<re_detail::re_repeat*>(dat)->greedy = false;
1711 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size);
1712 static_cast<re_detail::re_repeat*>(dat)->alt.i = offset;
1715 case traits_type::syntax_plus:
1716 if(_flags & (bk_plus_qm | limited_ops))
1718 dat = add_literal(dat, (charT)c);
1723 rep_max = (unsigned)-1;
1725 case traits_type::syntax_question:
1726 if(_flags & (bk_plus_qm | limited_ops))
1728 dat = add_literal(dat, (charT)c);
1735 case traits_type::syntax_open_set:
1740 dat->next.i = data.size();
1743 dat = compile_set(ptr, end);
1746 if((_flags & regbase::failbit) == 0)
1748 return error_code();
1751 case traits_type::syntax_or:
1753 if(_flags & (bk_vbar | limited_ops))
1755 dat = add_literal(dat, (charT)c);
1765 // start of pattern can't have empty "|"
1767 return error_code();
1769 // see if we have an empty alternative:
1770 if(mark.empty() == false)
1771 if(mark.peek() == data.index(dat))
1774 return error_code();
1777 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1780 // we don't know what value to put here yet,
1781 // use an arbitrarily large value for now
1782 // and check it later (TODO!)
1783 static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1785 // now work out where to insert:
1786 std::size_t offset = 0;
1787 if(mark.empty() == false)
1789 // we have a '(' or '|' to go back to:
1790 offset = mark.peek();
1791 re_detail::re_syntax_base* base = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset);
1792 offset = base->next.i;
1794 re_detail::re_jump* j = static_cast<re_detail::re_jump*>(data.insert(offset, re_detail::re_jump_size));
1795 j->type = re_detail::syntax_element_alt;
1796 j->next.i = offset + re_detail::re_jump_size;
1797 j->alt.i = data.size();
1798 move_offsets(j, re_detail::re_jump_size);
1799 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + data.size() - re_detail::re_jump_size);
1800 mark.push(data.size() - re_detail::re_jump_size);
1804 case traits_type::syntax_open_brace:
1805 if((_flags & bk_braces) || ((_flags & intervals) == 0))
1807 dat = add_literal(dat, (charT)c);
1811 // we have {x} or {x,} or {x,y}:
1812 parse_range(ptr, end, rep_min, rep_max);
1814 case traits_type::syntax_newline:
1815 if(_flags & newline_alt)
1816 goto alt_string_jump;
1817 dat = add_literal(dat, (charT)c);
1820 case traits_type::syntax_close_brace:
1821 if(_flags & bk_braces)
1823 dat = add_literal(dat, (charT)c);
1828 return error_code();
1830 dat = add_literal(dat, (charT)c);
1841 dat->next.i = data.size();
1844 // see if we have an empty alternative:
1845 if(mark.empty() == false)
1846 if(mark.peek() == data.index(dat) )
1848 re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1849 if(para->type == re_detail::syntax_element_jump)
1852 return error_code();
1858 if(mark.empty() == false)
1860 // pop any pushed alternatives and set the target end destination:
1861 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1862 while(dat->type == re_detail::syntax_element_jump)
1864 static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1866 if(mark.empty() == true)
1868 dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1872 dat = static_cast<re_detail::re_brace*>(data.extend(sizeof(re_detail::re_syntax_base)));
1873 dat->type = re_detail::syntax_element_match;
1876 if(mark.empty() == false)
1879 return error_code();
1883 // allocate space for start _map:
1884 startmap = reinterpret_cast<unsigned char*>(data.extend(256 + ((end - base + 1) * sizeof(charT))));
1886 // and copy the expression we just compiled:
1887 _expression = reinterpret_cast<charT*>(reinterpret_cast<char*>(startmap) + 256);
1888 _expression_len = end - base;
1889 std::memcpy(_expression, base, _expression_len * sizeof(charT));
1890 *(_expression + _expression_len) = charT(0);
1893 // now we need to apply fixups to the array
1894 // so that we can use pointers and not indexes
1895 fixup_apply(static_cast<re_detail::re_syntax_base*>(data.data()), marks);
1897 // check for error during fixup:
1898 if(_flags & regbase::failbit)
1899 return error_code();
1902 // finally compile the maps so that we can make intelligent choices
1903 // whenever we encounter an alternative:
1907 re_detail::kmp_free(pkmp, data.allocator());
1910 re_detail::re_syntax_base* sbase = static_cast<re_detail::re_syntax_base*>(data.data());
1911 _restart_type = probe_restart(sbase);
1912 _leading_len = fixup_leading_rep(sbase, 0);
1913 if((sbase->type == re_detail::syntax_element_literal) && (sbase->next.p->type == re_detail::syntax_element_match))
1915 _restart_type = restart_fixed_lit;
1918 charT* p1 = reinterpret_cast<charT*>(reinterpret_cast<char*>(sbase) + sizeof(re_detail::re_literal));
1919 charT* p2 = p1 + static_cast<re_detail::re_literal*>(sbase)->length;
1920 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
1923 return error_code();
1929 # pragma warning(pop)
1934 template <class charT, class traits, class Allocator>
1935 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_simple(re_detail::re_syntax_base* dat, re_detail::syntax_element_type type, unsigned int size)
1940 dat->next.i = data.size();
1942 if(size < sizeof(re_detail::re_syntax_base))
1943 size = sizeof(re_detail::re_syntax_base);
1944 dat = static_cast<re_detail::re_syntax_base*>(data.extend(size));
1950 template <class charT, class traits, class Allocator>
1951 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_literal(re_detail::re_syntax_base* dat, charT c)
1953 if(dat && (dat->type == re_detail::syntax_element_literal))
1955 // add another charT to the list:
1956 std::ptrdiff_t pos = reinterpret_cast<unsigned char*>(dat) - reinterpret_cast<unsigned char*>(data.data());
1957 *reinterpret_cast<charT*>(data.extend(sizeof(charT))) = traits_inst.translate(c, (_flags & regbase::icase));
1958 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + pos);
1959 ++(static_cast<re_detail::re_literal*>(dat)->length);
1964 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1965 static_cast<re_detail::re_literal*>(dat)->length = 1;
1966 *reinterpret_cast<charT*>(reinterpret_cast<re_detail::re_literal*>(dat)+1) = traits_inst.translate(c, (_flags & regbase::icase));
1971 template <class charT, class traits, class Allocator>
1972 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_restart(re_detail::re_syntax_base* dat)
1976 case re_detail::syntax_element_startmark:
1977 case re_detail::syntax_element_endmark:
1978 return probe_restart(dat->next.p);
1979 case re_detail::syntax_element_start_line:
1980 return regbase::restart_line;
1981 case re_detail::syntax_element_word_start:
1982 return regbase::restart_word;
1983 case re_detail::syntax_element_buffer_start:
1984 return regbase::restart_buf;
1985 case re_detail::syntax_element_restart_continue:
1986 return regbase::restart_continue;
1988 return regbase::restart_any;
1992 template <class charT, class traits, class Allocator>
1993 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_leading_rep(re_detail::re_syntax_base* dat, re_detail::re_syntax_base* end)
1995 unsigned int len = 0;
1996 bool leading_lit = end ? false : true;
2001 case re_detail::syntax_element_literal:
2002 len += static_cast<re_detail::re_literal*>(dat)->length;
2003 if((leading_lit) && (static_cast<re_detail::re_literal*>(dat)->length > 2))
2005 // we can do a literal search for the leading literal string
2006 // using Knuth-Morris-Pratt (or whatever), and only then check for
2007 // matches. We need a decent length string though to make it
2009 _leading_string = reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal));
2010 _leading_string_len = static_cast<re_detail::re_literal*>(dat)->length;
2011 _restart_type = restart_lit;
2012 leading_lit = false;
2013 const charT* p1 = _leading_string;
2014 const charT* p2 = _leading_string + _leading_string_len;
2015 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®base::icase, &traits_inst), data.allocator());
2017 leading_lit = false;
2019 case re_detail::syntax_element_wild:
2021 leading_lit = false;
2023 case re_detail::syntax_element_match:
2025 case re_detail::syntax_element_backref:
2026 //case re_detail::syntax_element_jump:
2027 case re_detail::syntax_element_alt:
2028 case re_detail::syntax_element_combining:
2030 case re_detail::syntax_element_long_set:
2032 // we need to verify that there are no multi-character
2033 // collating elements inside the repeat:
2034 const charT* p = reinterpret_cast<const charT*>(reinterpret_cast<const char*>(dat) + sizeof(re_detail::re_set_long));
2035 unsigned int csingles = static_cast<re_detail::re_set_long*>(dat)->csingles;
2036 for(unsigned int i = 0; i < csingles; ++i)
2038 if(re_detail::re_strlen(p) > 1)
2044 leading_lit = false;
2047 case re_detail::syntax_element_set:
2049 leading_lit = false;
2051 case re_detail::syntax_element_rep:
2052 if((len == 0) && (1 == fixup_leading_rep(dat->next.p, static_cast<re_detail::re_repeat*>(dat)->alt.p) ))
2054 static_cast<re_detail::re_repeat*>(dat)->leading = true;
2066 template <class charT, class traits, class Allocator>
2067 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fail(unsigned int err)
2072 _flags |= regbase::failbit;
2073 #ifndef BOOST_NO_EXCEPTIONS
2074 if(_flags & regbase::use_except)
2076 throw bad_expression(traits_inst.error_string(err));
2081 _flags &= ~regbase::failbit;
2089 } // namespace boost
2092 #endif // BOOST_REGEX_COMPILE_HPP