3 * Copyright (c) 1998-2002
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE regex_compile.hpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares reg_expression<> member functions. This is
17 * an internal header file, do not include directly.
20 #ifndef BOOST_REGEX_COMPILE_HPP
21 #define BOOST_REGEX_COMPILE_HPP
24 #ifdef BOOST_HAS_ABI_HEADERS
25 # include BOOST_ABI_PREFIX
28 #pragma option push -w-8004
34 template <class traits>
37 typedef typename traits::char_type char_type;
40 kmp_translator(bool c, traits* p) : icase(c), pt(p) {}
41 char_type operator()(char_type c)
43 return pt->translate(c, icase);
48 template <class charT, class traits_type, class Allocator>
49 bool BOOST_REGEX_CALL re_maybe_set_member(charT c,
50 const re_set_long* set_,
51 const reg_expression<charT, traits_type, Allocator>& e)
53 const charT* p = reinterpret_cast<const charT*>(set_+1);
54 bool icase = e.flags() & regex_constants::icase;
55 charT col = e.get_traits().translate(c, icase);
56 for(unsigned int i = 0; i < set_->csingles; ++i)
59 return set_->isnot ? false : true;
64 return set_->isnot ? true : false;
67 } // namespace re_detail
69 template <class traits>
72 typedef typename traits::uchar_type traits_uchar_type;
73 typedef typename traits::size_type traits_size_type;
74 #if !BOOST_WORKAROUND(BOOST_MSVC, <= 1200)
75 static bool test(char)
77 static bool test(unsigned char)
79 static bool test(signed char)
82 template <class charT> static bool test(charT c)
83 { return (traits_size_type)(traits_uchar_type)c >= 256; }
86 template <class charT, class traits, class Allocator>
87 inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_wide_type&)
89 if(is_big_char<traits>::test(c))
91 return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
94 template <class charT, class traits, class Allocator>
95 inline bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_narrow_type&)
97 return BOOST_REGEX_MAKE_BOOL(_map[(traits_uchar_type)c] & mask);
100 template <class charT, class traits, class Allocator>
101 reg_expression<charT, traits, Allocator>::reg_expression(const Allocator& a)
102 : regbase(), data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
106 template <class charT, class traits, class Allocator>
107 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, flag_type f, const Allocator& a)
108 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
110 set_expression(p, f | regex_constants::use_except);
113 template <class charT, class traits, class Allocator>
114 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p1, const charT* p2, flag_type f, const Allocator& a)
115 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
117 set_expression(p1, p2, f | regex_constants::use_except);
120 template <class charT, class traits, class Allocator>
121 reg_expression<charT, traits, Allocator>::reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a)
122 : data(a), pkmp(0), error_code_(REG_EMPTY), _expression(0)
124 set_expression(p, p + len, f | regex_constants::use_except);
127 template <class charT, class traits, class Allocator>
128 reg_expression<charT, traits, Allocator>::reg_expression(const reg_expression<charT, traits, Allocator>& e)
129 : regbase(e), data(e.allocator()), pkmp(0), error_code_(REG_EMPTY), _expression(0)
132 // we do a deep copy only if e is a valid expression, otherwise fail.
134 if(e.error_code() == 0)
136 const charT* pe = e.expression();
137 set_expression(pe, pe + e._expression_len, e.flags() | regex_constants::use_except);
141 _flags = e.flags() & ~(regex_constants::use_except);
142 fail(e.error_code());
146 template <class charT, class traits, class Allocator>
147 reg_expression<charT, traits, Allocator>::~reg_expression()
150 re_detail::kmp_free(pkmp, data.allocator());
153 template <class charT, class traits, class Allocator>
154 reg_expression<charT, traits, Allocator>& BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::operator=(const reg_expression<charT, traits, Allocator>& e)
157 // we do a deep copy only if e is a valid expression, otherwise fail.
159 if(this == &e) return *this;
161 fail(e.error_code());
162 if(error_code() == 0)
163 set_expression(e._expression, e._expression + e._expression_len, e.flags() | regex_constants::use_except);
167 template <class charT, class traits, class Allocator>
168 int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compare(const reg_expression<charT, traits, Allocator>& e)const
170 if(_flags != e.flags())
171 return _flags - e.flags();
172 return str().compare(e.str());
175 template <class charT, class traits, class Allocator>
176 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::swap(reg_expression& that)throw()
178 traits_inst.swap(that.traits_inst);
179 data.swap(that.data);
180 static_cast<regbase&>(*this).swap(that);
182 std::swap(_restart_type, that._restart_type);
183 std::swap(marks, that.marks);
184 std::swap(repeats, that.repeats);
185 std::swap(startmap, that.startmap);
186 std::swap(_expression_len, that._expression_len);
187 std::swap(_leading_len, that._leading_len);
188 std::swap(_leading_string, that._leading_string);
189 std::swap(_leading_string_len, that._leading_string_len);
190 std::swap(pkmp, that.pkmp);
191 std::swap(error_code_, that.error_code_);
192 std::swap(_expression, that._expression);
195 template <class charT, class traits, class Allocator>
196 Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::allocator()const
198 return data.allocator();
201 template <class charT, class traits, class Allocator>
202 Allocator BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::get_allocator()const
204 return data.allocator();
207 template <class charT, class traits, class Allocator>
208 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_inner_set(const charT*& arg_first, const charT* arg_last)
211 // we have an inner [...] construct
213 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) == traits_type::syntax_open_set);
214 const charT* base = arg_first;
215 while( (arg_first != arg_last)
216 && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) != traits_type::syntax_close_set) )
218 if(arg_first == arg_last)
221 if((arg_first-base) < 5)
223 if(*(base+1) != *(arg_first-2))
225 unsigned int result = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
226 if((result == traits_type::syntax_colon) && ((arg_first-base) == 5))
228 unsigned type = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+2));
229 if((type == traits_type::syntax_left_word) || (type == traits_type::syntax_right_word))
232 return ((result == traits_type::syntax_colon) || (result == traits_type::syntax_dot) || (result == traits_type::syntax_equal)) ? result : 0;
236 template <class charT, class traits, class Allocator>
237 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::skip_space(const charT*& arg_first, const charT* arg_last)
240 // returns true if we get to arg_last:
242 while((arg_first != arg_last) && (traits_inst.is_class(*arg_first, traits_type::char_class_space) == true))
246 return arg_first == arg_last;
249 template <class charT, class traits, class Allocator>
250 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_range(const charT*& ptr, const charT* arg_end, unsigned& min, unsigned& max)
253 // we have {x} or {x,} or {x,y} NB no spaces inside braces
254 // anything else is illegal
255 // On input ptr points to "{"
258 if(skip_space(ptr, arg_end))
263 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_digit)
268 min = traits_inst.toi(ptr, arg_end, 10);
269 if(skip_space(ptr, arg_end))
274 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_comma)
276 //we have a second interval:
278 if(skip_space(ptr, arg_end))
283 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_digit)
284 max = traits_inst.toi(ptr, arg_end, 10);
292 if(skip_space(ptr, arg_end))
302 if(_flags & bk_braces)
304 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_slash)
311 // back\ is OK now check the }
313 if((ptr == arg_end) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace))
320 else if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) != traits_type::syntax_close_brace)
327 template <class charT, class traits, class Allocator>
328 charT BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::parse_escape(const charT*& arg_first, const charT* arg_last)
331 traits_size_type c_unsigned = (traits_size_type)(traits_uchar_type)*arg_first;
332 // this is only used for the switch(), but cannot be folded in
333 // due to a bug in Comeau 4.2.44beta3
334 traits_size_type syntax = traits_inst.syntax_type(c_unsigned);
337 case traits_type::syntax_a:
341 case traits_type::syntax_f:
345 case traits_type::syntax_n:
349 case traits_type::syntax_r:
353 case traits_type::syntax_t:
357 case traits_type::syntax_v:
361 case traits_type::syntax_x:
363 if(arg_first == arg_last)
368 // maybe have \x{ddd}
369 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*arg_first)) == traits_type::syntax_open_brace)
372 if(arg_first == arg_last)
377 if(traits_inst.is_class(*arg_first, traits_type::char_class_xdigit) == false)
382 c = (charT)traits_inst.toi(arg_first, arg_last, -16);
383 if((arg_first == arg_last) || (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)(*arg_first)) != traits_type::syntax_close_brace))
392 if(traits_inst.is_class(*arg_first, traits_type::char_class_xdigit) == false)
397 c = (charT)traits_inst.toi(arg_first, arg_last, -16);
400 case traits_type::syntax_c:
402 if(arg_first == arg_last)
407 if(((traits_uchar_type)(*arg_first) < (traits_uchar_type)'@')
408 || ((traits_uchar_type)(*arg_first) > (traits_uchar_type)127) )
413 c = (charT)((traits_uchar_type)(*arg_first) - (traits_uchar_type)'@');
416 case traits_type::syntax_e:
420 case traits_type::syntax_digit:
421 c = (charT)traits_inst.toi(arg_first, arg_last, -8);
430 template <class charT, class traits, class Allocator>
431 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_maps()
433 re_detail::re_syntax_base* record = static_cast<re_detail::re_syntax_base*>(data.data());
434 // always compile the first _map:
435 std::memset(startmap, 0, 256);
436 record->can_be_null = 0;
437 compile_map(record, startmap, 0, re_detail::mask_all);
439 while(record->type != re_detail::syntax_element_match)
441 if((record->type == re_detail::syntax_element_alt) || (record->type == re_detail::syntax_element_rep))
443 std::memset(&(static_cast<re_detail::re_jump*>(record)->_map), 0, 256);
444 record->can_be_null = 0;
445 compile_map(record->next.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_take, static_cast<re_detail::re_jump*>(record)->alt.p);
446 compile_map(static_cast<re_detail::re_jump*>(record)->alt.p, static_cast<re_detail::re_jump*>(record)->_map, &(record->can_be_null), re_detail::mask_skip);
447 if(record->type == re_detail::syntax_element_rep)
449 re_detail::re_repeat* rep = static_cast<re_detail::re_repeat*>(record);
450 // set whether this is a singleton repeat or not:
451 if(rep->next.p->next.p->next.p == rep->alt.p)
453 switch(rep->next.p->type)
455 case re_detail::syntax_element_wild:
456 rep->type = re_detail::syntax_element_dot_rep;
458 case re_detail::syntax_element_literal:
459 rep->type = re_detail::syntax_element_char_rep;
461 case re_detail::syntax_element_set:
462 rep->type = re_detail::syntax_element_short_set_rep;
464 case re_detail::syntax_element_long_set:
465 if(static_cast<re_detail::re_set_long*>(rep->next.p)->singleton)
466 rep->type = re_detail::syntax_element_long_set_rep;
476 record->can_be_null = 0;
477 compile_map(record, 0, &(record->can_be_null), re_detail::mask_all);
479 record = record->next.p;
481 record->can_be_null = re_detail::mask_all;
484 template <class charT, class traits, class Allocator>
485 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start(
486 re_detail::re_syntax_base* node, charT cc, re_detail::re_syntax_base* terminal) const
492 case re_detail::syntax_element_startmark:
493 if(static_cast<const re_detail::re_brace*>(node)->index == -1)
495 return probe_start(node->next.p->next.p, cc, terminal)
496 && probe_start(static_cast<const re_detail::re_jump*>(node->next.p)->alt.p, cc, terminal);
498 else if(static_cast<const re_detail::re_brace*>(node)->index == -3)
500 return probe_start(node->next.p->next.p, cc, terminal);
502 // doesn't tell us anything about the next character, so:
503 return probe_start(node->next.p, cc, terminal);
504 case re_detail::syntax_element_endmark:
505 if(static_cast<const re_detail::re_brace*>(node)->index == -3)
510 case re_detail::syntax_element_start_line:
511 case re_detail::syntax_element_word_boundary:
512 case re_detail::syntax_element_buffer_start:
513 case re_detail::syntax_element_restart_continue:
514 // doesn't tell us anything about the next character, so:
515 return probe_start(node->next.p, cc, terminal);
516 case re_detail::syntax_element_literal:
517 // only the first character of the literal can match:
518 // note these have already been translated:
519 if(*reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(node)+1) == traits_inst.translate(cc, (_flags & regex_constants::icase)))
522 case re_detail::syntax_element_end_line:
523 // next character (if there is one!) must be a newline:
524 if(traits_inst.is_separator(traits_inst.translate(cc, (_flags & regex_constants::icase))))
527 case re_detail::syntax_element_wild:
529 case re_detail::syntax_element_match:
531 case re_detail::syntax_element_within_word:
532 case re_detail::syntax_element_word_start:
533 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regex_constants::icase)), traits_type::char_class_word);
534 case re_detail::syntax_element_word_end:
535 // what follows must not be a word character,
536 return traits_inst.is_class(traits_inst.translate(cc, (_flags & regex_constants::icase)), traits_type::char_class_word) ? false : true;
537 case re_detail::syntax_element_buffer_end:
538 // we can be null, nothing must follow,
539 // NB we assume that this is followed by
540 // re_detail::syntax_element_match, if its not then we can
541 // never match anything anyway!!
543 case re_detail::syntax_element_soft_buffer_end:
544 // we can be null, only newlines must follow,
545 // NB we assume that this is followed by
546 // re_detail::syntax_element_match, if its not then we can
547 // never match anything anyway!!
548 return traits_inst.is_separator(traits_inst.translate(cc, (_flags & regex_constants::icase)));
549 case re_detail::syntax_element_backref:
550 // there's no easy way to determine this
551 // which is not to say it can't be done!
554 case re_detail::syntax_element_long_set:
555 // we can not be null,
556 // we need to add already translated values in the set
557 // to values in the _map
558 return re_detail::re_maybe_set_member(cc, static_cast<const re_detail::re_set_long*>(node), *this) || (re_detail::re_is_set_member(static_cast<const charT*>(&cc), static_cast<const charT*>(&cc+1), static_cast<re_detail::re_set_long*>(node), *this) != &cc);
559 case re_detail::syntax_element_set:
560 // set all the elements that are set in corresponding set:
561 c = (traits_size_type)(traits_uchar_type)traits_inst.translate(cc, (_flags & regex_constants::icase));
562 return static_cast<re_detail::re_set*>(node)->_map[c] != 0;
563 case re_detail::syntax_element_jump:
564 if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
567 // caused only by end of repeat section, we'll treat this
568 // the same as a match, because the sub-expression has matched.
569 if(node->next.p == terminal)
570 return true; // null repeat - we can always take this
574 // take the jump, add in fix for the fact that if the
575 // repeat that we're jumping to has non-zero minimum count
576 // then we need to add in the possiblity that we could still
578 re_detail::re_syntax_base* next = static_cast<re_detail::re_jump*>(node)->alt.p;
579 bool b = probe_start(next, cc, terminal);
580 if((next->type == re_detail::syntax_element_rep) && (static_cast<re_detail::re_repeat*>(next)->min != 0))
582 b = b || probe_start(static_cast<re_detail::re_jump*>(next)->alt.p, cc, terminal);
588 // take the jump and compile:
589 return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
590 case re_detail::syntax_element_alt:
591 // we need to take the OR of the two alternatives:
592 return probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal) || probe_start(node->next.p, cc, terminal);
593 case re_detail::syntax_element_rep:
594 case re_detail::syntax_element_char_rep:
595 case re_detail::syntax_element_dot_rep:
596 case re_detail::syntax_element_long_set_rep:
597 case re_detail::syntax_element_short_set_rep:
598 // we need to take the OR of the two alternatives
599 if(static_cast<re_detail::re_repeat*>(node)->min == 0)
600 return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p) || probe_start(static_cast<re_detail::re_jump*>(node)->alt.p, cc, terminal);
602 return probe_start(node->next.p, cc, static_cast<re_detail::re_jump*>(node)->alt.p);
603 case re_detail::syntax_element_combining:
604 return !traits_inst.is_combining(traits_inst.translate(cc, (_flags & regex_constants::icase)));
609 template <class charT, class traits, class Allocator>
610 bool BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const
614 case re_detail::syntax_element_endmark:
615 if(static_cast<const re_detail::re_brace*>(node)->index == -3)
620 case re_detail::syntax_element_startmark:
621 case re_detail::syntax_element_start_line:
622 case re_detail::syntax_element_word_boundary:
623 case re_detail::syntax_element_buffer_start:
624 case re_detail::syntax_element_restart_continue:
625 case re_detail::syntax_element_end_line:
626 case re_detail::syntax_element_word_end:
627 // doesn't tell us anything about the next character, so:
628 return probe_start_null(node->next.p, terminal);
629 case re_detail::syntax_element_match:
630 case re_detail::syntax_element_buffer_end:
631 case re_detail::syntax_element_soft_buffer_end:
632 case re_detail::syntax_element_backref:
634 case re_detail::syntax_element_jump:
635 if(static_cast<re_detail::re_jump*>(node)->alt.p < node)
638 // caused only by end of repeat section, we'll treat this
639 // the same as a match, because the sub-expression has matched.
640 // this is only caused by NULL repeats as in "(a*)*" or "(\<)*"
641 // these are really nonsensence and make the matching code much
642 // harder, it would be nice to get rid of them altogether.
643 if(node->next.p == terminal)
646 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
649 // take the jump and compile:
650 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
651 case re_detail::syntax_element_alt:
652 // we need to take the OR of the two alternatives:
653 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal) || probe_start_null(node->next.p, terminal);
654 case re_detail::syntax_element_rep:
655 // only need to consider skipping the repeat:
656 return probe_start_null(static_cast<re_detail::re_jump*>(node)->alt.p, terminal);
663 template <class charT, class traits, class Allocator>
664 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_map(
665 re_detail::re_syntax_base* node, unsigned char* _map,
666 unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal)const
670 for(unsigned int i = 0; i < 256; ++i)
672 if(probe_start(node, (charT)i, terminal))
676 if(pnull && probe_start_null(node, terminal))
680 template <class charT, class traits, class Allocator>
681 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::move_offsets(re_detail::re_syntax_base* j, unsigned arg_size)
684 # pragma warning(push)
685 # pragma warning(disable: 4127)
687 // move all offsets starting with j->link forward by arg_size
688 // called after an insert:
689 j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
694 case re_detail::syntax_element_rep:
695 static_cast<re_detail::re_jump*>(j)->alt.i += arg_size;
696 j->next.i += arg_size;
698 case re_detail::syntax_element_jump:
699 case re_detail::syntax_element_alt:
700 static_cast<re_detail::re_jump*>(j)->alt.i += arg_size;
701 j->next.i += arg_size;
704 j->next.i += arg_size;
707 if(j->next.i == arg_size)
709 j = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + j->next.i);
712 # pragma warning(pop)
716 template <class charT, class traits, class Allocator>
717 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot)
719 typedef typename re_detail::is_byte<charT>::width_type width_type;
720 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
721 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
722 re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
723 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
724 if(_flags & regbase::icase)
726 if((cls == traits_type::char_class_upper) || (cls == traits_type::char_class_lower))
728 cls = traits_type::char_class_alpha;
735 dat->next.i = data.size();
737 return compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
740 template <class charT, class traits, class Allocator>
741 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set(const charT*& arg_first, const charT* arg_last)
743 re_detail::jstack<traits_string_type, Allocator> singles(64, data.allocator());
744 re_detail::jstack<traits_string_type, Allocator> ranges(64, data.allocator());
745 re_detail::jstack<boost::uint_fast32_t, Allocator> classes(64, data.allocator());
746 re_detail::jstack<traits_string_type, Allocator> equivalents(64, data.allocator());
747 bool has_digraphs = false;
748 jm_assert(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) == traits_type::syntax_open_set);
750 bool started = false;
761 unsigned l = last_none;
762 traits_string_type s;
764 while((arg_first != arg_last) && !done)
766 traits_size_type c = (traits_size_type)(traits_uchar_type)*arg_first;
767 // this is only used for the switch(), but cannot be folded in
768 // due to a bug in Comeau 4.2.44beta3
769 traits_size_type syntax = traits_inst.syntax_type(c);
772 case traits_type::syntax_caret:
773 if(!started && !isnot)
780 goto char_set_literal;
783 case traits_type::syntax_open_set:
785 if((_flags & char_classes) == 0)
788 goto char_set_literal;
790 // check to see if we really have a class:
791 const charT* base = arg_first;
792 // this is only used for the switch(), but cannot be folded in
793 // due to a bug in Comeau 4.2.44beta3
794 unsigned int inner_set = parse_inner_set(arg_first, arg_last);
797 case traits_type::syntax_colon:
804 boost::uint_fast32_t id = traits_inst.lookup_classname(base+2, arg_first-2);
805 if(_flags & regex_constants::icase)
807 if((id == traits_type::char_class_upper) || (id == traits_type::char_class_lower))
809 id = traits_type::char_class_alpha;
822 case traits_type::syntax_dot:
824 // we have a collating element [.collating-name.]
826 if(traits_inst.lookup_collatename(s, base+2, arg_first-2))
831 if(s.size())goto char_set_literal;
835 case traits_type::syntax_equal:
837 // we have an equivalence class [=collating-name=]
839 if(traits_inst.lookup_collatename(s, base+2, arg_first-2))
841 std::size_t len = s.size();
847 s[i] = traits_inst.translate(s[i], (_flags & regex_constants::icase));
850 traits_string_type s2;
851 traits_inst.transform_primary(s2, s);
852 equivalents.push(s2);
860 case traits_type::syntax_left_word:
861 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) == traits_type::syntax_close_set))
864 return add_simple(0, re_detail::syntax_element_word_start);
868 case traits_type::syntax_right_word:
869 if((started == false) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) == traits_type::syntax_close_set))
872 return add_simple(0, re_detail::syntax_element_word_end);
879 unsigned int t = traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*(base+1));
880 if((t != traits_type::syntax_colon) && (t != traits_type::syntax_dot) && (t != traits_type::syntax_equal))
884 goto char_set_literal;
890 if(arg_first == arg_last)
897 case traits_type::syntax_close_set:
901 goto char_set_literal;
905 case traits_type::syntax_dash:
909 goto char_set_literal;
912 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*arg_first) == traits_type::syntax_close_set)
916 goto char_set_literal;
918 if((singles.empty() == true) || (l != last_single))
923 ranges.push(singles.peek());
924 if(singles.peek().size() <= 1) // leave digraphs and ligatures in place
928 case traits_type::syntax_slash:
929 if(_flags & regex_constants::escape_in_lists)
932 if(arg_first == arg_last)
934 /*traits_size_type*/ c = (traits_size_type)(traits_uchar_type)*arg_first;
935 // this is only used for the switch(), but cannot be folded in
936 // due to a bug in Comeau 4.2.44beta3
937 traits_size_type syntax4 = traits_inst.syntax_type(c);
940 case traits_type::syntax_w:
946 classes.push(traits_type::char_class_word);
951 case traits_type::syntax_d:
957 classes.push(traits_type::char_class_digit);
962 case traits_type::syntax_s:
968 classes.push(traits_type::char_class_space);
973 case traits_type::syntax_l:
979 classes.push(traits_type::char_class_lower);
984 case traits_type::syntax_u:
990 classes.push(traits_type::char_class_upper);
995 case traits_type::syntax_W:
996 case traits_type::syntax_D:
997 case traits_type::syntax_S:
998 case traits_type::syntax_U:
999 case traits_type::syntax_L:
1003 c = parse_escape(arg_first, arg_last);
1006 goto char_set_literal;
1012 goto char_set_literal;
1018 // get string length to stop us going past the end of string (DWA)
1019 std::size_t len = s.size();
1022 s[i] = traits_inst.translate(s[i], (_flags & regex_constants::icase));
1030 if(s.size() > 1) // add ligatures to singles list as well
1044 typedef typename re_detail::is_byte<charT>::width_type width_type;
1046 re_detail::re_syntax_base* result;
1048 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, re_detail::_wide_type());
1050 result = compile_set_aux(singles, ranges, classes, equivalents, isnot, width_type());
1053 if((result == 0) && (_flags & regex_constants::use_except))
1059 template <class charT, class traits, class Allocator>
1060 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&)
1062 size_type base = data.size();
1063 data.extend(sizeof(re_detail::re_set_long));
1064 unsigned int csingles = 0;
1065 unsigned int cranges = 0;
1066 boost::uint_fast32_t cclasses = 0;
1067 unsigned int cequivalents = 0;
1068 bool nocollate_state = !(flags() & regex_constants::collate);
1069 bool singleton = true;
1071 while(singles.empty() == false)
1074 const traits_string_type& s = singles.peek();
1075 std::size_t len = (s.size() + 1) * sizeof(charT);
1076 if(len > sizeof(charT) * 2)
1078 std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
1081 while(ranges.empty() == false)
1083 traits_string_type c1, c2;
1087 traits_inst.transform(c1, ranges.peek());
1092 traits_inst.transform(c2, ranges.peek());
1096 // for some reason bc5 crashes when throwing exceptions
1097 // from here - probably an EH-compiler bug, but hard to
1099 // delay throw to later:
1101 boost::uint_fast32_t f = _flags;
1102 _flags &= ~regex_constants::use_except;
1111 std::size_t len = (re_detail::re_strlen(c1.c_str()) + 1) * sizeof(charT);
1112 std::memcpy(data.extend(len), c1.c_str(), len);
1113 len = (re_detail::re_strlen(c2.c_str()) + 1) * sizeof(charT);
1114 std::memcpy(data.extend(len), c2.c_str(), len);
1116 while(classes.empty() == false)
1118 cclasses |= classes.peek();
1121 while(equivalents.empty() == false)
1124 const traits_string_type& s = equivalents.peek();
1125 std::size_t len = (re_detail::re_strlen(s.c_str()) + 1) * sizeof(charT);
1126 std::memcpy(reinterpret_cast<charT*>(data.extend(len)), s.c_str(), len);
1130 re_detail::re_set_long* dat = reinterpret_cast<re_detail::re_set_long*>(reinterpret_cast<unsigned char*>(data.data()) + base);
1131 dat->type = re_detail::syntax_element_long_set;
1132 dat->csingles = csingles;
1133 dat->cranges = cranges;
1134 dat->cclasses = cclasses;
1135 dat->cequivalents = cequivalents;
1138 dat->singleton = isnot ? true : singleton;
1142 template <class charT, class traits, class Allocator>
1143 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<boost::uint_fast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&)
1145 re_detail::re_set* dat = reinterpret_cast<re_detail::re_set*>(data.extend(sizeof(re_detail::re_set)));
1146 std::memset(dat, 0, sizeof(re_detail::re_set));
1148 while(singles.empty() == false)
1150 dat->_map[(traits_size_type)(traits_uchar_type)*(singles.peek().c_str())] = re_detail::mask_all;
1153 while(ranges.empty() == false)
1155 traits_string_type c1, c2, c3, c4;
1157 if((flags() & regex_constants::collate) == 0)
1160 traits_inst.transform(c1, ranges.peek());
1162 if((flags() & regex_constants::collate) == 0)
1165 traits_inst.transform(c2, ranges.peek());
1170 // for some reason bc5 crashes when throwing exceptions
1171 // from here - probably an EH-compiler bug, but hard to
1173 // delay throw to later:
1175 boost::uint_fast32_t f = _flags;
1176 _flags &= ~regex_constants::use_except;
1184 for(unsigned int i = 0; i < 256; ++i)
1187 if((flags() & regex_constants::collate) == 0)
1190 traits_inst.transform(c3, c4);
1191 if((c3 <= c1) && (c3 >= c2))
1192 dat->_map[i] = re_detail::mask_all;
1195 while(equivalents.empty() == false)
1197 traits_string_type c1, c2;
1198 for(unsigned int i = 0; i < 256; ++i)
1201 traits_inst.transform_primary(c1, c2);
1202 if(c1 == equivalents.peek())
1203 dat->_map[i] = re_detail::mask_all;
1208 boost::uint_fast32_t l_flags = 0;
1209 while(classes.empty() == false)
1211 l_flags |= classes.peek();
1216 for(unsigned int i = 0; i < 256; ++i)
1218 if(traits_inst.is_class(charT(i), l_flags))
1219 dat->_map[(traits_uchar_type)traits_inst.translate((charT)i, (_flags & regex_constants::icase))] = re_detail::mask_all;
1225 for(unsigned int i = 0; i < 256; ++i)
1227 dat->_map[i] = !dat->_map[i];
1231 dat->type = re_detail::syntax_element_set;
1236 #ifndef __CODEGUARD__
1237 // this must not be inline when Borland's codeguard support is turned
1238 // on, otherwise we _will_ get surious codeguard errors...
1241 re_detail::re_syntax_base* add_offset(void* base, std::ptrdiff_t off)
1243 return reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(base) + off);
1246 template <class charT, class traits, class Allocator>
1247 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_apply(re_detail::re_syntax_base* b, unsigned cbraces)
1249 typedef typename boost::detail::rebind_allocator<bool, Allocator>::type b_alloc;
1251 register unsigned char* base = reinterpret_cast<unsigned char*>(b);
1252 register re_detail::re_syntax_base* ptr = b;
1254 b_alloc a(data.allocator());
1255 #ifndef BOOST_NO_EXCEPTIONS
1259 pb = a.allocate(cbraces);
1260 BOOST_REGEX_NOEH_ASSERT(pb)
1261 for(unsigned i = 0; i < cbraces; ++i)
1270 case re_detail::syntax_element_rep:
1271 jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1272 static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1273 #ifdef BOOST_REGEX_DEBUG
1274 if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1276 jm_trace("padding mis-aligment in repeat jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1277 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1280 static_cast<re_detail::re_repeat*>(ptr)->id = repeats;
1283 case re_detail::syntax_element_jump:
1284 case re_detail::syntax_element_alt:
1285 jm_assert(data.size() > static_cast<re_detail::re_jump*>(ptr)->alt.i);
1286 static_cast<re_detail::re_jump*>(ptr)->alt.p = add_offset(base, static_cast<re_detail::re_jump*>(ptr)->alt.i);
1287 #ifdef BOOST_REGEX_DEBUG
1288 if((re_detail::padding_mask & reinterpret_cast<int>(static_cast<re_detail::re_jump*>(ptr)->alt.p) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b)))
1290 jm_trace("padding mis-aligment in alternation jump to object type: " << static_cast<re_detail::re_jump*>(ptr)->alt.p->type)
1291 //jm_assert(0 == (padding_mask & (int)((re_detail::re_jump*)ptr)->alt.p));
1295 case re_detail::syntax_element_backref:
1296 if((static_cast<re_detail::re_brace*>(ptr)->index >= (int)cbraces) || (pb[static_cast<re_detail::re_brace*>(ptr)->index] == false) )
1299 a.deallocate(pb, cbraces);
1303 case re_detail::syntax_element_endmark:
1304 if(static_cast<re_detail::re_brace*>(ptr)->index > 0)
1305 pb[static_cast<re_detail::re_brace*>(ptr)->index] = true;
1309 jm_assert(data.size() > ptr->next.i);
1310 ptr->next.p = add_offset(base, ptr->next.i);
1311 #ifdef BOOST_REGEX_DEBUG
1312 if((re_detail::padding_mask & (int)(ptr->next.p)) && (static_cast<re_detail::re_jump*>(ptr)->alt.p != b))
1314 jm_trace("padding mis-alignment in next record of type " << ptr->next.p->type)
1315 jm_assert(0 == (re_detail::padding_mask & (int)(ptr->next.p)));
1321 a.deallocate(pb, cbraces);
1323 #ifndef BOOST_NO_EXCEPTIONS
1328 a.deallocate(pb, cbraces);
1335 template <class charT, class traits, class Allocator>
1336 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::set_expression(const charT* arg_first, const charT* arg_last, flag_type f)
1339 # pragma warning(push)
1340 # pragma warning(disable: 4127)
1343 // strxfrm not working on OpenBSD??
1344 f &= ~regex_constants::collate;
1347 if(arg_first == expression())
1349 traits_string_type s(arg_first, arg_last);
1350 return set_expression(s.c_str(), s.c_str() + s.size(), f);
1352 typedef typename traits_type::sentry sentry_t;
1353 sentry_t sent(traits_inst);
1356 const charT* base = arg_first;
1359 fail(REG_NOERROR); // clear any error
1360 _leading_len = 0; // set this to non-zero if there are any backrefs, we'll refer to it later...
1362 if(arg_first >= arg_last)
1365 return error_code();
1368 const charT* ptr = arg_first;
1370 re_detail::jstack<std::size_t, Allocator> mark(64, data.allocator());
1371 re_detail::jstack<int, Allocator> markid(64, data.allocator());
1372 std::size_t last_mark_popped = 0;
1373 register traits_size_type c;
1374 register re_detail::re_syntax_base* dat;
1376 unsigned rep_min = 0;
1377 unsigned rep_max = 0;
1385 if(_flags & regex_constants::literal)
1387 while(ptr != arg_last)
1389 dat = add_literal(dat, traits_inst.translate(*ptr, (_flags & regex_constants::icase)));
1394 while (ptr < arg_last)
1396 c = (traits_size_type)(traits_uchar_type)*ptr;
1397 // this is only used for the switch(), but cannot be folded in
1398 // due to a bug in Comeau 4.2.44beta3
1399 traits_size_type syntax = traits_inst.syntax_type(c);
1402 case traits_type::syntax_open_bracket:
1403 if(_flags & bk_parens)
1405 dat = add_literal(dat, (charT)c);
1411 dat = add_simple(dat, re_detail::syntax_element_startmark, sizeof(re_detail::re_brace));
1415 static_cast<re_detail::re_brace*>(dat)->index = 0;
1420 static_cast<re_detail::re_brace*>(dat)->index = marks++;
1422 mark.push(data.index(dat));
1425 // check for perl like (?...) extention syntax
1426 c = (traits_size_type)(traits_uchar_type)*ptr;
1427 if(((_flags & (bk_parens|perlex)) == perlex) && (traits_type::syntax_question == traits_inst.syntax_type(c)))
1430 c = (traits_size_type)(traits_uchar_type)*ptr;
1431 // this is only used for the switch(), but cannot be folded in
1432 // due to a bug in Comeau 4.2.44beta3
1433 traits_size_type syntax2 = traits_inst.syntax_type(c);
1436 case traits_type::syntax_colon:
1437 static_cast<re_detail::re_brace*>(dat)->index = 0;
1438 if((_flags & nosubs) == 0)
1444 case traits_type::syntax_equal:
1445 static_cast<re_detail::re_brace*>(dat)->index = -1;
1448 common_forward_assert:
1449 if((_flags & nosubs) == 0)
1453 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1456 // we don't know what value to put here yet,
1457 // use an arbitrarily large value for now
1458 // and check it later:
1459 static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1460 mark.push(data.size() - re_detail::re_jump_size);
1462 case traits_type::syntax_right_word:
1463 static_cast<re_detail::re_brace*>(dat)->index = -3;
1466 goto common_forward_assert;
1467 case traits_type::syntax_not:
1468 static_cast<re_detail::re_brace*>(dat)->index = -2;
1471 goto common_forward_assert;
1472 case traits_type::syntax_hash:
1473 // comment just skip it:
1474 static_cast<re_detail::re_brace*>(dat)->index = 0;
1475 if((_flags & nosubs) == 0)
1481 c = (traits_size_type)(traits_uchar_type)*ptr;
1482 }while(traits_type::syntax_close_bracket != traits_inst.syntax_type(c));
1487 // error, return to standard parsing and let that handle the error:
1493 case traits_type::syntax_close_bracket:
1494 if(_flags & bk_parens)
1496 dat = add_literal(dat, (charT)c);
1505 dat->next.i = data.size();
1511 return error_code();
1513 // see if we have an empty alternative:
1514 if(mark.peek() == data.index(dat) )
1516 re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1517 if(para->type == re_detail::syntax_element_jump)
1520 return error_code();
1524 // pop any pushed alternatives and set the target arg_last destination:
1525 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1526 while(dat->type == re_detail::syntax_element_jump)
1528 static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1533 return error_code();
1535 dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1538 dat = add_simple(0, re_detail::syntax_element_endmark, sizeof(re_detail::re_brace));
1539 static_cast<re_detail::re_brace*>(dat)->index = markid.peek();
1541 last_mark_popped = mark.peek();
1545 case traits_type::syntax_char:
1546 dat = add_literal(dat, (charT)c);
1549 case traits_type::syntax_slash:
1551 if(++ptr == arg_last)
1554 return error_code();
1556 c = (traits_size_type)(traits_uchar_type)*ptr;
1557 // this is only used for the switch(), but cannot be folded in
1558 // due to a bug in Comeau 4.2.44beta3
1559 traits_size_type syntax3 = traits_inst.syntax_type(c);
1562 case traits_type::syntax_open_bracket:
1563 if(_flags & bk_parens)
1564 goto open_bracked_jump;
1566 case traits_type::syntax_close_bracket:
1567 if(_flags & bk_parens)
1568 goto close_bracked_jump;
1570 case traits_type::syntax_plus:
1571 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1574 rep_max = (unsigned)-1;
1578 case traits_type::syntax_question:
1579 if((_flags & bk_plus_qm) && ((_flags & limited_ops) == 0))
1586 case traits_type::syntax_or:
1587 if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))
1589 goto alt_string_jump;
1590 case traits_type::syntax_open_brace:
1591 if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))
1594 // we have {x} or {x,} or {x,y}:
1595 parse_range(ptr, arg_last, rep_min, rep_max);
1598 case traits_type::syntax_digit:
1599 if(_flags & bk_refs)
1602 int i = traits_inst.toi((charT)c);
1605 // we can have \025 which means take char whose
1606 // code is 25 (octal), so parse string:
1607 c = traits_inst.toi(ptr, arg_last, -8);
1611 dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));
1612 static_cast<re_detail::re_brace*>(dat)->index = i;
1618 case traits_type::syntax_b: // re_detail::syntax_element_word_boundary
1619 dat = add_simple(dat, re_detail::syntax_element_word_boundary);
1622 case traits_type::syntax_B:
1623 dat = add_simple(dat, re_detail::syntax_element_within_word);
1626 case traits_type::syntax_left_word:
1627 dat = add_simple(dat, re_detail::syntax_element_word_start);
1630 case traits_type::syntax_right_word:
1631 dat = add_simple(dat, re_detail::syntax_element_word_end);
1634 case traits_type::syntax_w: //re_detail::syntax_element_word_char
1635 dat = compile_set_simple(dat, traits_type::char_class_word);
1638 case traits_type::syntax_W:
1639 dat = compile_set_simple(dat, traits_type::char_class_word, true);
1642 case traits_type::syntax_d: //re_detail::syntax_element_word_char
1643 dat = compile_set_simple(dat, traits_type::char_class_digit);
1646 case traits_type::syntax_D:
1647 dat = compile_set_simple(dat, traits_type::char_class_digit, true);
1650 case traits_type::syntax_s: //re_detail::syntax_element_word_char
1651 dat = compile_set_simple(dat, traits_type::char_class_space);
1654 case traits_type::syntax_S:
1655 dat = compile_set_simple(dat, traits_type::char_class_space, true);
1658 case traits_type::syntax_l: //re_detail::syntax_element_word_char
1659 dat = compile_set_simple(dat, traits_type::char_class_lower);
1662 case traits_type::syntax_L:
1663 dat = compile_set_simple(dat, traits_type::char_class_lower, true);
1666 case traits_type::syntax_u: //re_detail::syntax_element_word_char
1667 dat = compile_set_simple(dat, traits_type::char_class_upper);
1670 case traits_type::syntax_U:
1671 dat = compile_set_simple(dat, traits_type::char_class_upper, true);
1674 case traits_type::syntax_Q:
1681 return error_code();
1683 if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)
1686 if((ptr != arg_last) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))
1690 dat = add_literal(dat, *(ptr-1));
1694 dat = add_literal(dat, *ptr);
1699 case traits_type::syntax_C:
1700 dat = add_simple(dat, re_detail::syntax_element_wild);
1703 case traits_type::syntax_X:
1704 dat = add_simple(dat, re_detail::syntax_element_combining);
1707 case traits_type::syntax_Z:
1708 dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);
1711 case traits_type::syntax_G:
1712 dat = add_simple(dat, re_detail::syntax_element_restart_continue);
1715 case traits_type::syntax_start_buffer:
1716 dat = add_simple(dat, re_detail::syntax_element_buffer_start);
1719 case traits_type::syntax_end_buffer:
1720 dat = add_simple(dat, re_detail::syntax_element_buffer_end);
1724 c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, arg_last);
1725 dat = add_literal(dat, (charT)c);
1728 dat = add_literal(dat, (charT)c);
1732 case traits_type::syntax_dollar:
1733 dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));
1736 case traits_type::syntax_caret:
1737 dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));
1740 case traits_type::syntax_dot:
1741 dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));
1744 case traits_type::syntax_star:
1746 rep_max = (unsigned)-1;
1750 std::ptrdiff_t offset;
1754 return error_code();
1758 case re_detail::syntax_element_endmark:
1759 offset = last_mark_popped;
1761 case re_detail::syntax_element_literal:
1762 if(static_cast<re_detail::re_literal*>(dat)->length > 1)
1765 charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT)));
1766 --static_cast<re_detail::re_literal*>(dat)->length;
1767 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
1768 static_cast<re_detail::re_literal*>(dat)->length = 1;
1769 *reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit;
1771 offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1773 case re_detail::syntax_element_backref:
1774 case re_detail::syntax_element_long_set:
1775 case re_detail::syntax_element_set:
1776 case re_detail::syntax_element_wild:
1777 case re_detail::syntax_element_combining:
1778 // we're repeating a single item:
1779 offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());
1783 return error_code();
1786 dat->next.i = data.size();
1787 //unsigned pos = (char*)dat - (char*)data.data();
1789 // add the trailing jump:
1790 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1791 static_cast<re_detail::re_jump*>(dat)->alt.i = 0;
1793 // now insert the leading repeater:
1794 dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size));
1795 dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size;
1796 dat->type = re_detail::syntax_element_rep;
1797 static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size();
1798 static_cast<re_detail::re_repeat*>(dat)->min = rep_min;
1799 static_cast<re_detail::re_repeat*>(dat)->max = rep_max;
1800 static_cast<re_detail::re_repeat*>(dat)->leading = false;
1801 static_cast<re_detail::re_repeat*>(dat)->greedy = true;
1802 move_offsets(dat, re_detail::re_repeater_size);
1805 // now check to see if we have a non-greedy repeat:
1806 if((ptr != arg_last) && (_flags & (perlex | limited_ops | bk_plus_qm | bk_braces)) == perlex)
1808 c = (traits_size_type)(traits_uchar_type)*ptr;
1809 if(traits_type::syntax_question == traits_inst.syntax_type(c))
1811 // OK repeat is non-greedy:
1812 static_cast<re_detail::re_repeat*>(dat)->greedy = false;
1816 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size);
1817 static_cast<re_detail::re_repeat*>(dat)->alt.i = offset;
1820 case traits_type::syntax_plus:
1821 if(_flags & (bk_plus_qm | limited_ops))
1823 dat = add_literal(dat, (charT)c);
1828 rep_max = (unsigned)-1;
1830 case traits_type::syntax_question:
1831 if(_flags & (bk_plus_qm | limited_ops))
1833 dat = add_literal(dat, (charT)c);
1840 case traits_type::syntax_open_set:
1845 dat->next.i = data.size();
1848 dat = compile_set(ptr, arg_last);
1851 if((_flags & regex_constants::failbit) == 0)
1853 return error_code();
1856 case traits_type::syntax_or:
1858 if(_flags & (bk_vbar | limited_ops))
1860 dat = add_literal(dat, (charT)c);
1870 // start of pattern can't have empty "|"
1872 return error_code();
1874 // see if we have an empty alternative:
1875 if(mark.empty() == false)
1876 if(mark.peek() == data.index(dat))
1879 return error_code();
1882 dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);
1885 // we don't know what value to put here yet,
1886 // use an arbitrarily large value for now
1887 // and check it later (TODO!)
1888 static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;
1890 // now work out where to insert:
1891 std::size_t offset = 0;
1892 if(mark.empty() == false)
1894 // we have a '(' or '|' to go back to:
1895 offset = mark.peek();
1896 re_detail::re_syntax_base* base2 = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset);
1897 offset = base2->next.i;
1899 re_detail::re_jump* j = static_cast<re_detail::re_jump*>(data.insert(offset, re_detail::re_jump_size));
1900 j->type = re_detail::syntax_element_alt;
1901 j->next.i = offset + re_detail::re_jump_size;
1902 j->alt.i = data.size();
1903 move_offsets(j, re_detail::re_jump_size);
1904 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + data.size() - re_detail::re_jump_size);
1905 mark.push(data.size() - re_detail::re_jump_size);
1909 case traits_type::syntax_open_brace:
1910 if((_flags & bk_braces) || ((_flags & intervals) == 0))
1912 dat = add_literal(dat, (charT)c);
1916 // we have {x} or {x,} or {x,y}:
1917 parse_range(ptr, arg_last, rep_min, rep_max);
1919 case traits_type::syntax_newline:
1920 if(_flags & newline_alt)
1921 goto alt_string_jump;
1922 dat = add_literal(dat, (charT)c);
1925 case traits_type::syntax_close_brace:
1926 if(_flags & bk_braces)
1928 dat = add_literal(dat, (charT)c);
1933 return error_code();
1935 dat = add_literal(dat, (charT)c);
1946 dat->next.i = data.size();
1949 // see if we have an empty alternative:
1950 if(mark.empty() == false)
1951 if(mark.peek() == data.index(dat) )
1953 re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + mark.peek());
1954 if(para->type == re_detail::syntax_element_jump)
1957 return error_code();
1963 if(mark.empty() == false)
1965 // pop any pushed alternatives and set the target arg_last destination:
1966 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1967 while(dat->type == re_detail::syntax_element_jump)
1969 static_cast<re_detail::re_jump*>(dat)->alt.i = data.size();
1971 if(mark.empty() == true)
1973 dat = reinterpret_cast<re_detail::re_jump*>(reinterpret_cast<unsigned char*>(data.data()) + mark.peek());
1977 dat = static_cast<re_detail::re_brace*>(data.extend(sizeof(re_detail::re_syntax_base)));
1978 dat->type = re_detail::syntax_element_match;
1981 if(mark.empty() == false)
1984 return error_code();
1988 // allocate space for start _map:
1989 startmap = reinterpret_cast<unsigned char*>(data.extend(256 + ((arg_last - base + 1) * sizeof(charT))));
1991 // and copy the expression we just compiled:
1992 _expression = reinterpret_cast<charT*>(reinterpret_cast<char*>(startmap) + 256);
1993 _expression_len = arg_last - base;
1994 std::memcpy(_expression, base, _expression_len * sizeof(charT));
1995 *(_expression + _expression_len) = charT(0);
1998 // now we need to apply fixups to the array
1999 // so that we can use pointers and not indexes
2000 fixup_apply(static_cast<re_detail::re_syntax_base*>(data.data()), marks);
2002 // check for error during fixup:
2003 if(_flags & regex_constants::failbit)
2004 return error_code();
2007 // finally compile the maps so that we can make intelligent choices
2008 // whenever we encounter an alternative:
2012 re_detail::kmp_free(pkmp, data.allocator());
2015 re_detail::re_syntax_base* sbase = static_cast<re_detail::re_syntax_base*>(data.data());
2016 _restart_type = probe_restart(sbase);
2017 _leading_len = fixup_leading_rep(sbase, 0);
2018 if((sbase->type == re_detail::syntax_element_literal) && (sbase->next.p->type == re_detail::syntax_element_match))
2020 _restart_type = restart_fixed_lit;
2023 charT* p1 = reinterpret_cast<charT*>(reinterpret_cast<char*>(sbase) + sizeof(re_detail::re_literal));
2024 charT* p2 = p1 + static_cast<re_detail::re_literal*>(sbase)->length;
2025 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®ex_constants::icase, &traits_inst), data.allocator());
2028 return error_code();
2034 # pragma warning(pop)
2039 template <class charT, class traits, class Allocator>
2040 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_simple(re_detail::re_syntax_base* dat, re_detail::syntax_element_type type, unsigned int arg_size)
2045 dat->next.i = data.size();
2047 if(arg_size < sizeof(re_detail::re_syntax_base))
2048 arg_size = sizeof(re_detail::re_syntax_base);
2049 dat = static_cast<re_detail::re_syntax_base*>(data.extend(arg_size));
2055 template <class charT, class traits, class Allocator>
2056 re_detail::re_syntax_base* BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::add_literal(re_detail::re_syntax_base* dat, charT c)
2058 if(dat && (dat->type == re_detail::syntax_element_literal))
2060 // add another charT to the list:
2061 std::ptrdiff_t pos = reinterpret_cast<unsigned char*>(dat) - reinterpret_cast<unsigned char*>(data.data());
2062 *reinterpret_cast<charT*>(data.extend(sizeof(charT))) = traits_inst.translate(c, (_flags & regex_constants::icase));
2063 dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + pos);
2064 ++(static_cast<re_detail::re_literal*>(dat)->length);
2069 dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));
2070 static_cast<re_detail::re_literal*>(dat)->length = 1;
2071 *reinterpret_cast<charT*>(reinterpret_cast<re_detail::re_literal*>(dat)+1) = traits_inst.translate(c, (_flags & regex_constants::icase));
2076 template <class charT, class traits, class Allocator>
2077 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::probe_restart(re_detail::re_syntax_base* dat)
2081 case re_detail::syntax_element_startmark:
2082 case re_detail::syntax_element_endmark:
2083 if(static_cast<const re_detail::re_brace*>(dat)->index == -2)
2084 return regbase::restart_any;
2085 return probe_restart(dat->next.p);
2086 case re_detail::syntax_element_start_line:
2087 return regbase::restart_line;
2088 case re_detail::syntax_element_word_start:
2089 return regbase::restart_word;
2090 case re_detail::syntax_element_buffer_start:
2091 return regbase::restart_buf;
2092 case re_detail::syntax_element_restart_continue:
2093 return regbase::restart_continue;
2095 return regbase::restart_any;
2099 template <class charT, class traits, class Allocator>
2100 unsigned int BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fixup_leading_rep(re_detail::re_syntax_base* dat, re_detail::re_syntax_base* arg_end)
2102 unsigned int len = 0;
2103 if((_restart_type >= restart_word) || (_restart_type <= restart_continue))
2105 bool leading_lit = arg_end ? false : true;
2106 while(dat != arg_end)
2110 case re_detail::syntax_element_literal:
2111 len += static_cast<re_detail::re_literal*>(dat)->length;
2112 if((leading_lit) && (static_cast<re_detail::re_literal*>(dat)->length > 2))
2114 // we can do a literal search for the leading literal string
2115 // using Knuth-Morris-Pratt (or whatever), and only then check for
2116 // matches. We need a decent length string though to make it
2118 _leading_string = reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal));
2119 _leading_string_len = static_cast<re_detail::re_literal*>(dat)->length;
2120 _restart_type = restart_lit;
2121 leading_lit = false;
2122 const charT* p1 = _leading_string;
2123 const charT* p2 = _leading_string + _leading_string_len;
2124 pkmp = re_detail::kmp_compile(p1, p2, charT(), re_detail::kmp_translator<traits>(_flags®ex_constants::icase, &traits_inst), data.allocator());
2126 leading_lit = false;
2128 case re_detail::syntax_element_wild:
2130 leading_lit = false;
2132 case re_detail::syntax_element_match:
2134 case re_detail::syntax_element_backref:
2135 //case re_detail::syntax_element_jump:
2136 case re_detail::syntax_element_alt:
2137 case re_detail::syntax_element_combining:
2139 case re_detail::syntax_element_long_set:
2141 // we need to verify that there are no multi-character
2142 // collating elements inside the repeat:
2143 if(!static_cast<re_detail::re_set_long*>(dat)->singleton)
2146 leading_lit = false;
2149 case re_detail::syntax_element_set:
2151 leading_lit = false;
2153 case re_detail::syntax_element_rep:
2154 case re_detail::syntax_element_dot_rep:
2155 case re_detail::syntax_element_char_rep:
2156 case re_detail::syntax_element_short_set_rep:
2157 case re_detail::syntax_element_long_set_rep:
2158 if((len == 0) && (_leading_len == 0) && (1 == fixup_leading_rep(dat->next.p, static_cast<re_detail::re_repeat*>(dat)->alt.p) ))
2160 static_cast<re_detail::re_repeat*>(dat)->leading = leading_lit;
2164 case re_detail::syntax_element_startmark:
2165 if(static_cast<const re_detail::re_brace*>(dat)->index == -2)
2176 template <class charT, class traits, class Allocator>
2177 void BOOST_REGEX_CALL reg_expression<charT, traits, Allocator>::fail(unsigned int err)
2182 _flags |= regex_constants::failbit;
2183 #ifndef BOOST_NO_EXCEPTIONS
2184 if(_flags & regex_constants::use_except)
2186 re_detail::raise_error(traits_inst, err);
2191 _flags &= ~regex_constants::failbit;
2197 #ifdef BOOST_HAS_ABI_HEADERS
2198 # include BOOST_ABI_SUFFIX
2201 } // namespace boost
2204 #endif // BOOST_REGEX_COMPILE_HPP