#endif
namespace boost{
-namespace re_detail{
+namespace BOOST_REGEX_DETAIL_NS{
#ifdef BOOST_MSVC
#pragma warning(push)
#pragma warning(disable:4244 4800)
#endif
+inline boost::intmax_t umax(mpl::false_ const&)
+{
+ // Get out clause here, just in case numeric_limits is unspecialized:
+ return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
+}
+inline boost::intmax_t umax(mpl::true_ const&)
+{
+ return (std::numeric_limits<std::size_t>::max)();
+}
+
+inline boost::intmax_t umax()
+{
+ return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
+}
+
template <class charT, class traits>
class basic_regex_parser : public basic_regex_creator<charT, traits>
{
bool parse_inner_set(basic_char_set<charT, traits>& char_set);
bool parse_QE();
bool parse_perl_extension();
+ bool parse_perl_verb();
+ bool match_verb(const char*);
bool add_emacs_code(bool negate);
bool unwind_alts(std::ptrdiff_t last_paren_start);
digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted).
std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative
bool m_has_case_change; // true if somewhere in the current block the case has changed
+ unsigned m_recursion_count; // How many times we've called parse_all.
#if defined(BOOST_MSVC) && defined(_M_IX86)
// This is an ugly warning suppression workaround (for warnings *inside* std::vector
// that can not otherwise be suppressed)...
template <class charT, class traits>
basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
- : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false)
+ : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
{
}
// have had an unexpected ')' :
if(!result)
{
- fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
+ fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis.");
return;
}
// if an error has been set then give up now:
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_all()
{
+ if (++m_recursion_count > 400)
+ {
+ // exceeded internal limits
+ fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
+ }
bool result = true;
while(result && (m_position != m_end))
{
result = (this->*m_parser_proc)();
}
+ --m_recursion_count;
return result;
}
{
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
return parse_perl_extension();
+ if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
+ return parse_perl_verb();
}
//
// update our mark count, and append the required state:
//
if(m_position == m_end)
{
- this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
+ this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
return false;
}
- BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
+ if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
+ return false;
#ifndef BOOST_NO_STD_DISTANCE
if(markid && (this->flags() & regbase::save_subexpression_location))
this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::parse_basic_escape()
{
- ++m_position;
+ if(++m_position == m_end)
+ {
+ fail(regex_constants::error_paren, m_position - m_base);
+ return false;
+ }
bool result = true;
switch(this->m_traits.escape_syntax_type(*m_position))
{
return false;
}
const charT* pc = m_position;
- int i = this->m_traits.toi(pc, m_end, 10);
+ boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
if((i < 0) && syn_end)
{
// Check for a named capture, get the leftmost one if there is more than one:
}
if(negative)
i = 1 + m_mark_count - i;
- if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
+ if(((i > 0) && (i < std::numeric_limits<unsigned>::digits) && (i - 1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_pdata->get_id(i)-1 < static_cast<boost::intmax_t>(sizeof(unsigned) * CHAR_BIT)) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1)))))
{
m_position = pc;
re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
static_cast<re_dot*>(
this->append_state(syntax_element_wild, sizeof(re_dot))
)->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
- ? re_detail::force_not_newline
+ ? BOOST_REGEX_DETAIL_NS::force_not_newline
: this->flags() & regbase::mod_s ?
- re_detail::force_newline : re_detail::dont_care);
+ BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
return true;
}
)
{
// OK we have a perl or emacs regex, check for a '?':
- if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
+ if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
+ {
+ // whitespace skip:
+ while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
+ ++m_position;
+ }
+ if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
{
greedy = false;
++m_position;
}
if(0 == this->m_last_state)
{
- fail(regex_constants::error_badrepeat, ::boost::re_detail::distance(m_base, m_position), "Nothing to repeat.");
+ fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
return false;
}
if(this->m_last_state->type == syntax_element_endmark)
// Check for illegal following quantifier, we have to do this here, because
// the extra states we insert below circumvents our usual error checking :-(
//
- switch(this->m_traits.syntax_type(*m_position))
+ bool contin = false;
+ do
{
- case regex_constants::syntax_star:
- case regex_constants::syntax_plus:
- case regex_constants::syntax_question:
- case regex_constants::syntax_open_brace:
- fail(regex_constants::error_badrepeat, m_position - m_base);
- return false;
- }
+ if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
+ {
+ // whitespace skip:
+ while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
+ ++m_position;
+ }
+ if (m_position != m_end)
+ {
+ switch (this->m_traits.syntax_type(*m_position))
+ {
+ case regex_constants::syntax_star:
+ case regex_constants::syntax_plus:
+ case regex_constants::syntax_question:
+ case regex_constants::syntax_open_brace:
+ fail(regex_constants::error_badrepeat, m_position - m_base);
+ return false;
+ case regex_constants::syntax_open_mark:
+ // Do we have a comment? If so we need to skip it here...
+ if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
+ && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
+ {
+ while ((m_position != m_end)
+ && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
+ }
+ contin = true;
+ }
+ else
+ contin = false;
+ }
+ }
+ else
+ contin = false;
+ } while (contin);
}
re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
pb->index = -3;
// parse a repeat-range:
//
std::size_t min, max;
- int v;
+ boost::intmax_t v;
// skip whitespace:
while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
++m_position;
// get min:
v = this->m_traits.toi(m_position, m_end, 10);
// skip whitespace:
- if(v < 0)
+ if((v < 0) || (v > umax()))
{
if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
{
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
return parse_literal();
}
- min = v;
+ min = static_cast<std::size_t>(v);
// see if we have a comma:
if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
{
}
// get the value if any:
v = this->m_traits.toi(m_position, m_end, 10);
- max = (v >= 0) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
+ max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
}
else
{
//
// we need to append a trailing jump:
//
- re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump));
+ re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
std::ptrdiff_t jump_offset = this->getoffset(pj);
//
// now insert the alternative:
// does a value fit in the specified charT type?
//
template <class charT>
-bool valid_value(charT, int v, const mpl::true_&)
+bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
{
return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
}
template <class charT>
-bool valid_value(charT, int, const mpl::false_&)
+bool valid_value(charT, boost::intmax_t, const mpl::false_&)
{
return true; // v will alsways fit in a charT
}
template <class charT>
-bool valid_value(charT c, int v)
+bool valid_value(charT c, boost::intmax_t v)
{
- return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>());
+ return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
}
template <class charT, class traits>
fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
return result;
}
- int i = this->m_traits.toi(m_position, m_end, 16);
+ boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
if((m_position == m_end)
|| (i < 0)
- || ((std::numeric_limits<charT>::is_specialized) && (i > (int)(std::numeric_limits<charT>::max)()))
+ || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
|| (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
{
// Rewind to start of escape:
else
{
std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
- int i = this->m_traits.toi(m_position, m_position + len, 16);
+ boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
if((i < 0)
|| !valid_value(charT(0), i))
{
{
// an octal escape sequence, the first character must be a zero
// followed by up to 3 octal digits:
- std::ptrdiff_t len = (std::min)(::boost::re_detail::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
+ std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
const charT* bp = m_position;
- int val = this->m_traits.toi(bp, bp + 1, 8);
+ boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
if(val != 0)
{
// Rewind to start of escape:
return result;
}
val = this->m_traits.toi(m_position, m_position + len, 8);
- if(val < 0)
+ if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
{
// Rewind to start of escape:
--m_position;
{
BOOST_ASSERT(m_position != m_end);
const charT* pc = m_position;
- int i = this->m_traits.toi(pc, pc + 1, 10);
+ boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
{
// not a backref at all but an octal escape sequence:
{
while((m_position != m_end)
&& (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
- {}
+ {}
return true;
}
//
int max_mark = m_max_mark;
m_mark_reset = -1;
m_max_mark = m_mark_count;
- int v;
+ boost::intmax_t v;
//
// select the actual extension used:
//
fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
return false;
}
+ if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
+ {
+ fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
+ return false;
+ }
v += m_mark_count;
goto insert_recursion;
case regex_constants::syntax_dash:
// Rewind to start of (? sequence:
--m_position;
while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
- this->fail(regex_constants::error_paren, ::boost::re_detail::distance(m_base, m_end));
+ this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
return false;
}
BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
}
- else if(this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
+ else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
{
// Can't have seen more than one alternative:
// Rewind to start of (? sequence:
//
// allow backrefs to this mark:
//
- if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT)))
+ if(markid < (int)(sizeof(unsigned) * CHAR_BIT))
this->m_backrefs |= 1u << (markid - 1);
}
return true;
}
+template <class charT, class traits>
+bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
+{
+ while(*verb)
+ {
+ if(static_cast<charT>(*verb) != *m_position)
+ {
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(++m_position == m_end)
+ {
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++verb;
+ }
+ return true;
+}
+
+template <class charT, class traits>
+bool basic_regex_parser<charT, traits>::parse_perl_verb()
+{
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ switch(*m_position)
+ {
+ case 'F':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ this->append_state(syntax_element_fail);
+ return true;
+ }
+ break;
+ case 'A':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(match_verb("CCEPT"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ this->append_state(syntax_element_accept);
+ return true;
+ }
+ break;
+ case 'C':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(match_verb("OMMIT"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
+ this->m_pdata->m_disable_match_any = true;
+ return true;
+ }
+ break;
+ case 'P':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(match_verb("RUNE"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
+ this->m_pdata->m_disable_match_any = true;
+ return true;
+ }
+ break;
+ case 'S':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(match_verb("KIP"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
+ this->m_pdata->m_disable_match_any = true;
+ return true;
+ }
+ break;
+ case 'T':
+ if(++m_position == m_end)
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ if(match_verb("HEN"))
+ {
+ if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
+ {
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+ }
+ ++m_position;
+ this->append_state(syntax_element_then);
+ this->m_pdata->m_disable_match_any = true;
+ return true;
+ }
+ break;
+ }
+ // Rewind to start of (* sequence:
+ --m_position;
+ while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
+ fail(regex_constants::error_perl_extension, m_position - m_base);
+ return false;
+}
+
template <class charT, class traits>
bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
{
#pragma warning(pop)
#endif
-} // namespace re_detail
+} // namespace BOOST_REGEX_DETAIL_NS
} // namespace boost
#ifdef BOOST_MSVC