X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=boost%2Fboost%2Ftoken_functions.hpp;h=be245a047ff2e50aaec487390ecf9f5de6fec393;hb=44cdffa39e9160bde46d824f1915f9ef3084b53e;hp=858d2b587e89e2c3b7c6c0c269504fa540cb6550;hpb=69862d12f457dd5dbf509c4d46a0399d4b16cfc7;p=lyx.git diff --git a/boost/boost/token_functions.hpp b/boost/boost/token_functions.hpp index 858d2b587e..be245a047f 100644 --- a/boost/boost/token_functions.hpp +++ b/boost/boost/token_functions.hpp @@ -1,17 +1,23 @@ // Boost token_functions.hpp ------------------------------------------------// -// Copyright John R. Bandela 2001. +// Copyright John R. Bandela 2001. -// Permission to copy, use, modify, sell and distribute this software -// is granted provided this copyright notice appears in all -// copies. This software is provided "as is" without express or -// implied warranty, and with no claim as to its suitability for any -// purpose. +// Distributed under the Boost Software License, Version 1.0. (See +// accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) -// See http://www.boost.org for updates, documentation, and revision history. +// See http://www.boost.org/libs/tokenizer/ for documentation. // Revision History: - +// 01 Oct 2004 Joaquin M Lopez Munoz +// Workaround for a problem with string::assign in msvc-stlport +// 06 Apr 2004 John Bandela +// Fixed a bug involving using char_delimiter with a true input iterator +// 28 Nov 2003 Robert Zeh and John Bandela +// Converted into "fast" functions that avoid using += when +// the supplied iterator isn't an input_iterator; based on +// some work done at Archelon and a version that was checked into +// the boost CVS for a short period of time. // 20 Feb 2002 John Maddock // Removed using namespace std declarations and added // workaround for BOOST_NO_STDC_NAMESPACE (the library @@ -22,14 +28,22 @@ // Removed tabs and a little cleanup. -#ifndef BOOST_TOKEN_FUNCTIONS_JRB051801_HPP_ -#define BOOST_TOKEN_FUNCTIONS_JRB051801_HPP_ +#ifndef BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ +#define BOOST_TOKEN_FUNCTIONS_JRB120303_HPP_ #include #include -#include #include #include +#include // for find_if +#include +#include +#include +#include +#include +#if !defined(BOOST_NO_CWCTYPE) +#include +#endif // // the following must not be macros if we are to prefix them @@ -38,9 +52,15 @@ #ifdef ispunct # undef ispunct #endif +#ifdef iswpunct +# undef iswpunct +#endif #ifdef isspace # undef isspace #endif +#ifdef iswspace +# undef iswspace +#endif // // fix namespace problems: // @@ -48,35 +68,33 @@ namespace std{ using ::ispunct; using ::isspace; +#if !defined(BOOST_NO_CWCTYPE) + using ::iswpunct; + using ::iswspace; +#endif } #endif namespace boost{ - //=========================================================================== // The escaped_list_separator class. Which is a model of TokenizerFunction - // An escaped list is a super-set of what is commonly known as a comma - // separated value (csv) list.It is separated into fields by a comma or + // An escaped list is a super-set of what is commonly known as a comma + // separated value (csv) list.It is separated into fields by a comma or // other character. If the delimiting character is inside quotes, then it is // counted as a regular character.To allow for embedded quotes in a field, - // there can be escape sequences using the \ much like C. - // The role of the comma, the quotation mark, and the escape + // there can be escape sequences using the \ much like C. + // The role of the comma, the quotation mark, and the escape // character (backslash \), can be assigned to other characters. struct escaped_list_error : public std::runtime_error{ - escaped_list_error(const std::string& what):std::runtime_error(what) { } + escaped_list_error(const std::string& what_arg):std::runtime_error(what_arg) { } }; - + // The out of the box GCC 2.95 on cygwin does not have a char_traits class. // MSVC does not like the following typename -#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300 - template ::traits_type > -#else - template ::traits_type > -#endif + template ::traits_type > class escaped_list_separator { private: @@ -90,7 +108,7 @@ namespace boost{ }; string_type escape_; string_type c_; - string_type quote_; + string_type quote_; bool last_; bool is_escape(Char e) { @@ -108,7 +126,7 @@ namespace boost{ template void do_escape(iterator& next,iterator end,Token& tok) { if (++next == end) - throw escaped_list_error(std::string("cannot end with escape")); + BOOST_THROW_EXCEPTION(escaped_list_error(std::string("cannot end with escape"))); if (Traits::eq(*next,'n')) { tok+='\n'; return; @@ -126,25 +144,25 @@ namespace boost{ return; } else - throw escaped_list_error(std::string("unknown escape sequence")); + BOOST_THROW_EXCEPTION(escaped_list_error(std::string("unknown escape sequence"))); } public: - + explicit escaped_list_separator(Char e = '\\', Char c = ',',Char q = '\"') : escape_(1,e), c_(1,c), quote_(1,q), last_(false) { } - + escaped_list_separator(string_type e, string_type c, string_type q) : escape_(e), c_(c), quote_(q), last_(false) { } - + void reset() {last_=false;} template bool operator()(InputIterator& next,InputIterator end,Token& tok) { bool bInQuote = false; tok = Token(); - + if (next == end) { if (last_) { last_ = false; @@ -164,7 +182,7 @@ namespace boost{ ++next; // The last character was a c, that means there is // 1 more blank field - last_ = true; + last_ = true; return true; } else tok+=*next; @@ -180,7 +198,143 @@ namespace boost{ } }; - + //=========================================================================== + // The classes here are used by offset_separator and char_separator to implement + // faster assigning of tokens using assign instead of += + + namespace tokenizer_detail { + //=========================================================================== + // Tokenizer was broken for wide character separators, at least on Windows, since + // CRT functions isspace etc only expect values in [0, 0xFF]. Debug build asserts + // if higher values are passed in. The traits extension class should take care of this. + // Assuming that the conditional will always get optimized out in the function + // implementations, argument types are not a problem since both forms of character classifiers + // expect an int. + +#if !defined(BOOST_NO_CWCTYPE) + template + struct traits_extension_details : public traits { + typedef typename traits::char_type char_type; + static bool isspace(char_type c) + { + return std::iswspace(c) != 0; + } + static bool ispunct(char_type c) + { + return std::iswpunct(c) != 0; + } + }; + + template + struct traits_extension_details : public traits { + typedef typename traits::char_type char_type; + static bool isspace(char_type c) + { + return std::isspace(c) != 0; + } + static bool ispunct(char_type c) + { + return std::ispunct(c) != 0; + } + }; +#endif + + + // In case there is no cwctype header, we implement the checks manually. + // We make use of the fact that the tested categories should fit in ASCII. + template + struct traits_extension : public traits { + typedef typename traits::char_type char_type; + static bool isspace(char_type c) + { +#if !defined(BOOST_NO_CWCTYPE) + return traits_extension_details::isspace(c); +#else + return static_cast< unsigned >(c) <= 255 && std::isspace(c) != 0; +#endif + } + + static bool ispunct(char_type c) + { +#if !defined(BOOST_NO_CWCTYPE) + return traits_extension_details::ispunct(c); +#else + return static_cast< unsigned >(c) <= 255 && std::ispunct(c) != 0; +#endif + } + }; + + // The assign_or_plus_equal struct contains functions that implement + // assign, +=, and clearing based on the iterator type. The + // generic case does nothing for plus_equal and clearing, while + // passing through the call for assign. + // + // When an input iterator is being used, the situation is reversed. + // The assign method does nothing, plus_equal invokes operator +=, + // and the clearing method sets the supplied token to the default + // token constructor's result. + // + + template + struct assign_or_plus_equal { + template + static void assign(Iterator b, Iterator e, Token &t) { + t.assign(b, e); + } + + template + static void plus_equal(Token &, const Value &) { } + + // If we are doing an assign, there is no need for the + // the clear. + // + template + static void clear(Token &) { } + }; + + template <> + struct assign_or_plus_equal { + template + static void assign(Iterator , Iterator , Token &) { } + template + static void plus_equal(Token &t, const Value &v) { + t += v; + } + template + static void clear(Token &t) { + t = Token(); + } + }; + + + template + struct pointer_iterator_category{ + typedef std::random_access_iterator_tag type; + }; + + + template + struct class_iterator_category{ + typedef typename Iterator::iterator_category type; + }; + + + + // This portably gets the iterator_tag without partial template specialization + template + struct get_iterator_category{ + typedef typename mpl::if_, + pointer_iterator_category, + class_iterator_category + >::type cat; + + typedef typename cat::type iterator_category; + }; + + + } // namespace tokenizer_detail + + //=========================================================================== // The offset_separator class, which is a model of TokenizerFunction. // Offset breaks a string into tokens based on a range of offsets @@ -192,7 +346,7 @@ namespace boost{ unsigned int current_offset_; bool wrap_offsets_; bool return_partial_last_; - + public: template offset_separator(Iter begin, Iter end, bool wrap_offsets = true, @@ -200,7 +354,7 @@ namespace boost{ : offsets_(begin,end), current_offset_(0), wrap_offsets_(wrap_offsets), return_partial_last_(return_partial_last) { } - + offset_separator() : offsets_(1,1), current_offset_(), wrap_offsets_(true), return_partial_last_(true) { } @@ -212,30 +366,40 @@ namespace boost{ template bool operator()(InputIterator& next, InputIterator end, Token& tok) { - assert(!offsets_.empty()); - - tok = Token(); - + typedef tokenizer_detail::assign_or_plus_equal< + BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< + InputIterator + >::iterator_category + > assigner; + + BOOST_ASSERT(!offsets_.empty()); + + assigner::clear(tok); + InputIterator start(next); + if (next == end) return false; if (current_offset_ == offsets_.size()) + { if (wrap_offsets_) current_offset_=0; else return false; - + } + int c = offsets_[current_offset_]; int i = 0; for (; i < c; ++i) { if (next == end)break; - tok+=*next++; + assigner::plus_equal(tok,*next++); } - + assigner::assign(start,next,tok); + if (!return_partial_last_) if (i < (c-1) ) return false; - + ++current_offset_; return true; } @@ -259,18 +423,14 @@ namespace boost{ enum empty_token_policy { drop_empty_tokens, keep_empty_tokens }; // The out of the box GCC 2.95 on cygwin does not have a char_traits class. -#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300 - template ::traits_type > -#else - template ::traits_type > -#endif + template ::traits_type > class char_separator { - typedef std::basic_string string_type; + typedef tokenizer_detail::traits_extension Traits; + typedef std::basic_string string_type; public: - explicit + explicit char_separator(const Char* dropped_delims, const Char* kept_delims = 0, empty_token_policy empty_tokens = drop_empty_tokens) @@ -282,14 +442,14 @@ namespace boost{ { // Borland workaround if (kept_delims) - m_kept_delims = kept_delims; + m_kept_delims = kept_delims; } - // use ispunct() for kept delimiters and isspace for dropped. + // use ispunct() for kept delimiters and isspace for dropped. explicit char_separator() - : m_use_ispunct(true), - m_use_isspace(true), + : m_use_ispunct(true), + m_use_isspace(true), m_empty_tokens(drop_empty_tokens) { } void reset() { } @@ -297,57 +457,72 @@ namespace boost{ template bool operator()(InputIterator& next, InputIterator end, Token& tok) { - tok = Token(); + typedef tokenizer_detail::assign_or_plus_equal< + BOOST_DEDUCED_TYPENAME tokenizer_detail::get_iterator_category< + InputIterator + >::iterator_category + > assigner; + + assigner::clear(tok); // skip past all dropped_delims if (m_empty_tokens == drop_empty_tokens) for (; next != end && is_dropped(*next); ++next) { } - + + InputIterator start(next); + if (m_empty_tokens == drop_empty_tokens) { if (next == end) return false; + // if we are on a kept_delims move past it and stop if (is_kept(*next)) { - tok += *next; + assigner::plus_equal(tok,*next); ++next; } else // append all the non delim characters for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) - tok += *next; - } + assigner::plus_equal(tok,*next); + } else { // m_empty_tokens == keep_empty_tokens - + // Handle empty token at the end if (next == end) - if (m_output_done == false) { + { + if (m_output_done == false) + { m_output_done = true; + assigner::assign(start,next,tok); return true; - } else + } + else return false; + } if (is_kept(*next)) { if (m_output_done == false) m_output_done = true; else { - tok += *next; + assigner::plus_equal(tok,*next); ++next; m_output_done = false; } - } + } else if (m_output_done == false && is_dropped(*next)) { m_output_done = true; - } + } else { if (is_dropped(*next)) - ++next; + start=++next; for (; next != end && !is_dropped(*next) && !is_kept(*next); ++next) - tok += *next; + assigner::plus_equal(tok,*next); m_output_done = true; } } + assigner::assign(start,next,tok); return true; } @@ -358,13 +533,13 @@ namespace boost{ bool m_use_isspace; empty_token_policy m_empty_tokens; bool m_output_done; - + bool is_kept(Char E) const - { + { if (m_kept_delims.length()) return m_kept_delims.find(E) != string_type::npos; else if (m_use_ispunct) { - return std::ispunct(E) != 0; + return Traits::ispunct(E) != 0; } else return false; } @@ -373,7 +548,7 @@ namespace boost{ if (m_dropped_delims.length()) return m_dropped_delims.find(E) != string_type::npos; else if (m_use_isspace) { - return std::isspace(E) != 0; + return Traits::isspace(E) != 0; } else return false; } @@ -390,31 +565,27 @@ namespace boost{ // cannot be returned as tokens. These are often whitespace // The out of the box GCC 2.95 on cygwin does not have a char_traits class. -#if !defined(BOOST_MSVC) || BOOST_MSVC > 1300 - template ::traits_type > -#else - template ::traits_type > -#endif + template ::traits_type > class char_delimiters_separator { - private: + private: - typedef std::basic_string string_type; + typedef tokenizer_detail::traits_extension Traits; + typedef std::basic_string string_type; string_type returnable_; string_type nonreturnable_; bool return_delims_; bool no_ispunct_; bool no_isspace_; - + bool is_ret(Char E)const - { + { if (returnable_.length()) return returnable_.find(E) != string_type::npos; else{ if (no_ispunct_) {return false;} else{ - int r = std::ispunct(E); + int r = Traits::ispunct(E); return r != 0; } } @@ -426,21 +597,21 @@ namespace boost{ else{ if (no_isspace_) {return false;} else{ - int r = std::isspace(E); + int r = Traits::isspace(E); return r != 0; } } } - + public: - explicit char_delimiters_separator(bool return_delims = false, + explicit char_delimiters_separator(bool return_delims = false, const Char* returnable = 0, const Char* nonreturnable = 0) : returnable_(returnable ? returnable : string_type().c_str()), nonreturnable_(nonreturnable ? nonreturnable:string_type().c_str()), return_delims_(return_delims), no_ispunct_(returnable!=0), no_isspace_(nonreturnable!=0) { } - + void reset() { } public: @@ -448,16 +619,16 @@ namespace boost{ template bool operator()(InputIterator& next, InputIterator end,Token& tok) { tok = Token(); - + // skip past all nonreturnable delims // skip past the returnable only if we are not returning delims - for (;next!=end && ( is_nonret(*next) || (is_ret(*next) + for (;next!=end && ( is_nonret(*next) || (is_ret(*next) && !return_delims_ ) );++next) { } - + if (next == end) { return false; } - + // if we are to return delims and we are one a returnable one // move past it and stop if (is_ret(*next) && return_delims_) { @@ -468,8 +639,8 @@ namespace boost{ // append all the non delim characters for (;next!=end && !is_nonret(*next) && !is_ret(*next);++next) tok+=*next; - - + + return true; } }; @@ -477,10 +648,4 @@ namespace boost{ } //namespace boost - -#endif - - - - - +#endif