ParbreakIsNewline 1
FreeSpacing 1
PassThru 1
+ KeepEmpty 1
NewLine 0
ParSkip 0.4
TopSep 0.7
namespace {
-CatCode theCatcode[256];
-
-void catInit()
-{
- static bool init_done = false;
- if (init_done)
- return;
- init_done = true;
-
- fill(theCatcode, theCatcode + 256, catOther);
- fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
- fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
-
- theCatcode[int('\\')] = catEscape;
- theCatcode[int('{')] = catBegin;
- theCatcode[int('}')] = catEnd;
- theCatcode[int('$')] = catMath;
- theCatcode[int('&')] = catAlign;
- theCatcode[int('\n')] = catNewline;
- theCatcode[int('#')] = catParameter;
- theCatcode[int('^')] = catSuper;
- theCatcode[int('_')] = catSub;
- theCatcode[0x7f] = catIgnore;
- theCatcode[int(' ')] = catSpace;
- theCatcode[int('\t')] = catSpace;
- theCatcode[int('\r')] = catNewline;
- theCatcode[int('~')] = catActive;
- theCatcode[int('%')] = catComment;
-
- // This is wrong!
- theCatcode[int('@')] = catLetter;
-}
-
/*!
* Translate a line ending to '\n'.
* \p c must have catcode catNewline, and it must be the last character read
return c;
}
-CatCode catcode(char_type c)
-{
- if (c < 256)
- return theCatcode[(unsigned char)c];
- return catOther;
}
-}
-
-
//
// Token
//
Parser::Parser(idocstream & is)
- : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
+ : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
+ theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
{
}
Parser::Parser(string const & s)
: lineno_(0), pos_(0),
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
- encoding_iconv_("UTF-8")
+ encoding_iconv_("UTF-8"),
+ theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
{
}
}
+void Parser::catInit()
+{
+ if (curr_cat_ == theCatcodesType_)
+ return;
+ curr_cat_ = theCatcodesType_;
+
+ fill(theCatcode_, theCatcode_ + 256, catOther);
+ fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
+ fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
+ // This is wrong!
+ theCatcode_[int('@')] = catLetter;
+
+ if (theCatcodesType_ == NORMAL_CATCODES) {
+ theCatcode_[int('\\')] = catEscape;
+ theCatcode_[int('{')] = catBegin;
+ theCatcode_[int('}')] = catEnd;
+ theCatcode_[int('$')] = catMath;
+ theCatcode_[int('&')] = catAlign;
+ theCatcode_[int('\n')] = catNewline;
+ theCatcode_[int('#')] = catParameter;
+ theCatcode_[int('^')] = catSuper;
+ theCatcode_[int('_')] = catSub;
+ theCatcode_[0x7f] = catIgnore;
+ theCatcode_[int(' ')] = catSpace;
+ theCatcode_[int('\t')] = catSpace;
+ theCatcode_[int('\r')] = catNewline;
+ theCatcode_[int('~')] = catActive;
+ theCatcode_[int('%')] = catComment;
+ }
+}
+
+CatCode Parser::catcode(char_type c) const
+{
+ if (c < 256)
+ return theCatcode_[(unsigned char)c];
+ return catOther;
+}
+
+
+void Parser::setCatcode(char c, CatCode cat)
+{
+ theCatcode_[(unsigned char)c] = cat;
+}
+
+
+void Parser::setCatcodes(cat_type t)
+{
+ theCatcodesType_ = t;
+}
+
+
void Parser::setEncoding(std::string const & e)
{
//cerr << "setting encoding to " << e << std::endl;
}
-string const Parser::verbatimEnvironment(string const & name)
+string const Parser::ertEnvironment(string const & name)
{
if (!good())
return string();
} else if (t.asInput() == "\\begin") {
string const env = getArg('{', '}');
os << "\\begin{" << env << '}'
- << verbatimEnvironment(env)
+ << ertEnvironment(env)
<< "\\end{" << env << '}';
} else if (t.asInput() == "\\end") {
string const end = getArg('{', '}');
}
+string const Parser::verbatimStuff(string const & end_string)
+{
+ if (!good())
+ return string();
+
+ ostringstream oss;
+ size_t match_index = 0;
+ setCatcodes(VERBATIM_CATCODES);
+ for (Token t = get_token(); good(); t = get_token()) {
+ // FIXME t.asInput() might be longer than we need ?
+ if (t.asInput() == end_string.substr(match_index,
+ t.asInput().length())) {
+ match_index += t.asInput().length();
+ if (match_index >= end_string.length())
+ break;
+ } else if (match_index) {
+ oss << end_string.substr(0, match_index) << t.asInput();
+ match_index = 0;
+ } else
+ oss << t.asInput();
+ }
+ setCatcodes(NORMAL_CATCODES);
+ if (!good())
+ cerr << "unexpected end of input" << endl;
+ return oss.str();
+}
+
+
void Parser::tokenize_one()
{
catInit();
}
-void Parser::setCatCode(char c, CatCode cat)
-{
- theCatcode[(unsigned char)c] = cat;
-}
-
-
-CatCode Parser::getCatCode(char c) const
-{
- return theCatcode[(unsigned char)c];
-}
-
-
} // namespace lyx
catInvalid // 15 <delete>
};
+enum cat_type {
+ NORMAL_CATCODES,
+ VERBATIM_CATCODES,
+ UNDECIDED_CATCODES
+};
+
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
///
~Parser();
+ ///
+ CatCode catcode(char_type c) const;
+ ///
+ void setCatcode(char c, CatCode cat);
+ /// set parser to normal or verbatim mode
+ void setCatcodes(cat_type t);
+
/// change the iconv encoding of the input stream
/// according to the latex encoding and package
void setEncoding(std::string const & encoding, int const & package);
/*!
* \returns the contents of the environment \p name.
* <tt>\begin{name}</tt> must be parsed already, <tt>\end{name}</tt>
- * is parsed but not returned.
+ * is parsed but not returned. This parses nested environments properly.
*/
- std::string const verbatimEnvironment(std::string const & name);
+ std::string const ertEnvironment(std::string const & name);
/*
- * The same as verbatimEnvironment(std::string const & name) but
+ * The same as ertEnvironment(std::string const & name) but
* \begin and \end commands inside the name environment are not parsed.
* This function is designed to parse verbatim environments.
*/
* This function is designed to parse verbatim commands.
*/
std::string const plainCommand(char left, char right, std::string const & name);
+ /*
+ * Basically the same as plainEnvironment() but the parsing is
+ * stopped at string \p end_string. Contrary to the other
+ * methods, this uses proper catcode setting. This function is
+ * designed to parse verbatim environments and command. The
+ * intention is to eventually replace all of its siblings.
+ */
+ std::string const verbatimStuff(std::string const & end_string);
/*!
* Returns the character of the current token and increments
* the token position.
char getChar();
///
void error(std::string const & msg);
- /// Parses one token from \p is
+ /// Parses one token from \p is
void tokenize_one();
///
void push_back(Token const & t);
std::string verbatimOption();
/// resets the parser to initial state
void reset();
- ///
- void setCatCode(char c, CatCode cat);
- ///
- CatCode getCatCode(char c) const;
private:
+ /// Setup catcode table
+ void catInit();
///
int lineno_;
///
idocstream & is_;
/// iconv name of the current encoding
std::string encoding_iconv_;
+ ///
+ CatCode theCatcode_[256];
+ //
+ cat_type theCatcodesType_;
+ //
+ cat_type curr_cat_;
};
else if (t.cs() == "makeatletter") {
// LyX takes care of this
- p.setCatCode('@', catLetter);
+ p.setCatcode('@', catLetter);
}
else if (t.cs() == "makeatother") {
// LyX takes care of this
- p.setCatCode('@', catOther);
+ p.setCatcode('@', catOther);
}
else if (t.cs() == "newcommand" || t.cs() == "newcommandx"
// treat the nested environment as a block, don't
// parse &, \\ etc, because they don't belong to our
// table if they appear.
- os << p.verbatimEnvironment(name);
+ os << p.ertEnvironment(name);
os << "\\end{" << name << '}';
active_environments.pop_back();
}
angle = p.getArg('{', '}');
}
active_environments.push_back(env);
- p.verbatimEnvironment(env);
+ p.ertEnvironment(env);
active_environments.pop_back();
p.skip_spaces();
if (!p.good() && support::isStrInt(angle))
// If yes, we need to output ERT.
p.pushPosition();
if (inner_flags & FLAG_END)
- p.verbatimEnvironment(inner_type);
+ p.ertEnvironment(inner_type);
else
p.verbatim_item();
p.skip_spaces(true);
}
else if (name == "verbatim") {
- os << "\n\\end_layout\n\n\\begin_layout Verbatim\n";
- string const s = p.plainEnvironment("verbatim");
+ // FIXME: this should go in the generic code that
+ // handles environments defined in layout file that
+ // have "PassThru 1". However, the code there is
+ // already too complicated for my taste.
+ parent_context.new_paragraph(os);
+ Context context(true, parent_context.textclass,
+ &parent_context.textclass[from_ascii("Verbatim")]);
+ context.check_layout(os);
+ string s = p.verbatimStuff("\\end{verbatim}");
+ // ignore one newline at beginning or end of string
+ if (prefixIs(s, "\n"))
+ s.erase(0,1);
+ if (suffixIs(s, "\n"))
+ s.erase(s.length(),1);
+
string::const_iterator it2 = s.begin();
for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
- if (*it == '\\')
- os << "\\backslash ";
- else if (*it == '\n') {
- it2 = it + 1;
- // avoid adding an empty paragraph at the end
- // FIXME: if there are 2 consecutive spaces at the end ignore it
- // because LyX will re-add a \n
- // This hack must be removed once bug 8049 is fixed!
- if ((it + 1 != et) && (it + 2 != et || *it2 != '\n'))
- os << "\n\\end_layout\n\\begin_layout Verbatim\n";
- } else
+ context.check_layout(os);
+ if (*it == '\\') {
+ os << "\n\\backslash\n";
+ context.need_end_layout = true;
+ } else if (*it == '\n') {
+ context.new_paragraph(os);
+ } else {
os << *it;
+ context.need_end_layout = true;
+ }
}
- os << "\n\\end_layout\n\n";
+ context.new_paragraph(os);
p.skip_spaces();
- // reset to Standard layout
- os << "\n\\begin_layout Standard\n";
}
else if (name == "CJK") {
parse_arguments("\\begin{" + name + "}", arguments, p, os,
outer, parent_context);
if (contents == verbatim)
- handle_ert(os, p.verbatimEnvironment(name),
+ handle_ert(os, p.ertEnvironment(name),
parent_context);
else
parse_text_snippet(p, os, FLAG_END, outer,
else if (t.cs() == "verb") {
context.check_layout(os);
- char const delimiter = p.next_token().character();
- // \verb is special: The usual escaping rules do not
- // apply, e.g. "\verb+\+" is valid and denotes a single
- // backslash (bug #4468). Therefore we do not allow
- // escaping in getArg().
- string const arg = p.getArg(delimiter, delimiter, false);
- ostringstream oss;
- oss << "\\verb" << delimiter << arg << delimiter;
- handle_ert(os, oss.str(), context);
+ // set catcodes to verbatim early, just in case.
+ p.setCatcodes(VERBATIM_CATCODES);
+ string delim = p.get_token().asInput();
+ string const arg = p.verbatimStuff(delim);
+ handle_ert(os, "\\verb" + delim + arg + delim, context);
}
// Problem: \= creates a tabstop inside the tabbing environment
p.setEncoding(encoding, Encoding::CJK);
else
p.setEncoding("UTF-8");
- string const text = p.verbatimEnvironment("CJK");
+ string const text = p.ertEnvironment("CJK");
p.setEncoding(encoding_old);
p.skip_spaces();
if (!where) {