src/tex2lyx/Parser.cpp

   1 /**
   2  * \file Parser.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author André Pönitz
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "Parser.h"
  14
  15 #include "tex2lyx.h"
  16
  17 #include "Encoding.h"
  18 #include "support/convert.h"
  19 #include "support/lstrings.h"
  20 #include "support/textutils.h"
  21
  22 #include <cstdint>
  23 #include <iostream>
  24
  25 using namespace std;
  26 using namespace lyx::support;
  27
  28 namespace lyx {
  29
  30 namespace {
  31
  32 /*!
  33  * Translate a line ending to '\n'.
  34  * \p c must have catcode catNewline, and it must be the last character read
  35  * from \p is.
  36  */
  37 char_type getNewline(iparserdocstream & is, char_type c)
  38 {
  39         // we have to handle 3 different line endings:
  40         // - UNIX (\n)
  41         // - MAC  (\r)
  42         // - DOS  (\r\n)
  43         if (c == '\r') {
  44                 // MAC or DOS
  45                 char_type wc;
  46                 if (is.get(wc) && wc != '\n') {
  47                         // MAC
  48                         is.putback(wc);
  49                 }
  50                 return '\n';
  51         }
  52         // UNIX
  53         return c;
  54 }
  55
  56 } // namespace
  57
  58 //
  59 // Token
  60 //
  61
  62 ostream & operator<<(ostream & os, Token const & t)
  63 {
  64         if (t.cat() == catComment)
  65                 os << '%' << t.cs() << '\n';
  66         else if (t.cat() == catSpace)
  67                 os << t.cs();
  68         else if (t.cat() == catEscape)
  69                 os << '\\' << t.cs() << ' ';
  70         else if (t.cat() == catLetter)
  71                 os << t.cs();
  72         else if (t.cat() == catNewline)
  73                 os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
  74         else
  75                 os << '[' << t.cs() << ',' << t.cat() << ']';
  76         return os;
  77 }
  78
  79
  80 string Token::asInput() const
  81 {
  82         if (cat_ == catComment)
  83                 return '%' + cs_ + '\n';
  84         if (cat_ == catEscape)
  85                 return '\\' + cs_;
  86         return cs_;
  87 }
  88
  89
  90 bool Token::isAlnumASCII() const
  91 {
  92         return cat_ == catLetter ||
  93                (cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0]));
  94 }
  95
  96
  97 #ifdef FILEDEBUG
  98 void debugToken(std::ostream & os, Token const & t, unsigned int flags)
  99 {
 100         char sep = ' ';
 101         os << "t: " << t << " flags: " << flags;
 102         if (flags & FLAG_BRACE_LAST) { os << sep << "BRACE_LAST"; sep = '|'; }
 103         if (flags & FLAG_RIGHT     ) { os << sep << "RIGHT"     ; sep = '|'; }
 104         if (flags & FLAG_END       ) { os << sep << "END"       ; sep = '|'; }
 105         if (flags & FLAG_BRACK_LAST) { os << sep << "BRACK_LAST"; sep = '|'; }
 106         if (flags & FLAG_TEXTMODE  ) { os << sep << "TEXTMODE"  ; sep = '|'; }
 107         if (flags & FLAG_ITEM      ) { os << sep << "ITEM"      ; sep = '|'; }
 108         if (flags & FLAG_LEAVE     ) { os << sep << "LEAVE"     ; sep = '|'; }
 109         if (flags & FLAG_SIMPLE    ) { os << sep << "SIMPLE"    ; sep = '|'; }
 110         if (flags & FLAG_EQUATION  ) { os << sep << "EQUATION"  ; sep = '|'; }
 111         if (flags & FLAG_SIMPLE2   ) { os << sep << "SIMPLE2"   ; sep = '|'; }
 112         if (flags & FLAG_OPTION    ) { os << sep << "OPTION"    ; sep = '|'; }
 113         if (flags & FLAG_BRACED    ) { os << sep << "BRACED"    ; sep = '|'; }
 114         if (flags & FLAG_CELL      ) { os << sep << "CELL"      ; sep = '|'; }
 115         if (flags & FLAG_TABBING   ) { os << sep << "TABBING"   ; sep = '|'; }
 116         os << "\n";
 117 }
 118 #endif
 119
 120
 121 //
 122 // Wrapper
 123 //
 124
 125 void iparserdocstream::setEncoding(std::string const & e)
 126 {
 127         is_ << lyx::setEncoding(e);
 128 }
 129
 130
 131 void iparserdocstream::putback(char_type c)
 132 {
 133         s_ = c + s_;
 134 }
 135
 136
 137 void iparserdocstream::putback(docstring const & s)
 138 {
 139         s_ = s + s_;
 140 }
 141
 142
 143 iparserdocstream & iparserdocstream::get(char_type &c)
 144 {
 145         if (s_.empty())
 146                 is_.get(c);
 147         else {
 148                 //warning_message("unparsed: " + to_utf8(s_));
 149                 c = s_[0];
 150                 s_.erase(0,1);
 151         }
 152         return *this;
 153 }
 154
 155
 156 //
 157 // Parser
 158 //
 159
 160
 161 Parser::Parser(idocstream & is, std::string const & fixedenc)
 162         : lineno_(0), pos_(0), iss_(nullptr), is_(is),
 163           encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
 164           theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
 165           fixed_enc_(!fixedenc.empty())
 166 {
 167         if (fixed_enc_)
 168                 is_.setEncoding(fixedenc);
 169         catInit();
 170 }
 171
 172
 173 Parser::Parser(string const & s)
 174         : lineno_(0), pos_(0),
 175           iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
 176           encoding_iconv_("UTF-8"),
 177           theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
 178           // An idocstringstream can not change the encoding
 179           fixed_enc_(true)
 180 {
 181         catInit();
 182 }
 183
 184
 185 Parser::~Parser()
 186 {
 187         delete iss_;
 188 }
 189
 190
 191 void Parser::deparse()
 192 {
 193         string s;
 194         for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
 195                 s += tokens_[i].asInput();
 196         }
 197         is_.putback(from_utf8(s));
 198         tokens_.erase(tokens_.begin() + pos_, tokens_.end());
 199         // make sure that next token is read
 200         tokenize_one();
 201 }
 202
 203
 204 bool Parser::setEncoding(std::string const & e, int p)
 205 {
 206         // We may (and need to) use unsafe encodings here: Since the text is
 207         // converted to unicode while reading from is_, we never see text in
 208         // the original encoding of the parser, but operate on utf8 strings
 209         // instead. Therefore, we cannot misparse high bytes as {, } or \\.
 210         Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
 211         if (!enc) {
 212                 warning_message("Unknown encoding " + e + ". Ignoring.");
 213                 return false;
 214         }
 215         return setEncoding(enc->iconvName());
 216 }
 217
 218
 219 void Parser::catInit()
 220 {
 221         if (curr_cat_ == theCatcodesType_)
 222                 return;
 223         curr_cat_ = theCatcodesType_;
 224
 225         fill(theCatcode_, theCatcode_ + 256, catOther);
 226         fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
 227         fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
 228         // This is wrong!
 229         theCatcode_[int('@')]  = catLetter;
 230
 231         if (theCatcodesType_ == NORMAL_CATCODES) {
 232                 theCatcode_[int('\\')] = catEscape;
 233                 theCatcode_[int('{')]  = catBegin;
 234                 theCatcode_[int('}')]  = catEnd;
 235                 theCatcode_[int('$')]  = catMath;
 236                 theCatcode_[int('&')]  = catAlign;
 237                 theCatcode_[int('\n')] = catNewline;
 238                 theCatcode_[int('#')]  = catParameter;
 239                 theCatcode_[int('^')]  = catSuper;
 240                 theCatcode_[int('_')]  = catSub;
 241                 theCatcode_[0x7f]      = catIgnore;
 242                 theCatcode_[int(' ')]  = catSpace;
 243                 theCatcode_[int('\t')] = catSpace;
 244                 theCatcode_[int('\r')] = catNewline;
 245                 theCatcode_[int('~')]  = catActive;
 246                 theCatcode_[int('%')]  = catComment;
 247         }
 248 }
 249
 250 CatCode Parser::catcode(char_type c) const
 251 {
 252         if (c < 256)
 253                 return theCatcode_[(unsigned char)c];
 254         return catOther;
 255 }
 256
 257
 258 void Parser::setCatcode(char c, CatCode cat)
 259 {
 260         theCatcode_[(unsigned char)c] = cat;
 261         deparse();
 262 }
 263
 264
 265 void Parser::setCatcodes(cat_type t)
 266 {
 267         theCatcodesType_ = t;
 268         deparse();
 269 }
 270
 271
 272 bool Parser::setEncoding(std::string const & e)
 273 {
 274         //warning_message("setting encoding to " + e);
 275         encoding_iconv_ = e;
 276         // If the encoding is fixed, we must not change the stream encoding
 277         // (because the whole input uses that encoding, e.g. if it comes from
 278         // the clipboard). We still need to track the original encoding in
 279         // encoding_iconv_, so that the generated output is correct.
 280         if (!fixed_enc_)
 281                 is_.setEncoding(e);
 282         return true;
 283 }
 284
 285
 286 void Parser::push_back(Token const & t)
 287 {
 288         tokens_.push_back(t);
 289 }
 290
 291
 292 // We return a copy here because the tokens_ vector may get reallocated
 293 Token const Parser::prev_token() const
 294 {
 295         static const Token dummy;
 296         return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
 297 }
 298
 299
 300 // We return a copy here because the tokens_ vector may get reallocated
 301 Token const Parser::curr_token() const
 302 {
 303         static const Token dummy;
 304         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
 305 }
 306
 307
 308 // We return a copy here because the tokens_ vector may get reallocated
 309 Token const Parser::next_token()
 310 {
 311         static const Token dummy;
 312         if (!good())
 313                 return dummy;
 314         if (pos_ >= tokens_.size())
 315                 tokenize_one();
 316         return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
 317 }
 318
 319
 320 // We return a copy here because the tokens_ vector may get reallocated
 321 Token const Parser::next_next_token()
 322 {
 323         static const Token dummy;
 324         if (!good())
 325                 return dummy;
 326         // If tokenize_one() has not been called after the last get_token() we
 327         // need to tokenize two more tokens.
 328         if (pos_ >= tokens_.size())
 329                 tokenize_one();
 330         if (pos_ + 1 >= tokens_.size())
 331                 tokenize_one();
 332         return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
 333 }
 334
 335
 336 // We return a copy here because the tokens_ vector may get reallocated
 337 Token const Parser::get_token()
 338 {
 339         static const Token dummy;
 340         if (!good())
 341                 return dummy;
 342         if (pos_ >= tokens_.size()) {
 343                 tokenize_one();
 344                 if (pos_ >= tokens_.size())
 345                         return dummy;
 346         }
 347         // warning_message("looking at token " + tokens_[pos_]
 348         //      + " pos: " + pos_ <<);
 349         return tokens_[pos_++];
 350 }
 351
 352
 353 bool Parser::isParagraph()
 354 {
 355         // A new paragraph in TeX is started
 356         // - either by a newline, following any amount of whitespace
 357         //   characters (including zero), and another newline
 358         // - or the token \par
 359         if (curr_token().cat() == catNewline &&
 360             (curr_token().cs().size() > 1 ||
 361              (next_token().cat() == catSpace &&
 362               next_next_token().cat() == catNewline)))
 363                 return true;
 364         if (curr_token().cat() == catEscape && curr_token().cs() == "par")
 365                 return true;
 366         return false;
 367 }
 368
 369
 370 bool Parser::skip_spaces(bool skip_comments)
 371 {
 372         // We just silently return if we have no more tokens.
 373         // skip_spaces() should be callable at any time,
 374         // the caller must check p::good() anyway.
 375         bool skipped = false;
 376         while (good()) {
 377                 get_token();
 378                 if (isParagraph()) {
 379                         putback();
 380                         break;
 381                 }
 382                 if (curr_token().cat() == catSpace ||
 383                     curr_token().cat() == catNewline) {
 384                         skipped = true;
 385                         continue;
 386                 }
 387                 if ((curr_token().cat() == catComment && curr_token().cs().empty()))
 388                         continue;
 389                 if (skip_comments && curr_token().cat() == catComment) {
 390                         // If positions_ is not empty we are doing some kind
 391                         // of look ahead
 392                         if (!positions_.empty())
 393                                 warning_message("Ignoring comment: " + curr_token().asInput());
 394                 } else {
 395                         putback();
 396                         break;
 397                 }
 398         }
 399         return skipped;
 400 }
 401
 402
 403 void Parser::unskip_spaces(bool skip_comments)
 404 {
 405         while (pos_ > 0) {
 406                 if ( curr_token().cat() == catSpace ||
 407                     (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
 408                         putback();
 409                 else if (skip_comments && curr_token().cat() == catComment) {
 410                         // TODO: Get rid of this
 411                         // If positions_ is not empty we are doing some kind
 412                         // of look ahead
 413                         if (!positions_.empty())
 414                                 warning_message("Unignoring comment: " + curr_token().asInput());
 415                         putback();
 416                 }
 417                 else
 418                         break;
 419         }
 420 }
 421
 422
 423 void Parser::putback()
 424 {
 425         --pos_;
 426 }
 427
 428
 429 void Parser::pushPosition()
 430 {
 431         positions_.push_back(pos_);
 432 }
 433
 434
 435 void Parser::popPosition()
 436 {
 437         pos_ = positions_.back();
 438         positions_.pop_back();
 439         deparse();
 440 }
 441
 442
 443 void Parser::dropPosition()
 444 {
 445         positions_.pop_back();
 446 }
 447
 448
 449 bool Parser::good() const
 450 {
 451         if (pos_ < tokens_.size())
 452                 return true;
 453         if (!is_.good())
 454                 return false;
 455         return is_.peek() != idocstream::traits_type::eof();
 456 }
 457
 458
 459 bool Parser::hasOpt(string const & l)
 460 {
 461         // An optional argument can occur in any of the following forms:
 462         // - \foo[bar]
 463         // - \foo [bar]
 464         // - \foo
 465         //   [bar]
 466         // - \foo %comment
 467         //   [bar]
 468
 469         // remember current position
 470         unsigned int oldpos = pos_;
 471         // skip spaces and comments
 472         while (good()) {
 473                 get_token();
 474                 if (isParagraph()) {
 475                         putback();
 476                         break;
 477                 }
 478                 if (curr_token().cat() == catSpace ||
 479                     curr_token().cat() == catNewline ||
 480                     curr_token().cat() == catComment)
 481                         continue;
 482                 putback();
 483                 break;
 484         }
 485         bool const retval = (next_token().asInput() == l);
 486         pos_ = oldpos;
 487         return retval;
 488 }
 489
 490
 491 bool Parser::hasIdxMacros(string const & c, string const & e)
 492 {
 493         // Check for index entry separator (! or @),
 494         // consider escaping via "
 495         // \p e marks a terminating delimiter¸
 496
 497         // remember current position
 498         unsigned int oldpos = pos_;
 499         // skip spaces and comments
 500         bool retval = false;
 501         while (good()) {
 502                 get_token();
 503                 if (isParagraph()) {
 504                         putback();
 505                         break;
 506                 }
 507                 if (curr_token().cat() == catEnd)
 508                         break;
 509                 if (!e.empty() && curr_token().asInput() == e
 510                     && prev_token().asInput() != "\"")
 511                         break;
 512                 if (curr_token().asInput() == c
 513                     && prev_token().asInput() != "\"") {
 514                         retval = true;
 515                         break;
 516                 }
 517                 continue;
 518         }
 519         pos_ = oldpos;
 520         return retval;
 521 }
 522
 523
 524 Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping, char e)
 525 {
 526         skip_spaces(true);
 527
 528         // This is needed if a partial file ends with a command without arguments,
 529         // e. g. \medskip
 530         if (! good())
 531                 return make_pair(false, string());
 532
 533         int group_level = (left == '{') ? 1 : 0;
 534         string result;
 535         Token t = get_token();
 536
 537         if (left != char()
 538             && (t.cat() == catComment || t.cat() == catEscape
 539                 || t.character() != left)) {
 540                 putback();
 541                 return make_pair(false, string());
 542         } else {
 543                 while (good()) {
 544                         t = get_token();
 545                         // honor grouping
 546                         if (t.cat() == catBegin) {
 547                                 ++group_level;
 548                                 if (left != '{')
 549                                         continue;
 550                         }
 551                         if (group_level > 0 && t.cat() == catEnd) {
 552                                 --group_level;
 553                                 if (left != '{')
 554                                         continue;
 555                         }
 556                         // Ignore comments
 557                         if (t.cat() == catComment) {
 558                                 if (!t.cs().empty())
 559                                         warning_message("Ignoring comment: " + t.asInput());
 560                                 continue;
 561                         }
 562                         if (allow_escaping) {
 563                                 if (t.cat() != catEscape && t.character() == right
 564                                     && group_level == 0)
 565                                         break;
 566                         } else if (e != char()) {
 567                                 if (prev_token().character() != e && t.character() == right
 568                                     && group_level == 0)
 569                                         break;
 570                         } else {
 571                                 if (t.character() == right) {
 572                                         if (t.cat() == catEscape)
 573                                                 result += '\\';
 574                                         if (group_level == 0)
 575                                                 break;
 576                                 }
 577                         }
 578                         result += t.asInput();
 579                 }
 580         }
 581         return make_pair(true, result);
 582 }
 583
 584
 585 string Parser::getArg(char left, char right, bool allow_escaping, char e)
 586 {
 587         return getFullArg(left, right, allow_escaping, e).second;
 588 }
 589
 590
 591 string Parser::getFullOpt(bool keepws, char left, char right)
 592 {
 593         Arg arg = getFullArg(left, right);
 594         if (arg.first)
 595                 return left + arg.second + right;
 596         if (keepws)
 597                 unskip_spaces(true);
 598         return string();
 599 }
 600
 601
 602 string Parser::getOpt(bool keepws)
 603 {
 604         string const res = getArg('[', ']');
 605         if (res.empty()) {
 606                 if (keepws)
 607                         unskip_spaces(true);
 608                 return string();
 609         }
 610         return '[' + res + ']';
 611 }
 612
 613
 614 string Parser::getFullParentheseArg()
 615 {
 616         Arg arg = getFullArg('(', ')');
 617         if (arg.first)
 618                 return '(' + arg.second + ')';
 619         return string();
 620 }
 621
 622
 623 bool Parser::hasListPreamble(string const & itemcmd)
 624 {
 625         // remember current position
 626         unsigned int oldpos = pos_;
 627         // jump over arguments
 628         if (hasOpt())
 629                 getOpt();
 630         if (hasOpt("{"))
 631                 getArg('{', '}');
 632         // and swallow spaces and comments
 633         skip_spaces(true);
 634         // we have a list preamble if the next thing
 635         // that follows is not the \item command
 636         bool res =  next_token().cs() != itemcmd;
 637         // back to orig position
 638         pos_ = oldpos;
 639         return res;
 640 }
 641
 642
 643 string const Parser::ertEnvironment(string const & name)
 644 {
 645         if (!good())
 646                 return string();
 647
 648         ostringstream os;
 649         for (Token t = get_token(); good(); t = get_token()) {
 650                 if (t.cat() == catBegin) {
 651                         putback();
 652                         os << '{' << verbatim_item() << '}';
 653                 } else if (t.asInput() == "\\begin") {
 654                         string const env = getArg('{', '}');
 655                         os << "\\begin{" << env << '}'
 656                            << ertEnvironment(env)
 657                            << "\\end{" << env << '}';
 658                 } else if (t.asInput() == "\\end") {
 659                         string const end = getArg('{', '}');
 660                         if (end != name)
 661                                 warning_message("\\end{" + end
 662                                                 + "} does not match \\begin{"
 663                                                 + name + "}.");
 664                         return os.str();
 665                 } else
 666                         os << t.asInput();
 667         }
 668         warning_message("unexpected end of input");
 669         return os.str();
 670 }
 671
 672
 673 string const Parser::plainEnvironment(string const & name)
 674 {
 675         if (!good())
 676                 return string();
 677
 678         ostringstream os;
 679         for (Token t = get_token(); good(); t = get_token()) {
 680                 if (t.asInput() == "\\end") {
 681                         string const end = getArg('{', '}');
 682                         if (end == name)
 683                                 return os.str();
 684                         else
 685                                 os << "\\end{" << end << '}';
 686                 } else
 687                         os << t.asInput();
 688         }
 689         warning_message("unexpected end of input");
 690         return os.str();
 691 }
 692
 693
 694 string const Parser::plainCommand(char left, char right, string const & name)
 695 {
 696         if (!good())
 697                 return string();
 698         // check if first token is really the start character
 699         Token tok = get_token();
 700         if (tok.character() != left) {
 701                 warning_message("first character does not match start character of command \\" + name);
 702                 return string();
 703         }
 704         ostringstream os;
 705         for (Token t = get_token(); good(); t = get_token()) {
 706                 if (t.character() == right) {
 707                         return os.str();
 708                 } else
 709                         os << t.asInput();
 710         }
 711         warning_message("unexpected end of input");
 712         return os.str();
 713 }
 714
 715
 716 string const Parser::getCommandLatexParam()
 717 {
 718         if (!good())
 719                 return string();
 720         string res;
 721         size_t offset = 0;
 722         while (true) {
 723                 if (pos_ + offset >= tokens_.size())
 724                         tokenize_one();
 725                 if (pos_ + offset >= tokens_.size())
 726                         break;
 727                 Token t = tokens_[pos_ + offset];
 728                 if (t.cat() == catBegin)
 729                         break;
 730                 res += t.asInput();
 731                 ++offset;
 732         }
 733         return res;
 734 }
 735
 736
 737 Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
 738 {
 739         if (!good())
 740                 return Arg(false, string());
 741
 742         pushPosition();
 743         ostringstream oss;
 744         size_t match_index = 0;
 745         setCatcodes(VERBATIM_CATCODES);
 746         for (Token t = get_token(); good(); t = get_token()) {
 747                 // FIXME t.asInput() might be longer than we need ?
 748                 if (t.asInput() == end_string.substr(match_index,
 749                                                      t.asInput().length())) {
 750                         match_index += t.asInput().length();
 751                         if (match_index >= end_string.length())
 752                                 break;
 753                 } else {
 754                         if (!allow_linebreak && t.asInput() == "\n") {
 755                                 warning_message("unexpected end of input");
 756                                 popPosition();
 757                                 setCatcodes(NORMAL_CATCODES);
 758                                 return Arg(false, string());
 759                         }
 760                         if (match_index) {
 761                                 oss << end_string.substr(0, match_index)
 762                                     << t.asInput();
 763                                 match_index = 0;
 764                         } else
 765                                 oss << t.asInput();
 766                 }
 767         }
 768
 769         if (!good()) {
 770                 warning_message("unexpected end of input");
 771                 popPosition();
 772                 setCatcodes(NORMAL_CATCODES);
 773                 return Arg(false, string());
 774         }
 775         setCatcodes(NORMAL_CATCODES);
 776         dropPosition();
 777         return Arg(true, oss.str());
 778 }
 779
 780
 781 string const Parser::verbatimEnvironment(string const & name)
 782 {
 783         //FIXME: do something if endstring is not found
 784         string s = verbatimStuff("\\end{" + name + "}").second;
 785         // ignore one newline at beginning or end of string
 786         if (prefixIs(s, "\n"))
 787                 s.erase(0,1);
 788         if (suffixIs(s, "\n"))
 789                 s.erase(s.length() - 1,1);
 790         return s;
 791 }
 792
 793
 794 string Parser::verbatimOption()
 795 {
 796         string res;
 797         if (next_token().character() == '[') {
 798                 Token t = get_token();
 799                 for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
 800                         if (t.cat() == catBegin) {
 801                                 putback();
 802                                 res += '{' + verbatim_item() + '}';
 803                         } else
 804                                 res += t.asInput();
 805                 }
 806         }
 807         return res;
 808 }
 809
 810
 811 string Parser::verbatim_item()
 812 {
 813         if (!good())
 814                 error("stream bad");
 815         skip_spaces();
 816         if (next_token().cat() == catBegin) {
 817                 Token t = get_token(); // skip brace
 818                 string res;
 819                 for (t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
 820                         if (t.cat() == catBegin) {
 821                                 putback();
 822                                 res += '{' + verbatim_item() + '}';
 823                         }
 824                         else
 825                                 res += t.asInput();
 826                 }
 827                 return res;
 828         }
 829         return get_token().asInput();
 830 }
 831
 832
 833 void Parser::tokenize_one()
 834 {
 835         catInit();
 836         char_type c;
 837         if (!is_.get(c))
 838                 return;
 839
 840         switch (catcode(c)) {
 841         case catSpace: {
 842                 docstring s(1, c);
 843                 while (is_.get(c) && catcode(c) == catSpace)
 844                         s += c;
 845                 if (catcode(c) != catSpace)
 846                         is_.putback(c);
 847                 push_back(Token(s, catSpace));
 848                 break;
 849         }
 850
 851         case catNewline: {
 852                 ++lineno_;
 853                 docstring s(1, getNewline(is_, c));
 854                 while (is_.get(c) && catcode(c) == catNewline) {
 855                         ++lineno_;
 856                         s += getNewline(is_, c);
 857                 }
 858                 if (catcode(c) != catNewline)
 859                         is_.putback(c);
 860                 push_back(Token(s, catNewline));
 861                 break;
 862         }
 863
 864         case catComment: {
 865                 // We don't treat "%\n" combinations here specially because
 866                 // we want to preserve them in the preamble
 867                 docstring s;
 868                 while (is_.get(c) && catcode(c) != catNewline)
 869                         s += c;
 870                 // handle possible DOS line ending
 871                 if (catcode(c) == catNewline)
 872                         c = getNewline(is_, c);
 873                 // Note: The '%' at the beginning and the '\n' at the end
 874                 // of the comment are not stored.
 875                 ++lineno_;
 876                 push_back(Token(s, catComment));
 877                 break;
 878         }
 879
 880         case catEscape: {
 881                 is_.get(c);
 882                 if (!is_) {
 883                         error("unexpected end of input");
 884                 } else {
 885                         docstring s(1, c);
 886                         if (catcode(c) == catLetter) {
 887                                 // collect letters
 888                                 while (is_.get(c) && catcode(c) == catLetter)
 889                                         s += c;
 890                                 if (catcode(c) != catLetter)
 891                                         is_.putback(c);
 892                         }
 893                         push_back(Token(s, catEscape));
 894                 }
 895                 break;
 896         }
 897
 898         case catIgnore: {
 899                 warning_message("ignoring a char: " + std::to_string(static_cast<uint32_t>(c)));
 900                 break;
 901         }
 902
 903         default:
 904                 push_back(Token(docstring(1, c), catcode(c)));
 905         }
 906         //warning_message(tokens_.back());
 907 }
 908
 909
 910 void Parser::dump() const
 911 {
 912         cerr << "\nTokens: ";
 913         for (unsigned i = 0; i < tokens_.size(); ++i) {
 914                 if (i == pos_)
 915                         cerr << " <#> ";
 916                 cerr << tokens_[i];
 917         }
 918         cerr << " pos: " << pos_ << "\n";
 919 }
 920
 921
 922 void Parser::error(string const & msg) const
 923 {
 924         error_message("Line ~" + convert<string>(lineno_) + ":  parse error: " + msg);
 925         dump();
 926         //exit(1);
 927 }
 928
 929
 930 void Parser::reset()
 931 {
 932         pos_ = 0;
 933 }
 934
 935
 936 } // namespace lyx