]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/Parser.cpp
Fix functions that used functions but did not defined it
[lyx.git] / src / tex2lyx / Parser.cpp
1 /**
2  * \file Parser.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 #include <config.h>
12
13 #include "Parser.h"
14
15 #include "tex2lyx.h"
16
17 #include "Encoding.h"
18 #include "support/convert.h"
19 #include "support/lstrings.h"
20 #include "support/textutils.h"
21
22 #include <cstdint>
23 #include <iostream>
24
25 using namespace std;
26 using namespace lyx::support;
27
28 namespace lyx {
29
30 namespace {
31
32 /*!
33  * Translate a line ending to '\n'.
34  * \p c must have catcode catNewline, and it must be the last character read
35  * from \p is.
36  */
37 char_type getNewline(iparserdocstream & is, char_type c)
38 {
39         // we have to handle 3 different line endings:
40         // - UNIX (\n)
41         // - MAC  (\r)
42         // - DOS  (\r\n)
43         if (c == '\r') {
44                 // MAC or DOS
45                 char_type wc;
46                 if (is.get(wc) && wc != '\n') {
47                         // MAC
48                         is.putback(wc);
49                 }
50                 return '\n';
51         }
52         // UNIX
53         return c;
54 }
55
56 } // namespace
57
58 //
59 // Token
60 //
61
62 ostream & operator<<(ostream & os, Token const & t)
63 {
64         if (t.cat() == catComment)
65                 os << '%' << t.cs() << '\n';
66         else if (t.cat() == catSpace)
67                 os << t.cs();
68         else if (t.cat() == catEscape)
69                 os << '\\' << t.cs() << ' ';
70         else if (t.cat() == catLetter)
71                 os << t.cs();
72         else if (t.cat() == catNewline)
73                 os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
74         else
75                 os << '[' << t.cs() << ',' << t.cat() << ']';
76         return os;
77 }
78
79
80 string Token::asInput() const
81 {
82         if (cat_ == catComment)
83                 return '%' + cs_ + '\n';
84         if (cat_ == catEscape)
85                 return '\\' + cs_;
86         return cs_;
87 }
88
89
90 bool Token::isAlnumASCII() const
91 {
92         return cat_ == catLetter ||
93                (cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0]));
94 }
95
96
97 #ifdef FILEDEBUG
98 void debugToken(std::ostream & os, Token const & t, unsigned int flags)
99 {
100         char sep = ' ';
101         os << "t: " << t << " flags: " << flags;
102         if (flags & FLAG_BRACE_LAST) { os << sep << "BRACE_LAST"; sep = '|'; }
103         if (flags & FLAG_RIGHT     ) { os << sep << "RIGHT"     ; sep = '|'; }
104         if (flags & FLAG_END       ) { os << sep << "END"       ; sep = '|'; }
105         if (flags & FLAG_BRACK_LAST) { os << sep << "BRACK_LAST"; sep = '|'; }
106         if (flags & FLAG_TEXTMODE  ) { os << sep << "TEXTMODE"  ; sep = '|'; }
107         if (flags & FLAG_ITEM      ) { os << sep << "ITEM"      ; sep = '|'; }
108         if (flags & FLAG_LEAVE     ) { os << sep << "LEAVE"     ; sep = '|'; }
109         if (flags & FLAG_SIMPLE    ) { os << sep << "SIMPLE"    ; sep = '|'; }
110         if (flags & FLAG_EQUATION  ) { os << sep << "EQUATION"  ; sep = '|'; }
111         if (flags & FLAG_SIMPLE2   ) { os << sep << "SIMPLE2"   ; sep = '|'; }
112         if (flags & FLAG_OPTION    ) { os << sep << "OPTION"    ; sep = '|'; }
113         if (flags & FLAG_BRACED    ) { os << sep << "BRACED"    ; sep = '|'; }
114         if (flags & FLAG_CELL      ) { os << sep << "CELL"      ; sep = '|'; }
115         if (flags & FLAG_TABBING   ) { os << sep << "TABBING"   ; sep = '|'; }
116         os << "\n";
117 }
118 #endif
119
120
121 //
122 // Wrapper
123 //
124
125 void iparserdocstream::setEncoding(std::string const & e)
126 {
127         is_ << lyx::setEncoding(e);
128 }
129
130
131 void iparserdocstream::putback(char_type c)
132 {
133         s_ = c + s_;
134 }
135
136
137 void iparserdocstream::putback(docstring const & s)
138 {
139         s_ = s + s_;
140 }
141
142
143 iparserdocstream & iparserdocstream::get(char_type &c)
144 {
145         if (s_.empty())
146                 is_.get(c);
147         else {
148                 //warning_message("unparsed: " + to_utf8(s_));
149                 c = s_[0];
150                 s_.erase(0,1);
151         }
152         return *this;
153 }
154
155
156 //
157 // Parser
158 //
159
160
161 Parser::Parser(idocstream & is, std::string const & fixedenc)
162         : lineno_(0), pos_(0), iss_(nullptr), is_(is),
163           encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
164           theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
165           fixed_enc_(!fixedenc.empty())
166 {
167         if (fixed_enc_)
168                 is_.setEncoding(fixedenc);
169         catInit();
170 }
171
172
173 Parser::Parser(string const & s)
174         : lineno_(0), pos_(0),
175           iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
176           encoding_iconv_("UTF-8"),
177           theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
178           // An idocstringstream can not change the encoding
179           fixed_enc_(true)
180 {
181         catInit();
182 }
183
184
185 Parser::~Parser()
186 {
187         delete iss_;
188 }
189
190
191 void Parser::deparse()
192 {
193         string s;
194         for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
195                 s += tokens_[i].asInput();
196         }
197         is_.putback(from_utf8(s));
198         tokens_.erase(tokens_.begin() + pos_, tokens_.end());
199         // make sure that next token is read
200         tokenize_one();
201 }
202
203
204 bool Parser::setEncoding(std::string const & e, int p)
205 {
206         // We may (and need to) use unsafe encodings here: Since the text is
207         // converted to unicode while reading from is_, we never see text in
208         // the original encoding of the parser, but operate on utf8 strings
209         // instead. Therefore, we cannot misparse high bytes as {, } or \\.
210         Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
211         if (!enc) {
212                 warning_message("Unknown encoding " + e + ". Ignoring.");
213                 return false;
214         }
215         return setEncoding(enc->iconvName());
216 }
217
218
219 void Parser::catInit()
220 {
221         if (curr_cat_ == theCatcodesType_)
222                 return;
223         curr_cat_ = theCatcodesType_;
224
225         fill(theCatcode_, theCatcode_ + 256, catOther);
226         fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
227         fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
228         // This is wrong!
229         theCatcode_[int('@')]  = catLetter;
230
231         if (theCatcodesType_ == NORMAL_CATCODES) {
232                 theCatcode_[int('\\')] = catEscape;
233                 theCatcode_[int('{')]  = catBegin;
234                 theCatcode_[int('}')]  = catEnd;
235                 theCatcode_[int('$')]  = catMath;
236                 theCatcode_[int('&')]  = catAlign;
237                 theCatcode_[int('\n')] = catNewline;
238                 theCatcode_[int('#')]  = catParameter;
239                 theCatcode_[int('^')]  = catSuper;
240                 theCatcode_[int('_')]  = catSub;
241                 theCatcode_[0x7f]      = catIgnore;
242                 theCatcode_[int(' ')]  = catSpace;
243                 theCatcode_[int('\t')] = catSpace;
244                 theCatcode_[int('\r')] = catNewline;
245                 theCatcode_[int('~')]  = catActive;
246                 theCatcode_[int('%')]  = catComment;
247         }
248 }
249
250 CatCode Parser::catcode(char_type c) const
251 {
252         if (c < 256)
253                 return theCatcode_[(unsigned char)c];
254         return catOther;
255 }
256
257
258 void Parser::setCatcode(char c, CatCode cat)
259 {
260         theCatcode_[(unsigned char)c] = cat;
261         deparse();
262 }
263
264
265 void Parser::setCatcodes(cat_type t)
266 {
267         theCatcodesType_ = t;
268         deparse();
269 }
270
271
272 bool Parser::setEncoding(std::string const & e)
273 {
274         //warning_message("setting encoding to " + e);
275         encoding_iconv_ = e;
276         // If the encoding is fixed, we must not change the stream encoding
277         // (because the whole input uses that encoding, e.g. if it comes from
278         // the clipboard). We still need to track the original encoding in
279         // encoding_iconv_, so that the generated output is correct.
280         if (!fixed_enc_)
281                 is_.setEncoding(e);
282         return true;
283 }
284
285
286 void Parser::push_back(Token const & t)
287 {
288         tokens_.push_back(t);
289 }
290
291
292 // We return a copy here because the tokens_ vector may get reallocated
293 Token const Parser::prev_token() const
294 {
295         static const Token dummy;
296         return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
297 }
298
299
300 // We return a copy here because the tokens_ vector may get reallocated
301 Token const Parser::curr_token() const
302 {
303         static const Token dummy;
304         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
305 }
306
307
308 // We return a copy here because the tokens_ vector may get reallocated
309 Token const Parser::next_token()
310 {
311         static const Token dummy;
312         if (!good())
313                 return dummy;
314         if (pos_ >= tokens_.size())
315                 tokenize_one();
316         return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
317 }
318
319
320 // We return a copy here because the tokens_ vector may get reallocated
321 Token const Parser::next_next_token()
322 {
323         static const Token dummy;
324         if (!good())
325                 return dummy;
326         // If tokenize_one() has not been called after the last get_token() we
327         // need to tokenize two more tokens.
328         if (pos_ >= tokens_.size())
329                 tokenize_one();
330         if (pos_ + 1 >= tokens_.size())
331                 tokenize_one();
332         return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
333 }
334
335
336 // We return a copy here because the tokens_ vector may get reallocated
337 Token const Parser::get_token()
338 {
339         static const Token dummy;
340         if (!good())
341                 return dummy;
342         if (pos_ >= tokens_.size()) {
343                 tokenize_one();
344                 if (pos_ >= tokens_.size())
345                         return dummy;
346         }
347         // warning_message("looking at token " + tokens_[pos_]
348         //      + " pos: " + pos_ <<);
349         return tokens_[pos_++];
350 }
351
352
353 bool Parser::isParagraph()
354 {
355         // A new paragraph in TeX is started
356         // - either by a newline, following any amount of whitespace
357         //   characters (including zero), and another newline
358         // - or the token \par
359         if (curr_token().cat() == catNewline &&
360             (curr_token().cs().size() > 1 ||
361              (next_token().cat() == catSpace &&
362               next_next_token().cat() == catNewline)))
363                 return true;
364         if (curr_token().cat() == catEscape && curr_token().cs() == "par")
365                 return true;
366         return false;
367 }
368
369
370 bool Parser::skip_spaces(bool skip_comments)
371 {
372         // We just silently return if we have no more tokens.
373         // skip_spaces() should be callable at any time,
374         // the caller must check p::good() anyway.
375         bool skipped = false;
376         while (good()) {
377                 get_token();
378                 if (isParagraph()) {
379                         putback();
380                         break;
381                 }
382                 if (curr_token().cat() == catSpace ||
383                     curr_token().cat() == catNewline) {
384                         skipped = true;
385                         continue;
386                 }
387                 if ((curr_token().cat() == catComment && curr_token().cs().empty()))
388                         continue;
389                 if (skip_comments && curr_token().cat() == catComment) {
390                         // If positions_ is not empty we are doing some kind
391                         // of look ahead
392                         if (!positions_.empty())
393                                 warning_message("Ignoring comment: " + curr_token().asInput());
394                 } else {
395                         putback();
396                         break;
397                 }
398         }
399         return skipped;
400 }
401
402
403 void Parser::unskip_spaces(bool skip_comments)
404 {
405         while (pos_ > 0) {
406                 if ( curr_token().cat() == catSpace ||
407                     (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
408                         putback();
409                 else if (skip_comments && curr_token().cat() == catComment) {
410                         // TODO: Get rid of this
411                         // If positions_ is not empty we are doing some kind
412                         // of look ahead
413                         if (!positions_.empty())
414                                 warning_message("Unignoring comment: " + curr_token().asInput());
415                         putback();
416                 }
417                 else
418                         break;
419         }
420 }
421
422
423 void Parser::putback()
424 {
425         --pos_;
426 }
427
428
429 void Parser::pushPosition()
430 {
431         positions_.push_back(pos_);
432 }
433
434
435 void Parser::popPosition()
436 {
437         pos_ = positions_.back();
438         positions_.pop_back();
439         deparse();
440 }
441
442
443 void Parser::dropPosition()
444 {
445         positions_.pop_back();
446 }
447
448
449 bool Parser::good() const
450 {
451         if (pos_ < tokens_.size())
452                 return true;
453         if (!is_.good())
454                 return false;
455         return is_.peek() != idocstream::traits_type::eof();
456 }
457
458
459 bool Parser::hasOpt(string const & l)
460 {
461         // An optional argument can occur in any of the following forms:
462         // - \foo[bar]
463         // - \foo [bar]
464         // - \foo
465         //   [bar]
466         // - \foo %comment
467         //   [bar]
468
469         // remember current position
470         unsigned int oldpos = pos_;
471         // skip spaces and comments
472         while (good()) {
473                 get_token();
474                 if (isParagraph()) {
475                         putback();
476                         break;
477                 }
478                 if (curr_token().cat() == catSpace ||
479                     curr_token().cat() == catNewline ||
480                     curr_token().cat() == catComment)
481                         continue;
482                 putback();
483                 break;
484         }
485         bool const retval = (next_token().asInput() == l);
486         pos_ = oldpos;
487         return retval;
488 }
489
490
491 bool Parser::hasIdxMacros(string const & c, string const & e)
492 {
493         // Check for index entry separator (! or @),
494         // consider escaping via "
495         // \p e marks a terminating delimiter¸
496
497         // remember current position
498         unsigned int oldpos = pos_;
499         // skip spaces and comments
500         bool retval = false;
501         while (good()) {
502                 get_token();
503                 if (isParagraph()) {
504                         putback();
505                         break;
506                 }
507                 if (curr_token().cat() == catEnd)
508                         break;
509                 if (!e.empty() && curr_token().asInput() == e
510                     && prev_token().asInput() != "\"")
511                         break;
512                 if (curr_token().asInput() == c
513                     && prev_token().asInput() != "\"") {
514                         retval = true;
515                         break;
516                 }
517                 continue;
518         }
519         pos_ = oldpos;
520         return retval;
521 }
522
523
524 Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping, char e)
525 {
526         skip_spaces(true);
527
528         // This is needed if a partial file ends with a command without arguments,
529         // e. g. \medskip
530         if (! good())
531                 return make_pair(false, string());
532
533         int group_level = (left == '{') ? 1 : 0;
534         string result;
535         Token t = get_token();
536
537         if (left != char()
538             && (t.cat() == catComment || t.cat() == catEscape
539                 || t.character() != left)) {
540                 putback();
541                 return make_pair(false, string());
542         } else {
543                 while (good()) {
544                         t = get_token();
545                         // honor grouping
546                         if (t.cat() == catBegin) {
547                                 ++group_level;
548                                 if (left != '{')
549                                         continue;
550                         }
551                         if (group_level > 0 && t.cat() == catEnd) {
552                                 --group_level;
553                                 if (left != '{')
554                                         continue;
555                         }
556                         // Ignore comments
557                         if (t.cat() == catComment) {
558                                 if (!t.cs().empty())
559                                         warning_message("Ignoring comment: " + t.asInput());
560                                 continue;
561                         }
562                         if (allow_escaping) {
563                                 if (t.cat() != catEscape && t.character() == right
564                                     && group_level == 0)
565                                         break;
566                         } else if (e != char()) {
567                                 if (prev_token().character() != e && t.character() == right
568                                     && group_level == 0)
569                                         break;
570                         } else {
571                                 if (t.character() == right) {
572                                         if (t.cat() == catEscape)
573                                                 result += '\\';
574                                         if (group_level == 0)
575                                                 break;
576                                 }
577                         }
578                         result += t.asInput();
579                 }
580         }
581         return make_pair(true, result);
582 }
583
584
585 string Parser::getArg(char left, char right, bool allow_escaping, char e)
586 {
587         return getFullArg(left, right, allow_escaping, e).second;
588 }
589
590
591 string Parser::getFullOpt(bool keepws, char left, char right)
592 {
593         Arg arg = getFullArg(left, right);
594         if (arg.first)
595                 return left + arg.second + right;
596         if (keepws)
597                 unskip_spaces(true);
598         return string();
599 }
600
601
602 string Parser::getOpt(bool keepws)
603 {
604         string const res = getArg('[', ']');
605         if (res.empty()) {
606                 if (keepws)
607                         unskip_spaces(true);
608                 return string();
609         }
610         return '[' + res + ']';
611 }
612
613
614 string Parser::getFullParentheseArg()
615 {
616         Arg arg = getFullArg('(', ')');
617         if (arg.first)
618                 return '(' + arg.second + ')';
619         return string();
620 }
621
622
623 bool Parser::hasListPreamble(string const & itemcmd)
624 {
625         // remember current position
626         unsigned int oldpos = pos_;
627         // jump over arguments
628         if (hasOpt())
629                 getOpt();
630         if (hasOpt("{"))
631                 getArg('{', '}');
632         // and swallow spaces and comments
633         skip_spaces(true);
634         // we have a list preamble if the next thing
635         // that follows is not the \item command
636         bool res =  next_token().cs() != itemcmd;
637         // back to orig position
638         pos_ = oldpos;
639         return res;
640 }
641
642
643 string const Parser::ertEnvironment(string const & name)
644 {
645         if (!good())
646                 return string();
647
648         ostringstream os;
649         for (Token t = get_token(); good(); t = get_token()) {
650                 if (t.cat() == catBegin) {
651                         putback();
652                         os << '{' << verbatim_item() << '}';
653                 } else if (t.asInput() == "\\begin") {
654                         string const env = getArg('{', '}');
655                         os << "\\begin{" << env << '}'
656                            << ertEnvironment(env)
657                            << "\\end{" << env << '}';
658                 } else if (t.asInput() == "\\end") {
659                         string const end = getArg('{', '}');
660                         if (end != name)
661                                 warning_message("\\end{" + end
662                                                 + "} does not match \\begin{"
663                                                 + name + "}.");
664                         return os.str();
665                 } else
666                         os << t.asInput();
667         }
668         warning_message("unexpected end of input");
669         return os.str();
670 }
671
672
673 string const Parser::plainEnvironment(string const & name)
674 {
675         if (!good())
676                 return string();
677
678         ostringstream os;
679         for (Token t = get_token(); good(); t = get_token()) {
680                 if (t.asInput() == "\\end") {
681                         string const end = getArg('{', '}');
682                         if (end == name)
683                                 return os.str();
684                         else
685                                 os << "\\end{" << end << '}';
686                 } else
687                         os << t.asInput();
688         }
689         warning_message("unexpected end of input");
690         return os.str();
691 }
692
693
694 string const Parser::plainCommand(char left, char right, string const & name)
695 {
696         if (!good())
697                 return string();
698         // check if first token is really the start character
699         Token tok = get_token();
700         if (tok.character() != left) {
701                 warning_message("first character does not match start character of command \\" + name);
702                 return string();
703         }
704         ostringstream os;
705         for (Token t = get_token(); good(); t = get_token()) {
706                 if (t.character() == right) {
707                         return os.str();
708                 } else
709                         os << t.asInput();
710         }
711         warning_message("unexpected end of input");
712         return os.str();
713 }
714
715
716 string const Parser::getCommandLatexParam()
717 {
718         if (!good())
719                 return string();
720         string res;
721         size_t offset = 0;
722         while (true) {
723                 if (pos_ + offset >= tokens_.size())
724                         tokenize_one();
725                 if (pos_ + offset >= tokens_.size())
726                         break;
727                 Token t = tokens_[pos_ + offset];
728                 if (t.cat() == catBegin)
729                         break;
730                 res += t.asInput();
731                 ++offset;
732         }
733         return res;
734 }
735
736
737 Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
738 {
739         if (!good())
740                 return Arg(false, string());
741
742         pushPosition();
743         ostringstream oss;
744         size_t match_index = 0;
745         setCatcodes(VERBATIM_CATCODES);
746         for (Token t = get_token(); good(); t = get_token()) {
747                 // FIXME t.asInput() might be longer than we need ?
748                 if (t.asInput() == end_string.substr(match_index,
749                                                      t.asInput().length())) {
750                         match_index += t.asInput().length();
751                         if (match_index >= end_string.length())
752                                 break;
753                 } else {
754                         if (!allow_linebreak && t.asInput() == "\n") {
755                                 warning_message("unexpected end of input");
756                                 popPosition();
757                                 setCatcodes(NORMAL_CATCODES);
758                                 return Arg(false, string());
759                         }
760                         if (match_index) {
761                                 oss << end_string.substr(0, match_index)
762                                     << t.asInput();
763                                 match_index = 0;
764                         } else
765                                 oss << t.asInput();
766                 }
767         }
768
769         if (!good()) {
770                 warning_message("unexpected end of input");
771                 popPosition();
772                 setCatcodes(NORMAL_CATCODES);
773                 return Arg(false, string());
774         }
775         setCatcodes(NORMAL_CATCODES);
776         dropPosition();
777         return Arg(true, oss.str());
778 }
779
780
781 string const Parser::verbatimEnvironment(string const & name)
782 {
783         //FIXME: do something if endstring is not found
784         string s = verbatimStuff("\\end{" + name + "}").second;
785         // ignore one newline at beginning or end of string
786         if (prefixIs(s, "\n"))
787                 s.erase(0,1);
788         if (suffixIs(s, "\n"))
789                 s.erase(s.length() - 1,1);
790         return s;
791 }
792
793
794 string Parser::verbatimOption()
795 {
796         string res;
797         if (next_token().character() == '[') {
798                 Token t = get_token();
799                 for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
800                         if (t.cat() == catBegin) {
801                                 putback();
802                                 res += '{' + verbatim_item() + '}';
803                         } else
804                                 res += t.asInput();
805                 }
806         }
807         return res;
808 }
809
810
811 string Parser::verbatim_item()
812 {
813         if (!good())
814                 error("stream bad");
815         skip_spaces();
816         if (next_token().cat() == catBegin) {
817                 Token t = get_token(); // skip brace
818                 string res;
819                 for (t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
820                         if (t.cat() == catBegin) {
821                                 putback();
822                                 res += '{' + verbatim_item() + '}';
823                         }
824                         else
825                                 res += t.asInput();
826                 }
827                 return res;
828         }
829         return get_token().asInput();
830 }
831
832
833 void Parser::tokenize_one()
834 {
835         catInit();
836         char_type c;
837         if (!is_.get(c))
838                 return;
839
840         switch (catcode(c)) {
841         case catSpace: {
842                 docstring s(1, c);
843                 while (is_.get(c) && catcode(c) == catSpace)
844                         s += c;
845                 if (catcode(c) != catSpace)
846                         is_.putback(c);
847                 push_back(Token(s, catSpace));
848                 break;
849         }
850
851         case catNewline: {
852                 ++lineno_;
853                 docstring s(1, getNewline(is_, c));
854                 while (is_.get(c) && catcode(c) == catNewline) {
855                         ++lineno_;
856                         s += getNewline(is_, c);
857                 }
858                 if (catcode(c) != catNewline)
859                         is_.putback(c);
860                 push_back(Token(s, catNewline));
861                 break;
862         }
863
864         case catComment: {
865                 // We don't treat "%\n" combinations here specially because
866                 // we want to preserve them in the preamble
867                 docstring s;
868                 while (is_.get(c) && catcode(c) != catNewline)
869                         s += c;
870                 // handle possible DOS line ending
871                 if (catcode(c) == catNewline)
872                         c = getNewline(is_, c);
873                 // Note: The '%' at the beginning and the '\n' at the end
874                 // of the comment are not stored.
875                 ++lineno_;
876                 push_back(Token(s, catComment));
877                 break;
878         }
879
880         case catEscape: {
881                 is_.get(c);
882                 if (!is_) {
883                         error("unexpected end of input");
884                 } else {
885                         docstring s(1, c);
886                         if (catcode(c) == catLetter) {
887                                 // collect letters
888                                 while (is_.get(c) && catcode(c) == catLetter)
889                                         s += c;
890                                 if (catcode(c) != catLetter)
891                                         is_.putback(c);
892                         }
893                         push_back(Token(s, catEscape));
894                 }
895                 break;
896         }
897
898         case catIgnore: {
899                 warning_message("ignoring a char: " + std::to_string(static_cast<uint32_t>(c)));
900                 break;
901         }
902
903         default:
904                 push_back(Token(docstring(1, c), catcode(c)));
905         }
906         //warning_message(tokens_.back());
907 }
908
909
910 void Parser::dump() const
911 {
912         cerr << "\nTokens: ";
913         for (unsigned i = 0; i < tokens_.size(); ++i) {
914                 if (i == pos_)
915                         cerr << " <#> ";
916                 cerr << tokens_[i];
917         }
918         cerr << " pos: " << pos_ << "\n";
919 }
920
921
922 void Parser::error(string const & msg) const
923 {
924         error_message("Line ~" + convert<string>(lineno_) + ":  parse error: " + msg);
925         dump();
926         //exit(1);
927 }
928
929
930 void Parser::reset()
931 {
932         pos_ = 0;
933 }
934
935
936 } // namespace lyx