]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/texparser.C
Add a Buffer::fully_loaded member function, returning true only when
[lyx.git] / src / tex2lyx / texparser.C
1 /**
2  * \file texparser.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 #include <config.h>
12
13 #include "texparser.h"
14
15 #include <iostream>
16 #include <sstream>
17
18 using std::cerr;
19 using std::endl;
20 using std::fill;
21 using std::istream;
22 using std::istringstream;
23 using std::ostream;
24 using std::string;
25
26
27 namespace {
28
29 CatCode theCatcode[256];
30
31 void skipSpaceTokens(istream & is, char c)
32 {
33         // skip trailing spaces
34         while (catcode(c) == catSpace || catcode(c) == catNewline)
35                 if (!is.get(c))
36                         break;
37         //cerr << "putting back: " << c << "\n";
38         is.putback(c);
39 }
40
41
42 void catInit()
43 {
44         fill(theCatcode, theCatcode + 256, catOther);
45         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
46         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
47
48         theCatcode[int('\\')] = catEscape;
49         theCatcode[int('{')]  = catBegin;
50         theCatcode[int('}')]  = catEnd;
51         theCatcode[int('$')]  = catMath;
52         theCatcode[int('&')]  = catAlign;
53         theCatcode[10]   = catNewline;
54         theCatcode[int('#')]  = catParameter;
55         theCatcode[int('^')]  = catSuper;
56         theCatcode[int('_')]  = catSub;
57         theCatcode[0x7f] = catIgnore;
58         theCatcode[int(' ')]  = catSpace;
59         theCatcode[int('\t')] = catSpace;
60         theCatcode[13]   = catIgnore;
61         theCatcode[int('~')]  = catActive;
62         theCatcode[int('%')]  = catComment;
63
64         // This is wrong!
65         theCatcode[int('@')]  = catLetter;
66 }
67
68 }
69
70
71 //
72 // catcodes
73 //
74
75 mode_type asMode(mode_type oldmode, string const & str)
76 {
77         if (str == "mathmode")
78                 return MATH_MODE;
79         if (str == "textmode" || str == "forcetext")
80                 return TEXT_MODE;
81         return oldmode;
82 }
83
84
85 CatCode catcode(unsigned char c)
86 {
87         return theCatcode[c];
88 }
89
90
91
92 //
93 // Token
94 //
95
96 ostream & operator<<(ostream & os, Token const & t)
97 {
98         if (t.cs().size())
99                 os << '\\' << t.cs() << ' ';
100         else if (t.cat() == catLetter)
101                 os << t.character();
102         else if (t.cat() == catNewline)
103                 os << "[\\n," << t.cat() << "]\n";
104         else
105                 os << '[' << t.character() << ',' << t.cat() << ']';
106         return os;
107 }
108
109
110 string Token::asString() const
111 {
112         return cs_.size() ? cs_ : string(1, char_);
113 }
114
115
116 string Token::asInput() const
117 {
118         return char_ ? string(1, char_) : '\\' + cs_ + ' ';
119 }
120
121
122 //
123 // Parser
124 //
125
126
127 Parser::Parser(istream & is)
128         : lineno_(0), pos_(0)
129 {
130         tokenize(is);
131 }
132
133
134 Parser::Parser(string const & s)
135         : lineno_(0), pos_(0)
136 {
137         istringstream is(s);
138         tokenize(is);
139 }
140
141
142 void Parser::push_back(Token const & t)
143 {
144         tokens_.push_back(t);
145 }
146
147
148 void Parser::pop_back()
149 {
150         tokens_.pop_back();
151 }
152
153
154 Token const & Parser::prev_token() const
155 {
156         static const Token dummy;
157         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
158 }
159
160
161 Token const & Parser::next_token() const
162 {
163         static const Token dummy;
164         return good() ? tokens_[pos_] : dummy;
165 }
166
167
168 Token const & Parser::get_token()
169 {
170         static const Token dummy;
171         //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
172         return good() ? tokens_[pos_++] : dummy;
173 }
174
175
176 void Parser::skip_spaces()
177 {
178         while (1) {
179                 if (next_token().cat() == catSpace || next_token().cat() == catNewline)
180                         get_token();
181                 else if (next_token().cat() == catComment)
182                         while (next_token().cat() != catNewline)
183                                 get_token();
184                 else
185                         break;
186         }
187 }
188
189
190 void Parser::putback()
191 {
192         --pos_;
193 }
194
195
196 bool Parser::good() const
197 {
198         return pos_ < tokens_.size();
199 }
200
201
202 char Parser::getChar()
203 {
204         if (!good())
205                 error("The input stream is not well...");
206         return tokens_[pos_++].character();
207 }
208
209
210 string Parser::getArg(char left, char right)
211 {
212         skip_spaces();
213
214         string result;
215         char c = getChar();
216
217         if (c != left)
218                 putback();
219         else
220                 while ((c = getChar()) != right && good())
221                         result += c;
222
223         return result;
224 }
225
226
227 string Parser::getOpt()
228 {
229         string const res = getArg('[', ']');
230         return res.size() ? '[' + res + ']' : string();
231 }
232
233
234 void Parser::tokenize(istream & is)
235 {
236         static bool init_done = false;
237
238         if (!init_done) {
239                 catInit();
240                 init_done = true;
241         }
242
243         char c;
244         while (is.get(c)) {
245                 //cerr << "reading c: " << c << "\n";
246
247                 switch (catcode(c)) {
248                         case catNewline: {
249                                 ++lineno_;
250                                 is.get(c);
251                                 if (catcode(c) == catNewline) {
252                                         //do {
253                                                 is.get(c);
254                                         //} while (catcode(c) == catNewline);
255                                         push_back(Token("par"));
256                                 } else {
257                                         push_back(Token('\n', catNewline));
258                                 }
259                                 is.putback(c);
260                                 break;
261                         }
262
263                         case catComment: {
264                                 push_back(Token(c, catComment));
265                                 while (is.get(c) && catcode(c) != catNewline)
266                                         push_back(Token(c, catLetter));
267                                 push_back(Token(c, catNewline));
268                                 ++lineno_;
269                                 is.get(c);
270                                 if (catcode(c) == catNewline) {
271                                         push_back(Token("par"));
272                                         ++lineno_;
273                                 } else {
274                                         is.putback(c);
275                                 }
276                                 break;
277                         }
278
279                         case catEscape: {
280                                 is.get(c);
281                                 if (!is) {
282                                         error("unexpected end of input");
283                                 } else {
284                                         string s(1, c);
285                                         if (catcode(c) == catLetter) {
286                                                 // collect letters
287                                                 while (is.get(c) && catcode(c) == catLetter)
288                                                         s += c;
289                                                 skipSpaceTokens(is, c);
290                                         }
291                                         push_back(Token(s));
292                                 }
293                                 break;
294                         }
295
296                         case catSuper:
297                         case catSub: {
298                                 push_back(Token(c, catcode(c)));
299                                 is.get(c);
300                                 skipSpaceTokens(is, c);
301                                 break;
302                         }
303
304                         case catIgnore: {
305                                 if (c != 13)
306                                         cerr << "ignoring a char: " << int(c) << "\n";
307                                 break;
308                         }
309
310                         default:
311                                 push_back(Token(c, catcode(c)));
312                 }
313         }
314 }
315
316
317 void Parser::dump() const
318 {
319         cerr << "\nTokens: ";
320         for (unsigned i = 0; i < tokens_.size(); ++i) {
321                 if (i == pos_)
322                         cerr << " <#> ";
323                 cerr << tokens_[i];
324         }
325         cerr << " pos: " << pos_ << "\n";
326 }
327
328
329 void Parser::error(string const & msg)
330 {
331         cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
332         dump();
333         //exit(1);
334 }
335
336
337 string Parser::verbatimOption()
338 {
339         string res;
340         if (next_token().character() == '[') {
341                 Token t = get_token();
342                 for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
343                         if (t.cat() == catBegin) {
344                                 putback();
345                                 res += '{' + verbatim_item() + '}';
346                         } else
347                                 res += t.asString();
348                 }
349         }
350         return res;
351 }
352
353
354 string Parser::verbatim_item()
355 {
356         if (!good())
357                 error("stream bad");
358         skip_spaces();
359         if (next_token().cat() == catBegin) {
360                 Token t = get_token(); // skip brace
361                 string res;
362                 for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
363                         if (t.cat() == catBegin) {
364                                 putback();
365                                 res += '{' + verbatim_item() + '}';
366                         }
367                         else
368                                 res += t.asInput();
369                 }
370                 return res;
371         }
372         return get_token().asInput();
373 }
374
375
376 void Parser::setCatCode(char c, CatCode cat)
377 {
378         theCatcode[(unsigned char)c] = cat;
379 }
380
381
382 CatCode Parser::getCatCode(char c) const
383 {
384         return theCatcode[(unsigned char)c];
385 }