]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/texparser.C
More 'standard conformant blurb' nonsense.
[lyx.git] / src / tex2lyx / texparser.C
1 /**
2  * \file texparser.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS
9  */
10
11 #include <config.h>
12
13 #include "texparser.h"
14
15 #include <iostream>
16 #include <sstream>
17
18 using std::cerr;
19 using std::endl;
20 using std::fill;
21 using std::ios;
22 using std::istream;
23 using std::istringstream;
24 using std::ostream;
25 using std::string;
26 using std::vector;
27
28
29 namespace {
30
31 CatCode theCatcode[256];
32
33 void skipSpaceTokens(istream & is, char c)
34 {
35         // skip trailing spaces
36         while (catcode(c) == catSpace || catcode(c) == catNewline)
37                 if (!is.get(c))
38                         break;
39         //cerr << "putting back: " << c << "\n";
40         is.putback(c);
41 }
42
43
44 void catInit()
45 {
46         fill(theCatcode, theCatcode + 256, catOther);
47         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
48         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
49
50         theCatcode[int('\\')] = catEscape;
51         theCatcode[int('{')]  = catBegin;
52         theCatcode[int('}')]  = catEnd;
53         theCatcode[int('$')]  = catMath;
54         theCatcode[int('&')]  = catAlign;
55         theCatcode[10]   = catNewline;
56         theCatcode[int('#')]  = catParameter;
57         theCatcode[int('^')]  = catSuper;
58         theCatcode[int('_')]  = catSub;
59         theCatcode[0x7f] = catIgnore;
60         theCatcode[int(' ')]  = catSpace;
61         theCatcode[int('\t')] = catSpace;
62         theCatcode[13]   = catIgnore;
63         theCatcode[int('~')]  = catActive;
64         theCatcode[int('%')]  = catComment;
65
66         // This is wrong!
67         theCatcode[int('@')]  = catLetter;
68 }
69
70 }
71
72
73 //
74 // catcodes
75 //
76
77 mode_type asMode(mode_type oldmode, string const & str)
78 {
79         if (str == "mathmode")
80                 return MATH_MODE;
81         if (str == "textmode" || str == "forcetext")
82                 return TEXT_MODE;
83         return oldmode;
84 }
85
86
87 CatCode catcode(unsigned char c)
88 {
89         return theCatcode[c];
90 }
91
92
93
94 //
95 // Token
96 //
97
98 ostream & operator<<(ostream & os, Token const & t)
99 {
100         if (t.cs().size())
101                 os << '\\' << t.cs() << ' ';
102         else if (t.cat() == catLetter)
103                 os << t.character();
104         else if (t.cat() == catNewline)
105                 os << "[\\n," << t.cat() << "]\n";
106         else
107                 os << '[' << t.character() << ',' << t.cat() << ']';
108         return os;
109 }
110
111
112 string Token::asString() const
113 {
114         return cs_.size() ? cs_ : string(1, char_);
115 }
116
117
118 string Token::asInput() const
119 {
120         return char_ ? string(1, char_) : '\\' + cs_ + ' ';
121 }
122
123
124 //
125 // Parser
126 //
127
128
129 Parser::Parser(istream & is)
130         : lineno_(0), pos_(0)
131 {
132         tokenize(is);
133 }
134
135
136 Parser::Parser(string const & s)
137         : lineno_(0), pos_(0)
138 {
139         istringstream is(s);
140         tokenize(is);
141 }
142
143
144 void Parser::push_back(Token const & t)
145 {
146         tokens_.push_back(t);
147 }
148
149
150 void Parser::pop_back()
151 {
152         tokens_.pop_back();
153 }
154
155
156 Token const & Parser::prev_token() const
157 {
158         static const Token dummy;
159         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
160 }
161
162
163 Token const & Parser::next_token() const
164 {
165         static const Token dummy;
166         return good() ? tokens_[pos_] : dummy;
167 }
168
169
170 Token const & Parser::get_token()
171 {
172         static const Token dummy;
173         //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
174         return good() ? tokens_[pos_++] : dummy;
175 }
176
177
178 void Parser::skip_spaces()
179 {
180         while (1) {
181                 if (next_token().cat() == catSpace || next_token().cat() == catNewline)
182                         get_token();
183                 else if (next_token().cat() == catComment)
184                         while (next_token().cat() != catNewline)
185                                 get_token();
186                 else
187                         break;
188         }
189 }
190
191
192 void Parser::putback()
193 {
194         --pos_;
195 }
196
197
198 bool Parser::good() const
199 {
200         return pos_ < tokens_.size();
201 }
202
203
204 char Parser::getChar()
205 {
206         if (!good())
207                 error("The input stream is not well...");
208         return tokens_[pos_++].character();
209 }
210
211
212 string Parser::getArg(char left, char right)
213 {
214         skip_spaces();
215
216         string result;
217         char c = getChar();
218
219         if (c != left)
220                 putback();
221         else
222                 while ((c = getChar()) != right && good())
223                         result += c;
224
225         return result;
226 }
227
228
229 string Parser::getOpt()
230 {
231         string const res = getArg('[', ']');
232         return res.size() ? '[' + res + ']' : string();
233 }
234
235
236 void Parser::tokenize(istream & is)
237 {
238         static bool init_done = false;
239
240         if (!init_done) {
241                 catInit();
242                 init_done = true;
243         }
244
245         char c;
246         while (is.get(c)) {
247                 //cerr << "reading c: " << c << "\n";
248
249                 switch (catcode(c)) {
250                         case catNewline: {
251                                 ++lineno_;
252                                 is.get(c);
253                                 if (catcode(c) == catNewline) {
254                                         //do {
255                                                 is.get(c);
256                                         //} while (catcode(c) == catNewline);
257                                         push_back(Token("par"));
258                                 } else {
259                                         push_back(Token('\n', catNewline));
260                                 }
261                                 is.putback(c);
262                                 break;
263                         }
264
265                         case catComment: {
266                                 push_back(Token(c, catComment));
267                                 while (is.get(c) && catcode(c) != catNewline)
268                                         push_back(Token(c, catLetter));
269                                 push_back(Token(c, catNewline));
270                                 ++lineno_;
271                                 is.get(c);
272                                 if (catcode(c) == catNewline) {
273                                         push_back(Token("par"));
274                                         ++lineno_;
275                                 } else {
276                                         is.putback(c);
277                                 }
278                                 break;
279                         }
280
281                         case catEscape: {
282                                 is.get(c);
283                                 if (!is) {
284                                         error("unexpected end of input");
285                                 } else {
286                                         string s(1, c);
287                                         if (catcode(c) == catLetter) {
288                                                 // collect letters
289                                                 while (is.get(c) && catcode(c) == catLetter)
290                                                         s += c;
291                                                 skipSpaceTokens(is, c);
292                                         }
293                                         push_back(Token(s));
294                                 }
295                                 break;
296                         }
297
298                         case catSuper:
299                         case catSub: {
300                                 push_back(Token(c, catcode(c)));
301                                 is.get(c);
302                                 skipSpaceTokens(is, c);
303                                 break;
304                         }
305
306                         case catIgnore: {
307                                 if (c != 13)
308                                         cerr << "ignoring a char: " << int(c) << "\n";
309                                 break;
310                         }
311
312                         default:
313                                 push_back(Token(c, catcode(c)));
314                 }
315         }
316 }
317
318
319 void Parser::dump() const
320 {
321         cerr << "\nTokens: ";
322         for (unsigned i = 0; i < tokens_.size(); ++i) {
323                 if (i == pos_)
324                         cerr << " <#> ";
325                 cerr << tokens_[i];
326         }
327         cerr << " pos: " << pos_ << "\n";
328 }
329
330
331 void Parser::error(string const & msg)
332 {
333         cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
334         dump();
335         //exit(1);
336 }
337
338
339 string Parser::verbatimOption()
340 {
341         string res;
342         if (next_token().character() == '[') {
343                 Token t = get_token();
344                 for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
345                         if (t.cat() == catBegin) {
346                                 putback();
347                                 res += '{' + verbatim_item() + '}';
348                         } else
349                                 res += t.asString();
350                 }
351         }
352         return res;
353 }
354
355
356 string Parser::verbatim_item()
357 {
358         if (!good())
359                 error("stream bad");
360         skip_spaces();
361         if (next_token().cat() == catBegin) {
362                 Token t = get_token(); // skip brace
363                 string res;
364                 for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
365                         if (t.cat() == catBegin) {
366                                 putback();
367                                 res += '{' + verbatim_item() + '}';
368                         }
369                         else
370                                 res += t.asInput();
371                 }
372                 return res;
373         }
374         return get_token().asInput();
375 }
376
377
378 void Parser::setCatCode(char c, CatCode cat)
379 {
380         theCatcode[(unsigned char)c] = cat;
381 }
382
383
384 CatCode Parser::getCatCode(char c) const
385 {
386         return theCatcode[(unsigned char)c];
387 }