]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/texparser.C
make tex2lyx lyxstring agnostic
[lyx.git] / src / tex2lyx / texparser.C
1
2 #include <config.h>
3
4 #include "texparser.h"
5
6 #include <iostream>
7 #include <sstream>
8
9 using std::cerr;
10 using std::endl;
11 using std::fill;
12 using std::ios;
13 using std::istream;
14 using std::istringstream;
15 using std::ostream;
16 using std::string;
17 using std::vector;
18
19
20 namespace {
21
22 CatCode theCatcode[256];
23
24 void skipSpaceTokens(istream & is, char c)
25 {
26         // skip trailing spaces
27         while (catcode(c) == catSpace || catcode(c) == catNewline)
28                 if (!is.get(c))
29                         break;
30         //cerr << "putting back: " << c << "\n";
31         is.putback(c);
32 }
33
34
35 void catInit()
36 {
37         fill(theCatcode, theCatcode + 256, catOther);
38         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
39         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
40
41         theCatcode['\\'] = catEscape;
42         theCatcode['{']  = catBegin;
43         theCatcode['}']  = catEnd;
44         theCatcode['$']  = catMath;
45         theCatcode['&']  = catAlign;
46         theCatcode[10]   = catNewline;
47         theCatcode['#']  = catParameter;
48         theCatcode['^']  = catSuper;
49         theCatcode['_']  = catSub;
50         theCatcode['\7f'] = catIgnore;
51         theCatcode[' ']  = catSpace;
52         theCatcode['\t'] = catSpace;
53         theCatcode[13]   = catIgnore;
54         theCatcode['~']  = catActive;
55         theCatcode['%']  = catComment;
56
57         // This is wrong!
58         theCatcode['@']  = catLetter;
59 }
60
61 }
62
63
64 // 
65 // catcodes
66 //
67
68 mode_type asMode(mode_type oldmode, string const & str)
69 {
70         if (str == "mathmode")
71                 return MATH_MODE;
72         if (str == "textmode" || str == "forcetext")
73                 return TEXT_MODE;
74         return oldmode;
75 }
76
77
78 CatCode catcode(unsigned char c)
79 {
80         return theCatcode[c];
81 }
82
83
84
85 //
86 // Token
87 //
88
89 ostream & operator<<(ostream & os, Token const & t)
90 {
91         if (t.cs().size())
92                 os << '\\' << t.cs() << ' ';
93         else if (t.cat() == catLetter)
94                 os << t.character();
95         else if (t.cat() == catNewline)
96                 os << "[\\n," << t.cat() << "]\n";
97         else
98                 os << '[' << t.character() << ',' << t.cat() << ']';
99         return os;
100 }
101
102
103 string Token::asString() const
104 {
105         return cs_.size() ? cs_ : string(1, char_);
106 }
107
108
109 string Token::asInput() const
110 {
111         return char_ ? string(1, char_) : '\\' + cs_ + ' ';
112 }
113
114
115 //
116 // Parser
117 //
118
119
120 Parser::Parser(istream & is)
121         : lineno_(0), pos_(0)
122 {
123         tokenize(is);
124 }
125
126
127 Parser::Parser(string const & s)
128         : lineno_(0), pos_(0)
129 {
130         istringstream is(s);
131         tokenize(is);
132 }
133
134
135 void Parser::push_back(Token const & t)
136 {
137         tokens_.push_back(t);
138 }
139
140
141 void Parser::pop_back()
142 {
143         tokens_.pop_back();
144 }
145
146
147 Token const & Parser::prev_token() const
148 {
149         static const Token dummy;
150         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
151 }
152
153
154 Token const & Parser::next_token() const
155 {
156         static const Token dummy;
157         return good() ? tokens_[pos_] : dummy;
158 }
159
160
161 Token const & Parser::get_token()
162 {
163         static const Token dummy;
164         //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
165         return good() ? tokens_[pos_++] : dummy;
166 }
167
168
169 void Parser::skip_spaces()
170 {
171         while (1) {
172                 if (next_token().cat() == catSpace || next_token().cat() == catNewline)
173                         get_token();
174                 else if (next_token().cat() == catComment) 
175                         while (next_token().cat() != catNewline)
176                                 get_token();
177                 else
178                         break;
179         }
180 }
181
182
183 void Parser::putback()
184 {
185         --pos_;
186 }
187
188
189 bool Parser::good() const
190 {
191         return pos_ < tokens_.size();
192 }
193
194
195 char Parser::getChar()
196 {
197         if (!good())
198                 error("The input stream is not well...");
199         return tokens_[pos_++].character();
200 }
201
202
203 string Parser::getArg(char left, char right)
204 {
205         skip_spaces();
206
207         string result;
208         char c = getChar();
209
210         if (c != left)
211                 putback();
212         else
213                 while ((c = getChar()) != right && good())
214                         result += c;
215
216         return result;
217 }
218
219
220 string Parser::getOpt()
221 {
222         string const res = getArg('[', ']');
223         return res.size() ? '[' + res + ']' : string();
224 }
225
226
227 void Parser::tokenize(istream & is)
228 {
229         static bool init_done = false;
230
231         if (!init_done) {
232                 catInit();
233                 init_done = true;
234         }
235
236         char c;
237         while (is.get(c)) {
238                 //cerr << "reading c: " << c << "\n";
239
240                 switch (catcode(c)) {
241                         case catNewline: {
242                                 ++lineno_;
243                                 is.get(c);
244                                 if (catcode(c) == catNewline) {
245                                         //do {
246                                                 is.get(c);
247                                         //} while (catcode(c) == catNewline);
248                                         push_back(Token("par"));
249                                 } else {
250                                         push_back(Token('\n', catNewline));
251                                 }
252                                 is.putback(c);
253                                 break;
254                         }
255
256                         case catComment: {
257                                 push_back(Token(c, catComment));
258                                 while (is.get(c) && catcode(c) != catNewline)
259                                         push_back(Token(c, catLetter));
260                                 push_back(Token(c, catNewline));
261                                 ++lineno_;
262                                 is.get(c);
263                                 if (catcode(c) == catNewline) {
264                                         push_back(Token("par"));
265                                         ++lineno_;
266                                 } else {
267                                         is.putback(c);
268                                 }
269                                 break;
270                         }
271
272                         case catEscape: {
273                                 is.get(c);
274                                 if (!is) {
275                                         error("unexpected end of input");
276                                 } else {
277                                         string s(1, c);
278                                         if (catcode(c) == catLetter) {
279                                                 // collect letters
280                                                 while (is.get(c) && catcode(c) == catLetter)
281                                                         s += c;
282                                                 skipSpaceTokens(is, c);
283                                         }
284                                         push_back(Token(s));
285                                 }
286                                 break;
287                         }
288
289                         case catSuper:
290                         case catSub: {
291                                 push_back(Token(c, catcode(c)));
292                                 is.get(c);
293                                 skipSpaceTokens(is, c);
294                                 break;
295                         }
296
297                         case catIgnore: {
298                                 if (c != 13)
299                                         cerr << "ignoring a char: " << int(c) << "\n";
300                                 break;
301                         }
302
303                         default:
304                                 push_back(Token(c, catcode(c)));
305                 }
306         }
307 }
308
309
310 void Parser::dump() const
311 {
312         cerr << "\nTokens: ";
313         for (unsigned i = 0; i < tokens_.size(); ++i) {
314                 if (i == pos_)
315                         cerr << " <#> ";
316                 cerr << tokens_[i];
317         }
318         cerr << " pos: " << pos_ << "\n";
319 }
320
321
322 void Parser::error(string const & msg)
323 {
324         cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
325         dump();
326         //exit(1);
327 }
328
329
330 string Parser::verbatimOption()
331 {
332         string res;
333         if (next_token().character() == '[') {
334                 Token t = get_token();
335                 for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
336                         if (t.cat() == catBegin) {
337                                 putback();
338                                 res += '{' + verbatim_item() + '}';
339                         } else
340                                 res += t.asString();
341                 }
342         }
343         return res;
344 }
345
346
347 string Parser::verbatim_item()
348 {
349         if (!good())
350                 error("stream bad");
351         skip_spaces();
352         if (next_token().cat() == catBegin) {
353                 Token t = get_token(); // skip brace
354                 string res;
355                 for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
356                         if (t.cat() == catBegin) {
357                                 putback();
358                                 res += '{' + verbatim_item() + '}';
359                         }
360                         else
361                                 res += t.asInput();
362                 }
363                 return res;
364         }
365         return get_token().asInput();
366 }
367
368
369 void Parser::setCatCode(char c, CatCode cat)
370 {
371         theCatcode[c] = cat;    
372 }
373
374
375 CatCode Parser::getCatCode(char c) const
376 {
377         return theCatcode[c];
378 }