]> git.lyx.org Git - lyx.git/blob - src/lyxlex.C
bb55e8b971382b7b75660fbd7e6e1e261512d496
[lyx.git] / src / lyxlex.C
1 //  Generalized simple lexical analizer.
2 //  It can be used for simple syntax parsers, like lyxrc,
3 //  texclass and others to come.   [asierra30/03/96]
4 //
5 //   (C) 1996 Lyx Team.
6
7 #include <config.h>
8
9 #include <cstdlib>
10
11 #ifdef __GNUG__
12 #pragma implementation "lyxlex.h"
13 #endif
14
15 #include "lyxlex.h"
16 #include "debug.h"
17 #include "support/filetools.h"
18
19 LyXLex::LyXLex(keyword_item * tab, int num)
20         : is(&fb__), table(tab), no_items(num)
21 {
22         
23         status = 0;
24         pushed = 0;
25 }
26
27
28 void LyXLex::pushTable(keyword_item * tab, int num)
29 {
30         pushed_table * tmppu = new pushed_table;
31         tmppu->next = pushed;
32         tmppu->table_elem = table;
33         tmppu->table_siz = no_items;
34         pushed = tmppu;
35         table = tab;
36         no_items = num;
37 }
38
39
40 void LyXLex::popTable()
41 {
42         if (pushed == 0)
43                 lyxerr << "LyXLex error: nothing to pop!" << endl;
44
45         pushed_table * tmp;
46         tmp = pushed;
47         table = tmp->table_elem;
48         no_items = tmp->table_siz;
49         tmp->table_elem = 0;
50         pushed = tmp->next;
51         delete tmp;
52 }
53
54
55 void LyXLex::printTable()
56 {
57         lyxerr << "\nNumber of tags: " << no_items << endl;
58         for(int i= 0; i < no_items; ++i)
59                 lyxerr << "table[" << i
60                        << "]:  tag: `" << table[i].tag
61                        << "'  code:" << table[i].code << endl;
62         lyxerr << endl;
63 }
64
65
66 void LyXLex::printError(string const & message) const
67 {
68         string tmpmsg = subst(message, "$$Token", GetString());
69         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
70                << " of file " << MakeDisplayPath(name) << ']' << endl;
71 }
72
73
74 bool LyXLex::setFile(string const & filename)
75 {
76         if (fb__.is_open())
77                 lyxerr << "Error in LyXLex::setFile: "
78                         "file or stream already set." << endl;
79         fb__.open(filename.c_str(), ios::in);
80         is.rdbuf(&fb__);
81         name = filename;
82         lineno = 0;
83         return fb__.is_open() && is.good();
84 }
85
86
87 void LyXLex::setStream(istream & i)
88 {
89         if (fb__.is_open() || is.rdbuf()->in_avail())
90                 lyxerr << "Error in LyXLex::setStream: "
91                         "file or stream already set." << endl;
92         is.rdbuf(i.rdbuf());
93         lineno = 0;
94 }
95
96
97 int LyXLex::lex()
98 {
99         //NOTE: possible bug.
100         if (next() && status == LEX_TOKEN)
101                 return search_kw(buff);
102         else
103                 return status;
104 }
105
106
107 int LyXLex::GetInteger() const
108 {
109         if (buff[0] > ' ')   
110                 return atoi(buff);
111         else {
112                 printError("Bad integer `$$Token'");
113                 return -1;
114         }
115 }
116
117
118 float LyXLex::GetFloat() const
119 {
120    if (buff[0] > ' ')   
121        return atof(buff);
122    else {
123         printError("Bad float `$$Token'");
124         return -1;
125    }
126 }
127
128
129 string LyXLex::GetString() const
130 {
131         return string(buff);
132 }
133
134
135 // I would prefer to give a tag number instead of an explicit token
136 // here, but it is not possible because Buffer::readLyXformat2 uses
137 // explicit tokens (JMarc) 
138 string LyXLex::getLongString(string const & endtoken)
139 {
140         string str, prefix;
141         bool firstline = true;
142
143         while (IsOK()) {
144                 if (!EatLine())
145                         // blank line in the file being read
146                         continue;
147                 
148                 string const token = frontStrip(strip(GetString()), " \t");
149                 
150                 lyxerr[Debug::PARSER] << "LongString: `"
151                                       << GetString() << '\'' << endl;
152
153                 // We do a case independent comparison, like search_kw
154                 // does.
155                 if (compare_no_case(token, endtoken) != 0) {
156                         string tmpstr = GetString();
157                         if (firstline) {
158                                 unsigned int i = 0;
159                                 while(i < tmpstr.length()
160                                       && tmpstr[i] == ' ') {
161                                         ++i;
162                                         prefix += ' ';
163                                 }
164                                 firstline = false;
165                                 lyxerr[Debug::PARSER] << "Prefix = `" << prefix
166                                                       << '\'' << endl;
167                         } 
168
169                         if (!prefix.empty() 
170                             && prefixIs(tmpstr, prefix.c_str())) {
171                                 tmpstr.erase(0, prefix.length() - 1);
172                         }
173                         str += tmpstr + '\n';
174                 }
175                 else // token == endtoken
176                         break;
177         }
178         if (!IsOK())
179                 printError("Long string not ended by `" + endtoken + '\'');
180
181         return str;
182 }
183
184
185 bool LyXLex::GetBool() const
186 {
187         if (compare(buff, "true") == 0)
188                 return true;
189         else if (compare(buff, "false") != 0)
190                 printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
191         return false;
192 }
193
194
195 bool LyXLex::EatLine()
196 {
197         int i = 0;
198         unsigned char c = '\0';
199
200         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
201                 is.get(c);
202                 if (c != '\r')
203                         buff[i++] = c;
204         }
205         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
206                 printError("Line too long");
207                 c = '\n'; // Pretend we had an end of line
208                 --lineno; // but don't increase line counter (netto effect)
209                 ++i; // and preserve last character read.
210         }
211         if (c == '\n') {
212                 ++lineno;
213                 buff[--i] = '\0'; // i can never be 0 here, so no danger
214                 status = LEX_DATA;
215                 return true;
216         } else {
217                 buff[i] = '\0';
218                 return false;
219         }
220 }
221
222
223 int LyXLex::search_kw(char const * const tag) const
224 {
225         int m, k = 0 , l = 0, r = no_items;
226
227         while (l < r) {
228                 m = (l + r) / 2;
229
230                 if (lyxerr.debugging(Debug::PARSER)) {
231                         lyxerr << "LyXLex::search_kw: elem " << m
232                                << " tag " << table[m].tag
233                                << " search tag " << tag
234                                << endl;
235                 }
236
237                 if (table[m].tag)
238                         k = compare_no_case(table[m].tag, tag);
239                 if (k == 0)
240                         return table[m].code;
241                 else
242                         if (k < 0) l = m + 1; else r = m;
243         }
244         return LEX_UNDEF;
245 }
246
247
248 bool LyXLex::next(bool esc)
249 {
250         if (!esc) {
251                 unsigned char c; // getc() returns an int
252                 
253                 status = 0;
254                 while (is && !status) { 
255                         is.get(c);
256                         if (c == '#') {
257                                 // Read rest of line (fast :-)
258                                 is.get(buff, sizeof(buff));
259                                 lyxerr[Debug::LYXLEX] << "Comment read: " << c << buff << endl;
260                                 ++lineno;
261                                 continue;
262                         }
263                         
264                         if (c == '\"') {
265                                 int i = -1;
266                                 do {
267                                         is.get(c);
268                                         if (c != '\r')
269                                                 buff[++i] = c;
270                                 } while (c != '\"' && c != '\n' && is &&
271                                          i != (LEX_MAX_BUFF - 2));
272                                 
273                                 if (i == (LEX_MAX_BUFF - 2)) {
274                                         printError("Line too long");
275                                         c = '\"'; // Pretend we got a "
276                                         ++i;
277                                 }
278                                 
279                                 if (c != '\"') {
280                                         printError("Missing quote");
281                                         if (c == '\n')
282                                                 ++lineno;
283                                 }
284                                 
285                                 buff[i] = '\0';
286                                 status = LEX_DATA;
287                                 break; 
288                         }
289                         
290                         if (c == ',')
291                                 continue;              /* Skip ','s */
292
293                         // using relational operators with chars other
294                         // than == and != is not safe. And if it is done
295                         // the type _have_ to be unsigned. It usually a
296                         // lot better to use the functions from cctype
297                         if (c > ' ' && is)  {
298 #warning Verify this! (Lgb)
299                                 //if (isalnum(static_cast<unsigned char>(c)) && is) {
300                                 int i = 0;
301                                 do {
302                                         buff[i++] = c;
303                                         is.get(c);
304                                 } while (c > ' ' && c != ',' && is
305                                 //} while (isalnum(static_cast<unsigned char>(c))
306                                          //&& c != ',' && is
307                                          && (i != LEX_MAX_BUFF - 1) );
308                                 if (i == LEX_MAX_BUFF - 1) {
309                                         printError("Line too long");
310                                 }
311                                 buff[i] = '\0';
312                                 status = LEX_TOKEN;
313                         }
314                         
315                         if (c == '\r' && is) {
316                                 // The Windows support has lead to the
317                                 // possibility of "\r\n" at the end of
318                                 // a line.  This will stop LyX choking
319                                 // when it expected to find a '\n'
320                                 is.get(c);
321                         }
322
323                         if (c == '\n')
324                                 ++lineno;
325                         
326                 }
327                 if (status) return true;
328                 
329                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
330                 buff[0] = '\0';
331                 return false;
332         } else {
333                 unsigned char c; // getc() returns an int
334                 
335                 status = 0;
336                 while (is && !status) {
337                         is.get(c);
338
339                         // skip ','s
340                         if (c == ',') continue;
341                         
342                         if (c == '\\') {
343                                 // escape
344                                 int i = 0;
345                                 do {
346                                         if (c == '\\') {
347                                                 // escape the next char
348                                                 is.get(c);
349                                         }
350                                         buff[i++] = c;
351                                         is.get(c);
352                                 } while (c > ' ' && c != ',' && is
353                                 //} while (isalnum(static_cast<unsigned char>(c))
354                                          //&& c != ',' && is
355                                          && (i != LEX_MAX_BUFF - 1) );
356                                 if (i == LEX_MAX_BUFF - 1) {
357                                         printError("Line too long");
358                                 }
359                                 buff[i] = '\0';
360                                 status = LEX_TOKEN;
361                                 continue;
362                         }
363                         
364                         if (c == '#') {
365                                 // Read rest of line (fast :-)
366                                 is.get(buff, sizeof(buff));
367                                 lyxerr[Debug::LYXLEX] << "Comment read: " << c << buff << endl;
368                                 ++lineno;
369                                 continue;
370                         }
371
372                         // string
373                         if (c == '\"') {
374                                 int i = -1;
375                                 bool escaped = false;
376                                 do {
377                                         escaped = false;
378                                         is.get(c);
379                                         if (c == '\r') continue;
380                                         if (c == '\\') {
381                                                 // escape the next char
382                                                 is.get(c);
383                                                 escaped = true;
384                                         }
385                                         buff[++i] = c;
386                                 
387                                         if (!escaped && c == '\"') break;
388                                 } while (c != '\n' && is &&
389                                          i != (LEX_MAX_BUFF - 2));
390                                 
391                                 if (i == (LEX_MAX_BUFF - 2)) {
392                                         printError("Line too long");
393                                         c = '\"'; // Pretend we got a "
394                                         ++i;
395                                 }
396                                 
397                                 if (c != '\"') {
398                                         printError("Missing quote");
399                                         if (c == '\n')
400                                                 ++lineno;
401                                 }
402                                 
403                                 buff[i] = '\0';
404                                 status = LEX_DATA;
405                                 break; 
406                         }
407                         
408                         if (c > ' ' && is) {
409                                 //if (isalnum(static_cast<unsigned char>(c)) && is) {
410                                 int i = 0;
411                                 do {
412                                         if (c == '\\') {
413                                                 // escape the next char
414                                                 is.get(c);
415                                                 //escaped = true;
416                                         }
417                                         buff[i++] = c;
418                                         is.get(c);
419                                 } while (c > ' ' && c != ',' && is
420                                 //} while (isalnum(static_cast<unsigned char>(c))
421                                          //!= ',' && is
422                                          && (i != LEX_MAX_BUFF-1) );
423                                 if (i == LEX_MAX_BUFF-1) {
424                                         printError("Line too long");
425                                 }
426                                 buff[i] = '\0';
427                                 status = LEX_TOKEN;
428                         }
429                         // new line
430                         if (c == '\n')
431                                 ++lineno;
432                 }
433                 
434                 if (status) return true;
435                 
436                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
437                 buff[0] = '\0';
438                 return false;   
439         }
440 }
441
442
443 bool LyXLex::nextToken()
444 {
445         status = 0;
446         while (is && !status) { 
447                 unsigned char c;
448                 is.get(c);
449            
450                 if (c >= ' ' && is) {
451                         //if (isprint(static_cast<unsigned char>(c)) && is) {
452                         int i = 0;
453                         if (c == '\\') { // first char == '\\'
454                                 do {
455                                         buff[i++] = c;
456                                         is.get(c);
457                                 } while (c > ' ' && c != '\\' && is
458                                 //} while (isalnum(static_cast<unsigned char>(c))
459                                 //       && c != '\\' && is
460                                          && i != (LEX_MAX_BUFF-1));
461                         } else {
462                                 do {
463                                         buff[i++] = c;
464                                         is.get(c);
465                                 } while (c >= ' ' && c != '\\' && is
466                                 //} while (isprint(static_cast<unsigned char>(c))
467                                          // && c != '\\' && is
468                                          && i != (LEX_MAX_BUFF-1));
469                         }
470
471                         if (i == (LEX_MAX_BUFF - 1)) {
472                                 printError("Line too long");
473                         }
474
475                         if (c == '\\') is.putback(c); // put it back
476                         buff[i] = '\0';
477                         status = LEX_TOKEN;
478                 }
479                   
480                 if (c == '\n')
481                         ++lineno;
482         
483         }
484         if (status)  return true;
485         
486         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
487         buff[0] = '\0';
488         return false;
489 }
490
491
492 int LyXLex::FindToken(char const * str[])
493 {  
494    int i = -1;
495    
496    if (next()) {
497       if (compare(buff, "default")) {
498          for (i = 0; str[i][0] && compare(str[i], buff); ++i);
499          if (!str[i][0]) {
500             printError("Unknown argument `$$Token'");
501             i = -1;
502          }
503       }  
504    } else
505      printError("file ended while scanning string token");
506    return i;
507 }
508
509
510 int LyXLex::CheckToken(char const * str[], int print_error)
511 {  
512    int i = -1;
513    
514    if (compare(buff, "default")) {
515        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
516        if (!str[i][0]) {
517            if (print_error)
518                printError("Unknown argument `$$Token'");
519            i = -1;
520        }
521    }
522    return i;
523 }