]> git.lyx.org Git - lyx.git/blob - src/lyxlex.C
88d7466476d59f682f6867019e0ffd9be800a7a1
[lyx.git] / src / lyxlex.C
1 //  Generalized simple lexical analizer.
2 //  It can be used for simple syntax parsers, like lyxrc,
3 //  texclass and others to come.   [asierra30/03/96]
4 //
5 //   (C) 1996 Lyx Team.
6
7 #include <config.h>
8
9 #include <cstdlib>
10
11 #ifdef __GNUG__
12 #pragma implementation "lyxlex.h"
13 #endif
14
15 #include "lyxlex.h"
16 #include "debug.h"
17 #include "support/filetools.h"
18
19 using std::ios;
20
21 LyXLex::LyXLex(keyword_item * tab, int num)
22         : is(&fb__), table(tab), no_items(num)
23 {
24         
25         status = 0;
26         pushed = 0;
27 }
28
29
30 void LyXLex::pushTable(keyword_item * tab, int num)
31 {
32         pushed_table * tmppu = new pushed_table;
33         tmppu->next = pushed;
34         tmppu->table_elem = table;
35         tmppu->table_siz = no_items;
36         pushed = tmppu;
37         table = tab;
38         no_items = num;
39 }
40
41
42 void LyXLex::popTable()
43 {
44         if (pushed == 0)
45                 lyxerr << "LyXLex error: nothing to pop!" << endl;
46
47         pushed_table * tmp;
48         tmp = pushed;
49         table = tmp->table_elem;
50         no_items = tmp->table_siz;
51         tmp->table_elem = 0;
52         pushed = tmp->next;
53         delete tmp;
54 }
55
56
57 void LyXLex::printTable()
58 {
59         lyxerr << "\nNumber of tags: " << no_items << endl;
60         for(int i= 0; i < no_items; ++i)
61                 lyxerr << "table[" << i
62                        << "]:  tag: `" << table[i].tag
63                        << "'  code:" << table[i].code << endl;
64         lyxerr << endl;
65 }
66
67
68 void LyXLex::printError(string const & message) const
69 {
70         string tmpmsg = subst(message, "$$Token", GetString());
71         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
72                << " of file " << MakeDisplayPath(name) << ']' << endl;
73 }
74
75
76 bool LyXLex::setFile(string const & filename)
77 {
78         if (fb__.is_open())
79                 lyxerr << "Error in LyXLex::setFile: "
80                         "file or stream already set." << endl;
81         fb__.open(filename.c_str(), ios::in);
82         is.rdbuf(&fb__);
83         name = filename;
84         lineno = 0;
85         return fb__.is_open() && is.good();
86 }
87
88
89 void LyXLex::setStream(istream & i)
90 {
91         if (fb__.is_open() || is.rdbuf()->in_avail())
92                 lyxerr << "Error in LyXLex::setStream: "
93                         "file or stream already set." << endl;
94         is.rdbuf(i.rdbuf());
95         lineno = 0;
96 }
97
98
99 int LyXLex::lex()
100 {
101         //NOTE: possible bug.
102         if (next() && status == LEX_TOKEN)
103                 return search_kw(buff);
104         else
105                 return status;
106 }
107
108
109 int LyXLex::GetInteger() const
110 {
111         if (buff[0] > ' ')   
112                 return atoi(buff);
113         else {
114                 printError("Bad integer `$$Token'");
115                 return -1;
116         }
117 }
118
119
120 float LyXLex::GetFloat() const
121 {
122    if (buff[0] > ' ')   
123        return atof(buff);
124    else {
125         printError("Bad float `$$Token'");
126         return -1;
127    }
128 }
129
130
131 string LyXLex::GetString() const
132 {
133         return string(buff);
134 }
135
136
137 // I would prefer to give a tag number instead of an explicit token
138 // here, but it is not possible because Buffer::readLyXformat2 uses
139 // explicit tokens (JMarc) 
140 string LyXLex::getLongString(string const & endtoken)
141 {
142         string str, prefix;
143         bool firstline = true;
144
145         while (IsOK()) {
146                 if (!EatLine())
147                         // blank line in the file being read
148                         continue;
149                 
150                 string const token = frontStrip(strip(GetString()), " \t");
151                 
152                 lyxerr[Debug::PARSER] << "LongString: `"
153                                       << GetString() << '\'' << endl;
154
155                 // We do a case independent comparison, like search_kw
156                 // does.
157                 if (compare_no_case(token, endtoken) != 0) {
158                         string tmpstr = GetString();
159                         if (firstline) {
160                                 unsigned int i = 0;
161                                 while(i < tmpstr.length()
162                                       && tmpstr[i] == ' ') {
163                                         ++i;
164                                         prefix += ' ';
165                                 }
166                                 firstline = false;
167                                 lyxerr[Debug::PARSER] << "Prefix = `" << prefix
168                                                       << '\'' << endl;
169                         } 
170
171                         if (!prefix.empty() 
172                             && prefixIs(tmpstr, prefix.c_str())) {
173                                 tmpstr.erase(0, prefix.length() - 1);
174                         }
175                         str += tmpstr + '\n';
176                 }
177                 else // token == endtoken
178                         break;
179         }
180         if (!IsOK())
181                 printError("Long string not ended by `" + endtoken + '\'');
182
183         return str;
184 }
185
186
187 bool LyXLex::GetBool() const
188 {
189         if (compare(buff, "true") == 0)
190                 return true;
191         else if (compare(buff, "false") != 0)
192                 printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
193         return false;
194 }
195
196
197 bool LyXLex::EatLine()
198 {
199         int i = 0;
200         unsigned char c = '\0';
201         char cc = 0;
202         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
203                 is.get(cc);
204                 c = cc;
205                 lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
206                                       << c << "'" << endl;
207                 if (c != '\r')
208                         buff[i++] = c;
209         }
210         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
211                 printError("Line too long");
212                 c = '\n'; // Pretend we had an end of line
213                 --lineno; // but don't increase line counter (netto effect)
214                 ++i; // and preserve last character read.
215         }
216         if (c == '\n') {
217                 ++lineno;
218                 buff[--i] = '\0'; // i can never be 0 here, so no danger
219                 status = LEX_DATA;
220                 return true;
221         } else {
222                 buff[i] = '\0';
223                 return false;
224         }
225 }
226
227
228 int LyXLex::search_kw(char const * const tag) const
229 {
230         int m, k = 0 , l = 0, r = no_items;
231
232         while (l < r) {
233                 m = (l + r) / 2;
234
235                 if (lyxerr.debugging(Debug::PARSER)) {
236                         lyxerr << "LyXLex::search_kw: elem " << m
237                                << " tag " << table[m].tag
238                                << " search tag " << tag
239                                << endl;
240                 }
241
242                 if (table[m].tag)
243                         k = compare_no_case(table[m].tag, tag);
244                 if (k == 0)
245                         return table[m].code;
246                 else
247                         if (k < 0) l = m + 1; else r = m;
248         }
249         return LEX_UNDEF;
250 }
251
252
253 bool LyXLex::next(bool esc)
254 {
255         if (!esc) {
256                 unsigned char c = 0; // getc() returns an int
257                 char cc = 0;
258                 status = 0;
259                 while (is && !status) {
260                         is.get(cc);
261                         c = cc;
262                         if (c == '#') {
263                                 // Read rest of line (fast :-)
264                                 is.getline(buff, sizeof(buff));
265                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
266                                                       << buff << "'" << endl;
267                                 ++lineno;
268                                 continue;
269                         }
270                         
271                         if (c == '\"') {
272                                 int i = -1;
273                                 do {
274                                         is.get(cc);
275                                         c = cc;
276                                         if (c != '\r')
277                                                 buff[++i] = c;
278                                 } while (c != '\"' && c != '\n' && is &&
279                                          i != (LEX_MAX_BUFF - 2));
280                                 
281                                 if (i == (LEX_MAX_BUFF - 2)) {
282                                         printError("Line too long");
283                                         c = '\"'; // Pretend we got a "
284                                         ++i;
285                                 }
286                                 
287                                 if (c != '\"') {
288                                         printError("Missing quote");
289                                         if (c == '\n')
290                                                 ++lineno;
291                                 }
292                                 
293                                 buff[i] = '\0';
294                                 status = LEX_DATA;
295                                 break; 
296                         }
297                         
298                         if (c == ',')
299                                 continue;              /* Skip ','s */
300
301                         // using relational operators with chars other
302                         // than == and != is not safe. And if it is done
303                         // the type _have_ to be unsigned. It usually a
304                         // lot better to use the functions from cctype
305                         if (c > ' ' && is)  {
306                                 int i = 0;
307                                 do {
308                                         buff[i++] = c;
309                                         is.get(cc);
310                                         c = cc;
311                                 } while (c > ' ' && c != ',' && is
312                                          && (i != LEX_MAX_BUFF - 1) );
313                                 if (i == LEX_MAX_BUFF - 1) {
314                                         printError("Line too long");
315                                 }
316                                 buff[i] = '\0';
317                                 status = LEX_TOKEN;
318                         }
319                         
320                         if (c == '\r' && is) {
321                                 // The Windows support has lead to the
322                                 // possibility of "\r\n" at the end of
323                                 // a line.  This will stop LyX choking
324                                 // when it expected to find a '\n'
325                                 is.get(cc);
326                                 c = cc;
327                         }
328
329                         if (c == '\n')
330                                 ++lineno;
331                         
332                 }
333                 if (status) return true;
334                 
335                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
336                 buff[0] = '\0';
337                 return false;
338         } else {
339                 unsigned char c = 0; // getc() returns an int
340                 char cc = 0;
341                 
342                 status = 0;
343                 while (is && !status) {
344                         is.get(cc);
345                         c = cc;
346
347                         // skip ','s
348                         if (c == ',') continue;
349                         
350                         if (c == '\\') {
351                                 // escape
352                                 int i = 0;
353                                 do {
354                                         if (c == '\\') {
355                                                 // escape the next char
356                                                 is.get(cc);
357                                                 c = cc;
358                                         }
359                                         buff[i++] = c;
360                                         is.get(cc);
361                                         c = cc;
362                                 } while (c > ' ' && c != ',' && is
363                                          && (i != LEX_MAX_BUFF - 1) );
364                                 if (i == LEX_MAX_BUFF - 1) {
365                                         printError("Line too long");
366                                 }
367                                 buff[i] = '\0';
368                                 status = LEX_TOKEN;
369                                 continue;
370                         }
371                         
372                         if (c == '#') {
373                                 // Read rest of line (fast :-)
374                                 is.getline(buff, sizeof(buff));
375                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
376                                                       << buff << "'" << endl;
377                                 ++lineno;
378                                 continue;
379                         }
380
381                         // string
382                         if (c == '\"') {
383                                 int i = -1;
384                                 bool escaped = false;
385                                 do {
386                                         escaped = false;
387                                         is.get(cc);
388                                         c = cc;
389                                         if (c == '\r') continue;
390                                         if (c == '\\') {
391                                                 // escape the next char
392                                                 is.get(cc);
393                                                 c = cc;
394                                                 escaped = true;
395                                         }
396                                         buff[++i] = c;
397                                 
398                                         if (!escaped && c == '\"') break;
399                                 } while (c != '\n' && is &&
400                                          i != (LEX_MAX_BUFF - 2));
401                                 
402                                 if (i == (LEX_MAX_BUFF - 2)) {
403                                         printError("Line too long");
404                                         c = '\"'; // Pretend we got a "
405                                         ++i;
406                                 }
407                                 
408                                 if (c != '\"') {
409                                         printError("Missing quote");
410                                         if (c == '\n')
411                                                 ++lineno;
412                                 }
413                                 
414                                 buff[i] = '\0';
415                                 status = LEX_DATA;
416                                 break; 
417                         }
418                         
419                         if (c > ' ' && is) {
420                                 int i = 0;
421                                 do {
422                                         if (c == '\\') {
423                                                 // escape the next char
424                                                 is.get(cc);
425                                                 c = cc;
426                                                 //escaped = true;
427                                         }
428                                         buff[i++] = c;
429                                         is.get(cc);
430                                         c = cc;
431                                 } while (c > ' ' && c != ',' && is
432                                          && (i != LEX_MAX_BUFF-1) );
433                                 if (i == LEX_MAX_BUFF-1) {
434                                         printError("Line too long");
435                                 }
436                                 buff[i] = '\0';
437                                 status = LEX_TOKEN;
438                         }
439                         // new line
440                         if (c == '\n')
441                                 ++lineno;
442                 }
443                 
444                 if (status) return true;
445                 
446                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
447                 buff[0] = '\0';
448                 return false;
449         }
450 }
451
452
453 bool LyXLex::nextToken()
454 {
455         status = 0;
456         while (is && !status) {
457                 unsigned char c = 0;
458                 char cc = 0;
459                 is.get(cc);
460                 c = cc;
461                 if (c >= ' ' && is) {
462                         int i = 0;
463                         if (c == '\\') { // first char == '\\'
464                                 do {
465                                         buff[i++] = c;
466                                         is.get(cc);
467                                         c = cc;
468                                 } while (c > ' ' && c != '\\' && is
469                                          && i != (LEX_MAX_BUFF-1));
470                         } else {
471                                 do {
472                                         buff[i++] = c;
473                                         is.get(cc);
474                                         c = cc;
475                                 } while (c >= ' ' && c != '\\' && is
476                                          && i != (LEX_MAX_BUFF-1));
477                         }
478
479                         if (i == (LEX_MAX_BUFF - 1)) {
480                                 printError("Line too long");
481                         }
482
483                         if (c == '\\') is.putback(c); // put it back
484                         buff[i] = '\0';
485                         status = LEX_TOKEN;
486                 }
487                   
488                 if (c == '\n')
489                         ++lineno;
490         
491         }
492         if (status)  return true;
493         
494         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
495         buff[0] = '\0';
496         return false;
497 }
498
499
500 int LyXLex::FindToken(char const * str[])
501 {  
502    int i = -1;
503    
504    if (next()) {
505       if (compare(buff, "default")) {
506          for (i = 0; str[i][0] && compare(str[i], buff); ++i);
507          if (!str[i][0]) {
508             printError("Unknown argument `$$Token'");
509             i = -1;
510          }
511       }  
512    } else
513      printError("file ended while scanning string token");
514    return i;
515 }
516
517
518 int LyXLex::CheckToken(char const * str[], int print_error)
519 {  
520    int i = -1;
521    
522    if (compare(buff, "default")) {
523        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
524        if (!str[i][0]) {
525            if (print_error)
526                printError("Unknown argument `$$Token'");
527            i = -1;
528        }
529    }
530    return i;
531 }