]> git.lyx.org Git - lyx.git/blob - src/lyxlex.C
145b2cc9f5926162d154efbc76c11d71a1de1588
[lyx.git] / src / lyxlex.C
1 //  Generalized simple lexical analizer.
2 //  It can be used for simple syntax parsers, like lyxrc,
3 //  texclass and others to come.   [asierra30/03/96]
4 //
5 //   Copyright 1996 Lyx Team.
6
7 #include <config.h>
8
9 #include <algorithm>
10 #include <cstdlib>
11
12 #ifdef __GNUG__
13 #pragma implementation "lyxlex.h"
14 #endif
15
16 #include "lyxlex.h"
17 #include "debug.h"
18 #include "support/filetools.h"
19 #include "support/lyxalgo.h"
20
21 using std::ios;
22 using std::ostream;
23 using std::istream;
24 using std::lower_bound;
25 using std::sort;
26 using std::endl;
27
28
29 // namespace {
30 struct compare_tags {
31         // used by lower_bound
32         inline
33         int operator()(keyword_item const & a, char const * const tag) const {
34                 return compare_no_case(a.tag, tag) < 0;
35         }
36         // used by sorted and sort
37         inline
38         int operator()(keyword_item const & a, keyword_item const & b) const {
39                 return compare_no_case(a.tag, b.tag) < 0;
40         }
41 };
42 // } // end of anon namespace
43
44
45 LyXLex::LyXLex(keyword_item * tab, int num)
46         : is(&fb__), table(tab), no_items(num)
47 {
48         status = 0; 
49         pushed = 0;
50         // Check if the table is sorted and if not, sort it.
51         if (table && !sorted(table, table + no_items, compare_tags())) {
52                 lyxerr << "The table passed to LyXLex is not sorted!!\n"
53                        << "Tell the developers to fix it!" << endl;
54                 // We sort it anyway to avoid problems.
55                 lyxerr << "\nUnsorted:\n";
56                 printTable(lyxerr);
57                 
58                 sort(table, table + no_items,
59                      compare_tags());
60                 lyxerr << "\nSorted:\n";
61                 printTable(lyxerr);
62         }
63 }
64
65
66 void LyXLex::pushTable(keyword_item * tab, int num)
67 {
68         pushed_table * tmppu = new pushed_table;
69         tmppu->next = pushed;
70         tmppu->table_elem = table;
71         tmppu->table_siz = no_items;
72         pushed = tmppu;
73         table = tab;
74         no_items = num;
75         // Check if the table is sorted and if not, sort it.
76         if (table && !sorted(table, table + no_items, compare_tags())) {
77                 lyxerr << "The table passed to LyXLex is not sorted!!\n"
78                        << "Tell the developers to fix it!" << endl;
79                 // We sort it anyway to avoid problems.
80                 lyxerr << "\nUnsorted:\n";
81                 printTable(lyxerr);
82                 
83                 sort(table, table + no_items, compare_tags());
84                 lyxerr << "\nSorted:\n";
85                 printTable(lyxerr);
86         }
87 }
88
89
90 void LyXLex::popTable()
91 {
92         if (pushed == 0)
93                 lyxerr << "LyXLex error: nothing to pop!" << endl;
94
95         pushed_table * tmp;
96         tmp = pushed;
97         table = tmp->table_elem;
98         no_items = tmp->table_siz;
99         tmp->table_elem = 0;
100         pushed = tmp->next;
101         delete tmp;
102 }
103
104
105 void LyXLex::printTable(ostream & os)
106 {
107         os << "\nNumber of tags: " << no_items << '\n';
108         for(int i= 0; i < no_items; ++i)
109                 os << "table[" << i
110                    << "]:  tag: `" << table[i].tag
111                    << "'  code:" << table[i].code << '\n';
112         os.flush();
113 }
114
115
116 void LyXLex::printError(string const & message) const
117 {
118         string tmpmsg = subst(message, "$$Token", GetString());
119         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
120                << " of file " << MakeDisplayPath(name) << ']' << endl;
121 }
122
123
124 bool LyXLex::setFile(string const & filename)
125 {
126         if (fb__.is_open())
127                 lyxerr << "Error in LyXLex::setFile: "
128                         "file or stream already set." << endl;
129         fb__.open(filename.c_str(), ios::in);
130         is.rdbuf(&fb__);
131         name = filename;
132         lineno = 0;
133         return fb__.is_open() && is.good();
134 }
135
136
137 void LyXLex::setStream(istream & i)
138 {
139         if (fb__.is_open() || is.rdbuf()->in_avail())
140                 lyxerr << "Error in LyXLex::setStream: "
141                         "file or stream already set." << endl;
142         is.rdbuf(i.rdbuf());
143         lineno = 0;
144 }
145
146
147 int LyXLex::lex()
148 {
149         //NOTE: possible bug.
150         if (next() && status == LEX_TOKEN)
151                 return search_kw(buff);
152         else
153                 return status;
154 }
155
156
157 int LyXLex::GetInteger() const
158 {
159         if (buff[0] > ' ')   
160                 return atoi(buff);
161         else {
162                 printError("Bad integer `$$Token'");
163                 return -1;
164         }
165 }
166
167
168 float LyXLex::GetFloat() const
169 {
170    if (buff[0] > ' ')   
171        return atof(buff);
172    else {
173         printError("Bad float `$$Token'");
174         return -1;
175    }
176 }
177
178
179 string LyXLex::GetString() const
180 {
181         return string(buff);
182 }
183
184
185 // I would prefer to give a tag number instead of an explicit token
186 // here, but it is not possible because Buffer::readLyXformat2 uses
187 // explicit tokens (JMarc) 
188 string LyXLex::getLongString(string const & endtoken)
189 {
190         string str, prefix;
191         bool firstline = true;
192
193         while (IsOK()) {
194                 if (!EatLine())
195                         // blank line in the file being read
196                         continue;
197                 
198                 string const token = frontStrip(strip(GetString()), " \t");
199                 
200                 lyxerr[Debug::PARSER] << "LongString: `"
201                                       << GetString() << '\'' << endl;
202
203                 // We do a case independent comparison, like search_kw
204                 // does.
205                 if (compare_no_case(token, endtoken) != 0) {
206                         string tmpstr = GetString();
207                         if (firstline) {
208                                 unsigned int i = 0;
209                                 while(i < tmpstr.length()
210                                       && tmpstr[i] == ' ') {
211                                         ++i;
212                                         prefix += ' ';
213                                 }
214                                 firstline = false;
215                                 lyxerr[Debug::PARSER] << "Prefix = `" << prefix
216                                                       << '\'' << endl;
217                         } 
218
219                         if (!prefix.empty() 
220                             && prefixIs(tmpstr, prefix.c_str())) {
221                                 tmpstr.erase(0, prefix.length() - 1);
222                         }
223                         str += tmpstr + '\n';
224                 }
225                 else // token == endtoken
226                         break;
227         }
228         if (!IsOK())
229                 printError("Long string not ended by `" + endtoken + '\'');
230
231         return str;
232 }
233
234
235 bool LyXLex::GetBool() const
236 {
237         if (compare(buff, "true") == 0)
238                 return true;
239         else if (compare(buff, "false") != 0)
240                 printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
241         return false;
242 }
243
244
245 bool LyXLex::EatLine()
246 {
247         int i = 0;
248         unsigned char c = '\0';
249         char cc = 0;
250         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
251                 is.get(cc);
252                 c = cc;
253                 lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
254                                       << c << "'" << endl;
255                 if (c != '\r')
256                         buff[i++] = c;
257         }
258         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
259                 printError("Line too long");
260                 c = '\n'; // Pretend we had an end of line
261                 --lineno; // but don't increase line counter (netto effect)
262                 ++i; // and preserve last character read.
263         }
264         if (c == '\n') {
265                 ++lineno;
266                 buff[--i] = '\0'; // i can never be 0 here, so no danger
267                 status = LEX_DATA;
268                 return true;
269         } else {
270                 buff[i] = '\0';
271                 return false;
272         }
273 }
274
275
276 int LyXLex::search_kw(char const * const tag) const
277 {
278         keyword_item * res =
279                 lower_bound(table, table + no_items, tag, compare_tags());
280         if (res != table + no_items && !compare_no_case(res->tag, tag))
281                 return res->code;
282         return LEX_UNDEF;
283 }
284
285
286 bool LyXLex::next(bool esc)
287 {
288         if (!esc) {
289                 unsigned char c = 0; // getc() returns an int
290                 char cc = 0;
291                 status = 0;
292                 while (is && !status) {
293                         is.get(cc);
294                         c = cc;
295                         if (c == '#') {
296                                 // Read rest of line (fast :-)
297                                 is.getline(buff, sizeof(buff));
298                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
299                                                       << buff << "'" << endl;
300                                 ++lineno;
301                                 continue;
302                         }
303                         
304                         if (c == '\"') {
305                                 int i = -1;
306                                 do {
307                                         is.get(cc);
308                                         c = cc;
309                                         if (c != '\r')
310                                                 buff[++i] = c;
311                                 } while (c != '\"' && c != '\n' && is &&
312                                          i != (LEX_MAX_BUFF - 2));
313                                 
314                                 if (i == (LEX_MAX_BUFF - 2)) {
315                                         printError("Line too long");
316                                         c = '\"'; // Pretend we got a "
317                                         ++i;
318                                 }
319                                 
320                                 if (c != '\"') {
321                                         printError("Missing quote");
322                                         if (c == '\n')
323                                                 ++lineno;
324                                 }
325                                 
326                                 buff[i] = '\0';
327                                 status = LEX_DATA;
328                                 break; 
329                         }
330                         
331                         if (c == ',')
332                                 continue;              /* Skip ','s */
333
334                         // using relational operators with chars other
335                         // than == and != is not safe. And if it is done
336                         // the type _have_ to be unsigned. It usually a
337                         // lot better to use the functions from cctype
338                         if (c > ' ' && is)  {
339                                 int i = 0;
340                                 do {
341                                         buff[i++] = c;
342                                         is.get(cc);
343                                         c = cc;
344                                 } while (c > ' ' && c != ',' && is
345                                          && (i != LEX_MAX_BUFF - 1) );
346                                 if (i == LEX_MAX_BUFF - 1) {
347                                         printError("Line too long");
348                                 }
349                                 buff[i] = '\0';
350                                 status = LEX_TOKEN;
351                         }
352                         
353                         if (c == '\r' && is) {
354                                 // The Windows support has lead to the
355                                 // possibility of "\r\n" at the end of
356                                 // a line.  This will stop LyX choking
357                                 // when it expected to find a '\n'
358                                 is.get(cc);
359                                 c = cc;
360                         }
361
362                         if (c == '\n')
363                                 ++lineno;
364                         
365                 }
366                 if (status) return true;
367                 
368                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
369                 buff[0] = '\0';
370                 return false;
371         } else {
372                 unsigned char c = 0; // getc() returns an int
373                 char cc = 0;
374                 
375                 status = 0;
376                 while (is && !status) {
377                         is.get(cc);
378                         c = cc;
379
380                         // skip ','s
381                         if (c == ',') continue;
382                         
383                         if (c == '\\') {
384                                 // escape
385                                 int i = 0;
386                                 do {
387                                         if (c == '\\') {
388                                                 // escape the next char
389                                                 is.get(cc);
390                                                 c = cc;
391                                         }
392                                         buff[i++] = c;
393                                         is.get(cc);
394                                         c = cc;
395                                 } while (c > ' ' && c != ',' && is
396                                          && (i != LEX_MAX_BUFF - 1) );
397                                 if (i == LEX_MAX_BUFF - 1) {
398                                         printError("Line too long");
399                                 }
400                                 buff[i] = '\0';
401                                 status = LEX_TOKEN;
402                                 continue;
403                         }
404                         
405                         if (c == '#') {
406                                 // Read rest of line (fast :-)
407                                 is.getline(buff, sizeof(buff));
408                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
409                                                       << buff << "'" << endl;
410                                 ++lineno;
411                                 continue;
412                         }
413
414                         // string
415                         if (c == '\"') {
416                                 int i = -1;
417                                 bool escaped = false;
418                                 do {
419                                         escaped = false;
420                                         is.get(cc);
421                                         c = cc;
422                                         if (c == '\r') continue;
423                                         if (c == '\\') {
424                                                 // escape the next char
425                                                 is.get(cc);
426                                                 c = cc;
427                                                 escaped = true;
428                                         }
429                                         buff[++i] = c;
430                                 
431                                         if (!escaped && c == '\"') break;
432                                 } while (c != '\n' && is &&
433                                          i != (LEX_MAX_BUFF - 2));
434                                 
435                                 if (i == (LEX_MAX_BUFF - 2)) {
436                                         printError("Line too long");
437                                         c = '\"'; // Pretend we got a "
438                                         ++i;
439                                 }
440                                 
441                                 if (c != '\"') {
442                                         printError("Missing quote");
443                                         if (c == '\n')
444                                                 ++lineno;
445                                 }
446                                 
447                                 buff[i] = '\0';
448                                 status = LEX_DATA;
449                                 break; 
450                         }
451                         
452                         if (c > ' ' && is) {
453                                 int i = 0;
454                                 do {
455                                         if (c == '\\') {
456                                                 // escape the next char
457                                                 is.get(cc);
458                                                 c = cc;
459                                                 //escaped = true;
460                                         }
461                                         buff[i++] = c;
462                                         is.get(cc);
463                                         c = cc;
464                                 } while (c > ' ' && c != ',' && is
465                                          && (i != LEX_MAX_BUFF-1) );
466                                 if (i == LEX_MAX_BUFF-1) {
467                                         printError("Line too long");
468                                 }
469                                 buff[i] = '\0';
470                                 status = LEX_TOKEN;
471                         }
472                         // new line
473                         if (c == '\n')
474                                 ++lineno;
475                 }
476                 
477                 if (status) return true;
478                 
479                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
480                 buff[0] = '\0';
481                 return false;
482         }
483 }
484
485
486 bool LyXLex::nextToken()
487 {
488         status = 0;
489         while (is && !status) {
490                 unsigned char c = 0;
491                 char cc = 0;
492                 is.get(cc);
493                 c = cc;
494                 if (c >= ' ' && is) {
495                         int i = 0;
496                         if (c == '\\') { // first char == '\\'
497                                 do {
498                                         buff[i++] = c;
499                                         is.get(cc);
500                                         c = cc;
501                                 } while (c > ' ' && c != '\\' && is
502                                          && i != (LEX_MAX_BUFF-1));
503                         } else {
504                                 do {
505                                         buff[i++] = c;
506                                         is.get(cc);
507                                         c = cc;
508                                 } while (c >= ' ' && c != '\\' && is
509                                          && i != (LEX_MAX_BUFF-1));
510                         }
511
512                         if (i == (LEX_MAX_BUFF - 1)) {
513                                 printError("Line too long");
514                         }
515
516                         if (c == '\\') is.putback(c); // put it back
517                         buff[i] = '\0';
518                         status = LEX_TOKEN;
519                 }
520                   
521                 if (c == '\n')
522                         ++lineno;
523         
524         }
525         if (status)  return true;
526         
527         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
528         buff[0] = '\0';
529         return false;
530 }
531
532
533 int LyXLex::FindToken(char const * str[])
534 {  
535    int i = -1;
536    
537    if (next()) {
538       if (compare(buff, "default")) {
539          for (i = 0; str[i][0] && compare(str[i], buff); ++i);
540          if (!str[i][0]) {
541             printError("Unknown argument `$$Token'");
542             i = -1;
543          }
544       }  
545    } else
546      printError("file ended while scanning string token");
547    return i;
548 }
549
550
551 int LyXLex::CheckToken(char const * str[], int print_error)
552 {  
553    int i = -1;
554    
555    if (compare(buff, "default")) {
556        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
557        if (!str[i][0]) {
558            if (print_error)
559                printError("Unknown argument `$$Token'");
560            i = -1;
561        }
562    }
563    return i;
564 }