]> git.lyx.org Git - lyx.git/blob - src/lyxlex.C
a61479728fde9340874e8c61a545e24bba1a527e
[lyx.git] / src / lyxlex.C
1 //  Generalized simple lexical analizer.
2 //  It can be used for simple syntax parsers, like lyxrc,
3 //  texclass and others to come.   [asierra30/03/96]
4 //
5 //   Copyright 1996 Lyx Team.
6
7 #include <config.h>
8
9 #include <algorithm>
10 #include <cstdlib>
11
12 #ifdef __GNUG__
13 #pragma implementation "lyxlex.h"
14 #endif
15
16 #include "lyxlex.h"
17 #include "debug.h"
18 #include "support/filetools.h"
19 #include "support/lyxalgo.h"
20
21 using std::ios;
22 using std::lower_bound;
23 using std::sort;
24 using std::endl;
25
26
27 // namespace {
28 struct compare_tags {
29         // used by lower_bound
30         inline
31         int operator()(keyword_item const & a, char const * const tag) const {
32                 return compare_no_case(a.tag, tag) < 0;
33         }
34         // used by sorted and sort
35         inline
36         int operator()(keyword_item const & a, keyword_item const & b) const {
37                 return compare_no_case(a.tag, b.tag) < 0;
38         }
39 };
40 // } // end of anon namespace
41
42
43 LyXLex::LyXLex(keyword_item * tab, int num)
44         : is(&fb__), table(tab), no_items(num)
45 {
46         status = 0; 
47         pushed = 0;
48         // Check if the table is sorted and if not, sort it.
49         if (table && !sorted(table, table + no_items, compare_tags())) {
50                 lyxerr << "The table passed to LyXLex is not sorted!!\n"
51                        << "Tell the developers to fix it!" << endl;
52                 // We sort it anyway to avoid problems.
53                 lyxerr << "\nUnsorted:\n";
54                 printTable(lyxerr);
55                 
56                 sort(table, table + no_items,
57                      compare_tags());
58                 lyxerr << "\nSorted:\n";
59                 printTable(lyxerr);
60         }
61 }
62
63
64 void LyXLex::pushTable(keyword_item * tab, int num)
65 {
66         pushed_table * tmppu = new pushed_table;
67         tmppu->next = pushed;
68         tmppu->table_elem = table;
69         tmppu->table_siz = no_items;
70         pushed = tmppu;
71         table = tab;
72         no_items = num;
73         // Check if the table is sorted and if not, sort it.
74         if (table && !sorted(table, table + no_items, compare_tags())) {
75                 lyxerr << "The table passed to LyXLex is not sorted!!\n"
76                        << "Tell the developers to fix it!" << endl;
77                 // We sort it anyway to avoid problems.
78                 lyxerr << "\nUnsorted:\n";
79                 printTable(lyxerr);
80                 
81                 sort(table, table + no_items, compare_tags());
82                 lyxerr << "\nSorted:\n";
83                 printTable(lyxerr);
84         }
85 }
86
87
88 void LyXLex::popTable()
89 {
90         if (pushed == 0)
91                 lyxerr << "LyXLex error: nothing to pop!" << endl;
92
93         pushed_table * tmp;
94         tmp = pushed;
95         table = tmp->table_elem;
96         no_items = tmp->table_siz;
97         tmp->table_elem = 0;
98         pushed = tmp->next;
99         delete tmp;
100 }
101
102
103 void LyXLex::printTable(ostream & os)
104 {
105         os << "\nNumber of tags: " << no_items << '\n';
106         for(int i= 0; i < no_items; ++i)
107                 os << "table[" << i
108                    << "]:  tag: `" << table[i].tag
109                    << "'  code:" << table[i].code << '\n';
110         os.flush();
111 }
112
113
114 void LyXLex::printError(string const & message) const
115 {
116         string tmpmsg = subst(message, "$$Token", GetString());
117         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
118                << " of file " << MakeDisplayPath(name) << ']' << endl;
119 }
120
121
122 bool LyXLex::setFile(string const & filename)
123 {
124         if (fb__.is_open())
125                 lyxerr << "Error in LyXLex::setFile: "
126                         "file or stream already set." << endl;
127         fb__.open(filename.c_str(), ios::in);
128         is.rdbuf(&fb__);
129         name = filename;
130         lineno = 0;
131         return fb__.is_open() && is.good();
132 }
133
134
135 void LyXLex::setStream(istream & i)
136 {
137         if (fb__.is_open() || is.rdbuf()->in_avail())
138                 lyxerr << "Error in LyXLex::setStream: "
139                         "file or stream already set." << endl;
140         is.rdbuf(i.rdbuf());
141         lineno = 0;
142 }
143
144
145 int LyXLex::lex()
146 {
147         //NOTE: possible bug.
148         if (next() && status == LEX_TOKEN)
149                 return search_kw(buff);
150         else
151                 return status;
152 }
153
154
155 int LyXLex::GetInteger() const
156 {
157         if (buff[0] > ' ')   
158                 return atoi(buff);
159         else {
160                 printError("Bad integer `$$Token'");
161                 return -1;
162         }
163 }
164
165
166 float LyXLex::GetFloat() const
167 {
168    if (buff[0] > ' ')   
169        return atof(buff);
170    else {
171         printError("Bad float `$$Token'");
172         return -1;
173    }
174 }
175
176
177 string LyXLex::GetString() const
178 {
179         return string(buff);
180 }
181
182
183 // I would prefer to give a tag number instead of an explicit token
184 // here, but it is not possible because Buffer::readLyXformat2 uses
185 // explicit tokens (JMarc) 
186 string LyXLex::getLongString(string const & endtoken)
187 {
188         string str, prefix;
189         bool firstline = true;
190
191         while (IsOK()) {
192                 if (!EatLine())
193                         // blank line in the file being read
194                         continue;
195                 
196                 string const token = frontStrip(strip(GetString()), " \t");
197                 
198                 lyxerr[Debug::PARSER] << "LongString: `"
199                                       << GetString() << '\'' << endl;
200
201                 // We do a case independent comparison, like search_kw
202                 // does.
203                 if (compare_no_case(token, endtoken) != 0) {
204                         string tmpstr = GetString();
205                         if (firstline) {
206                                 unsigned int i = 0;
207                                 while(i < tmpstr.length()
208                                       && tmpstr[i] == ' ') {
209                                         ++i;
210                                         prefix += ' ';
211                                 }
212                                 firstline = false;
213                                 lyxerr[Debug::PARSER] << "Prefix = `" << prefix
214                                                       << '\'' << endl;
215                         } 
216
217                         if (!prefix.empty() 
218                             && prefixIs(tmpstr, prefix.c_str())) {
219                                 tmpstr.erase(0, prefix.length() - 1);
220                         }
221                         str += tmpstr + '\n';
222                 }
223                 else // token == endtoken
224                         break;
225         }
226         if (!IsOK())
227                 printError("Long string not ended by `" + endtoken + '\'');
228
229         return str;
230 }
231
232
233 bool LyXLex::GetBool() const
234 {
235         if (compare(buff, "true") == 0)
236                 return true;
237         else if (compare(buff, "false") != 0)
238                 printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
239         return false;
240 }
241
242
243 bool LyXLex::EatLine()
244 {
245         int i = 0;
246         unsigned char c = '\0';
247         char cc = 0;
248         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
249                 is.get(cc);
250                 c = cc;
251                 lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
252                                       << c << "'" << endl;
253                 if (c != '\r')
254                         buff[i++] = c;
255         }
256         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
257                 printError("Line too long");
258                 c = '\n'; // Pretend we had an end of line
259                 --lineno; // but don't increase line counter (netto effect)
260                 ++i; // and preserve last character read.
261         }
262         if (c == '\n') {
263                 ++lineno;
264                 buff[--i] = '\0'; // i can never be 0 here, so no danger
265                 status = LEX_DATA;
266                 return true;
267         } else {
268                 buff[i] = '\0';
269                 return false;
270         }
271 }
272
273
274 int LyXLex::search_kw(char const * const tag) const
275 {
276         keyword_item * res =
277                 lower_bound(table, table + no_items, tag, compare_tags());
278         if (res != table + no_items && !compare_no_case(res->tag, tag))
279                 return res->code;
280         return LEX_UNDEF;
281 }
282
283
284 bool LyXLex::next(bool esc)
285 {
286         if (!esc) {
287                 unsigned char c = 0; // getc() returns an int
288                 char cc = 0;
289                 status = 0;
290                 while (is && !status) {
291                         is.get(cc);
292                         c = cc;
293                         if (c == '#') {
294                                 // Read rest of line (fast :-)
295                                 is.getline(buff, sizeof(buff));
296                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
297                                                       << buff << "'" << endl;
298                                 ++lineno;
299                                 continue;
300                         }
301                         
302                         if (c == '\"') {
303                                 int i = -1;
304                                 do {
305                                         is.get(cc);
306                                         c = cc;
307                                         if (c != '\r')
308                                                 buff[++i] = c;
309                                 } while (c != '\"' && c != '\n' && is &&
310                                          i != (LEX_MAX_BUFF - 2));
311                                 
312                                 if (i == (LEX_MAX_BUFF - 2)) {
313                                         printError("Line too long");
314                                         c = '\"'; // Pretend we got a "
315                                         ++i;
316                                 }
317                                 
318                                 if (c != '\"') {
319                                         printError("Missing quote");
320                                         if (c == '\n')
321                                                 ++lineno;
322                                 }
323                                 
324                                 buff[i] = '\0';
325                                 status = LEX_DATA;
326                                 break; 
327                         }
328                         
329                         if (c == ',')
330                                 continue;              /* Skip ','s */
331
332                         // using relational operators with chars other
333                         // than == and != is not safe. And if it is done
334                         // the type _have_ to be unsigned. It usually a
335                         // lot better to use the functions from cctype
336                         if (c > ' ' && is)  {
337                                 int i = 0;
338                                 do {
339                                         buff[i++] = c;
340                                         is.get(cc);
341                                         c = cc;
342                                 } while (c > ' ' && c != ',' && is
343                                          && (i != LEX_MAX_BUFF - 1) );
344                                 if (i == LEX_MAX_BUFF - 1) {
345                                         printError("Line too long");
346                                 }
347                                 buff[i] = '\0';
348                                 status = LEX_TOKEN;
349                         }
350                         
351                         if (c == '\r' && is) {
352                                 // The Windows support has lead to the
353                                 // possibility of "\r\n" at the end of
354                                 // a line.  This will stop LyX choking
355                                 // when it expected to find a '\n'
356                                 is.get(cc);
357                                 c = cc;
358                         }
359
360                         if (c == '\n')
361                                 ++lineno;
362                         
363                 }
364                 if (status) return true;
365                 
366                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
367                 buff[0] = '\0';
368                 return false;
369         } else {
370                 unsigned char c = 0; // getc() returns an int
371                 char cc = 0;
372                 
373                 status = 0;
374                 while (is && !status) {
375                         is.get(cc);
376                         c = cc;
377
378                         // skip ','s
379                         if (c == ',') continue;
380                         
381                         if (c == '\\') {
382                                 // escape
383                                 int i = 0;
384                                 do {
385                                         if (c == '\\') {
386                                                 // escape the next char
387                                                 is.get(cc);
388                                                 c = cc;
389                                         }
390                                         buff[i++] = c;
391                                         is.get(cc);
392                                         c = cc;
393                                 } while (c > ' ' && c != ',' && is
394                                          && (i != LEX_MAX_BUFF - 1) );
395                                 if (i == LEX_MAX_BUFF - 1) {
396                                         printError("Line too long");
397                                 }
398                                 buff[i] = '\0';
399                                 status = LEX_TOKEN;
400                                 continue;
401                         }
402                         
403                         if (c == '#') {
404                                 // Read rest of line (fast :-)
405                                 is.getline(buff, sizeof(buff));
406                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
407                                                       << buff << "'" << endl;
408                                 ++lineno;
409                                 continue;
410                         }
411
412                         // string
413                         if (c == '\"') {
414                                 int i = -1;
415                                 bool escaped = false;
416                                 do {
417                                         escaped = false;
418                                         is.get(cc);
419                                         c = cc;
420                                         if (c == '\r') continue;
421                                         if (c == '\\') {
422                                                 // escape the next char
423                                                 is.get(cc);
424                                                 c = cc;
425                                                 escaped = true;
426                                         }
427                                         buff[++i] = c;
428                                 
429                                         if (!escaped && c == '\"') break;
430                                 } while (c != '\n' && is &&
431                                          i != (LEX_MAX_BUFF - 2));
432                                 
433                                 if (i == (LEX_MAX_BUFF - 2)) {
434                                         printError("Line too long");
435                                         c = '\"'; // Pretend we got a "
436                                         ++i;
437                                 }
438                                 
439                                 if (c != '\"') {
440                                         printError("Missing quote");
441                                         if (c == '\n')
442                                                 ++lineno;
443                                 }
444                                 
445                                 buff[i] = '\0';
446                                 status = LEX_DATA;
447                                 break; 
448                         }
449                         
450                         if (c > ' ' && is) {
451                                 int i = 0;
452                                 do {
453                                         if (c == '\\') {
454                                                 // escape the next char
455                                                 is.get(cc);
456                                                 c = cc;
457                                                 //escaped = true;
458                                         }
459                                         buff[i++] = c;
460                                         is.get(cc);
461                                         c = cc;
462                                 } while (c > ' ' && c != ',' && is
463                                          && (i != LEX_MAX_BUFF-1) );
464                                 if (i == LEX_MAX_BUFF-1) {
465                                         printError("Line too long");
466                                 }
467                                 buff[i] = '\0';
468                                 status = LEX_TOKEN;
469                         }
470                         // new line
471                         if (c == '\n')
472                                 ++lineno;
473                 }
474                 
475                 if (status) return true;
476                 
477                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
478                 buff[0] = '\0';
479                 return false;
480         }
481 }
482
483
484 bool LyXLex::nextToken()
485 {
486         status = 0;
487         while (is && !status) {
488                 unsigned char c = 0;
489                 char cc = 0;
490                 is.get(cc);
491                 c = cc;
492                 if (c >= ' ' && is) {
493                         int i = 0;
494                         if (c == '\\') { // first char == '\\'
495                                 do {
496                                         buff[i++] = c;
497                                         is.get(cc);
498                                         c = cc;
499                                 } while (c > ' ' && c != '\\' && is
500                                          && i != (LEX_MAX_BUFF-1));
501                         } else {
502                                 do {
503                                         buff[i++] = c;
504                                         is.get(cc);
505                                         c = cc;
506                                 } while (c >= ' ' && c != '\\' && is
507                                          && i != (LEX_MAX_BUFF-1));
508                         }
509
510                         if (i == (LEX_MAX_BUFF - 1)) {
511                                 printError("Line too long");
512                         }
513
514                         if (c == '\\') is.putback(c); // put it back
515                         buff[i] = '\0';
516                         status = LEX_TOKEN;
517                 }
518                   
519                 if (c == '\n')
520                         ++lineno;
521         
522         }
523         if (status)  return true;
524         
525         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
526         buff[0] = '\0';
527         return false;
528 }
529
530
531 int LyXLex::FindToken(char const * str[])
532 {  
533    int i = -1;
534    
535    if (next()) {
536       if (compare(buff, "default")) {
537          for (i = 0; str[i][0] && compare(str[i], buff); ++i);
538          if (!str[i][0]) {
539             printError("Unknown argument `$$Token'");
540             i = -1;
541          }
542       }  
543    } else
544      printError("file ended while scanning string token");
545    return i;
546 }
547
548
549 int LyXLex::CheckToken(char const * str[], int print_error)
550 {  
551    int i = -1;
552    
553    if (compare(buff, "default")) {
554        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
555        if (!str[i][0]) {
556            if (print_error)
557                printError("Unknown argument `$$Token'");
558            i = -1;
559        }
560    }
561    return i;
562 }