]> git.lyx.org Git - lyx.git/blob - src/lyxlex.C
4e3618d8a9b2fce7ebab5984318f814f02c4f206
[lyx.git] / src / lyxlex.C
1 //  Generalized simple lexical analizer.
2 //  It can be used for simple syntax parsers, like lyxrc,
3 //  texclass and others to come.   [asierra30/03/96]
4 //
5 //   (C) 1996 Lyx Team.
6
7 #include <config.h>
8
9 #include <cstdlib>
10
11 #ifdef __GNUG__
12 #pragma implementation "lyxlex.h"
13 #endif
14
15 #include "lyxlex.h"
16 #include "debug.h"
17 #include "support/filetools.h"
18
19 using std::ios;
20
21 LyXLex::LyXLex(keyword_item * tab, int num)
22         : is(&fb__), table(tab), no_items(num)
23 {
24         
25         status = 0;
26         pushed = 0;
27 }
28
29
30 void LyXLex::pushTable(keyword_item * tab, int num)
31 {
32         pushed_table * tmppu = new pushed_table;
33         tmppu->next = pushed;
34         tmppu->table_elem = table;
35         tmppu->table_siz = no_items;
36         pushed = tmppu;
37         table = tab;
38         no_items = num;
39 }
40
41
42 void LyXLex::popTable()
43 {
44         if (pushed == 0)
45                 lyxerr << "LyXLex error: nothing to pop!" << endl;
46
47         pushed_table * tmp;
48         tmp = pushed;
49         table = tmp->table_elem;
50         no_items = tmp->table_siz;
51         tmp->table_elem = 0;
52         pushed = tmp->next;
53         delete tmp;
54 }
55
56
57 void LyXLex::printTable()
58 {
59         lyxerr << "\nNumber of tags: " << no_items << endl;
60         for(int i= 0; i < no_items; ++i)
61                 lyxerr << "table[" << i
62                        << "]:  tag: `" << table[i].tag
63                        << "'  code:" << table[i].code << endl;
64         lyxerr << endl;
65 }
66
67
68 void LyXLex::printError(string const & message) const
69 {
70         string tmpmsg = subst(message, "$$Token", GetString());
71         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
72                << " of file " << MakeDisplayPath(name) << ']' << endl;
73 }
74
75
76 bool LyXLex::setFile(string const & filename)
77 {
78         if (fb__.is_open())
79                 lyxerr << "Error in LyXLex::setFile: "
80                         "file or stream already set." << endl;
81         fb__.open(filename.c_str(), ios::in);
82         is.rdbuf(&fb__);
83         name = filename;
84         lineno = 0;
85         return fb__.is_open() && is.good();
86 }
87
88
89 void LyXLex::setStream(istream & i)
90 {
91         if (fb__.is_open() || is.rdbuf()->in_avail())
92                 lyxerr << "Error in LyXLex::setStream: "
93                         "file or stream already set." << endl;
94         is.rdbuf(i.rdbuf());
95         lineno = 0;
96 }
97
98
99 int LyXLex::lex()
100 {
101         //NOTE: possible bug.
102         if (next() && status == LEX_TOKEN)
103                 return search_kw(buff);
104         else
105                 return status;
106 }
107
108
109 int LyXLex::GetInteger() const
110 {
111         if (buff[0] > ' ')   
112                 return atoi(buff);
113         else {
114                 printError("Bad integer `$$Token'");
115                 return -1;
116         }
117 }
118
119
120 float LyXLex::GetFloat() const
121 {
122    if (buff[0] > ' ')   
123        return atof(buff);
124    else {
125         printError("Bad float `$$Token'");
126         return -1;
127    }
128 }
129
130
131 string LyXLex::GetString() const
132 {
133         return string(buff);
134 }
135
136
137 // I would prefer to give a tag number instead of an explicit token
138 // here, but it is not possible because Buffer::readLyXformat2 uses
139 // explicit tokens (JMarc) 
140 string LyXLex::getLongString(string const & endtoken)
141 {
142         string str, prefix;
143         bool firstline = true;
144
145         while (IsOK()) {
146                 if (!EatLine())
147                         // blank line in the file being read
148                         continue;
149                 
150                 string const token = frontStrip(strip(GetString()), " \t");
151                 
152                 lyxerr[Debug::PARSER] << "LongString: `"
153                                       << GetString() << '\'' << endl;
154
155                 // We do a case independent comparison, like search_kw
156                 // does.
157                 if (compare_no_case(token, endtoken) != 0) {
158                         string tmpstr = GetString();
159                         if (firstline) {
160                                 unsigned int i = 0;
161                                 while(i < tmpstr.length()
162                                       && tmpstr[i] == ' ') {
163                                         ++i;
164                                         prefix += ' ';
165                                 }
166                                 firstline = false;
167                                 lyxerr[Debug::PARSER] << "Prefix = `" << prefix
168                                                       << '\'' << endl;
169                         } 
170
171                         if (!prefix.empty() 
172                             && prefixIs(tmpstr, prefix.c_str())) {
173                                 tmpstr.erase(0, prefix.length() - 1);
174                         }
175                         str += tmpstr + '\n';
176                 }
177                 else // token == endtoken
178                         break;
179         }
180         if (!IsOK())
181                 printError("Long string not ended by `" + endtoken + '\'');
182
183         return str;
184 }
185
186
187 bool LyXLex::GetBool() const
188 {
189         if (compare(buff, "true") == 0)
190                 return true;
191         else if (compare(buff, "false") != 0)
192                 printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
193         return false;
194 }
195
196
197 bool LyXLex::EatLine()
198 {
199         int i = 0;
200         unsigned char c = '\0';
201
202         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
203                 is.get(c);
204                 if (c != '\r')
205                         buff[i++] = c;
206         }
207         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
208                 printError("Line too long");
209                 c = '\n'; // Pretend we had an end of line
210                 --lineno; // but don't increase line counter (netto effect)
211                 ++i; // and preserve last character read.
212         }
213         if (c == '\n') {
214                 ++lineno;
215                 buff[--i] = '\0'; // i can never be 0 here, so no danger
216                 status = LEX_DATA;
217                 return true;
218         } else {
219                 buff[i] = '\0';
220                 return false;
221         }
222 }
223
224
225 int LyXLex::search_kw(char const * const tag) const
226 {
227         int m, k = 0 , l = 0, r = no_items;
228
229         while (l < r) {
230                 m = (l + r) / 2;
231
232                 if (lyxerr.debugging(Debug::PARSER)) {
233                         lyxerr << "LyXLex::search_kw: elem " << m
234                                << " tag " << table[m].tag
235                                << " search tag " << tag
236                                << endl;
237                 }
238
239                 if (table[m].tag)
240                         k = compare_no_case(table[m].tag, tag);
241                 if (k == 0)
242                         return table[m].code;
243                 else
244                         if (k < 0) l = m + 1; else r = m;
245         }
246         return LEX_UNDEF;
247 }
248
249
250 bool LyXLex::next(bool esc)
251 {
252         if (!esc) {
253                 unsigned char c; // getc() returns an int
254                 
255                 status = 0;
256                 while (is && !status) { 
257                         is.get(c);
258                         if (c == '#') {
259                                 // Read rest of line (fast :-)
260                                 is.get(buff, sizeof(buff));
261                                 lyxerr[Debug::LYXLEX] << "Comment read: " << c << buff << endl;
262                                 ++lineno;
263                                 continue;
264                         }
265                         
266                         if (c == '\"') {
267                                 int i = -1;
268                                 do {
269                                         is.get(c);
270                                         if (c != '\r')
271                                                 buff[++i] = c;
272                                 } while (c != '\"' && c != '\n' && is &&
273                                          i != (LEX_MAX_BUFF - 2));
274                                 
275                                 if (i == (LEX_MAX_BUFF - 2)) {
276                                         printError("Line too long");
277                                         c = '\"'; // Pretend we got a "
278                                         ++i;
279                                 }
280                                 
281                                 if (c != '\"') {
282                                         printError("Missing quote");
283                                         if (c == '\n')
284                                                 ++lineno;
285                                 }
286                                 
287                                 buff[i] = '\0';
288                                 status = LEX_DATA;
289                                 break; 
290                         }
291                         
292                         if (c == ',')
293                                 continue;              /* Skip ','s */
294
295                         // using relational operators with chars other
296                         // than == and != is not safe. And if it is done
297                         // the type _have_ to be unsigned. It usually a
298                         // lot better to use the functions from cctype
299                         if (c > ' ' && is)  {
300 #warning Verify this! (Lgb)
301                                 //if (isalnum(static_cast<unsigned char>(c)) && is) {
302                                 int i = 0;
303                                 do {
304                                         buff[i++] = c;
305                                         is.get(c);
306                                 } while (c > ' ' && c != ',' && is
307                                 //} while (isalnum(static_cast<unsigned char>(c))
308                                          //&& c != ',' && is
309                                          && (i != LEX_MAX_BUFF - 1) );
310                                 if (i == LEX_MAX_BUFF - 1) {
311                                         printError("Line too long");
312                                 }
313                                 buff[i] = '\0';
314                                 status = LEX_TOKEN;
315                         }
316                         
317                         if (c == '\r' && is) {
318                                 // The Windows support has lead to the
319                                 // possibility of "\r\n" at the end of
320                                 // a line.  This will stop LyX choking
321                                 // when it expected to find a '\n'
322                                 is.get(c);
323                         }
324
325                         if (c == '\n')
326                                 ++lineno;
327                         
328                 }
329                 if (status) return true;
330                 
331                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
332                 buff[0] = '\0';
333                 return false;
334         } else {
335                 unsigned char c; // getc() returns an int
336                 
337                 status = 0;
338                 while (is && !status) {
339                         is.get(c);
340
341                         // skip ','s
342                         if (c == ',') continue;
343                         
344                         if (c == '\\') {
345                                 // escape
346                                 int i = 0;
347                                 do {
348                                         if (c == '\\') {
349                                                 // escape the next char
350                                                 is.get(c);
351                                         }
352                                         buff[i++] = c;
353                                         is.get(c);
354                                 } while (c > ' ' && c != ',' && is
355                                 //} while (isalnum(static_cast<unsigned char>(c))
356                                          //&& c != ',' && is
357                                          && (i != LEX_MAX_BUFF - 1) );
358                                 if (i == LEX_MAX_BUFF - 1) {
359                                         printError("Line too long");
360                                 }
361                                 buff[i] = '\0';
362                                 status = LEX_TOKEN;
363                                 continue;
364                         }
365                         
366                         if (c == '#') {
367                                 // Read rest of line (fast :-)
368                                 is.get(buff, sizeof(buff));
369                                 lyxerr[Debug::LYXLEX] << "Comment read: " << c << buff << endl;
370                                 ++lineno;
371                                 continue;
372                         }
373
374                         // string
375                         if (c == '\"') {
376                                 int i = -1;
377                                 bool escaped = false;
378                                 do {
379                                         escaped = false;
380                                         is.get(c);
381                                         if (c == '\r') continue;
382                                         if (c == '\\') {
383                                                 // escape the next char
384                                                 is.get(c);
385                                                 escaped = true;
386                                         }
387                                         buff[++i] = c;
388                                 
389                                         if (!escaped && c == '\"') break;
390                                 } while (c != '\n' && is &&
391                                          i != (LEX_MAX_BUFF - 2));
392                                 
393                                 if (i == (LEX_MAX_BUFF - 2)) {
394                                         printError("Line too long");
395                                         c = '\"'; // Pretend we got a "
396                                         ++i;
397                                 }
398                                 
399                                 if (c != '\"') {
400                                         printError("Missing quote");
401                                         if (c == '\n')
402                                                 ++lineno;
403                                 }
404                                 
405                                 buff[i] = '\0';
406                                 status = LEX_DATA;
407                                 break; 
408                         }
409                         
410                         if (c > ' ' && is) {
411                                 //if (isalnum(static_cast<unsigned char>(c)) && is) {
412                                 int i = 0;
413                                 do {
414                                         if (c == '\\') {
415                                                 // escape the next char
416                                                 is.get(c);
417                                                 //escaped = true;
418                                         }
419                                         buff[i++] = c;
420                                         is.get(c);
421                                 } while (c > ' ' && c != ',' && is
422                                 //} while (isalnum(static_cast<unsigned char>(c))
423                                          //!= ',' && is
424                                          && (i != LEX_MAX_BUFF-1) );
425                                 if (i == LEX_MAX_BUFF-1) {
426                                         printError("Line too long");
427                                 }
428                                 buff[i] = '\0';
429                                 status = LEX_TOKEN;
430                         }
431                         // new line
432                         if (c == '\n')
433                                 ++lineno;
434                 }
435                 
436                 if (status) return true;
437                 
438                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
439                 buff[0] = '\0';
440                 return false;   
441         }
442 }
443
444
445 bool LyXLex::nextToken()
446 {
447         status = 0;
448         while (is && !status) { 
449                 unsigned char c;
450                 is.get(c);
451            
452                 if (c >= ' ' && is) {
453                         //if (isprint(static_cast<unsigned char>(c)) && is) {
454                         int i = 0;
455                         if (c == '\\') { // first char == '\\'
456                                 do {
457                                         buff[i++] = c;
458                                         is.get(c);
459                                 } while (c > ' ' && c != '\\' && is
460                                 //} while (isalnum(static_cast<unsigned char>(c))
461                                 //       && c != '\\' && is
462                                          && i != (LEX_MAX_BUFF-1));
463                         } else {
464                                 do {
465                                         buff[i++] = c;
466                                         is.get(c);
467                                 } while (c >= ' ' && c != '\\' && is
468                                 //} while (isprint(static_cast<unsigned char>(c))
469                                          // && c != '\\' && is
470                                          && i != (LEX_MAX_BUFF-1));
471                         }
472
473                         if (i == (LEX_MAX_BUFF - 1)) {
474                                 printError("Line too long");
475                         }
476
477                         if (c == '\\') is.putback(c); // put it back
478                         buff[i] = '\0';
479                         status = LEX_TOKEN;
480                 }
481                   
482                 if (c == '\n')
483                         ++lineno;
484         
485         }
486         if (status)  return true;
487         
488         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
489         buff[0] = '\0';
490         return false;
491 }
492
493
494 int LyXLex::FindToken(char const * str[])
495 {  
496    int i = -1;
497    
498    if (next()) {
499       if (compare(buff, "default")) {
500          for (i = 0; str[i][0] && compare(str[i], buff); ++i);
501          if (!str[i][0]) {
502             printError("Unknown argument `$$Token'");
503             i = -1;
504          }
505       }  
506    } else
507      printError("file ended while scanning string token");
508    return i;
509 }
510
511
512 int LyXLex::CheckToken(char const * str[], int print_error)
513 {  
514    int i = -1;
515    
516    if (compare(buff, "default")) {
517        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
518        if (!str[i][0]) {
519            if (print_error)
520                printError("Unknown argument `$$Token'");
521            i = -1;
522        }
523    }
524    return i;
525 }