]> git.lyx.org Git - lyx.git/blob - src/lyxlex_pimpl.C
work around for bug reported by Mario Morandini
[lyx.git] / src / lyxlex_pimpl.C
1 #include <config.h>
2
3 #ifdef __GNUG__
4 #pragma implementation
5 #endif
6 #include <algorithm>
7
8 #include "lyxlex_pimpl.h"
9 #include "support/lyxalgo.h"
10 #include "support/filetools.h"
11 #include "support/lstrings.h"
12 #include "debug.h"
13
14 using std::sort;
15 using std::ostream;
16 using std::ios;
17 using std::istream;
18 using std::endl;
19 using std::lower_bound;
20
21 // namespace {
22 struct compare_tags {
23         // used by lower_bound, sort and sorted
24         inline
25         int operator()(keyword_item const & a, keyword_item const & b) const {
26                 // we use the ascii version, because in turkish, 'i'
27                 // is not the lowercase version of 'I', and thus
28                 // turkish locale breaks parsing of tags.
29                 return compare_ascii_no_case(a.tag, b.tag) < 0;
30         }
31 };
32 // } // end of anon namespace
33
34
35 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
36         : is(&fb__), table(tab), no_items(num),
37           status(0), lineno(0), commentChar('#')
38 {
39         verifyTable();
40 }
41
42
43 string const LyXLex::Pimpl::getString() const
44 {
45         return string(buff);
46 }
47
48
49 void LyXLex::Pimpl::printError(string const & message) const
50 {
51         string const tmpmsg = subst(message, "$$Token", getString());
52         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
53                << " of file " << MakeDisplayPath(name) << ']' << endl;
54 }
55
56
57 void LyXLex::Pimpl::printTable(ostream & os)
58 {
59         os << "\nNumber of tags: " << no_items << '\n';
60         for (int i= 0; i < no_items; ++i)
61                 os << "table[" << i
62                    << "]:  tag: `" << table[i].tag
63                    << "'  code:" << table[i].code << '\n';
64         os.flush();
65 }
66
67
68 void LyXLex::Pimpl::verifyTable()
69 {
70         // Check if the table is sorted and if not, sort it.
71         if (table
72             && !lyx::sorted(table, table + no_items, compare_tags())) {
73                 lyxerr << "The table passed to LyXLex is not sorted!\n"
74                        << "Tell the developers to fix it!" << endl;
75                 // We sort it anyway to avoid problems.
76                 lyxerr << "\nUnsorted:\n";
77                 printTable(lyxerr);
78
79                 sort(table, table + no_items, compare_tags());
80                 lyxerr << "\nSorted:\n";
81                 printTable(lyxerr);
82         }
83 }
84
85
86 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
87 {
88         pushed_table tmppu(table, no_items);
89         pushed.push(tmppu);
90
91         table = tab;
92         no_items = num;
93
94         verifyTable();
95 }
96
97
98 void LyXLex::Pimpl::popTable()
99 {
100         if (pushed.empty()) {
101                 lyxerr << "LyXLex error: nothing to pop!" << endl;
102                 return;
103         }
104
105         pushed_table tmp = pushed.top();
106         pushed.pop();
107         table = tmp.table_elem;
108         no_items = tmp.table_siz;
109 }
110
111
112 bool LyXLex::Pimpl::setFile(string const & filename)
113 {
114         // The check only outputs a debug message, because it triggers
115         // a bug in compaq cxx 6.2, where is_open() returns 'true' for a
116         // fresh new filebuf.  (JMarc)
117         if (fb__.is_open() || is.tellg() > 0)
118                 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
119                         "file or stream already set." << endl;
120         fb__.open(filename.c_str(), ios::in);
121         is.rdbuf(&fb__);
122         name = filename;
123         lineno = 0;
124         return fb__.is_open() && is.good();
125 }
126
127
128 void LyXLex::Pimpl::setStream(istream & i)
129 {
130         if (fb__.is_open() || is.tellg() > 0)
131                 lyxerr[Debug::LYXLEX]  << "Error in LyXLex::setStream: "
132                         "file or stream already set." << endl;
133         is.rdbuf(i.rdbuf());
134         lineno = 0;
135 }
136
137 void LyXLex::Pimpl::setCommentChar(char c)
138 {
139         commentChar = c;
140 }
141
142
143 bool LyXLex::Pimpl::next(bool esc /* = false */)
144 {
145         if (!pushTok.empty()) {
146                 // There can have been a whole line pushed so
147                 // we extract the first word and leaves the rest
148                 // in pushTok. (Lgb)
149                 if (pushTok.find(' ') != string::npos) {
150                         string tmp;
151                         pushTok = split(pushTok, tmp, ' ');
152                         tmp.copy(buff, string::npos);
153                         buff[tmp.length()] = '\0';
154                         return true;
155                 } else {
156                         pushTok.copy(buff, string::npos);
157                         buff[pushTok.length()] = '\0';
158                         pushTok.erase();
159                         return true;
160                 }
161         }
162         if (!esc) {
163                 unsigned char c = 0; // getc() returns an int
164                 char cc = 0;
165                 status = 0;
166                 while (is && !status) {
167                         is.get(cc);
168                         c = cc;
169                         if (c == commentChar) {
170                                 // Read rest of line (fast :-)
171                                 // That is not fast... (Lgb)
172 #if 1
173                                 is.getline(buff, sizeof(buff));
174                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
175                                                       << buff << "'" << endl;
176 #else
177                                 // unfortunately ignore is buggy (Lgb)
178                                 is.ignore(100, '\n');
179 #endif
180                                 ++lineno;
181                                 continue;
182                         }
183
184                         if (c == '\"') {
185                                 int i = -1;
186                                 do {
187                                         is.get(cc);
188                                         c = cc;
189                                         if (c != '\r')
190                                                 buff[++i] = c;
191                                 } while (c != '\"' && c != '\n' && is &&
192                                          i != (LEX_MAX_BUFF - 2));
193
194                                 if (i == (LEX_MAX_BUFF - 2)) {
195                                         printError("Line too long");
196                                         c = '\"'; // Pretend we got a "
197                                         ++i;
198                                 }
199
200                                 if (c != '\"') {
201                                         printError("Missing quote");
202                                         if (c == '\n')
203                                                 ++lineno;
204                                 }
205
206                                 buff[i] = '\0';
207                                 status = LEX_DATA;
208                                 break;
209                         }
210
211                         if (c == ',')
212                                 continue;              /* Skip ','s */
213
214                                 // using relational operators with chars other
215                                 // than == and != is not safe. And if it is done
216                                 // the type _have_ to be unsigned. It usually a
217                                 // lot better to use the functions from cctype
218                         if (c > ' ' && is)  {
219                                 int i = 0;
220                                 do {
221                                         buff[i++] = c;
222                                         is.get(cc);
223                                         c = cc;
224                                 } while (c > ' ' && c != ',' && is
225                                          && (i != LEX_MAX_BUFF - 1));
226                                 if (i == LEX_MAX_BUFF - 1) {
227                                         printError("Line too long");
228                                 }
229                                 buff[i] = '\0';
230                                 status = LEX_TOKEN;
231                         }
232
233                         if (c == '\r' && is) {
234                                 // The Windows support has lead to the
235                                 // possibility of "\r\n" at the end of
236                                 // a line.  This will stop LyX choking
237                                 // when it expected to find a '\n'
238                                 is.get(cc);
239                                 c = cc;
240                         }
241
242                         if (c == '\n')
243                                 ++lineno;
244
245                 }
246                 if (status) return true;
247
248                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
249                 buff[0] = '\0';
250                 return false;
251         } else {
252                 unsigned char c = 0; // getc() returns an int
253                 char cc = 0;
254
255                 status = 0;
256                 while (is && !status) {
257                         is.get(cc);
258                         c = cc;
259
260                         // skip ','s
261                         if (c == ',') continue;
262
263                         if (c == '\\') {
264                                 // escape
265                                 int i = 0;
266                                 do {
267                                         if (c == '\\') {
268                                                 // escape the next char
269                                                 is.get(cc);
270                                                 c = cc;
271                                         }
272                                         buff[i++] = c;
273                                         is.get(cc);
274                                         c = cc;
275                                 } while (c > ' ' && c != ',' && is
276                                          && (i != LEX_MAX_BUFF - 1));
277                                 if (i == LEX_MAX_BUFF - 1) {
278                                         printError("Line too long");
279                                 }
280                                 buff[i] = '\0';
281                                 status = LEX_TOKEN;
282                                 continue;
283                         }
284
285                         if (c == commentChar) {
286                                 // Read rest of line (fast :-)
287                                 // That is still not fast... (Lgb)
288 #if 1
289                                 is.getline(buff, sizeof(buff));
290                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
291                                                       << buff << "'" << endl;
292 #else
293                                 // but ignore is also still buggy (Lgb)
294                                 // This is fast (Lgb)
295                                 is.ignore(100, '\n');
296 #endif
297                                 ++lineno;
298                                 continue;
299                         }
300
301                         // string
302                         if (c == '\"') {
303                                 int i = -1;
304                                 bool escaped = false;
305                                 do {
306                                         escaped = false;
307                                         is.get(cc);
308                                         c = cc;
309                                         if (c == '\r') continue;
310                                         if (c == '\\') {
311                                                 // escape the next char
312                                                 is.get(cc);
313                                                 c = cc;
314                                                 if (c == '\"' || c == '\\')
315                                                         escaped = true;
316                                                 else
317                                                         buff[++i] = '\\';
318                                         }
319                                         buff[++i] = c;
320
321                                         if (!escaped && c == '\"') break;
322                                 } while (c != '\n' && is &&
323                                          i != (LEX_MAX_BUFF - 2));
324
325                                 if (i == (LEX_MAX_BUFF - 2)) {
326                                         printError("Line too long");
327                                         c = '\"'; // Pretend we got a "
328                                         ++i;
329                                 }
330
331                                 if (c != '\"') {
332                                         printError("Missing quote");
333                                         if (c == '\n')
334                                                 ++lineno;
335                                 }
336
337                                 buff[i] = '\0';
338                                 status = LEX_DATA;
339                                 break;
340                         }
341
342                         if (c > ' ' && is) {
343                                 int i = 0;
344                                 do {
345                                         if (c == '\\') {
346                                                 // escape the next char
347                                                 is.get(cc);
348                                                 c = cc;
349                                                 //escaped = true;
350                                         }
351                                         buff[i++] = c;
352                                         is.get(cc);
353                                         c = cc;
354                                 } while (c > ' ' && c != ',' && is
355                                          && (i != LEX_MAX_BUFF-1));
356                                 if (i == LEX_MAX_BUFF-1) {
357                                         printError("Line too long");
358                                 }
359                                 buff[i] = '\0';
360                                 status = LEX_TOKEN;
361                         }
362                         // new line
363                         if (c == '\n')
364                                 ++lineno;
365                 }
366
367                 if (status) return true;
368
369                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
370                 buff[0] = '\0';
371                 return false;
372         }
373 }
374
375
376 int LyXLex::Pimpl::search_kw(char const * const tag) const
377 {
378         keyword_item search_tag = { tag, 0 };
379         keyword_item * res =
380                 lower_bound(table, table + no_items,
381                             search_tag, compare_tags());
382         if (res != table + no_items
383             && !compare_no_case(res->tag, tag))
384                 return res->code;
385         return LEX_UNDEF;
386 }
387
388
389 int LyXLex::Pimpl::lex()
390 {
391         //NOTE: possible bug.
392         if (next() && status == LEX_TOKEN)
393                 return search_kw(buff);
394         else
395                 return status;
396 }
397
398
399 bool LyXLex::Pimpl::eatLine()
400 {
401         int i = 0;
402         unsigned char c = '\0';
403         char cc = 0;
404         while (is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
405                 is.get(cc);
406                 c = cc;
407                 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
408                 //                    << c << "'" << endl;
409                 if (c != '\r')
410                         buff[i++] = c;
411         }
412         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
413                 printError("Line too long");
414                 c = '\n'; // Pretend we had an end of line
415                 --lineno; // but don't increase line counter (netto effect)
416                 ++i; // and preserve last character read.
417         }
418         if (c == '\n') {
419                 ++lineno;
420                 buff[--i] = '\0'; // i can never be 0 here, so no danger
421                 status = LEX_DATA;
422                 return true;
423         } else {
424                 buff[i] = '\0';
425                 return false;
426         }
427 }
428
429
430 bool LyXLex::Pimpl::nextToken()
431 {
432         if (!pushTok.empty()) {
433                 // There can have been a whole line pushed so
434                 // we extract the first word and leaves the rest
435                 // in pushTok. (Lgb)
436                 if (pushTok.find(' ') != string::npos) {
437                         string tmp;
438                         pushTok = split(pushTok, tmp, ' ');
439                         tmp.copy(buff, string::npos);
440                         buff[tmp.length()] = '\0';
441                         return true;
442                 } else {
443                         pushTok.copy(buff, string::npos);
444                         buff[pushTok.length()] = '\0';
445                         pushTok.erase();
446                         return true;
447                 }
448         }
449
450         status = 0;
451         while (is && !status) {
452                 unsigned char c = 0;
453                 char cc = 0;
454                 is.get(cc);
455                 c = cc;
456                 if (c >= ' ' && is) {
457                         int i = 0;
458                         if (c == '\\') { // first char == '\\'
459                                 do {
460                                         buff[i++] = c;
461                                         is.get(cc);
462                                         c = cc;
463                                 } while (c > ' ' && c != '\\' && is
464                                          && i != (LEX_MAX_BUFF-1));
465                         } else {
466                                 do {
467                                         buff[i++] = c;
468                                         is.get(cc);
469                                         c = cc;
470                                 } while (c >= ' ' && c != '\\' && is
471                                          && i != (LEX_MAX_BUFF-1));
472                         }
473
474                         if (i == (LEX_MAX_BUFF - 1)) {
475                                 printError("Line too long");
476                         }
477
478                         if (c == '\\') is.putback(c); // put it back
479                         buff[i] = '\0';
480                         status = LEX_TOKEN;
481                 }
482
483                 if (c == '\n')
484                         ++lineno;
485
486         }
487         if (status)  return true;
488
489         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
490         buff[0] = '\0';
491         return false;
492 }
493
494
495 void LyXLex::Pimpl::pushToken(string const & pt)
496 {
497         pushTok = pt;
498 }