]> git.lyx.org Git - lyx.git/blob - src/lyxlex_pimpl.C
ws changes only
[lyx.git] / src / lyxlex_pimpl.C
1 /**
2  * \file lyxlex_pimpl.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Vigna
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "lyxlex_pimpl.h"
16
17 #include "debug.h"
18
19 #include "support/filetools.h"
20 #include "support/lyxalgo.h"
21 #include "support/lstrings.h"
22
23 using lyx::support::compare_ascii_no_case;
24 using lyx::support::getExtFromContents;
25 using lyx::support::MakeDisplayPath;
26 using lyx::support::split;
27 using lyx::support::subst;
28
29 using std::endl;
30 using std::getline;
31 using std::lower_bound;
32 using std::sort;
33 using std::string;
34 using std::ios;
35 using std::istream;
36 using std::ostream;
37
38 // namespace {
39 struct compare_tags {
40         // used by lower_bound, sort and sorted
41         inline
42         int operator()(keyword_item const & a, keyword_item const & b) const {
43                 // we use the ascii version, because in turkish, 'i'
44                 // is not the lowercase version of 'I', and thus
45                 // turkish locale breaks parsing of tags.
46                 return compare_ascii_no_case(a.tag, b.tag) < 0;
47         }
48 };
49 // } // end of anon namespace
50
51
52 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
53         : is(&fb__), table(tab), no_items(num),
54           status(0), lineno(0), commentChar('#')
55 {
56         verifyTable();
57 }
58
59
60 string const LyXLex::Pimpl::getString() const
61 {
62         return string(buff.begin(), buff.end());
63 }
64
65
66 void LyXLex::Pimpl::printError(string const & message) const
67 {
68         string const tmpmsg = subst(message, "$$Token", getString());
69         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
70                << " of file " << MakeDisplayPath(name) << ']' << endl;
71 }
72
73
74 void LyXLex::Pimpl::printTable(ostream & os)
75 {
76         os << "\nNumber of tags: " << no_items << endl;
77         for (int i= 0; i < no_items; ++i)
78                 os << "table[" << i
79                    << "]:  tag: `" << table[i].tag
80                    << "'  code:" << table[i].code << '\n';
81         os.flush();
82 }
83
84
85 void LyXLex::Pimpl::verifyTable()
86 {
87         // Check if the table is sorted and if not, sort it.
88         if (table
89             && !lyx::sorted(table, table + no_items, compare_tags())) {
90                 lyxerr << "The table passed to LyXLex is not sorted!\n"
91                        << "Tell the developers to fix it!" << endl;
92                 // We sort it anyway to avoid problems.
93                 lyxerr << "\nUnsorted:" << endl;
94                 printTable(lyxerr);
95
96                 sort(table, table + no_items, compare_tags());
97                 lyxerr << "\nSorted:" << endl;
98                 printTable(lyxerr);
99         }
100 }
101
102
103 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
104 {
105         pushed_table tmppu(table, no_items);
106         pushed.push(tmppu);
107
108         table = tab;
109         no_items = num;
110
111         verifyTable();
112 }
113
114
115 void LyXLex::Pimpl::popTable()
116 {
117         if (pushed.empty()) {
118                 lyxerr << "LyXLex error: nothing to pop!" << endl;
119                 return;
120         }
121
122         pushed_table tmp = pushed.top();
123         pushed.pop();
124         table = tmp.table_elem;
125         no_items = tmp.table_siz;
126 }
127
128
129 bool LyXLex::Pimpl::setFile(string const & filename)
130 {
131
132         // Check the format of the file.
133         string const format = getExtFromContents(filename);
134
135         if (format == "gzip" || format == "zip" || format == "compress") {
136                 lyxerr[Debug::LYXLEX] << "lyxlex: compressed" << endl;
137
138                 // The check only outputs a debug message, because it triggers
139                 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
140                 // a fresh new filebuf.  (JMarc)
141                 if (gz__.is_open() || is.tellg() > 0)
142                         lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
143                                 "file or stream already set." << endl;
144                 gz__.open(filename.c_str(), ios::in);
145                 is.rdbuf(&gz__);
146                 name = filename;
147                 lineno = 0;
148                 return gz__.is_open() && is.good();
149         } else {
150                 lyxerr[Debug::LYXLEX] << "lyxlex: UNcompressed" << endl;
151
152                 // The check only outputs a debug message, because it triggers
153                 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
154                 // a fresh new filebuf.  (JMarc)
155                 if (fb__.is_open() || is.tellg() > 0)
156                         lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
157                                 "file or stream already set." << endl;
158                 fb__.open(filename.c_str(), ios::in);
159                 is.rdbuf(&fb__);
160                 name = filename;
161                 lineno = 0;
162                 return fb__.is_open() && is.good();
163         }
164 }
165
166
167 void LyXLex::Pimpl::setStream(istream & i)
168 {
169         if (fb__.is_open() || is.tellg() > 0)
170                 lyxerr[Debug::LYXLEX]  << "Error in LyXLex::setStream: "
171                         "file or stream already set." << endl;
172         is.rdbuf(i.rdbuf());
173         lineno = 0;
174 }
175
176
177 void LyXLex::Pimpl::setCommentChar(char c)
178 {
179         commentChar = c;
180 }
181
182
183 bool LyXLex::Pimpl::next(bool esc /* = false */)
184 {
185         if (!pushTok.empty()) {
186                 // There can have been a whole line pushed so
187                 // we extract the first word and leaves the rest
188                 // in pushTok. (Lgb)
189                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
190                         string tmp;
191                         pushTok = split(pushTok, tmp, ' ');
192                         buff.assign(tmp.begin(), tmp.end());
193                         return true;
194                 } else {
195                         buff.assign(pushTok.begin(), pushTok.end());
196                         pushTok.erase();
197                         return true;
198                 }
199         }
200         if (!esc) {
201                 unsigned char c = 0; // getc() returns an int
202                 char cc = 0;
203                 status = 0;
204                 while (is && !status) {
205                         is.get(cc);
206                         c = cc;
207                         if (c == commentChar) {
208                                 // Read rest of line (fast :-)
209 #if 1
210                                 // That is not fast... (Lgb)
211                                 string dummy;
212                                 getline(is, dummy);
213
214                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
215                                                       << dummy << '\'' << endl;
216 #else
217                                 // unfortunately ignore is buggy (Lgb)
218                                 is.ignore(100, '\n');
219 #endif
220                                 ++lineno;
221                                 continue;
222                         }
223
224                         if (c == '\"') {
225                                 buff.clear();
226
227                                 do {
228                                         is.get(cc);
229                                         c = cc;
230                                         if (c != '\r')
231                                                 buff.push_back(c);
232                                 } while (c != '\"' && c != '\n' && is);
233
234                                 if (c != '\"') {
235                                         printError("Missing quote");
236                                         if (c == '\n')
237                                                 ++lineno;
238                                 }
239
240                                 buff.pop_back();
241                                 status = LEX_DATA;
242                                 break;
243                         }
244
245                         if (c == ',')
246                                 continue;              /* Skip ','s */
247
248                                 // using relational operators with chars other
249                                 // than == and != is not safe. And if it is done
250                                 // the type _have_ to be unsigned. It usually a
251                                 // lot better to use the functions from cctype
252                         if (c > ' ' && is)  {
253                                 buff.clear();
254
255                                 do {
256                                         buff.push_back(c);
257                                         is.get(cc);
258                                         c = cc;
259                                 } while (c > ' ' && c != ',' && is);
260
261                                 status = LEX_TOKEN;
262                         }
263
264                         if (c == '\r' && is) {
265                                 // The Windows support has lead to the
266                                 // possibility of "\r\n" at the end of
267                                 // a line.  This will stop LyX choking
268                                 // when it expected to find a '\n'
269                                 is.get(cc);
270                                 c = cc;
271                         }
272
273                         if (c == '\n')
274                                 ++lineno;
275
276                 }
277                 if (status) return true;
278
279                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
280                 buff.clear();
281                 return false;
282         } else {
283                 unsigned char c = 0; // getc() returns an int
284                 char cc = 0;
285
286                 status = 0;
287                 while (is && !status) {
288                         is.get(cc);
289                         c = cc;
290
291                         // skip ','s
292                         if (c == ',') continue;
293
294                         if (c == '\\') {
295                                 // escape
296                                 buff.clear();
297
298                                 do {
299                                         if (c == '\\') {
300                                                 // escape the next char
301                                                 is.get(cc);
302                                                 c = cc;
303                                         }
304                                         buff.push_back(c);
305                                         is.get(cc);
306                                         c = cc;
307                                 } while (c > ' ' && c != ',' && is);
308
309                                 status = LEX_TOKEN;
310                                 continue;
311                         }
312
313                         if (c == commentChar) {
314                                 // Read rest of line (fast :-)
315 #if 1
316                                 // That is still not fast... (Lgb)
317                                 string dummy;
318                                 getline(is, dummy);
319
320                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
321                                                       << dummy << '\'' << endl;
322 #else
323                                 // but ignore is also still buggy (Lgb)
324                                 // This is fast (Lgb)
325                                 is.ignore(100, '\n');
326 #endif
327                                 ++lineno;
328                                 continue;
329                         }
330
331                         // string
332                         if (c == '\"') {
333                                 buff.clear();
334
335                                 bool escaped = false;
336                                 do {
337                                         escaped = false;
338                                         is.get(cc);
339                                         c = cc;
340                                         if (c == '\r') continue;
341                                         if (c == '\\') {
342                                                 // escape the next char
343                                                 is.get(cc);
344                                                 c = cc;
345                                                 if (c == '\"' || c == '\\')
346                                                         escaped = true;
347                                                 else
348                                                         buff.push_back('\\');
349                                         }
350                                         buff.push_back(c);
351
352                                         if (!escaped && c == '\"') break;
353                                 } while (c != '\n' && is);
354
355                                 if (c != '\"') {
356                                         printError("Missing quote");
357                                         if (c == '\n')
358                                                 ++lineno;
359                                 }
360
361                                 buff.pop_back();
362                                 status = LEX_DATA;
363                                 break;
364                         }
365
366                         if (c > ' ' && is) {
367                                 buff.clear();
368
369                                 do {
370                                         if (c == '\\') {
371                                                 // escape the next char
372                                                 is.get(cc);
373                                                 c = cc;
374                                                 //escaped = true;
375                                         }
376                                         buff.push_back(c);
377                                         is.get(cc);
378                                         c = cc;
379                                 } while (c > ' ' && c != ',' && is);
380
381                                 status = LEX_TOKEN;
382                         }
383                         // new line
384                         if (c == '\n')
385                                 ++lineno;
386                 }
387
388                 if (status) return true;
389
390                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
391                 buff.clear();
392                 return false;
393         }
394 }
395
396
397 int LyXLex::Pimpl::search_kw(char const * const tag) const
398 {
399         keyword_item search_tag = { tag, 0 };
400         keyword_item * res =
401                 lower_bound(table, table + no_items,
402                             search_tag, compare_tags());
403         // use the compare_ascii_no_case instead of compare_no_case,
404         // because in turkish, 'i' is not the lowercase version of 'I',
405         // and thus turkish locale breaks parsing of tags.
406         if (res != table + no_items
407             && !compare_ascii_no_case(res->tag, tag))
408                 return res->code;
409         return LEX_UNDEF;
410 }
411
412
413 int LyXLex::Pimpl::lex()
414 {
415         //NOTE: possible bug.
416         if (next() && status == LEX_TOKEN) {
417                 return search_kw(getString().c_str());
418         } else
419                 return status;
420 }
421
422
423 bool LyXLex::Pimpl::eatLine()
424 {
425         buff.clear();
426
427         unsigned char c = '\0';
428         char cc = 0;
429         while (is && c != '\n') {
430                 is.get(cc);
431                 c = cc;
432                 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
433                 //                    << c << '\'' << endl;
434                 if (c != '\r')
435                         buff.push_back(c);
436         }
437
438         if (c == '\n') {
439                 ++lineno;
440                 buff.pop_back();
441                 status = LEX_DATA;
442                 return true;
443         } else {
444                 return false;
445         }
446 }
447
448
449 bool LyXLex::Pimpl::nextToken()
450 {
451         if (!pushTok.empty()) {
452                 // There can have been a whole line pushed so
453                 // we extract the first word and leaves the rest
454                 // in pushTok. (Lgb)
455                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
456                         string tmp;
457                         pushTok = split(pushTok, tmp, ' ');
458                         buff.assign(tmp.begin(), tmp.end());
459                         return true;
460                 } else {
461                         buff.assign(pushTok.begin(), pushTok.end());
462                         pushTok.erase();
463                         return true;
464                 }
465         }
466
467         status = 0;
468         while (is && !status) {
469                 unsigned char c = 0;
470                 char cc = 0;
471                 is.get(cc);
472                 c = cc;
473                 if (c >= ' ' && is) {
474                         buff.clear();
475
476                         if (c == '\\') { // first char == '\\'
477                                 do {
478                                         buff.push_back(c);
479                                         is.get(cc);
480                                         c = cc;
481                                 } while (c > ' ' && c != '\\' && is);
482                         } else {
483                                 do {
484                                         buff.push_back(c);
485                                         is.get(cc);
486                                         c = cc;
487                                 } while (c >= ' ' && c != '\\' && is);
488                         }
489
490                         if (c == '\\') is.putback(c); // put it back
491                         status = LEX_TOKEN;
492                 }
493
494                 if (c == '\n')
495                         ++lineno;
496
497         }
498         if (status)  return true;
499
500         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
501         buff.clear();
502         return false;
503 }
504
505
506 void LyXLex::Pimpl::pushToken(string const & pt)
507 {
508         pushTok = pt;
509 }