]> git.lyx.org Git - lyx.git/blob - src/lyxlex_pimpl.C
Move #includes out of header files.
[lyx.git] / src / lyxlex_pimpl.C
1 /**
2  * \file lyxlex_pimpl.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jean-Marc Lasgouttes
8  * \author Jürgen Vigna
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "lyxlex_pimpl.h"
16 #include "debug.h"
17
18 #include "support/lyxalgo.h"
19 #include "support/filetools.h"
20 #include "support/lstrings.h"
21
22 #include <algorithm>
23
24 using namespace lyx::support;
25
26 using std::sort;
27 using std::ostream;
28 using std::ios;
29 using std::istream;
30 using std::endl;
31 using std::lower_bound;
32 using std::vector;
33 using std::getline;
34
35 // namespace {
36 struct compare_tags {
37         // used by lower_bound, sort and sorted
38         inline
39         int operator()(keyword_item const & a, keyword_item const & b) const {
40                 // we use the ascii version, because in turkish, 'i'
41                 // is not the lowercase version of 'I', and thus
42                 // turkish locale breaks parsing of tags.
43                 return compare_ascii_no_case(a.tag, b.tag) < 0;
44         }
45 };
46 // } // end of anon namespace
47
48
49 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
50         : is(&fb__), table(tab), no_items(num),
51           status(0), lineno(0), commentChar('#')
52 {
53         verifyTable();
54 }
55
56
57 string const LyXLex::Pimpl::getString() const
58 {
59         return string(buff.begin(), buff.end());
60 }
61
62
63 void LyXLex::Pimpl::printError(string const & message) const
64 {
65         string const tmpmsg = subst(message, "$$Token", getString());
66         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
67                << " of file " << MakeDisplayPath(name) << ']' << endl;
68 }
69
70
71 void LyXLex::Pimpl::printTable(ostream & os)
72 {
73         os << "\nNumber of tags: " << no_items << endl;
74         for (int i= 0; i < no_items; ++i)
75                 os << "table[" << i
76                    << "]:  tag: `" << table[i].tag
77                    << "'  code:" << table[i].code << '\n';
78         os.flush();
79 }
80
81
82 void LyXLex::Pimpl::verifyTable()
83 {
84         // Check if the table is sorted and if not, sort it.
85         if (table
86             && !lyx::sorted(table, table + no_items, compare_tags())) {
87                 lyxerr << "The table passed to LyXLex is not sorted!\n"
88                        << "Tell the developers to fix it!" << endl;
89                 // We sort it anyway to avoid problems.
90                 lyxerr << "\nUnsorted:" << endl;
91                 printTable(lyxerr);
92
93                 sort(table, table + no_items, compare_tags());
94                 lyxerr << "\nSorted:" << endl;
95                 printTable(lyxerr);
96         }
97 }
98
99
100 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
101 {
102         pushed_table tmppu(table, no_items);
103         pushed.push(tmppu);
104
105         table = tab;
106         no_items = num;
107
108         verifyTable();
109 }
110
111
112 void LyXLex::Pimpl::popTable()
113 {
114         if (pushed.empty()) {
115                 lyxerr << "LyXLex error: nothing to pop!" << endl;
116                 return;
117         }
118
119         pushed_table tmp = pushed.top();
120         pushed.pop();
121         table = tmp.table_elem;
122         no_items = tmp.table_siz;
123 }
124
125
126 bool LyXLex::Pimpl::setFile(string const & filename)
127 {
128
129         // Check the format of the file.
130         string const format = getExtFromContents(filename);
131
132         if (format == "gzip" || format == "zip" || format == "compress") {
133                 lyxerr << "lyxlex: compressed" << endl;
134
135                 // The check only outputs a debug message, because it triggers
136                 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
137                 // a fresh new filebuf.  (JMarc)
138                 if (gz__.is_open() || is.tellg() > 0)
139                         lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
140                                 "file or stream already set." << endl;
141                 gz__.open(filename.c_str(), ios::in);
142                 is.rdbuf(&gz__);
143                 name = filename;
144                 lineno = 0;
145                 return gz__.is_open() && is.good();
146         } else {
147                 lyxerr << "lyxlex: UNcompressed" << endl;
148
149                 // The check only outputs a debug message, because it triggers
150                 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
151                 // a fresh new filebuf.  (JMarc)
152                 if (fb__.is_open() || is.tellg() > 0)
153                         lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
154                                 "file or stream already set." << endl;
155                 fb__.open(filename.c_str(), ios::in);
156                 is.rdbuf(&fb__);
157                 name = filename;
158                 lineno = 0;
159                 return fb__.is_open() && is.good();
160         }
161 }
162
163
164 void LyXLex::Pimpl::setStream(istream & i)
165 {
166         if (fb__.is_open() || is.tellg() > 0)
167                 lyxerr[Debug::LYXLEX]  << "Error in LyXLex::setStream: "
168                         "file or stream already set." << endl;
169         is.rdbuf(i.rdbuf());
170         lineno = 0;
171 }
172
173
174 void LyXLex::Pimpl::setCommentChar(char c)
175 {
176         commentChar = c;
177 }
178
179
180 bool LyXLex::Pimpl::next(bool esc /* = false */)
181 {
182         if (!pushTok.empty()) {
183                 // There can have been a whole line pushed so
184                 // we extract the first word and leaves the rest
185                 // in pushTok. (Lgb)
186                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
187                         string tmp;
188                         pushTok = split(pushTok, tmp, ' ');
189                         buff.assign(tmp.begin(), tmp.end());
190                         return true;
191                 } else {
192                         buff.assign(pushTok.begin(), pushTok.end());
193                         pushTok.erase();
194                         return true;
195                 }
196         }
197         if (!esc) {
198                 unsigned char c = 0; // getc() returns an int
199                 char cc = 0;
200                 status = 0;
201                 while (is && !status) {
202                         is.get(cc);
203                         c = cc;
204                         if (c == commentChar) {
205                                 // Read rest of line (fast :-)
206 #if 1
207                                 // That is not fast... (Lgb)
208                                 string dummy;
209                                 getline(is, dummy);
210
211                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
212                                                       << dummy << '\'' << endl;
213 #else
214                                 // unfortunately ignore is buggy (Lgb)
215                                 is.ignore(100, '\n');
216 #endif
217                                 ++lineno;
218                                 continue;
219                         }
220
221                         if (c == '\"') {
222                                 buff.clear();
223
224                                 do {
225                                         is.get(cc);
226                                         c = cc;
227                                         if (c != '\r')
228                                                 buff.push_back(c);
229                                 } while (c != '\"' && c != '\n' && is);
230
231                                 if (c != '\"') {
232                                         printError("Missing quote");
233                                         if (c == '\n')
234                                                 ++lineno;
235                                 }
236
237                                 buff.pop_back();
238                                 status = LEX_DATA;
239                                 break;
240                         }
241
242                         if (c == ',')
243                                 continue;              /* Skip ','s */
244
245                                 // using relational operators with chars other
246                                 // than == and != is not safe. And if it is done
247                                 // the type _have_ to be unsigned. It usually a
248                                 // lot better to use the functions from cctype
249                         if (c > ' ' && is)  {
250                                 buff.clear();
251
252                                 do {
253                                         buff.push_back(c);
254                                         is.get(cc);
255                                         c = cc;
256                                 } while (c > ' ' && c != ',' && is);
257
258                                 status = LEX_TOKEN;
259                         }
260
261                         if (c == '\r' && is) {
262                                 // The Windows support has lead to the
263                                 // possibility of "\r\n" at the end of
264                                 // a line.  This will stop LyX choking
265                                 // when it expected to find a '\n'
266                                 is.get(cc);
267                                 c = cc;
268                         }
269
270                         if (c == '\n')
271                                 ++lineno;
272
273                 }
274                 if (status) return true;
275
276                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
277                 buff.clear();
278                 return false;
279         } else {
280                 unsigned char c = 0; // getc() returns an int
281                 char cc = 0;
282
283                 status = 0;
284                 while (is && !status) {
285                         is.get(cc);
286                         c = cc;
287
288                         // skip ','s
289                         if (c == ',') continue;
290
291                         if (c == '\\') {
292                                 // escape
293                                 buff.clear();
294
295                                 do {
296                                         if (c == '\\') {
297                                                 // escape the next char
298                                                 is.get(cc);
299                                                 c = cc;
300                                         }
301                                         buff.push_back(c);
302                                         is.get(cc);
303                                         c = cc;
304                                 } while (c > ' ' && c != ',' && is);
305
306                                 status = LEX_TOKEN;
307                                 continue;
308                         }
309
310                         if (c == commentChar) {
311                                 // Read rest of line (fast :-)
312 #if 1
313                                 // That is still not fast... (Lgb)
314                                 string dummy;
315                                 getline(is, dummy);
316
317                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
318                                                       << dummy << '\'' << endl;
319 #else
320                                 // but ignore is also still buggy (Lgb)
321                                 // This is fast (Lgb)
322                                 is.ignore(100, '\n');
323 #endif
324                                 ++lineno;
325                                 continue;
326                         }
327
328                         // string
329                         if (c == '\"') {
330                                 buff.clear();
331
332                                 bool escaped = false;
333                                 do {
334                                         escaped = false;
335                                         is.get(cc);
336                                         c = cc;
337                                         if (c == '\r') continue;
338                                         if (c == '\\') {
339                                                 // escape the next char
340                                                 is.get(cc);
341                                                 c = cc;
342                                                 if (c == '\"' || c == '\\')
343                                                         escaped = true;
344                                                 else
345                                                         buff.push_back('\\');
346                                         }
347                                         buff.push_back(c);
348
349                                         if (!escaped && c == '\"') break;
350                                 } while (c != '\n' && is);
351
352                                 if (c != '\"') {
353                                         printError("Missing quote");
354                                         if (c == '\n')
355                                                 ++lineno;
356                                 }
357
358                                 buff.pop_back();
359                                 status = LEX_DATA;
360                                 break;
361                         }
362
363                         if (c > ' ' && is) {
364                                 buff.clear();
365
366                                 do {
367                                         if (c == '\\') {
368                                                 // escape the next char
369                                                 is.get(cc);
370                                                 c = cc;
371                                                 //escaped = true;
372                                         }
373                                         buff.push_back(c);
374                                         is.get(cc);
375                                         c = cc;
376                                 } while (c > ' ' && c != ',' && is);
377
378                                 status = LEX_TOKEN;
379                         }
380                         // new line
381                         if (c == '\n')
382                                 ++lineno;
383                 }
384
385                 if (status) return true;
386
387                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
388                 buff.clear();
389                 return false;
390         }
391 }
392
393
394 int LyXLex::Pimpl::search_kw(char const * const tag) const
395 {
396         keyword_item search_tag = { tag, 0 };
397         keyword_item * res =
398                 lower_bound(table, table + no_items,
399                             search_tag, compare_tags());
400         // use the compare_ascii_no_case instead of compare_no_case,
401         // because in turkish, 'i' is not the lowercase version of 'I',
402         // and thus turkish locale breaks parsing of tags.
403         if (res != table + no_items
404             && !compare_ascii_no_case(res->tag, tag))
405                 return res->code;
406         return LEX_UNDEF;
407 }
408
409
410 int LyXLex::Pimpl::lex()
411 {
412         //NOTE: possible bug.
413         if (next() && status == LEX_TOKEN) {
414                 return search_kw(getString().c_str());
415         } else
416                 return status;
417 }
418
419
420 bool LyXLex::Pimpl::eatLine()
421 {
422         buff.clear();
423
424         unsigned char c = '\0';
425         char cc = 0;
426         while (is && c != '\n') {
427                 is.get(cc);
428                 c = cc;
429                 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
430                 //                    << c << '\'' << endl;
431                 if (c != '\r')
432                         buff.push_back(c);
433         }
434
435         if (c == '\n') {
436                 ++lineno;
437                 buff.pop_back();
438                 status = LEX_DATA;
439                 return true;
440         } else {
441                 return false;
442         }
443 }
444
445
446 bool LyXLex::Pimpl::nextToken()
447 {
448         if (!pushTok.empty()) {
449                 // There can have been a whole line pushed so
450                 // we extract the first word and leaves the rest
451                 // in pushTok. (Lgb)
452                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
453                         string tmp;
454                         pushTok = split(pushTok, tmp, ' ');
455                         buff.assign(tmp.begin(), tmp.end());
456                         return true;
457                 } else {
458                         buff.assign(pushTok.begin(), pushTok.end());
459                         pushTok.erase();
460                         return true;
461                 }
462         }
463
464         status = 0;
465         while (is && !status) {
466                 unsigned char c = 0;
467                 char cc = 0;
468                 is.get(cc);
469                 c = cc;
470                 if (c >= ' ' && is) {
471                         buff.clear();
472
473                         if (c == '\\') { // first char == '\\'
474                                 do {
475                                         buff.push_back(c);
476                                         is.get(cc);
477                                         c = cc;
478                                 } while (c > ' ' && c != '\\' && is);
479                         } else {
480                                 do {
481                                         buff.push_back(c);
482                                         is.get(cc);
483                                         c = cc;
484                                 } while (c >= ' ' && c != '\\' && is);
485                         }
486
487                         if (c == '\\') is.putback(c); // put it back
488                         status = LEX_TOKEN;
489                 }
490
491                 if (c == '\n')
492                         ++lineno;
493
494         }
495         if (status)  return true;
496
497         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
498         buff.clear();
499         return false;
500 }
501
502
503 void LyXLex::Pimpl::pushToken(string const & pt)
504 {
505         pushTok = pt;
506 }