]> git.lyx.org Git - lyx.git/blob - src/lyxlex_pimpl.C
more funcs to lowerchar, adjust
[lyx.git] / src / lyxlex_pimpl.C
1 #include <config.h>
2
3 #ifdef __GNUG__
4 #pragma implementation
5 #endif
6 #include <algorithm>
7
8 #include "lyxlex_pimpl.h"
9 #include "support/lyxalgo.h"
10 #include "support/filetools.h"
11 #include "support/lstrings.h"
12 #include "debug.h"
13
14 using std::sort;
15 using std::ostream;
16 using std::ios;
17 using std::istream;
18 using std::endl;
19 using std::lower_bound;
20
21 // namespace {
22 struct compare_tags {
23         // used by lower_bound, sort and sorted
24         inline
25         int operator()(keyword_item const & a, keyword_item const & b) const {
26                 // we use the ascii version, because in turkish, 'i'
27                 // is not the lowercase version of 'I', and thus
28                 // turkish locale breaks parsing of tags.
29                 return compare_ascii_no_case(a.tag, b.tag) < 0;
30         }
31 };
32 // } // end of anon namespace
33
34
35 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num) 
36         : is(&fb__), table(tab), no_items(num),
37           status(0), lineno(0), commentChar('#')
38 {
39         verifyTable();
40 }
41
42
43 string const LyXLex::Pimpl::getString() const
44 {
45         return string(buff);
46 }
47
48
49 void LyXLex::Pimpl::printError(string const & message) const
50 {
51         string const tmpmsg = subst(message, "$$Token", getString());
52         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
53                << " of file " << MakeDisplayPath(name) << ']' << endl;
54 }
55
56         
57 void LyXLex::Pimpl::printTable(ostream & os)
58 {
59         os << "\nNumber of tags: " << no_items << '\n';
60         for (int i= 0; i < no_items; ++i)
61                 os << "table[" << i
62                    << "]:  tag: `" << table[i].tag
63                    << "'  code:" << table[i].code << '\n';
64         os.flush();
65 }
66
67
68 void LyXLex::Pimpl::verifyTable()
69 {
70         // Check if the table is sorted and if not, sort it.
71         if (table
72             && !lyx::sorted(table, table + no_items, compare_tags())) {
73                 lyxerr << "The table passed to LyXLex is not sorted!\n"
74                        << "Tell the developers to fix it!" << endl;
75                 // We sort it anyway to avoid problems.
76                 lyxerr << "\nUnsorted:\n";
77                 printTable(lyxerr);
78
79                 sort(table, table + no_items, compare_tags());
80                 lyxerr << "\nSorted:\n";
81                 printTable(lyxerr);
82         }
83 }
84
85
86 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
87 {
88         pushed_table tmppu(table, no_items);
89         pushed.push(tmppu);
90
91         table = tab;
92         no_items = num;
93
94         verifyTable();
95 }
96
97         
98 void LyXLex::Pimpl::popTable()
99 {
100         if (pushed.empty()) {
101                 lyxerr << "LyXLex error: nothing to pop!" << endl;
102                 return;
103         }
104         
105         pushed_table tmp = pushed.top();
106         pushed.pop();
107         table = tmp.table_elem;
108         no_items = tmp.table_siz;
109 }
110
111
112 bool LyXLex::Pimpl::setFile(string const & filename)
113 {
114         // The check only outputs a debug message, because it triggers
115         // a bug in compaq cxx 6.2, where is_open() returns 'true' for a
116         // fresh new filebuf.  (JMarc)
117         if (fb__.is_open() || is.tellg() > 0)
118                 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
119                         "file or stream already set." << endl;
120         fb__.open(filename.c_str(), ios::in);
121         is.rdbuf(&fb__);
122         name = filename;
123         lineno = 0;
124         return fb__.is_open() && is.good();
125 }
126
127         
128 void LyXLex::Pimpl::setStream(istream & i)
129 {
130         if (fb__.is_open() || is.tellg() > 0)
131                 lyxerr[Debug::LYXLEX]  << "Error in LyXLex::setStream: "
132                         "file or stream already set." << endl;
133         is.rdbuf(i.rdbuf());
134         lineno = 0;
135 }
136
137 void LyXLex::Pimpl::setCommentChar(char c)
138 {
139         commentChar = c;
140 }
141
142
143 bool LyXLex::Pimpl::next(bool esc /* = false */)
144 {
145         if (!pushTok.empty()) {
146                 // There can have been a whole line pushed so
147                 // we extract the first word and leaves the rest
148                 // in pushTok. (Lgb)
149                 if (pushTok.find(' ') != string::npos) {
150                         string tmp;
151                         pushTok = split(pushTok, tmp, ' ');
152                         tmp.copy(buff, string::npos);
153                         buff[tmp.length()] = '\0';
154                         return true;
155                 } else {
156                         pushTok.copy(buff, string::npos);
157                         buff[pushTok.length()] = '\0';
158                         pushTok.erase();
159                         return true;
160                 }     
161         }
162         if (!esc) {
163                 unsigned char c = 0; // getc() returns an int
164                 char cc = 0;
165                 status = 0;
166                 while (is && !status) {
167                         is.get(cc);
168                         c = cc;
169                         if (c == commentChar) {
170                                 // Read rest of line (fast :-)
171                                 // That is not fast... (Lgb)
172 #if 1
173                                 is.getline(buff, sizeof(buff));
174                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
175                                                       << buff << "'" << endl;
176 #else
177                                 // unfortunately ignore is buggy (Lgb)
178                                 is.ignore(100, '\n');
179 #endif
180                                 ++lineno;
181                                 continue;
182                         }
183                         
184                         if (c == '\"') {
185                                 int i = -1;
186                                 do {
187                                         is.get(cc);
188                                         c = cc;
189                                         if (c != '\r')
190                                                 buff[++i] = c;
191                                 } while (c != '\"' && c != '\n' && is &&
192                                          i != (LEX_MAX_BUFF - 2));
193                                 
194                                 if (i == (LEX_MAX_BUFF - 2)) {
195                                         printError("Line too long");
196                                         c = '\"'; // Pretend we got a "
197                                         ++i;
198                                 }
199                                 
200                                 if (c != '\"') {
201                                         printError("Missing quote");
202                                         if (c == '\n')
203                                                 ++lineno;
204                                 }
205                                 
206                                 buff[i] = '\0';
207                                 status = LEX_DATA;
208                                 break; 
209                         }
210                         
211                         if (c == ',')
212                                 continue;              /* Skip ','s */
213                         
214                                 // using relational operators with chars other
215                                 // than == and != is not safe. And if it is done
216                                 // the type _have_ to be unsigned. It usually a
217                                 // lot better to use the functions from cctype
218                         if (c > ' ' && is)  {
219                                 int i = 0;
220                                 do {
221                                         buff[i++] = c;
222                                         is.get(cc);
223                                         c = cc;
224                                 } while (c > ' ' && c != ',' && is
225                                          && (i != LEX_MAX_BUFF - 1) );
226                                 if (i == LEX_MAX_BUFF - 1) {
227                                         printError("Line too long");
228                                 }
229                                 buff[i] = '\0';
230                                 status = LEX_TOKEN;
231                         }
232                         
233                         if (c == '\r' && is) {
234                                 // The Windows support has lead to the
235                                 // possibility of "\r\n" at the end of
236                                 // a line.  This will stop LyX choking
237                                 // when it expected to find a '\n'
238                                 is.get(cc);
239                                 c = cc;
240                         }
241                         
242                         if (c == '\n')
243                                 ++lineno;
244                         
245                 }
246                 if (status) return true;
247                 
248                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
249                 buff[0] = '\0';
250                 return false;
251         } else {
252                 unsigned char c = 0; // getc() returns an int
253                 char cc = 0;
254                 
255                 status = 0;
256                 while (is && !status) {
257                         is.get(cc);
258                         c = cc;
259                         
260                         // skip ','s
261                         if (c == ',') continue;
262                         
263                         if (c == '\\') {
264                                 // escape
265                                 int i = 0;
266                                 do {
267                                         if (c == '\\') {
268                                                 // escape the next char
269                                                 is.get(cc);
270                                                 c = cc;
271                                         }
272                                         buff[i++] = c;
273                                         is.get(cc);
274                                         c = cc;
275                                 } while (c > ' ' && c != ',' && is
276                                          && (i != LEX_MAX_BUFF - 1) );
277                                 if (i == LEX_MAX_BUFF - 1) {
278                                         printError("Line too long");
279                                 }
280                                 buff[i] = '\0';
281                                 status = LEX_TOKEN;
282                                 continue;
283                         }
284                         
285                         if (c == commentChar) {
286                                 // Read rest of line (fast :-)
287                                 // That is still not fast... (Lgb)
288 #if 1
289                                 is.getline(buff, sizeof(buff));
290                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
291                                                       << buff << "'" << endl;
292 #else
293                                 // but ignore is also still buggy (Lgb)
294                                 // This is fast (Lgb)
295                                 is.ignore(100, '\n');
296 #endif
297                                 ++lineno;
298                                 continue;
299                         }
300                         
301                         // string
302                         if (c == '\"') {
303                                 int i = -1;
304                                 bool escaped = false;
305                                 do {
306                                         escaped = false;
307                                         is.get(cc);
308                                         c = cc;
309                                         if (c == '\r') continue;
310                                         if (c == '\\') {
311                                                 // escape the next char
312                                                 is.get(cc);
313                                                 c = cc;
314                                                 escaped = true;
315                                         }
316                                         buff[++i] = c;
317                                         
318                                         if (!escaped && c == '\"') break;
319                                 } while (c != '\n' && is &&
320                                          i != (LEX_MAX_BUFF - 2));
321                                 
322                                 if (i == (LEX_MAX_BUFF - 2)) {
323                                         printError("Line too long");
324                                         c = '\"'; // Pretend we got a "
325                                         ++i;
326                                 }
327                                 
328                                 if (c != '\"') {
329                                         printError("Missing quote");
330                                         if (c == '\n')
331                                                 ++lineno;
332                                 }
333                                 
334                                 buff[i] = '\0';
335                                 status = LEX_DATA;
336                                 break; 
337                         }
338                         
339                         if (c > ' ' && is) {
340                                 int i = 0;
341                                 do {
342                                         if (c == '\\') {
343                                                 // escape the next char
344                                                 is.get(cc);
345                                                 c = cc;
346                                                 //escaped = true;
347                                         }
348                                         buff[i++] = c;
349                                         is.get(cc);
350                                         c = cc;
351                                 } while (c > ' ' && c != ',' && is
352                                          && (i != LEX_MAX_BUFF-1) );
353                                 if (i == LEX_MAX_BUFF-1) {
354                                         printError("Line too long");
355                                 }
356                                 buff[i] = '\0';
357                                 status = LEX_TOKEN;
358                         }
359                         // new line
360                         if (c == '\n')
361                                 ++lineno;
362                 }
363                 
364                 if (status) return true;
365                 
366                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
367                 buff[0] = '\0';
368                 return false;
369         }
370 }
371
372
373 int LyXLex::Pimpl::search_kw(char const * const tag) const
374 {
375         keyword_item search_tag = { tag, 0 };
376         keyword_item * res =
377                 lower_bound(table, table + no_items,
378                             search_tag, compare_tags());
379         if (res != table + no_items
380             && !compare_no_case(res->tag, tag))
381                 return res->code;
382         return LEX_UNDEF;
383 }
384
385
386 int LyXLex::Pimpl::lex()
387 {
388         //NOTE: possible bug.
389         if (next() && status == LEX_TOKEN)
390                 return search_kw(buff);
391         else
392                 return status;
393 }
394
395         
396 bool LyXLex::Pimpl::eatLine()
397 {
398         int i = 0;
399         unsigned char c = '\0';
400         char cc = 0;
401         while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
402                 is.get(cc);
403                 c = cc;
404                 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
405                 //                    << c << "'" << endl;
406                 if (c != '\r')
407                         buff[i++] = c;
408         }
409         if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
410                 printError("Line too long");
411                 c = '\n'; // Pretend we had an end of line
412                 --lineno; // but don't increase line counter (netto effect)
413                 ++i; // and preserve last character read.
414         }
415         if (c == '\n') {
416                 ++lineno;
417                 buff[--i] = '\0'; // i can never be 0 here, so no danger
418                 status = LEX_DATA;
419                 return true;
420         } else {
421                 buff[i] = '\0';
422                 return false;
423         }
424 }
425
426
427 bool LyXLex::Pimpl::nextToken()
428 {
429         if (!pushTok.empty()) {
430                 // There can have been a whole line pushed so
431                 // we extract the first word and leaves the rest
432                 // in pushTok. (Lgb)
433                 if (pushTok.find(' ') != string::npos) {
434                         string tmp;
435                         pushTok = split(pushTok, tmp, ' ');
436                         tmp.copy(buff, string::npos);
437                         buff[tmp.length()] = '\0';
438                         return true;
439                 } else {
440                         pushTok.copy(buff, string::npos);
441                         buff[pushTok.length()] = '\0';
442                         pushTok.erase();
443                         return true;
444                 }
445         }
446
447         status = 0;
448         while (is && !status) {
449                 unsigned char c = 0;
450                 char cc = 0;
451                 is.get(cc);
452                 c = cc;
453                 if (c >= ' ' && is) {
454                         int i = 0;
455                         if (c == '\\') { // first char == '\\'
456                                 do {
457                                         buff[i++] = c;
458                                         is.get(cc);
459                                         c = cc;
460                                 } while (c > ' ' && c != '\\' && is
461                                          && i != (LEX_MAX_BUFF-1));
462                         } else {
463                                 do {
464                                         buff[i++] = c;
465                                         is.get(cc);
466                                         c = cc;
467                                 } while (c >= ' ' && c != '\\' && is
468                                          && i != (LEX_MAX_BUFF-1));
469                         }
470                         
471                         if (i == (LEX_MAX_BUFF - 1)) {
472                                 printError("Line too long");
473                         }
474                         
475                         if (c == '\\') is.putback(c); // put it back
476                         buff[i] = '\0';
477                         status = LEX_TOKEN;
478                 }
479                 
480                 if (c == '\n')
481                         ++lineno;
482                 
483         }
484         if (status)  return true;
485         
486         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
487         buff[0] = '\0';
488         return false;
489 }
490
491
492 void LyXLex::Pimpl::pushToken(string const & pt)
493 {
494         pushTok = pt;
495 }