]> git.lyx.org Git - lyx.git/blob - src/lyxlex_pimpl.C
"Inter-word Space"
[lyx.git] / src / lyxlex_pimpl.C
1 #include <config.h>
2
3 #include "lyxlex_pimpl.h"
4 #include "debug.h"
5
6 #include "support/lyxalgo.h"
7 #include "support/filetools.h"
8 #include "support/lstrings.h"
9
10 #include <algorithm>
11
12 using std::sort;
13 using std::ostream;
14 using std::ios;
15 using std::istream;
16 using std::endl;
17 using std::lower_bound;
18 using std::vector;
19 using std::getline;
20
21 // namespace {
22 struct compare_tags {
23         // used by lower_bound, sort and sorted
24         inline
25         int operator()(keyword_item const & a, keyword_item const & b) const {
26                 // we use the ascii version, because in turkish, 'i'
27                 // is not the lowercase version of 'I', and thus
28                 // turkish locale breaks parsing of tags.
29                 return compare_ascii_no_case(a.tag, b.tag) < 0;
30         }
31 };
32 // } // end of anon namespace
33
34
35 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
36         : is(&fb__), table(tab), no_items(num),
37           status(0), lineno(0), commentChar('#')
38 {
39         verifyTable();
40 }
41
42
43 string const LyXLex::Pimpl::getString() const
44 {
45         return string(buff.begin(), buff.end());
46 }
47
48
49 void LyXLex::Pimpl::printError(string const & message) const
50 {
51         string const tmpmsg = subst(message, "$$Token", getString());
52         lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
53                << " of file " << MakeDisplayPath(name) << ']' << endl;
54 }
55
56
57 void LyXLex::Pimpl::printTable(ostream & os)
58 {
59         os << "\nNumber of tags: " << no_items << '\n';
60         for (int i= 0; i < no_items; ++i)
61                 os << "table[" << i
62                    << "]:  tag: `" << table[i].tag
63                    << "'  code:" << table[i].code << '\n';
64         os.flush();
65 }
66
67
68 void LyXLex::Pimpl::verifyTable()
69 {
70         // Check if the table is sorted and if not, sort it.
71         if (table
72             && !lyx::sorted(table, table + no_items, compare_tags())) {
73                 lyxerr << "The table passed to LyXLex is not sorted!\n"
74                        << "Tell the developers to fix it!" << endl;
75                 // We sort it anyway to avoid problems.
76                 lyxerr << "\nUnsorted:\n";
77                 printTable(lyxerr);
78
79                 sort(table, table + no_items, compare_tags());
80                 lyxerr << "\nSorted:\n";
81                 printTable(lyxerr);
82         }
83 }
84
85
86 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
87 {
88         pushed_table tmppu(table, no_items);
89         pushed.push(tmppu);
90
91         table = tab;
92         no_items = num;
93
94         verifyTable();
95 }
96
97
98 void LyXLex::Pimpl::popTable()
99 {
100         if (pushed.empty()) {
101                 lyxerr << "LyXLex error: nothing to pop!" << endl;
102                 return;
103         }
104
105         pushed_table tmp = pushed.top();
106         pushed.pop();
107         table = tmp.table_elem;
108         no_items = tmp.table_siz;
109 }
110
111
112 bool LyXLex::Pimpl::setFile(string const & filename)
113 {
114         // The check only outputs a debug message, because it triggers
115         // a bug in compaq cxx 6.2, where is_open() returns 'true' for a
116         // fresh new filebuf.  (JMarc)
117         if (fb__.is_open() || is.tellg() > 0)
118                 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
119                         "file or stream already set." << endl;
120         fb__.open(filename.c_str(), ios::in);
121         is.rdbuf(&fb__);
122         name = filename;
123         lineno = 0;
124         return fb__.is_open() && is.good();
125 }
126
127
128 void LyXLex::Pimpl::setStream(istream & i)
129 {
130         if (fb__.is_open() || is.tellg() > 0)
131                 lyxerr[Debug::LYXLEX]  << "Error in LyXLex::setStream: "
132                         "file or stream already set." << endl;
133         is.rdbuf(i.rdbuf());
134         lineno = 0;
135 }
136
137
138 void LyXLex::Pimpl::setCommentChar(char c)
139 {
140         commentChar = c;
141 }
142
143
144 bool LyXLex::Pimpl::next(bool esc /* = false */)
145 {
146         if (!pushTok.empty()) {
147                 // There can have been a whole line pushed so
148                 // we extract the first word and leaves the rest
149                 // in pushTok. (Lgb)
150                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
151                         string tmp;
152                         pushTok = split(pushTok, tmp, ' ');
153                         buff.assign(tmp.begin(), tmp.end());
154                         return true;
155                 } else {
156                         buff.assign(pushTok.begin(), pushTok.end());
157                         pushTok.erase();
158                         return true;
159                 }
160         }
161         if (!esc) {
162                 unsigned char c = 0; // getc() returns an int
163                 char cc = 0;
164                 status = 0;
165                 while (is && !status) {
166                         is.get(cc);
167                         c = cc;
168                         if (c == commentChar) {
169                                 // Read rest of line (fast :-)
170 #if 1
171                                 // That is not fast... (Lgb)
172                                 string dummy;
173                                 getline(is, dummy);
174
175                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
176                                                       << dummy << '\'' << endl;
177 #else
178                                 // unfortunately ignore is buggy (Lgb)
179                                 is.ignore(100, '\n');
180 #endif
181                                 ++lineno;
182                                 continue;
183                         }
184
185                         if (c == '\"') {
186                                 buff.clear();
187
188                                 do {
189                                         is.get(cc);
190                                         c = cc;
191                                         if (c != '\r')
192                                                 buff.push_back(c);
193                                 } while (c != '\"' && c != '\n' && is);
194
195                                 if (c != '\"') {
196                                         printError("Missing quote");
197                                         if (c == '\n')
198                                                 ++lineno;
199                                 }
200
201                                 buff.pop_back();
202                                 status = LEX_DATA;
203                                 break;
204                         }
205
206                         if (c == ',')
207                                 continue;              /* Skip ','s */
208
209                                 // using relational operators with chars other
210                                 // than == and != is not safe. And if it is done
211                                 // the type _have_ to be unsigned. It usually a
212                                 // lot better to use the functions from cctype
213                         if (c > ' ' && is)  {
214                                 buff.clear();
215
216                                 do {
217                                         buff.push_back(c);
218                                         is.get(cc);
219                                         c = cc;
220                                 } while (c > ' ' && c != ',' && is);
221
222                                 status = LEX_TOKEN;
223                         }
224
225                         if (c == '\r' && is) {
226                                 // The Windows support has lead to the
227                                 // possibility of "\r\n" at the end of
228                                 // a line.  This will stop LyX choking
229                                 // when it expected to find a '\n'
230                                 is.get(cc);
231                                 c = cc;
232                         }
233
234                         if (c == '\n')
235                                 ++lineno;
236
237                 }
238                 if (status) return true;
239
240                 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
241                 buff.clear();
242                 return false;
243         } else {
244                 unsigned char c = 0; // getc() returns an int
245                 char cc = 0;
246
247                 status = 0;
248                 while (is && !status) {
249                         is.get(cc);
250                         c = cc;
251
252                         // skip ','s
253                         if (c == ',') continue;
254
255                         if (c == '\\') {
256                                 // escape
257                                 buff.clear();
258
259                                 do {
260                                         if (c == '\\') {
261                                                 // escape the next char
262                                                 is.get(cc);
263                                                 c = cc;
264                                         }
265                                         buff.push_back(c);
266                                         is.get(cc);
267                                         c = cc;
268                                 } while (c > ' ' && c != ',' && is);
269
270                                 status = LEX_TOKEN;
271                                 continue;
272                         }
273
274                         if (c == commentChar) {
275                                 // Read rest of line (fast :-)
276 #if 1
277                                 // That is still not fast... (Lgb)
278                                 string dummy;
279                                 getline(is, dummy);
280
281                                 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
282                                                       << dummy << '\'' << endl;
283 #else
284                                 // but ignore is also still buggy (Lgb)
285                                 // This is fast (Lgb)
286                                 is.ignore(100, '\n');
287 #endif
288                                 ++lineno;
289                                 continue;
290                         }
291
292                         // string
293                         if (c == '\"') {
294                                 buff.clear();
295
296                                 bool escaped = false;
297                                 do {
298                                         escaped = false;
299                                         is.get(cc);
300                                         c = cc;
301                                         if (c == '\r') continue;
302                                         if (c == '\\') {
303                                                 // escape the next char
304                                                 is.get(cc);
305                                                 c = cc;
306                                                 if (c == '\"' || c == '\\')
307                                                         escaped = true;
308                                                 else
309                                                         buff.push_back('\\');
310                                         }
311                                         buff.push_back(c);
312
313                                         if (!escaped && c == '\"') break;
314                                 } while (c != '\n' && is);
315
316                                 if (c != '\"') {
317                                         printError("Missing quote");
318                                         if (c == '\n')
319                                                 ++lineno;
320                                 }
321
322                                 buff.pop_back();
323                                 status = LEX_DATA;
324                                 break;
325                         }
326
327                         if (c > ' ' && is) {
328                                 buff.clear();
329
330                                 do {
331                                         if (c == '\\') {
332                                                 // escape the next char
333                                                 is.get(cc);
334                                                 c = cc;
335                                                 //escaped = true;
336                                         }
337                                         buff.push_back(c);
338                                         is.get(cc);
339                                         c = cc;
340                                 } while (c > ' ' && c != ',' && is);
341
342                                 status = LEX_TOKEN;
343                         }
344                         // new line
345                         if (c == '\n')
346                                 ++lineno;
347                 }
348
349                 if (status) return true;
350
351                 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
352                 buff.clear();
353                 return false;
354         }
355 }
356
357
358 int LyXLex::Pimpl::search_kw(char const * const tag) const
359 {
360         keyword_item search_tag = { tag, 0 };
361         keyword_item * res =
362                 lower_bound(table, table + no_items,
363                             search_tag, compare_tags());
364         // use the compare_ascii_no_case instead of compare_no_case,
365         // because in turkish, 'i' is not the lowercase version of 'I',
366         // and thus turkish locale breaks parsing of tags.
367         if (res != table + no_items
368             && !compare_ascii_no_case(res->tag, tag))
369                 return res->code;
370         return LEX_UNDEF;
371 }
372
373
374 int LyXLex::Pimpl::lex()
375 {
376         //NOTE: possible bug.
377         if (next() && status == LEX_TOKEN) {
378                 return search_kw(getString().c_str());
379         } else
380                 return status;
381 }
382
383
384 bool LyXLex::Pimpl::eatLine()
385 {
386         buff.clear();
387
388         unsigned char c = '\0';
389         char cc = 0;
390         while (is && c != '\n') {
391                 is.get(cc);
392                 c = cc;
393                 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
394                 //                    << c << '\'' << endl;
395                 if (c != '\r')
396                         buff.push_back(c);
397         }
398
399         if (c == '\n') {
400                 ++lineno;
401                 buff.pop_back();
402                 status = LEX_DATA;
403                 return true;
404         } else {
405                 return false;
406         }
407 }
408
409
410 bool LyXLex::Pimpl::nextToken()
411 {
412         if (!pushTok.empty()) {
413                 // There can have been a whole line pushed so
414                 // we extract the first word and leaves the rest
415                 // in pushTok. (Lgb)
416                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
417                         string tmp;
418                         pushTok = split(pushTok, tmp, ' ');
419                         buff.assign(tmp.begin(), tmp.end());
420                         return true;
421                 } else {
422                         buff.assign(pushTok.begin(), pushTok.end());
423                         pushTok.erase();
424                         return true;
425                 }
426         }
427
428         status = 0;
429         while (is && !status) {
430                 unsigned char c = 0;
431                 char cc = 0;
432                 is.get(cc);
433                 c = cc;
434                 if (c >= ' ' && is) {
435                         buff.clear();
436
437                         if (c == '\\') { // first char == '\\'
438                                 do {
439                                         buff.push_back(c);
440                                         is.get(cc);
441                                         c = cc;
442                                 } while (c > ' ' && c != '\\' && is);
443                         } else {
444                                 do {
445                                         buff.push_back(c);
446                                         is.get(cc);
447                                         c = cc;
448                                 } while (c >= ' ' && c != '\\' && is);
449                         }
450
451                         if (c == '\\') is.putback(c); // put it back
452                         status = LEX_TOKEN;
453                 }
454
455                 if (c == '\n')
456                         ++lineno;
457
458         }
459         if (status)  return true;
460
461         status = is.eof() ? LEX_FEOF: LEX_UNDEF;
462         buff.clear();
463         return false;
464 }
465
466
467 void LyXLex::Pimpl::pushToken(string const & pt)
468 {
469         pushTok = pt;
470 }