]> git.lyx.org Git - lyx.git/blob - src/Trans.cpp
* Hartmut's csv2lyx script
[lyx.git] / src / Trans.cpp
1 /**
2  * \file Trans.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Matthias Ettrich
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Trans.h"
15
16 #include "Buffer.h"
17 #include "BufferView.h"
18 #include "Cursor.h"
19 #include "CutAndPaste.h"
20 #include "Lexer.h"
21 #include "LyXRC.h"
22 #include "Text.h"
23
24 #include "support/convert.h"
25 #include "support/debug.h"
26 #include "support/docstream.h"
27 #include "support/FileName.h"
28 #include "support/filetools.h"
29 #include "support/lstrings.h"
30
31 using namespace std;
32 using namespace lyx::support;
33
34 namespace lyx {
35
36 /////////////////////////////////////////////////////////////////////
37 //
38 // TeXAccents
39 //
40 /////////////////////////////////////////////////////////////////////
41
42 /* the names used by TeX and XWindows for deadkeys/accents are not the same
43    so here follows a table to clearify the differences. Please correct this
44    if I got it wrong
45
46    |------------------|------------------|------------------|--------------|
47    |      TeX         |     XWindows     |   \bind/LFUN     | used by intl |
48    |------------------|------------------|------------------|--------------|
49    |    grave         |    grave         |LFUN_ACCENT_GRAVE        | grave
50    |    acute         |    acute         |LFUN_ACCENT_ACUTE        | acute
51    |    circumflex    |    circumflex    |LFUN_ACCENT_CIRCUMFLEX   | circumflex
52    | umlaut/dieresis  |    diaeresis     |LFUN_ACCENT_UMLAUT       | umlaut
53    |    tilde         |    tilde         |LFUN_ACCENT_TILDE        | tilde
54    |    macron        |    maron         |LFUN_ACCENT_MACRON       | macron
55    |    dot           |    abovedot      |LFUN_ACCENT_DOT          | dot
56    |    cedilla       |    cedilla       |LFUN_ACCENT_CEDILLA      | cedilla
57    |    underdot      |                  |LFUN_ACCENT_UNDERDOT     | underdot
58    |    underbar      |                  |LFUN_ACCENT_UNDERBAR     | underbar
59    |    hácek         |    caron         |LFUN_ACCENT_CARON        | caron
60    |    breve         |    breve         |LFUN_ACCENT_BREVE        | breve
61    |    tie           |                  |LFUN_ACCENT_TIE          | tie
62    | Hungarian umlaut |    doubleacute   |LFUN_ACCENT_HUNGARIAN_UMLAUT  | hungarian umlaut
63    |    circle        |    abovering     |LFUN_ACCENT_CIRCLE       | circle
64    |                  |    ogonek        |                  |
65    |                  |    iota          |                  |
66    |                  |    voiced_sound  |                  |
67    |                  | semivoiced_sound |                  |
68    |                  |                  |LFUN_ACCENT_SPECIAL_CARON| special caron
69    */
70 static TeXAccent lyx_accent_table[] = {
71         {TEX_NOACCENT,   0,      "",                LFUN_NOACTION},
72         {TEX_ACUTE,      0x0301, "acute",           LFUN_ACCENT_ACUTE},
73         {TEX_GRAVE,      0x0300, "grave",           LFUN_ACCENT_GRAVE},
74         {TEX_MACRON,     0x0304, "macron",          LFUN_ACCENT_MACRON},
75         {TEX_TILDE,      0x0303, "tilde",           LFUN_ACCENT_TILDE},
76         {TEX_UNDERBAR,   0x0320, "underbar",        LFUN_ACCENT_UNDERBAR},
77         {TEX_CEDILLA,    0x0327, "cedilla",         LFUN_ACCENT_CEDILLA},
78         {TEX_UNDERDOT,   0x0323, "underdot",        LFUN_ACCENT_UNDERDOT},
79         {TEX_CIRCUMFLEX, 0x0302, "circumflex",      LFUN_ACCENT_CIRCUMFLEX},
80         {TEX_CIRCLE,     0x030a, "circle",          LFUN_ACCENT_CIRCLE},
81         {TEX_TIE,        0x0361, "tie",             LFUN_ACCENT_TIE},
82         {TEX_BREVE,      0x0306, "breve",           LFUN_ACCENT_BREVE},
83         {TEX_CARON,      0x030c, "caron",           LFUN_ACCENT_CARON},
84 //      {TEX_SPECIAL_CARON, 0x030c, "ooo",          LFUN_ACCENT_SPECIAL_CARON},
85         // Don't fix this typo for compatibility reasons!
86         {TEX_HUNGUML,    0x030b, "hugarian_umlaut", LFUN_ACCENT_HUNGARIAN_UMLAUT},
87         {TEX_UMLAUT,     0x0308, "umlaut",          LFUN_ACCENT_UMLAUT},
88         {TEX_DOT,        0x0307, "dot",             LFUN_ACCENT_DOT},
89         {TEX_OGONEK,     0x0328, "ogonek",          LFUN_ACCENT_OGONEK}
90 };
91
92
93 TeXAccent get_accent(FuncCode action)
94 {
95         int i = 0;
96         while (i <= TEX_MAX_ACCENT) {
97                 if (lyx_accent_table[i].action == action)
98                         return lyx_accent_table[i];
99                 ++i;
100         }
101         struct TeXAccent temp = { static_cast<tex_accent>(0), 0,
102                                           0, static_cast<FuncCode>(0)};
103         return temp;
104 }
105
106
107 static docstring const doAccent(docstring const & s, tex_accent accent)
108 {
109         if (s.empty())
110                 return docstring(1, lyx_accent_table[accent].ucs4);
111
112         odocstringstream os;
113         os.put(s[0]);
114         os.put(lyx_accent_table[accent].ucs4);
115         if (s.length() > 1) {
116                 if (accent != TEX_TIE || s.length() > 2)
117                         lyxerr << "Warning: Too many characters given for accent "
118                                << lyx_accent_table[accent].name << '.' << endl;
119                 os << s.substr(1);
120         }
121         return normalize_c(os.str());
122 }
123
124
125 static docstring const doAccent(char_type c, tex_accent accent)
126 {
127         return doAccent(docstring(1, c), accent);
128 }
129
130
131
132 /////////////////////////////////////////////////////////////////////
133 //
134 // Trans
135 //
136 /////////////////////////////////////////////////////////////////////
137
138
139 void Trans::insertException(KmodException & exclist, char_type c,
140         docstring const & data, bool flag, tex_accent accent)
141 {
142         Keyexc p;
143         p.c = c;
144         p.data = data;
145         p.combined = flag;
146         p.accent = accent;
147         exclist.insert(exclist.begin(), p);
148         // or just
149         // exclist.push_back(p);
150 }
151
152
153 void Trans::freeException(KmodException & exclist)
154 {
155         exclist.clear();
156 }
157
158
159 void Trans::freeKeymap()
160 {
161         kmod_list_.clear();
162         keymap_.clear();
163 }
164
165
166 bool Trans::isDefined() const
167 {
168         return !name_.empty();
169 }
170
171
172 enum {
173         KCOMB = 1,
174         KMOD,
175         KMAP,
176         KXMOD,
177 };
178
179
180 tex_accent getkeymod(string const &);
181
182
183 void Trans::addDeadkey(tex_accent accent, docstring const & keys)
184 {
185         KmodInfo tmp;
186         tmp.data = keys;
187         tmp.accent = accent;
188         kmod_list_[accent] = tmp;
189
190         for (docstring::size_type i = 0; i < keys.length(); ++i) {
191                 // FIXME This is a hack.
192                 // tmp is no valid UCS4 string, but misused to store the
193                 // accent.
194                 docstring tmp;
195                 tmp += char_type(0);
196                 tmp += char_type(accent);
197                 keymap_[keys[i]] = tmp;
198         }
199 }
200
201
202 int Trans::load(Lexer & lex)
203 {
204         bool error = false;
205
206         while (lex.isOK() && !error) {
207                 switch (lex.lex()) {
208                 case KMOD:
209                 {
210                         LYXERR(Debug::KBMAP, "KMOD:\t" << lex.getString());
211                         if (!lex.next(true))
212                                 return -1;
213
214                         LYXERR(Debug::KBMAP, "key\t`" << lex.getString() << '\'');
215
216                         docstring const keys = lex.getDocString();
217
218                         if (!lex.next(true))
219                                 return -1;
220
221                         LYXERR(Debug::KBMAP, "accent\t`" << lex.getString() << '\'');
222
223                         tex_accent accent = getkeymod(lex.getString());
224
225                         if (accent == TEX_NOACCENT)
226                                 return -1;
227
228 #if 1
229                         // FIXME: This code should be removed...
230                         // But we need to fix up all the kmap files first
231                         // so that this field is not present anymore.
232                         if (!lex.next(true))
233                                 return -1;
234
235                         LYXERR(Debug::KBMAP, "allowed\t`" << lex.getString() << '\'');
236
237                         /* string const allowed = lex.getString(); */
238                         addDeadkey(accent, keys /*, allowed*/);
239 #else
240                         addDeadkey(accent, keys);
241 #endif
242                         break;
243                 }
244                 case KCOMB: {
245                         string str;
246
247                         LYXERR(Debug::KBMAP, "KCOMB:");
248                         if (!lex.next(true))
249                                 return -1;
250
251                         str = lex.getString();
252                         LYXERR(Debug::KBMAP, str);
253
254                         tex_accent accent_1 = getkeymod(str);
255                         if (accent_1 == TEX_NOACCENT)
256                                 return -1;
257
258                         if (!lex.next(true))
259                                 return -1;
260
261                         str = lex.getString();
262                         LYXERR(Debug::KBMAP, str);
263
264                         tex_accent accent_2 = getkeymod(str);
265                         if (accent_2 == TEX_NOACCENT) return -1;
266
267                         map<tex_accent, KmodInfo>::iterator it1 =
268                                 kmod_list_.find(accent_1);
269                         map<tex_accent, KmodInfo>::iterator it2 =
270                                 kmod_list_.find(accent_2);
271                         if (it1 == kmod_list_.end() || it2 == kmod_list_.end())
272                                 return -1;
273
274                         // Find what key accent_2 is on - should
275                         // check about accent_1 also
276                         map<char_type, docstring>::iterator it = keymap_.begin();
277                         map<char_type, docstring>::iterator end = keymap_.end();
278                         for (; it != end; ++it) {
279                                 if (!it->second.empty()
280                                     && it->second[0] == 0
281                                     && it->second[1] == accent_2)
282                                         break;
283                         }
284                         docstring allowed;
285                         if (!lex.next())
286                                 return -1;
287
288                         allowed = lex.getDocString();
289                         LYXERR(Debug::KBMAP, "allowed: " << to_utf8(allowed));
290
291                         insertException(kmod_list_[accent_1].exception_list,
292                                         it->first, allowed, true, accent_2);
293                 }
294                 break;
295                 case KMAP: {
296                         unsigned char key_from;
297
298                         LYXERR(Debug::KBMAP, "KMAP:\t" << lex.getString());
299
300                         if (!lex.next(true))
301                                 return -1;
302
303                         key_from = lex.getString()[0];
304                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
305
306                         if (!lex.next(true))
307                                 return -1;
308
309                         docstring const string_to = lex.getDocString();
310                         keymap_[key_from] = string_to;
311                         LYXERR(Debug::KBMAP, "\t`" << to_utf8(string_to) << '\'');
312                         break;
313                 }
314                 case KXMOD: {
315                         tex_accent accent;
316                         char_type key;
317                         docstring str;
318
319                         LYXERR(Debug::KBMAP, "KXMOD:\t" << lex.getString());
320
321                         if (!lex.next(true))
322                                 return -1;
323
324                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
325                         accent = getkeymod(lex.getString());
326
327                         if (!lex.next(true))
328                                 return -1;
329
330                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
331                         key = lex.getDocString()[0];
332
333                         if (!lex.next(true))
334                                 return -1;
335
336                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
337                         str = lex.getDocString();
338
339                         insertException(kmod_list_[accent].exception_list,
340                                         key, str);
341                         break;
342                 }
343                 case Lexer::LEX_FEOF:
344                         LYXERR(Debug::PARSER, "End of parsing");
345                         break;
346                 default:
347                         lex.printError("ParseKeymapFile: Unknown tag: `$$Token'");
348                         return -1;
349                 }
350         }
351         return 0;
352 }
353
354
355 bool Trans::isAccentDefined(tex_accent accent, KmodInfo & i) const
356 {
357         map<tex_accent, KmodInfo>::const_iterator cit = kmod_list_.find(accent);
358         if (cit == kmod_list_.end())
359                 return false;
360         i = cit->second;
361         return true;
362 }
363
364
365 docstring const Trans::process(char_type c, TransManager & k)
366 {
367         docstring const t = match(c);
368
369         if (t.empty() && c != 0)
370                 return k.normalkey(c);
371
372         if (!t.empty() && t[0] != 0)
373                 return t; //return k.normalkey(c);
374
375         return k.deadkey(c, kmod_list_[static_cast<tex_accent>(t[1])]);
376 }
377
378
379 int Trans::load(string const & language)
380 {
381         LexerKeyword kmapTags[] = {
382                 {"\\kcomb", KCOMB },
383                 { "\\kmap", KMAP },
384                 { "\\kmod", KMOD },
385                 { "\\kxmod", KXMOD }
386         };
387
388         FileName const filename = libFileSearch("kbd", language, "kmap");
389         if (filename.empty())
390                 return -1;
391
392         freeKeymap();
393         Lexer lex(kmapTags);
394         lex.setFile(filename);
395
396         int const res = load(lex);
397
398         if (res == 0)
399                 name_ = language;
400         else
401                 name_.erase();
402
403         return res;
404 }
405
406
407 tex_accent getkeymod(string const & p)
408         /* return modifier - decoded from p and update p */
409 {
410         for (int i = 1; i <= TEX_MAX_ACCENT; ++i) {
411                 LYXERR(Debug::KBMAP, "p = " << p
412                        << ", lyx_accent_table[" << i
413                        << "].name = `" << lyx_accent_table[i].name << '\'');
414
415                 if (lyx_accent_table[i].name
416                      && contains(p, lyx_accent_table[i].name)) {
417                         LYXERR(Debug::KBMAP, "Found it!");
418                         return static_cast<tex_accent>(i);
419                 }
420         }
421         return TEX_NOACCENT;
422 }
423
424
425 /////////////////////////////////////////////////////////////////////
426 //
427 // TransState
428 //
429 /////////////////////////////////////////////////////////////////////
430
431
432 // TransFSMData
433 TransFSMData::TransFSMData()
434 {
435         deadkey_ = deadkey2_ = 0;
436         deadkey_info_.accent = deadkey2_info_.accent = TEX_NOACCENT;
437 }
438
439
440 // TransState
441 char_type const TransState::TOKEN_SEP = 4;
442
443
444 // TransInitState
445 TransInitState::TransInitState()
446 {
447         init_state_ = this;
448 }
449
450
451 docstring const TransInitState::normalkey(char_type c)
452 {
453         docstring res;
454         res = c;
455         return res;
456 }
457
458
459 docstring const TransInitState::deadkey(char_type c, KmodInfo d)
460 {
461         deadkey_ = c;
462         deadkey_info_ = d;
463         currentState = deadkey_state_;
464         return docstring();
465 }
466
467
468 // TransDeadkeyState
469 TransDeadkeyState::TransDeadkeyState()
470 {
471         deadkey_state_ = this;
472 }
473
474
475 docstring const TransDeadkeyState::normalkey(char_type c)
476 {
477         docstring res;
478
479         KmodException::iterator it = deadkey_info_.exception_list.begin();
480         KmodException::iterator end = deadkey_info_.exception_list.end();
481
482         for (; it != end; ++it) {
483                 if (it->c == c) {
484                         res = it->data;
485                         break;
486                 }
487         }
488         if (it == end) {
489                 res = doAccent(c, deadkey_info_.accent);
490         }
491         currentState = init_state_;
492         return res;
493 }
494
495
496 docstring const TransDeadkeyState::deadkey(char_type c, KmodInfo d)
497 {
498         docstring res;
499
500         // Check if the same deadkey was typed twice
501         if (deadkey_ == c) {
502                 res = deadkey_;
503                 deadkey_ = 0;
504                 deadkey_info_.accent = TEX_NOACCENT;
505                 currentState = init_state_;
506                 return res;
507         }
508
509         // Check if it is a combination or an exception
510         KmodException::const_iterator cit = deadkey_info_.exception_list.begin();
511         KmodException::const_iterator end = deadkey_info_.exception_list.end();
512         for (; cit != end; ++cit) {
513                 if (cit->combined == true && cit->accent == d.accent) {
514                         deadkey2_ = c;
515                         deadkey2_info_ = d;
516                         comb_info_ = (*cit);
517                         currentState = combined_state_;
518                         return docstring();
519                 }
520                 if (cit->c == c) {
521                         res = cit->data;
522                         deadkey_ = 0;
523                         deadkey_info_.accent = TEX_NOACCENT;
524                         currentState = init_state_;
525                         return res;
526                 }
527         }
528
529         // Not a combination or an exception.
530         // Output deadkey1 and keep deadkey2
531
532         if (deadkey_!= 0)
533                 res = deadkey_;
534         deadkey_ = c;
535         deadkey_info_ = d;
536         currentState = deadkey_state_;
537         return res;
538 }
539
540
541 TransCombinedState::TransCombinedState()
542 {
543         combined_state_ = this;
544 }
545
546
547 docstring const TransCombinedState::normalkey(char_type c)
548 {
549         docstring const temp = doAccent(c, deadkey2_info_.accent);
550         docstring const res = doAccent(temp, deadkey_info_.accent);
551         currentState = init_state_;
552         return res;
553 }
554
555
556 docstring const TransCombinedState::deadkey(char_type c, KmodInfo d)
557 {
558         // Third key in a row. Output the first one and
559         // reenter with shifted deadkeys
560         docstring res;
561         if (deadkey_ != 0)
562                 res = deadkey_;
563         res += TOKEN_SEP;
564         deadkey_ = deadkey2_;
565         deadkey_info_ = deadkey2_info_;
566         res += deadkey_state_->deadkey(c, d);
567         return res;
568 }
569
570
571 // TransFSM
572 TransFSM::TransFSM()
573         : TransFSMData(), TransInitState(), TransDeadkeyState(), TransCombinedState()
574 {
575         currentState = init_state_;
576 }
577
578
579 // TransManager
580
581 // Initialize static member.
582 Trans TransManager::default_;
583
584
585 TransManager::TransManager()
586         : active_(0)
587 {}
588
589
590 int TransManager::setPrimary(string const & language)
591 {
592         if (t1_.getName() == language)
593                 return 0;
594
595         return t1_.load(language);
596 }
597
598
599 int TransManager::setSecondary(string const & language)
600 {
601         if (t2_.getName() == language)
602                 return 0;
603
604         return t2_.load(language);
605 }
606
607
608 void TransManager::enablePrimary()
609 {
610         if (t1_.isDefined())
611                 active_ = &t1_;
612
613         LYXERR(Debug::KBMAP, "Enabling primary keymap");
614 }
615
616
617 void TransManager::enableSecondary()
618 {
619         if (t2_.isDefined())
620                 active_ = &t2_;
621         LYXERR(Debug::KBMAP, "Enabling secondary keymap");
622 }
623
624
625 void TransManager::disableKeymap()
626 {
627         active_ = &default_;
628         LYXERR(Debug::KBMAP, "Disabling keymap");
629 }
630
631
632 void  TransManager::translateAndInsert(char_type c, Text * text, Cursor & cur)
633 {
634         docstring res = active_->process(c, *this);
635
636         // Process with tokens
637         docstring temp;
638
639         while (res.length() > 0) {
640                 res = split(res, temp, TransState::TOKEN_SEP);
641                 insert(temp, text, cur);
642         }
643 }
644
645
646 void TransManager::insert(docstring const & str, Text * text, Cursor & cur)
647 {
648         for (size_t i = 0, n = str.size(); i != n; ++i)
649                 text->insertChar(cur, str[i]);
650 }
651
652
653 void TransManager::deadkey(char_type c, tex_accent accent, Text * t, Cursor & cur)
654 {
655         if (c == 0 && active_ != &default_) {
656                 // A deadkey was pressed that cannot be printed
657                 // or a accent command was typed in the minibuffer
658                 KmodInfo i;
659                 if (active_->isAccentDefined(accent, i) == true) {
660                         docstring const res = trans_fsm_
661                                 .currentState->deadkey(c, i);
662                         insert(res, t, cur);
663                         return;
664                 }
665         }
666
667         if (active_ == &default_ || c == 0) {
668                 KmodInfo i;
669                 i.accent = accent;
670                 i.data.erase();
671                 docstring res = trans_fsm_.currentState->deadkey(c, i);
672                 insert(res, t, cur);
673         } else {
674                 // Go through the translation
675                 translateAndInsert(c, t, cur);
676         }
677 }
678
679
680 } // namespace lyx