]> git.lyx.org Git - lyx.git/blob - src/Trans.cpp
Move Lexer to support/ directory (and lyx::support namespace)
[lyx.git] / src / Trans.cpp
1 /**
2  * \file Trans.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Matthias Ettrich
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Trans.h"
15
16 #include "Text.h"
17
18 #include "support/debug.h"
19 #include "support/docstream.h"
20 #include "support/FileName.h"
21 #include "support/filetools.h"
22 #include "support/Lexer.h"
23 #include "support/lstrings.h"
24
25 using namespace std;
26 using namespace lyx::support;
27
28 namespace lyx {
29
30 /////////////////////////////////////////////////////////////////////
31 //
32 // TeXAccents
33 //
34 /////////////////////////////////////////////////////////////////////
35
36 /* the names used by TeX and XWindows for deadkeys/accents are not the same
37    so here follows a table to clearify the differences. Please correct this
38    if I got it wrong
39
40    |------------------|------------------|------------------|--------------|
41    |      TeX         |     XWindows     |   \bind/LFUN     | used by intl |
42    |------------------|------------------|------------------|--------------|
43    |    grave         |    grave         |LFUN_ACCENT_GRAVE        | grave
44    |    acute         |    acute         |LFUN_ACCENT_ACUTE        | acute
45    |    circumflex    |    circumflex    |LFUN_ACCENT_CIRCUMFLEX   | circumflex
46    | umlaut/dieresis  |    diaeresis     |LFUN_ACCENT_UMLAUT       | umlaut
47    |    tilde         |    tilde         |LFUN_ACCENT_TILDE        | tilde
48    |    macron        |    maron         |LFUN_ACCENT_MACRON       | macron
49    |    dot           |    abovedot      |LFUN_ACCENT_DOT          | dot
50    |    cedilla       |    cedilla       |LFUN_ACCENT_CEDILLA      | cedilla
51    |    underdot      |                  |LFUN_ACCENT_UNDERDOT     | underdot
52    |    underbar      |                  |LFUN_ACCENT_UNDERBAR     | underbar
53    |    hácek         |    caron         |LFUN_ACCENT_CARON        | caron
54    |    breve         |    breve         |LFUN_ACCENT_BREVE        | breve
55    |    tie           |                  |LFUN_ACCENT_TIE          | tie
56    | Hungarian umlaut |    doubleacute   |LFUN_ACCENT_HUNGARIAN_UMLAUT  | hungarian umlaut
57    |    circle        |    abovering     |LFUN_ACCENT_CIRCLE       | circle
58    |                  |    ogonek        |                  |
59    |                  |    iota          |                  |
60    |                  |    voiced_sound  |                  |
61    |                  | semivoiced_sound |                  |
62    */
63 static TeXAccent lyx_accent_table[] = {
64         {TEX_NOACCENT,   0,      "",                LFUN_NOACTION},
65         {TEX_ACUTE,      0x0301, "acute",           LFUN_ACCENT_ACUTE},
66         {TEX_GRAVE,      0x0300, "grave",           LFUN_ACCENT_GRAVE},
67         {TEX_MACRON,     0x0304, "macron",          LFUN_ACCENT_MACRON},
68         {TEX_TILDE,      0x0303, "tilde",           LFUN_ACCENT_TILDE},
69         {TEX_PERISPOMENI, 0x0342, "perispomeni",    LFUN_ACCENT_PERISPOMENI},
70         {TEX_UNDERBAR,   0x0320, "underbar",        LFUN_ACCENT_UNDERBAR}, // COMBINING MINUS SIGN BELOW or 0x0331 COMBINING MACRON BELOW ?
71
72         {TEX_CEDILLA,    0x0327, "cedilla",         LFUN_ACCENT_CEDILLA},
73         {TEX_UNDERDOT,   0x0323, "underdot",        LFUN_ACCENT_UNDERDOT},
74         {TEX_CIRCUMFLEX, 0x0302, "circumflex",      LFUN_ACCENT_CIRCUMFLEX},
75         {TEX_CIRCLE,     0x030a, "circle",          LFUN_ACCENT_CIRCLE},
76         {TEX_TIE,        0x0361, "tie",             LFUN_ACCENT_TIE},
77         {TEX_BREVE,      0x0306, "breve",           LFUN_ACCENT_BREVE},
78         {TEX_CARON,      0x030c, "caron",           LFUN_ACCENT_CARON},
79         // Don't fix this typo for compatibility reasons!
80         {TEX_HUNGUML,    0x030b, "hugarian_umlaut", LFUN_ACCENT_HUNGARIAN_UMLAUT},
81         {TEX_UMLAUT,     0x0308, "umlaut",          LFUN_ACCENT_UMLAUT},
82         {TEX_DOT,        0x0307, "dot",             LFUN_ACCENT_DOT},
83         {TEX_OGONEK,     0x0328, "ogonek",          LFUN_ACCENT_OGONEK}
84 };
85
86
87 TeXAccent get_accent(FuncCode action)
88 {
89         int i = 0;
90         while (i <= TEX_MAX_ACCENT) {
91                 if (lyx_accent_table[i].action == action)
92                         return lyx_accent_table[i];
93                 ++i;
94         }
95         struct TeXAccent temp = { static_cast<tex_accent>(0), 0,
96                                           nullptr, static_cast<FuncCode>(0)};
97         return temp;
98 }
99
100
101 static docstring const doAccent(docstring const & s, tex_accent accent)
102 {
103         if (s.empty())
104                 return docstring(1, lyx_accent_table[accent].ucs4);
105
106         odocstringstream os;
107         os.put(s[0]);
108         os.put(lyx_accent_table[accent].ucs4);
109         if (s.length() > 1) {
110                 if (accent != TEX_TIE || s.length() > 2)
111                         lyxerr << "Warning: Too many characters given for accent "
112                                << lyx_accent_table[accent].name << '.' << endl;
113                 os << s.substr(1);
114         }
115         return normalize_c(os.str());
116 }
117
118
119 static docstring const doAccent(char_type c, tex_accent accent)
120 {
121         return doAccent(docstring(1, c), accent);
122 }
123
124
125
126 /////////////////////////////////////////////////////////////////////
127 //
128 // Trans
129 //
130 /////////////////////////////////////////////////////////////////////
131
132
133 void Trans::insertException(KmodException & exclist, char_type c,
134         docstring const & data, bool flag, tex_accent accent)
135 {
136         Keyexc p;
137         p.c = c;
138         p.data = data;
139         p.combined = flag;
140         p.accent = accent;
141         exclist.insert(exclist.begin(), p);
142         // or just
143         // exclist.push_back(p);
144 }
145
146
147 void Trans::freeException(KmodException & exclist)
148 {
149         exclist.clear();
150 }
151
152
153 void Trans::freeKeymap()
154 {
155         kmod_list_.clear();
156         keymap_.clear();
157 }
158
159
160 bool Trans::isDefined() const
161 {
162         return !name_.empty();
163 }
164
165
166 enum {
167         KCOMB = 1,
168         KMOD,
169         KMAP,
170         KXMOD
171 };
172
173
174 tex_accent getkeymod(string const &);
175
176
177 void Trans::addDeadkey(tex_accent accent, docstring const & keys)
178 {
179         KmodInfo tmp;
180         tmp.data = keys;
181         tmp.accent = accent;
182         kmod_list_[accent] = tmp;
183
184         for (char_type key : keys) {
185                 // FIXME This is a hack.
186                 // tmp is no valid UCS4 string, but misused to store the
187                 // accent.
188                 docstring tmpd;
189                 tmpd += char_type(0);
190                 tmpd += char_type(accent);
191                 keymap_[key] = tmpd;
192         }
193 }
194
195
196 int Trans::load(Lexer & lex)
197 {
198         while (lex.isOK()) {
199                 switch (lex.lex()) {
200                 case KMOD:
201                 {
202                         LYXERR(Debug::KBMAP, "KMOD:\t" << lex.getString());
203                         if (!lex.next(true))
204                                 return -1;
205
206                         LYXERR(Debug::KBMAP, "key\t`" << lex.getString() << '\'');
207
208                         docstring const keys = lex.getDocString();
209
210                         if (!lex.next(true))
211                                 return -1;
212
213                         LYXERR(Debug::KBMAP, "accent\t`" << lex.getString() << '\'');
214
215                         tex_accent accent = getkeymod(lex.getString());
216
217                         if (accent == TEX_NOACCENT)
218                                 return -1;
219
220 #if 1
221                         // FIXME: This code should be removed...
222                         // But we need to fix up all the kmap files first
223                         // so that this field is not present anymore.
224                         if (!lex.next(true))
225                                 return -1;
226
227                         LYXERR(Debug::KBMAP, "allowed\t`" << lex.getString() << '\'');
228
229                         /* string const allowed = lex.getString(); */
230                         addDeadkey(accent, keys /*, allowed*/);
231 #else
232                         addDeadkey(accent, keys);
233 #endif
234                         break;
235                 }
236                 case KCOMB: {
237                         string str;
238
239                         LYXERR(Debug::KBMAP, "KCOMB:");
240                         if (!lex.next(true))
241                                 return -1;
242
243                         str = lex.getString();
244                         LYXERR(Debug::KBMAP, str);
245
246                         tex_accent accent_1 = getkeymod(str);
247                         if (accent_1 == TEX_NOACCENT)
248                                 return -1;
249
250                         if (!lex.next(true))
251                                 return -1;
252
253                         str = lex.getString();
254                         LYXERR(Debug::KBMAP, str);
255
256                         tex_accent accent_2 = getkeymod(str);
257                         if (accent_2 == TEX_NOACCENT) return -1;
258
259                         map<tex_accent, KmodInfo>::iterator it1 =
260                                 kmod_list_.find(accent_1);
261                         map<tex_accent, KmodInfo>::iterator it2 =
262                                 kmod_list_.find(accent_2);
263                         if (it1 == kmod_list_.end() || it2 == kmod_list_.end())
264                                 return -1;
265
266                         // Find what key accent_2 is on - should
267                         // check about accent_1 also
268                         map<char_type, docstring>::iterator it = keymap_.begin();
269                         map<char_type, docstring>::iterator end = keymap_.end();
270                         for (; it != end; ++it) {
271                                 if (!it->second.empty()
272                                     && it->second[0] == 0
273                                     && it->second[1] == accent_2)
274                                         break;
275                         }
276
277                         // could not find accent2 on a key -- this should not happen.
278                         if (it == end)
279                                 return -1;
280
281                         docstring allowed;
282                         if (!lex.next())
283                                 return -1;
284                         allowed = lex.getDocString();
285                         LYXERR(Debug::KBMAP, "allowed: " << to_utf8(allowed));
286
287                         insertException(kmod_list_[accent_1].exception_list,
288                                         it->first, allowed, true, accent_2);
289                 }
290                 break;
291                 case KMAP: {
292                         unsigned char key_from;
293
294                         LYXERR(Debug::KBMAP, "KMAP:\t" << lex.getString());
295
296                         if (!lex.next(true))
297                                 return -1;
298
299                         key_from = static_cast<unsigned char>(lex.getString()[0]);
300                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
301
302                         if (!lex.next(true))
303                                 return -1;
304
305                         docstring const string_to = lex.getDocString();
306                         keymap_[key_from] = string_to;
307                         LYXERR(Debug::KBMAP, "\t`" << to_utf8(string_to) << '\'');
308                         break;
309                 }
310                 case KXMOD: {
311                         tex_accent accent;
312                         char_type key;
313                         docstring str;
314
315                         LYXERR(Debug::KBMAP, "KXMOD:\t" << lex.getString());
316
317                         if (!lex.next(true))
318                                 return -1;
319
320                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
321                         accent = getkeymod(lex.getString());
322
323                         if (!lex.next(true))
324                                 return -1;
325
326                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
327                         key = lex.getDocString()[0];
328
329                         if (!lex.next(true))
330                                 return -1;
331
332                         LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
333                         str = lex.getDocString();
334
335                         insertException(kmod_list_[accent].exception_list,
336                                         key, str);
337                         break;
338                 }
339                 case Lexer::LEX_FEOF:
340                         LYXERR(Debug::PARSER, "End of parsing");
341                         break;
342                 default:
343                         lex.printError("ParseKeymapFile: Unknown tag: `$$Token'");
344                         return -1;
345                 }
346         }
347         return 0;
348 }
349
350
351 bool Trans::isAccentDefined(tex_accent accent, KmodInfo & i) const
352 {
353         map<tex_accent, KmodInfo>::const_iterator cit = kmod_list_.find(accent);
354         if (cit == kmod_list_.end())
355                 return false;
356         i = cit->second;
357         return true;
358 }
359
360
361 docstring const Trans::process(char_type c, TransManager & k)
362 {
363         docstring const t = match(c);
364
365         if (t.empty() && c != 0)
366                 return k.normalkey(c);
367
368         if (!t.empty() && t[0] != 0)
369                 return t; //return k.normalkey(c);
370
371         return k.deadkey(c, kmod_list_[static_cast<tex_accent>(t[1])]);
372 }
373
374
375 int Trans::load(string const & language)
376 {
377         LexerKeyword kmapTags[] = {
378                 {"\\kcomb", KCOMB },
379                 { "\\kmap", KMAP },
380                 { "\\kmod", KMOD },
381                 { "\\kxmod", KXMOD }
382         };
383
384         FileName const filename = libFileSearch("kbd", language, "kmap");
385         if (filename.empty())
386                 return -1;
387
388         freeKeymap();
389         Lexer lex(kmapTags);
390         lex.setFile(filename);
391
392         int const res = load(lex);
393
394         if (res == 0)
395                 name_ = language;
396         else
397                 name_.erase();
398
399         return res;
400 }
401
402
403 tex_accent getkeymod(string const & p)
404         /* return modifier - decoded from p and update p */
405 {
406         for (int i = 1; i <= TEX_MAX_ACCENT; ++i) {
407                 LYXERR(Debug::KBMAP, "p = " << p
408                        << ", lyx_accent_table[" << i
409                        << "].name = `" << lyx_accent_table[i].name << '\'');
410
411                 if (lyx_accent_table[i].name
412                      && contains(p, lyx_accent_table[i].name)) {
413                         LYXERR(Debug::KBMAP, "Found it!");
414                         return static_cast<tex_accent>(i);
415                 }
416         }
417         return TEX_NOACCENT;
418 }
419
420
421 /////////////////////////////////////////////////////////////////////
422 //
423 // TransState
424 //
425 /////////////////////////////////////////////////////////////////////
426
427
428 // TransFSMData
429 TransFSMData::TransFSMData() : deadkey_(0), deadkey2_(0), init_state_(nullptr),
430         deadkey_state_(nullptr), combined_state_(nullptr), currentState(nullptr)
431 {
432 }
433
434
435 // TransState
436 char_type const TransState::TOKEN_SEP = 4;
437
438
439 // TransInitState
440 TransInitState::TransInitState()
441 {
442         init_state_ = this;
443 }
444
445
446 docstring const TransInitState::normalkey(char_type c)
447 {
448         docstring res;
449         res = c;
450         return res;
451 }
452
453
454 docstring const TransInitState::deadkey(char_type c, KmodInfo d)
455 {
456         deadkey_ = c;
457         deadkey_info_ = d;
458         currentState = deadkey_state_;
459         return docstring();
460 }
461
462
463 // TransDeadkeyState
464 TransDeadkeyState::TransDeadkeyState()
465 {
466         deadkey_state_ = this;
467 }
468
469
470 docstring const TransDeadkeyState::normalkey(char_type c)
471 {
472         docstring res;
473
474         KmodException::iterator it = deadkey_info_.exception_list.begin();
475         KmodException::iterator end = deadkey_info_.exception_list.end();
476
477         for (; it != end; ++it) {
478                 if (it->c == c) {
479                         res = it->data;
480                         break;
481                 }
482         }
483         if (it == end) {
484                 res = doAccent(c, deadkey_info_.accent);
485         }
486         currentState = init_state_;
487         return res;
488 }
489
490
491 docstring const TransDeadkeyState::deadkey(char_type c, KmodInfo d)
492 {
493         docstring res;
494
495         // Check if the same deadkey was typed twice
496         if (deadkey_ == c) {
497                 res = deadkey_;
498                 deadkey_ = 0;
499                 deadkey_info_.accent = TEX_NOACCENT;
500                 currentState = init_state_;
501                 return res;
502         }
503
504         // Check if it is a combination or an exception
505         KmodException::const_iterator cit = deadkey_info_.exception_list.begin();
506         KmodException::const_iterator end = deadkey_info_.exception_list.end();
507         for (; cit != end; ++cit) {
508                 if (cit->combined && cit->accent == d.accent) {
509                         deadkey2_ = c;
510                         deadkey2_info_ = d;
511                         comb_info_ = (*cit);
512                         currentState = combined_state_;
513                         return docstring();
514                 }
515                 if (cit->c == c) {
516                         res = cit->data;
517                         deadkey_ = 0;
518                         deadkey_info_.accent = TEX_NOACCENT;
519                         currentState = init_state_;
520                         return res;
521                 }
522         }
523
524         // Not a combination or an exception.
525         // Output deadkey1 and keep deadkey2
526
527         if (deadkey_!= 0)
528                 res = deadkey_;
529         deadkey_ = c;
530         deadkey_info_ = d;
531         currentState = deadkey_state_;
532         return res;
533 }
534
535
536 TransCombinedState::TransCombinedState()
537 {
538         combined_state_ = this;
539 }
540
541
542 docstring const TransCombinedState::normalkey(char_type c)
543 {
544         docstring const temp = doAccent(c, deadkey2_info_.accent);
545         docstring const res = doAccent(temp, deadkey_info_.accent);
546         currentState = init_state_;
547         return res;
548 }
549
550
551 docstring const TransCombinedState::deadkey(char_type c, KmodInfo d)
552 {
553         // Third key in a row. Output the first one and
554         // reenter with shifted deadkeys
555         docstring res;
556         if (deadkey_ != 0)
557                 res = deadkey_;
558         res += TOKEN_SEP;
559         deadkey_ = deadkey2_;
560         deadkey_info_ = deadkey2_info_;
561         res += deadkey_state_->deadkey(c, d);
562         return res;
563 }
564
565
566 // TransFSM
567 TransFSM::TransFSM()
568         : TransFSMData(), TransInitState(), TransDeadkeyState(), TransCombinedState()
569 {
570         currentState = init_state_;
571 }
572
573
574 // TransManager
575
576 // Initialize static member.
577 Trans TransManager::default_;
578
579
580 TransManager::TransManager()
581         : active_(&default_)
582 {}
583
584
585 int TransManager::setPrimary(string const & language)
586 {
587         if (t1_.getName() == language)
588                 return 0;
589
590         return t1_.load(language);
591 }
592
593
594 int TransManager::setSecondary(string const & language)
595 {
596         if (t2_.getName() == language)
597                 return 0;
598
599         return t2_.load(language);
600 }
601
602
603 void TransManager::enablePrimary()
604 {
605         if (t1_.isDefined())
606                 active_ = &t1_;
607
608         LYXERR(Debug::KBMAP, "Enabling primary keymap");
609 }
610
611
612 void TransManager::enableSecondary()
613 {
614         if (t2_.isDefined())
615                 active_ = &t2_;
616         LYXERR(Debug::KBMAP, "Enabling secondary keymap");
617 }
618
619
620 void TransManager::disableKeymap()
621 {
622         active_ = &default_;
623         LYXERR(Debug::KBMAP, "Disabling keymap");
624 }
625
626
627 void  TransManager::translateAndInsert(char_type c, Text * text, Cursor & cur)
628 {
629         docstring res = active_->process(c, *this);
630
631         // Process with tokens
632         docstring temp;
633
634         while (res.length() > 0) {
635                 res = split(res, temp, TransState::TOKEN_SEP);
636                 insert(temp, text, cur);
637         }
638 }
639
640
641 void TransManager::insert(docstring const & str, Text * text, Cursor & cur)
642 {
643         for (size_t i = 0, n = str.size(); i != n; ++i)
644                 text->insertChar(cur, str[i]);
645 }
646
647
648 void TransManager::deadkey(char_type c, tex_accent accent, Text * t, Cursor & cur)
649 {
650         if (c == 0 && active_ != &default_) {
651                 // A deadkey was pressed that cannot be printed
652                 // or a accent command was typed in the minibuffer
653                 KmodInfo i;
654                 if (active_->isAccentDefined(accent, i)) {
655                         docstring const res = trans_fsm_
656                                 .currentState->deadkey(c, i);
657                         insert(res, t, cur);
658                         return;
659                 }
660         }
661
662         if (active_ == &default_ || c == 0) {
663                 KmodInfo i;
664                 i.accent = accent;
665                 i.data.erase();
666                 docstring res = trans_fsm_.currentState->deadkey(c, i);
667                 insert(res, t, cur);
668         } else {
669                 // Go through the translation
670                 translateAndInsert(c, t, cur);
671         }
672 }
673
674
675 } // namespace lyx