Merge branch 'master' of git.lyx.org:lyx

[lyx.git] / src / Trans.cpp
diff --git a/src/Trans.cpp b/src/Trans.cpp

index f826c0f9ddc1d4e90b310174dfb3c38c9af617ba..ed73c7719b397699db193c51efc127059df18577 100644 (file)
--- a/src/Trans.cpp
+++ b/src/Trans.cpp
@@ -3,7 +3,7 @@
   * This file is part of LyX, the document processor.
   * Licence details can be found in the file COPYING.
   *
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik Bjønnes
   * \author Matthias Ettrich
   *
   * Full author contact details are available in file CREDITS.
@@ -12,44 +12,131 @@
  #include <config.h>
  
  #include "Trans.h"
+
+#include "Buffer.h"
+#include "BufferView.h"
+#include "Cursor.h"
+#include "CutAndPaste.h"
+#include "Lexer.h"
+#include "LyXRC.h"
+#include "Text.h"
+
+#include "support/debug.h"
+#include "support/docstream.h"
+#include "support/FileName.h"
  #include "support/filetools.h"
  #include "support/lstrings.h"
-#include "Lexer.h"
-#include "debug.h"
-#include "TransState.h"
  
+using namespace std;
+using namespace lyx::support;
  
  namespace lyx {
  
-using support::contains;
-using support::libFileSearch;
-
-using std::endl;
-using std::string;
-using std::map;
+/////////////////////////////////////////////////////////////////////
+//
+// TeXAccents
+//
+/////////////////////////////////////////////////////////////////////
+
+/* the names used by TeX and XWindows for deadkeys/accents are not the same
+   so here follows a table to clearify the differences. Please correct this
+   if I got it wrong
+
+   |------------------|------------------|------------------|--------------|
+   |      TeX         |     XWindows     |   \bind/LFUN     | used by intl |
+   |------------------|------------------|------------------|--------------|
+   |    grave         |    grave         |LFUN_ACCENT_GRAVE        | grave
+   |    acute         |    acute         |LFUN_ACCENT_ACUTE        | acute
+   |    circumflex    |    circumflex    |LFUN_ACCENT_CIRCUMFLEX   | circumflex
+   | umlaut/dieresis  |    diaeresis     |LFUN_ACCENT_UMLAUT       | umlaut
+   |    tilde         |    tilde         |LFUN_ACCENT_TILDE        | tilde
+   |    macron        |    maron         |LFUN_ACCENT_MACRON       | macron
+   |    dot           |    abovedot      |LFUN_ACCENT_DOT          | dot
+   |    cedilla       |    cedilla       |LFUN_ACCENT_CEDILLA      | cedilla
+   |    underdot      |                  |LFUN_ACCENT_UNDERDOT     | underdot
+   |    underbar      |                  |LFUN_ACCENT_UNDERBAR     | underbar
+   |    hácek         |    caron         |LFUN_ACCENT_CARON        | caron
+   |    breve         |    breve         |LFUN_ACCENT_BREVE        | breve
+   |    tie           |                  |LFUN_ACCENT_TIE          | tie
+   | Hungarian umlaut |    doubleacute   |LFUN_ACCENT_HUNGARIAN_UMLAUT  | hungarian umlaut
+   |    circle        |    abovering     |LFUN_ACCENT_CIRCLE       | circle
+   |                  |    ogonek        |                  |
+   |                  |    iota          |                  |
+   |                  |    voiced_sound  |                  |
+   |                  | semivoiced_sound |                  |
+   */
+static TeXAccent lyx_accent_table[] = {
+       {TEX_NOACCENT,   0,      "",                LFUN_NOACTION},
+       {TEX_ACUTE,      0x0301, "acute",           LFUN_ACCENT_ACUTE},
+       {TEX_GRAVE,      0x0300, "grave",           LFUN_ACCENT_GRAVE},
+       {TEX_MACRON,     0x0304, "macron",          LFUN_ACCENT_MACRON},
+       {TEX_TILDE,      0x0303, "tilde",           LFUN_ACCENT_TILDE},
+       {TEX_PERISPOMENI, 0x0342, "perispomeni",    LFUN_ACCENT_PERISPOMENI},
+       {TEX_UNDERBAR,   0x0320, "underbar",        LFUN_ACCENT_UNDERBAR}, // COMBINING MINUS SIGN BELOW or 0x0331 COMBINING MACRON BELOW ?
+
+       {TEX_CEDILLA,    0x0327, "cedilla",         LFUN_ACCENT_CEDILLA},
+       {TEX_UNDERDOT,   0x0323, "underdot",        LFUN_ACCENT_UNDERDOT},
+       {TEX_CIRCUMFLEX, 0x0302, "circumflex",      LFUN_ACCENT_CIRCUMFLEX},
+       {TEX_CIRCLE,     0x030a, "circle",          LFUN_ACCENT_CIRCLE},
+       {TEX_TIE,        0x0361, "tie",             LFUN_ACCENT_TIE},
+       {TEX_BREVE,      0x0306, "breve",           LFUN_ACCENT_BREVE},
+       {TEX_CARON,      0x030c, "caron",           LFUN_ACCENT_CARON},
+       // Don't fix this typo for compatibility reasons!
+       {TEX_HUNGUML,    0x030b, "hugarian_umlaut", LFUN_ACCENT_HUNGARIAN_UMLAUT},
+       {TEX_UMLAUT,     0x0308, "umlaut",          LFUN_ACCENT_UMLAUT},
+       {TEX_DOT,        0x0307, "dot",             LFUN_ACCENT_DOT},
+       {TEX_OGONEK,     0x0328, "ogonek",          LFUN_ACCENT_OGONEK}
+};
  
  
-// KmodInfo
-KmodInfo::KmodInfo()
+TeXAccent get_accent(FuncCode action)
  {
+       int i = 0;
+       while (i <= TEX_MAX_ACCENT) {
+               if (lyx_accent_table[i].action == action)
+                       return lyx_accent_table[i];
+               ++i;
+       }
+       struct TeXAccent temp = { static_cast<tex_accent>(0), 0,
+                                         0, static_cast<FuncCode>(0)};
+       return temp;
  }
  
  
-// Trans class
-
-Trans::Trans()
+static docstring const doAccent(docstring const & s, tex_accent accent)
  {
+       if (s.empty())
+               return docstring(1, lyx_accent_table[accent].ucs4);
+
+       odocstringstream os;
+       os.put(s[0]);
+       os.put(lyx_accent_table[accent].ucs4);
+       if (s.length() > 1) {
+               if (accent != TEX_TIE || s.length() > 2)
+                       lyxerr << "Warning: Too many characters given for accent "
+                              << lyx_accent_table[accent].name << '.' << endl;
+               os << s.substr(1);
+       }
+       return normalize_c(os.str());
  }
  
  
-Trans::~Trans()
+static docstring const doAccent(char_type c, tex_accent accent)
  {
-       freeKeymap();
+       return doAccent(docstring(1, c), accent);
  }
  
  
+
+/////////////////////////////////////////////////////////////////////
+//
+// Trans
+//
+/////////////////////////////////////////////////////////////////////
+
+
  void Trans::insertException(KmodException & exclist, char_type c,
-                            docstring const & data, bool flag, tex_accent accent)
+       docstring const & data, bool flag, tex_accent accent)
  {
         Keyexc p;
         p.c = c;
@@ -81,26 +168,11 @@ bool Trans::isDefined() const
  }
  
  
-string const & Trans::getName() const
-{
-       return name_;
-}
-
-
-enum kmaptags_ {
+enum {
         KCOMB = 1,
         KMOD,
         KMAP,
-       KXMOD,
-       K_LAST
-};
-
-
-struct keyword_item kmapTags[K_LAST - 1] = {
-       {"\\kcomb", KCOMB },
-       { "\\kmap", KMAP },
-       { "\\kmod", KMOD },
-       { "\\kxmod", KXMOD }
+       KXMOD
  };
  
  
@@ -134,37 +206,33 @@ int Trans::load(Lexer & lex)
                 switch (lex.lex()) {
                 case KMOD:
                 {
-                       LYXERR(Debug::KBMAP) << "KMOD:\t" << lex.getString() << endl;
-
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "key\t`" << lex.getString()
-                                      << '\'' << endl;
-                       } else
+                       LYXERR(Debug::KBMAP, "KMOD:\t" << lex.getString());
+                       if (!lex.next(true))
                                 return -1;
  
+                       LYXERR(Debug::KBMAP, "key\t`" << lex.getString() << '\'');
+
                         docstring const keys = lex.getDocString();
  
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "accent\t`" << lex.getString()
-                                              << '\'' << endl;
-                       } else
+                       if (!lex.next(true))
                                 return -1;
  
+                       LYXERR(Debug::KBMAP, "accent\t`" << lex.getString() << '\'');
+
                         tex_accent accent = getkeymod(lex.getString());
  
                         if (accent == TEX_NOACCENT)
                                 return -1;
  
  #if 1
-//#warning This code should be removed...
+                       // FIXME: This code should be removed...
                         // But we need to fix up all the kmap files first
                         // so that this field is not present anymore.
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "allowed\t`" << lex.getString()
-                                              << '\'' << endl;
-                       } else
+                       if (!lex.next(true))
                                 return -1;
  
+                       LYXERR(Debug::KBMAP, "allowed\t`" << lex.getString() << '\'');
+
                         /* string const allowed = lex.getString(); */
                         addDeadkey(accent, keys /*, allowed*/);
  #else
@@ -175,33 +243,32 @@ int Trans::load(Lexer & lex)
                 case KCOMB: {
                         string str;
  
-                       LYXERR(Debug::KBMAP) << "KCOMB:" << endl;
-                       if (lex.next(true)) {
-                               str = lex.getString();
-                               LYXERR(Debug::KBMAP) << str << endl;
-                       } else
+                       LYXERR(Debug::KBMAP, "KCOMB:");
+                       if (!lex.next(true))
                                 return -1;
  
+                       str = lex.getString();
+                       LYXERR(Debug::KBMAP, str);
+
                         tex_accent accent_1 = getkeymod(str);
-                       if (accent_1 == TEX_NOACCENT) return -1;
+                       if (accent_1 == TEX_NOACCENT)
+                               return -1;
  
-                       if (lex.next(true)) {
-                               str = lex.getString();
-                               LYXERR(Debug::KBMAP) << str << endl;
-                       } else
+                       if (!lex.next(true))
                                 return -1;
  
-                       tex_accent accent_2= getkeymod(str);
+                       str = lex.getString();
+                       LYXERR(Debug::KBMAP, str);
+
+                       tex_accent accent_2 = getkeymod(str);
                         if (accent_2 == TEX_NOACCENT) return -1;
  
                         map<tex_accent, KmodInfo>::iterator it1 =
                                 kmod_list_.find(accent_1);
                         map<tex_accent, KmodInfo>::iterator it2 =
                                 kmod_list_.find(accent_2);
-                       if (it1 == kmod_list_.end()
-                           || it2 == kmod_list_.end()) {
+                       if (it1 == kmod_list_.end() || it2 == kmod_list_.end())
                                 return -1;
-                       }
  
                         // Find what key accent_2 is on - should
                         // check about accent_1 also
@@ -214,39 +281,37 @@ int Trans::load(Lexer & lex)
                                         break;
                         }
                         docstring allowed;
-                       if (lex.next()) {
-                               allowed = lex.getDocString();
-                               LYXERR(Debug::KBMAP) << "allowed: "
-                                                    << to_utf8(allowed) << endl;
-                       } else {
+                       if (!lex.next())
                                 return -1;
-                       }
  
+                       allowed = lex.getDocString();
+                       LYXERR(Debug::KBMAP, "allowed: " << to_utf8(allowed));
+
+                       // FIXME Coverity
+                       // This is being flagged because we could in principle fail
+                       // ever to hit the break above, in which case we exit the loop
+                       // when it == end. Then this crashes.
                         insertException(kmod_list_[accent_1].exception_list,
-                                       it->first, allowed,
-                                       true, accent_2);
+                                       it->first, allowed, true, accent_2);
                 }
                 break;
                 case KMAP: {
                         unsigned char key_from;
  
-                       LYXERR(Debug::KBMAP) << "KMAP:\t" << lex.getString() << endl;
+                       LYXERR(Debug::KBMAP, "KMAP:\t" << lex.getString());
  
-                       if (lex.next(true)) {
-                               key_from = lex.getString()[0];
-                               LYXERR(Debug::KBMAP) << "\t`" << lex.getString() << '\''
-                                       << endl;
-                       } else
+                       if (!lex.next(true))
                                 return -1;
  
-                       if (lex.next(true)) {
-                               docstring const string_to = lex.getDocString();
-                               keymap_[key_from] = string_to;
-                               LYXERR(Debug::KBMAP) << "\t`" << to_utf8(string_to) << '\''
-                                       << endl;
-                       } else
+                       key_from = lex.getString()[0];
+                       LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
+
+                       if (!lex.next(true))
                                 return -1;
  
+                       docstring const string_to = lex.getDocString();
+                       keymap_[key_from] = string_to;
+                       LYXERR(Debug::KBMAP, "\t`" << to_utf8(string_to) << '\'');
                         break;
                 }
                 case KXMOD: {
@@ -254,39 +319,35 @@ int Trans::load(Lexer & lex)
                         char_type key;
                         docstring str;
  
-                       LYXERR(Debug::KBMAP) << "KXMOD:\t" << lex.getString() << endl;
+                       LYXERR(Debug::KBMAP, "KXMOD:\t" << lex.getString());
  
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "\t`" << lex.getString() << '\''
-                                       << endl;
-                               accent = getkeymod(lex.getString());
-                       } else
+                       if (!lex.next(true))
                                 return -1;
  
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "\t`" << lex.getString() << '\''
-                                       << endl;
-                               key = lex.getDocString()[0];
-                       } else
+                       LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
+                       accent = getkeymod(lex.getString());
+
+                       if (!lex.next(true))
                                 return -1;
  
-                       if (lex.next(true)) {
-                               LYXERR(Debug::KBMAP) << "\t`" << lex.getString() << '\''
-                                       << endl;
-                               str = lex.getDocString();
-                       } else
+                       LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
+                       key = lex.getDocString()[0];
+
+                       if (!lex.next(true))
                                 return -1;
  
+                       LYXERR(Debug::KBMAP, "\t`" << lex.getString() << '\'');
+                       str = lex.getDocString();
+
                         insertException(kmod_list_[accent].exception_list,
                                         key, str);
                         break;
                 }
                 case Lexer::LEX_FEOF:
-                       LYXERR(Debug::PARSER) << "End of parsing" << endl;
+                       LYXERR(Debug::PARSER, "End of parsing");
                         break;
                 default:
-                       lex.printError("ParseKeymapFile: "
-                                      "Unknown tag: `$$Token'");
+                       lex.printError("ParseKeymapFile: Unknown tag: `$$Token'");
                         return -1;
                 }
         }
@@ -297,11 +358,10 @@ int Trans::load(Lexer & lex)
  bool Trans::isAccentDefined(tex_accent accent, KmodInfo & i) const
  {
         map<tex_accent, KmodInfo>::const_iterator cit = kmod_list_.find(accent);
-       if (cit != kmod_list_.end()) {
-               i = cit->second;
-               return true;
-       }
-       return false;
+       if (cit == kmod_list_.end())
+               return false;
+       i = cit->second;
+       return true;
  }
  
  
@@ -309,33 +369,38 @@ docstring const Trans::process(char_type c, TransManager & k)
  {
         docstring const t = match(c);
  
-       if (t.empty() && c != 0) {
+       if (t.empty() && c != 0)
                 return k.normalkey(c);
-       } else if (!t.empty() && t[0] != 0) {
-               //return k.normalkey(c);
-               return t;
-       } else {
-               return k.deadkey(c,
-                                kmod_list_[static_cast<tex_accent>(t[1])]);
-       }
+
+       if (!t.empty() && t[0] != 0)
+               return t; //return k.normalkey(c);
+
+       return k.deadkey(c, kmod_list_[static_cast<tex_accent>(t[1])]);
  }
  
  
  int Trans::load(string const & language)
  {
-       support::FileName const filename = libFileSearch("kbd", language, "kmap");
+       LexerKeyword kmapTags[] = {
+               {"\\kcomb", KCOMB },
+               { "\\kmap", KMAP },
+               { "\\kmod", KMOD },
+               { "\\kxmod", KXMOD }
+       };
+
+       FileName const filename = libFileSearch("kbd", language, "kmap");
         if (filename.empty())
                 return -1;
  
         freeKeymap();
-       Lexer lex(kmapTags, K_LAST - 1);
+       Lexer lex(kmapTags);
         lex.setFile(filename);
  
         int const res = load(lex);
  
-       if (res == 0) {
+       if (res == 0)
                 name_ = language;
-       } else
+       else
                 name_.erase();
  
         return res;
@@ -346,14 +411,13 @@ tex_accent getkeymod(string const & p)
         /* return modifier - decoded from p and update p */
  {
         for (int i = 1; i <= TEX_MAX_ACCENT; ++i) {
-               LYXERR(Debug::KBMAP) << "p = " << p
+               LYXERR(Debug::KBMAP, "p = " << p
                        << ", lyx_accent_table[" << i
-                      << "].name = `" << lyx_accent_table[i].name
-                      << '\'' << endl;
+                      << "].name = `" << lyx_accent_table[i].name << '\'');
  
                 if (lyx_accent_table[i].name
                      && contains(p, lyx_accent_table[i].name)) {
-                       LYXERR(Debug::KBMAP) << "Found it!" << endl;
+                       LYXERR(Debug::KBMAP, "Found it!");
                         return static_cast<tex_accent>(i);
                 }
         }
@@ -361,4 +425,258 @@ tex_accent getkeymod(string const & p)
  }
  
  
+/////////////////////////////////////////////////////////////////////
+//
+// TransState
+//
+/////////////////////////////////////////////////////////////////////
+
+
+// TransFSMData
+TransFSMData::TransFSMData() : deadkey_(0), deadkey2_(0), init_state_(0),
+       deadkey_state_(0), combined_state_(0), currentState(0)
+{
+}
+
+
+// TransState
+char_type const TransState::TOKEN_SEP = 4;
+
+
+// TransInitState
+TransInitState::TransInitState()
+{
+       init_state_ = this;
+}
+
+
+docstring const TransInitState::normalkey(char_type c)
+{
+       docstring res;
+       res = c;
+       return res;
+}
+
+
+docstring const TransInitState::deadkey(char_type c, KmodInfo d)
+{
+       deadkey_ = c;
+       deadkey_info_ = d;
+       currentState = deadkey_state_;
+       return docstring();
+}
+
+
+// TransDeadkeyState
+TransDeadkeyState::TransDeadkeyState()
+{
+       deadkey_state_ = this;
+}
+
+
+docstring const TransDeadkeyState::normalkey(char_type c)
+{
+       docstring res;
+
+       KmodException::iterator it = deadkey_info_.exception_list.begin();
+       KmodException::iterator end = deadkey_info_.exception_list.end();
+
+       for (; it != end; ++it) {
+               if (it->c == c) {
+                       res = it->data;
+                       break;
+               }
+       }
+       if (it == end) {
+               res = doAccent(c, deadkey_info_.accent);
+       }
+       currentState = init_state_;
+       return res;
+}
+
+
+docstring const TransDeadkeyState::deadkey(char_type c, KmodInfo d)
+{
+       docstring res;
+
+       // Check if the same deadkey was typed twice
+       if (deadkey_ == c) {
+               res = deadkey_;
+               deadkey_ = 0;
+               deadkey_info_.accent = TEX_NOACCENT;
+               currentState = init_state_;
+               return res;
+       }
+
+       // Check if it is a combination or an exception
+       KmodException::const_iterator cit = deadkey_info_.exception_list.begin();
+       KmodException::const_iterator end = deadkey_info_.exception_list.end();
+       for (; cit != end; ++cit) {
+               if (cit->combined == true && cit->accent == d.accent) {
+                       deadkey2_ = c;
+                       deadkey2_info_ = d;
+                       comb_info_ = (*cit);
+                       currentState = combined_state_;
+                       return docstring();
+               }
+               if (cit->c == c) {
+                       res = cit->data;
+                       deadkey_ = 0;
+                       deadkey_info_.accent = TEX_NOACCENT;
+                       currentState = init_state_;
+                       return res;
+               }
+       }
+
+       // Not a combination or an exception.
+       // Output deadkey1 and keep deadkey2
+
+       if (deadkey_!= 0)
+               res = deadkey_;
+       deadkey_ = c;
+       deadkey_info_ = d;
+       currentState = deadkey_state_;
+       return res;
+}
+
+
+TransCombinedState::TransCombinedState()
+{
+       combined_state_ = this;
+}
+
+
+docstring const TransCombinedState::normalkey(char_type c)
+{
+       docstring const temp = doAccent(c, deadkey2_info_.accent);
+       docstring const res = doAccent(temp, deadkey_info_.accent);
+       currentState = init_state_;
+       return res;
+}
+
+
+docstring const TransCombinedState::deadkey(char_type c, KmodInfo d)
+{
+       // Third key in a row. Output the first one and
+       // reenter with shifted deadkeys
+       docstring res;
+       if (deadkey_ != 0)
+               res = deadkey_;
+       res += TOKEN_SEP;
+       deadkey_ = deadkey2_;
+       deadkey_info_ = deadkey2_info_;
+       res += deadkey_state_->deadkey(c, d);
+       return res;
+}
+
+
+// TransFSM
+TransFSM::TransFSM()
+       : TransFSMData(), TransInitState(), TransDeadkeyState(), TransCombinedState()
+{
+       currentState = init_state_;
+}
+
+
+// TransManager
+
+// Initialize static member.
+Trans TransManager::default_;
+
+
+TransManager::TransManager()
+       : active_(&default_)
+{}
+
+
+int TransManager::setPrimary(string const & language)
+{
+       if (t1_.getName() == language)
+               return 0;
+
+       return t1_.load(language);
+}
+
+
+int TransManager::setSecondary(string const & language)
+{
+       if (t2_.getName() == language)
+               return 0;
+
+       return t2_.load(language);
+}
+
+
+void TransManager::enablePrimary()
+{
+       if (t1_.isDefined())
+               active_ = &t1_;
+
+       LYXERR(Debug::KBMAP, "Enabling primary keymap");
+}
+
+
+void TransManager::enableSecondary()
+{
+       if (t2_.isDefined())
+               active_ = &t2_;
+       LYXERR(Debug::KBMAP, "Enabling secondary keymap");
+}
+
+
+void TransManager::disableKeymap()
+{
+       active_ = &default_;
+       LYXERR(Debug::KBMAP, "Disabling keymap");
+}
+
+
+void  TransManager::translateAndInsert(char_type c, Text * text, Cursor & cur)
+{
+       docstring res = active_->process(c, *this);
+
+       // Process with tokens
+       docstring temp;
+
+       while (res.length() > 0) {
+               res = split(res, temp, TransState::TOKEN_SEP);
+               insert(temp, text, cur);
+       }
+}
+
+
+void TransManager::insert(docstring const & str, Text * text, Cursor & cur)
+{
+       for (size_t i = 0, n = str.size(); i != n; ++i)
+               text->insertChar(cur, str[i]);
+}
+
+
+void TransManager::deadkey(char_type c, tex_accent accent, Text * t, Cursor & cur)
+{
+       if (c == 0 && active_ != &default_) {
+               // A deadkey was pressed that cannot be printed
+               // or a accent command was typed in the minibuffer
+               KmodInfo i;
+               if (active_->isAccentDefined(accent, i) == true) {
+                       docstring const res = trans_fsm_
+                               .currentState->deadkey(c, i);
+                       insert(res, t, cur);
+                       return;
+               }
+       }
+
+       if (active_ == &default_ || c == 0) {
+               KmodInfo i;
+               i.accent = accent;
+               i.data.erase();
+               docstring res = trans_fsm_.currentState->deadkey(c, i);
+               insert(res, t, cur);
+       } else {
+               // Go through the translation
+               translateAndInsert(c, t, cur);
+       }
+}
+
+
  } // namespace lyx