]> git.lyx.org Git - lyx.git/blobdiff - src/Thesaurus.cpp
Account for old versions of Pygments
[lyx.git] / src / Thesaurus.cpp
index 086ab6239df3ea5b76547baf1174550ffb8cbd0a..cf8567eb53f5c9a5403296c3e1615ccd3ce7530d 100644 (file)
 
 #include "Thesaurus.h"
 
-#include "support/debug.h"
-#include "support/gettext.h"
 #include "LyXRC.h"
 
+#include "SpellChecker.h"
+#include "WordLangTuple.h"
+
 #include "support/FileNameList.h"
+#include "support/Package.h"
+#include "support/debug.h"
+#include "support/docstring_list.h"
 #include "support/filetools.h"
+#include "support/gettext.h"
 #include "support/lstrings.h"
 #include "support/os.h"
-#include "support/unicode.h"
+
+#include <cstdio>
+#include MYTHES_H_LOCATION
 
 #include "frontends/alert.h"
 
 #include <algorithm>
 #include <cstring>
+#include <fstream>
 
 using namespace std;
 using namespace lyx::support;
@@ -34,158 +42,151 @@ using namespace lyx::support::os;
 
 namespace lyx {
 
-#ifndef HAVE_LIBMYTHES
-#ifdef HAVE_LIBAIKSAURUS
-
-
-Thesaurus::Thesaurus()
-       : thes_(new Aiksaurus)
-{}
+namespace {
 
+typedef std::map<docstring, MyThes *> Thesauri;
 
-Thesaurus::~Thesaurus()
-{
-       delete thes_;
-}
-
+} // namespace anon
 
-Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const &)
+struct Thesaurus::Private
 {
-       Meanings meanings;
-
-       // aiksaurus is for english text only, therefore it does not work
-       // with non-ascii strings.
-       // The interface of the Thesaurus class uses docstring because a
-       // non-english thesaurus is possible in theory.
-       if (!support::isAscii(t))
-               // to_ascii() would assert
-               return meanings;
-
-       string const text = to_ascii(t);
-
-       docstring error = from_ascii(thes_->error());
-       if (!error.empty()) {
-               static bool sent_error = false;
-               if (!sent_error) {
-                       frontend::Alert::error(_("Thesaurus failure"),
-                                    bformat(_("Aiksaurus returned the following error:\n\n%1$s."),
-                                            error));
-                       sent_error = true;
+       ~Private()
+       {
+               for (Thesauri::iterator it = thes_.begin();
+                    it != thes_.end(); ++it) {
+                       delete it->second;
                }
-               return meanings;
        }
-       if (!thes_->find(text.c_str()))
-               return meanings;
-
-       // weird api, but ...
-
-       int prev_meaning = -1;
-       int cur_meaning;
-       docstring meaning;
-
-       // correct, returns "" at the end
-       string ret = thes_->next(cur_meaning);
-
-       while (!ret.empty()) {
-               if (cur_meaning != prev_meaning) {
-                       meaning = from_ascii(ret);
-                       ret = thes_->next(cur_meaning);
-                       prev_meaning = cur_meaning;
-               } else {
-                       if (ret != text)
-                               meanings[meaning].push_back(from_ascii(ret));
+       ///
+       bool thesaurusAvailable(docstring const & lang) const
+       {
+               for (Thesauri::const_iterator it = thes_.begin();
+                       it != thes_.end(); ++it) {
+                               if (it->first == lang)
+                                       if (it->second)
+                                               return true;
                }
-
-               ret = thes_->next(cur_meaning);
+               return false;
        }
 
-       for (Meanings::iterator it = meanings.begin();
-            it != meanings.end(); ++it)
-               sort(it->second.begin(), it->second.end());
-
-       return meanings;
-}
-
-
-bool Thesaurus::thesaurusAvailable(docstring const & lang) const
-{
-       // we support English only
-       return prefixIs(lang, from_ascii("en_"));
-}
+       ///
+       typedef std::pair<std::string, std::string> ThesFiles;
+       ///
+       ThesFiles getThesaurus(string const & path, docstring const & lang);
+       ThesFiles getThesaurus(docstring const & lang);
+       /// add a thesaurus to the list
+       bool addThesaurus(docstring const & lang);
 
-#endif // HAVE_LIBAIKSAURUS
-#endif // !HAVE_LIBMYTHES
+       /// the thesauri
+       Thesauri thes_;
 
+       /// the location below system/user directory
+       /// there the data+idx files lookup will happen
+       const string dataDirectory(void) { return "thes"; }
 
-#ifdef HAVE_LIBMYTHES
+};
 
-namespace {
 
-string const to_iconv_encoding(docstring const & s, string const & encoding)
+pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
 {
-       std::vector<char> const encoded =
-               ucs4_to_eightbit(s.data(), s.length(), encoding);
-       return string(encoded.begin(), encoded.end());
+       FileName base(path);
+       if (!base.isDirectory()) {
+               return make_pair(string(), string());
+       }
+       FileNameList const idx_files = base.dirList("idx");
+       FileNameList const data_files = base.dirList("dat");
+       string idx;
+       string data;
+       string basename;
+
+       LYXERR(Debug::FILES, "thesaurus path: " << path);
+       for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
+               basename = it->onlyFileNameWithoutExt();
+               if (contains(basename, to_ascii(lang))) {
+                       // do not use more specific dicts.
+                       if (contains(basename, '_') && !contains(lang, '_'))
+                               continue;
+                       if (contains(basename, '-') && !contains(lang, '-'))
+                               continue;
+                       ifstream ifs(it->absFileName().c_str());
+                       if (ifs) {
+                               // check for appropriate version of index file
+                               string encoding; // first line is encoding
+                               int items = 0;   // second line is no. of items
+                               getline(ifs,encoding);
+                               ifs >> items;
+                               if (ifs.fail()) {
+                                       LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
+                                       continue;
+                               }
+                               if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
+                                       LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
+                                       continue;
+                               }
+                       }
+                       idx = it->absFileName();
+                       LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
+                       break;
+               }
+       }
+       if (idx.empty()) {
+               // try with a more general dictionary
+               docstring shortcode;
+               if (contains(lang, '_')) {
+                       split(lang, shortcode, '_');
+                       LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
+                              << lang << ". Trying with " << shortcode);
+                       return getThesaurus(path, shortcode);
+               }
+               else if (contains(lang, '-')) {
+                       split(lang, shortcode, '-');
+                       LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
+                              << lang << ". Trying with " << shortcode);
+                       return getThesaurus(path, shortcode);
+               }
+               return make_pair(string(), string());
+       }
+       for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
+               if (contains(it->onlyFileName(), basename)) {
+                       data = it->absFileName();
+                       LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
+                       break;
+               }
+       }
+       return make_pair(idx, data);
 }
 
 
-docstring const from_iconv_encoding(string const & s, string const & encoding)
+pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
 {
-       std::vector<char_type> const ucs4 =
-               eightbit_to_ucs4(s.data(), s.length(), encoding);
-       return docstring(ucs4.begin(), ucs4.end());
-}
-
-} // namespace anon
-
-
-Thesaurus::Thesaurus()
-{}
+       string const thes_path = external_path(lyxrc.thesaurusdir_path);
+       pair<string,string> result ;
 
+       if (thesaurusAvailable(lang))
+               return make_pair(string(), string());
 
-Thesaurus::~Thesaurus()
-{
-       for (Thesauri::iterator it = thes_.begin();
-            it != thes_.end(); ++it) {
-               delete it->second;
+       if (!thes_path.empty())
+               result = getThesaurus(thes_path, lang);
+       if (result.first.empty() || result.second.empty()) {
+               string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
+               result = getThesaurus(sys_path, lang);
+       }
+       if (result.first.empty() || result.second.empty()) {
+               string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
+               result = getThesaurus(user_path, lang);
        }
+       return result;
 }
 
 
-bool Thesaurus::addThesaurus(docstring const & lang)
+bool Thesaurus::Private::addThesaurus(docstring const & lang)
 {
-       string const thes_path = external_path(lyxrc.thesaurusdir_path);
-       LYXERR(Debug::FILES, "thesaurus path: " << thes_path);
-       if (thes_path.empty())
-               return false;
-
        if (thesaurusAvailable(lang))
                return true;
 
-       FileNameList const idx_files = FileName(thes_path).dirList("idx");
-       FileNameList const data_files = FileName(thes_path).dirList("dat");
-       string idx;
-       string data;
-
-       for (FileNameList::const_iterator it = idx_files.begin();
-            it != idx_files.end(); ++it) {
-               LYXERR(Debug::FILES, "found thesaurus idx file: " << it->onlyFileName());
-               if (contains(it->onlyFileName(), to_ascii(lang))) {
-                       idx = it->absFilename();
-                       LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
-                       break;
-                       }
-               }
-
-       for (support::FileNameList::const_iterator it = data_files.begin();
-            it != data_files.end(); ++it) {
-               LYXERR(Debug::FILES, "found thesaurus data file: " << it->onlyFileName());
-               if (contains(it->onlyFileName(), to_ascii(lang))) {
-                       data = it->absFilename();
-                       LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
-                       break;
-                       }
-               }
+       ThesFiles files = getThesaurus(lang);
+       string const idx = files.first;
+       string const data = files.second;
 
        if (idx.empty() || data.empty())
                return false;
@@ -199,28 +200,33 @@ bool Thesaurus::addThesaurus(docstring const & lang)
 
 bool Thesaurus::thesaurusAvailable(docstring const & lang) const
 {
-       for (Thesauri::const_iterator it = thes_.begin();
-            it != thes_.end(); ++it) {
-               if (it->first == lang)
-                       if (it->second)
-                               return true;
-       }
+       return d->thesaurusAvailable(lang);
+}
+
 
-       return false;
+bool Thesaurus::thesaurusInstalled(docstring const & lang) const
+{
+       if (thesaurusAvailable(lang))
+               return true;
+       pair<string, string> files = d->getThesaurus(lang);
+       return (!files.first.empty() && !files.second.empty());
 }
 
 
-Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lang)
+Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
 {
        Meanings meanings;
        MyThes * mythes = 0;
 
-       if (!addThesaurus(lang))
+       docstring const lang_code = from_ascii(wl.lang()->code());
+       docstring const t = wl.word();
+
+       if (!d->addThesaurus(lang_code))
                return meanings;
 
-       for (Thesauri::const_iterator it = thes_.begin();
-            it != thes_.end(); ++it) {
-               if (it->first == lang) {
+       for (Thesauri::const_iterator it = d->thes_.begin();
+            it != d->thes_.end(); ++it) {
+               if (it->first == lang_code) {
                        mythes = it->second;
                        break;
                }
@@ -235,24 +241,38 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan
        string const text = to_iconv_encoding(support::lowercase(t), encoding);
        int len = strlen(text.c_str());
        int count = mythes->Lookup(text.c_str(), len, &pmean);
-       if (!count)
-               return meanings;
+       if (!count) {
+               SpellChecker * speller = theSpellChecker();
+               if (!speller)
+                       return meanings;
+               docstring_list suggestions;
+               speller->stem(wl, suggestions);
+               for (size_t i = 0; i != suggestions.size(); ++i) {
+                       string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
+                       len = strlen(wordform.c_str());
+                       count = mythes->Lookup(wordform.c_str(), len, &pmean);
+                       if (count)
+                               break;
+               }
+               if (!count)
+                       return meanings;
+       }
 
        // don't change value of pmean or count
        // they are needed for the CleanUpAfterLookup routine
        mentry * pm = pmean;
        docstring meaning;
-       docstring ret;
        for (int i = 0; i < count; i++) {
+               vector<docstring> ret;
                meaning = from_iconv_encoding(string(pm->defn), encoding);
                // remove silly item
                if (support::prefixIs(meaning, '-'))
                        meaning = support::ltrim(meaning, "- ");
                for (int j = 0; j < pm->count; j++) {
-                       ret = from_iconv_encoding(string(pm->psyns[j]), encoding);
+                       ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
                }
-       meanings[meaning].push_back(ret);
-       pm++;
+               meanings[meaning] = ret;
+               ++pm;
        }
        // now clean up all allocated memory
        mythes->CleanUpAfterLookup(&pmean, count);
@@ -264,26 +284,18 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan
        return meanings;
 }
 
-#else
-#ifndef HAVE_LIBAIKSAURUS
+
 Thesaurus::Thesaurus()
+       : d(new Thesaurus::Private)
 {
 }
 
 
 Thesaurus::~Thesaurus()
 {
+       delete d;
 }
 
-
-Thesaurus::Meanings Thesaurus::lookup(docstring const &, docstring const &)
-{
-       return Meanings();
-}
-
-#endif
-#endif // HAVE_LIBMYTHES
-
 // Global instance
 Thesaurus thesaurus;