X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FThesaurus.cpp;h=cf8567eb53f5c9a5403296c3e1615ccd3ce7530d;hb=28be7d552f62cc02fa86d7f79201d089bfb2d7b5;hp=a7dbca0d79b36bf0d4fbdfe2aba8e053b0e58f31;hpb=561802d41e90a82a8110c790a8270f35fcba41d9;p=lyx.git diff --git a/src/Thesaurus.cpp b/src/Thesaurus.cpp index a7dbca0d79..cf8567eb53 100644 --- a/src/Thesaurus.cpp +++ b/src/Thesaurus.cpp @@ -13,20 +13,28 @@ #include "Thesaurus.h" -#include "support/debug.h" -#include "support/gettext.h" #include "LyXRC.h" +#include "SpellChecker.h" +#include "WordLangTuple.h" + #include "support/FileNameList.h" +#include "support/Package.h" +#include "support/debug.h" +#include "support/docstring_list.h" #include "support/filetools.h" +#include "support/gettext.h" #include "support/lstrings.h" #include "support/os.h" -#include "support/unicode.h" + +#include +#include MYTHES_H_LOCATION #include "frontends/alert.h" #include #include +#include using namespace std; using namespace lyx::support; @@ -34,158 +42,151 @@ using namespace lyx::support::os; namespace lyx { -#ifndef HAVE_LIBMYTHES -#ifdef HAVE_LIBAIKSAURUS - - -Thesaurus::Thesaurus() - : thes_(new Aiksaurus) -{} +namespace { +typedef std::map Thesauri; -Thesaurus::~Thesaurus() -{ - delete thes_; -} - +} // namespace anon -Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const &) +struct Thesaurus::Private { - Meanings meanings; - - // aiksaurus is for english text only, therefore it does not work - // with non-ascii strings. - // The interface of the Thesaurus class uses docstring because a - // non-english thesaurus is possible in theory. - if (!support::isAscii(t)) - // to_ascii() would assert - return meanings; - - string const text = to_ascii(t); - - docstring error = from_ascii(thes_->error()); - if (!error.empty()) { - static bool sent_error = false; - if (!sent_error) { - frontend::Alert::error(_("Thesaurus failure"), - bformat(_("Aiksaurus returned the following error:\n\n%1$s."), - error)); - sent_error = true; + ~Private() + { + for (Thesauri::iterator it = thes_.begin(); + it != thes_.end(); ++it) { + delete it->second; } - return meanings; } - if (!thes_->find(text.c_str())) - return meanings; - - // weird api, but ... - - int prev_meaning = -1; - int cur_meaning; - docstring meaning; - - // correct, returns "" at the end - string ret = thes_->next(cur_meaning); - - while (!ret.empty()) { - if (cur_meaning != prev_meaning) { - meaning = from_ascii(ret); - ret = thes_->next(cur_meaning); - prev_meaning = cur_meaning; - } else { - if (ret != text) - meanings[meaning].push_back(from_ascii(ret)); + /// + bool thesaurusAvailable(docstring const & lang) const + { + for (Thesauri::const_iterator it = thes_.begin(); + it != thes_.end(); ++it) { + if (it->first == lang) + if (it->second) + return true; } - - ret = thes_->next(cur_meaning); + return false; } - for (Meanings::iterator it = meanings.begin(); - it != meanings.end(); ++it) - sort(it->second.begin(), it->second.end()); - - return meanings; -} - - -bool Thesaurus::thesaurusAvailable(docstring const & lang) const -{ - // we support English only - return prefixIs(lang, from_ascii("en_")); -} + /// + typedef std::pair ThesFiles; + /// + ThesFiles getThesaurus(string const & path, docstring const & lang); + ThesFiles getThesaurus(docstring const & lang); + /// add a thesaurus to the list + bool addThesaurus(docstring const & lang); -#endif // HAVE_LIBAIKSAURUS -#endif // !HAVE_LIBMYTHES + /// the thesauri + Thesauri thes_; + /// the location below system/user directory + /// there the data+idx files lookup will happen + const string dataDirectory(void) { return "thes"; } -#ifdef HAVE_LIBMYTHES +}; -namespace { -string const to_iconv_encoding(docstring const & s, string const & encoding) +pair Thesaurus::Private::getThesaurus(string const & path, docstring const & lang) { - std::vector const encoded = - ucs4_to_eightbit(s.data(), s.length(), encoding); - return string(encoded.begin(), encoded.end()); + FileName base(path); + if (!base.isDirectory()) { + return make_pair(string(), string()); + } + FileNameList const idx_files = base.dirList("idx"); + FileNameList const data_files = base.dirList("dat"); + string idx; + string data; + string basename; + + LYXERR(Debug::FILES, "thesaurus path: " << path); + for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) { + basename = it->onlyFileNameWithoutExt(); + if (contains(basename, to_ascii(lang))) { + // do not use more specific dicts. + if (contains(basename, '_') && !contains(lang, '_')) + continue; + if (contains(basename, '-') && !contains(lang, '-')) + continue; + ifstream ifs(it->absFileName().c_str()); + if (ifs) { + // check for appropriate version of index file + string encoding; // first line is encoding + int items = 0; // second line is no. of items + getline(ifs,encoding); + ifs >> items; + if (ifs.fail()) { + LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName()); + continue; + } + if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) { + LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName()); + continue; + } + } + idx = it->absFileName(); + LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx); + break; + } + } + if (idx.empty()) { + // try with a more general dictionary + docstring shortcode; + if (contains(lang, '_')) { + split(lang, shortcode, '_'); + LYXERR(Debug::FILES, "Did not find thesaurus for LANG code " + << lang << ". Trying with " << shortcode); + return getThesaurus(path, shortcode); + } + else if (contains(lang, '-')) { + split(lang, shortcode, '-'); + LYXERR(Debug::FILES, "Did not find thesaurus for LANG code " + << lang << ". Trying with " << shortcode); + return getThesaurus(path, shortcode); + } + return make_pair(string(), string()); + } + for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) { + if (contains(it->onlyFileName(), basename)) { + data = it->absFileName(); + LYXERR(Debug::FILES, "selected thesaurus data file: " << data); + break; + } + } + return make_pair(idx, data); } -docstring const from_iconv_encoding(string const & s, string const & encoding) +pair Thesaurus::Private::getThesaurus(docstring const & lang) { - std::vector const ucs4 = - eightbit_to_ucs4(s.data(), s.length(), encoding); - return docstring(ucs4.begin(), ucs4.end()); -} - -} // namespace anon - - -Thesaurus::Thesaurus() -{} + string const thes_path = external_path(lyxrc.thesaurusdir_path); + pair result ; + if (thesaurusAvailable(lang)) + return make_pair(string(), string()); -Thesaurus::~Thesaurus() -{ - for (Thesauri::iterator it = thes_.begin(); - it != thes_.end(); ++it) { - delete it->second; + if (!thes_path.empty()) + result = getThesaurus(thes_path, lang); + if (result.first.empty() || result.second.empty()) { + string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ; + result = getThesaurus(sys_path, lang); + } + if (result.first.empty() || result.second.empty()) { + string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ; + result = getThesaurus(user_path, lang); } + return result; } -bool Thesaurus::addThesaurus(docstring const & lang) +bool Thesaurus::Private::addThesaurus(docstring const & lang) { - string const thes_path = external_path(lyxrc.thesaurusdir_path); - LYXERR(Debug::FILES, "thesaurus path: " << thes_path); - if (thes_path.empty()) - return false; - if (thesaurusAvailable(lang)) return true; - FileNameList const idx_files = FileName(thes_path).dirList("idx"); - FileNameList const data_files = FileName(thes_path).dirList("dat"); - string idx; - string data; - - for (FileNameList::const_iterator it = idx_files.begin(); - it != idx_files.end(); ++it) { - LYXERR(Debug::FILES, "found thesaurus idx file: " << it->onlyFileName()); - if (contains(it->onlyFileName(), to_ascii(lang))) { - idx = it->absFilename(); - LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx); - break; - } - } - - for (support::FileNameList::const_iterator it = data_files.begin(); - it != data_files.end(); ++it) { - LYXERR(Debug::FILES, "found thesaurus data file: " << it->onlyFileName()); - if (contains(it->onlyFileName(), to_ascii(lang))) { - data = it->absFilename(); - LYXERR(Debug::FILES, "selected thesaurus data file: " << data); - break; - } - } + ThesFiles files = getThesaurus(lang); + string const idx = files.first; + string const data = files.second; if (idx.empty() || data.empty()) return false; @@ -199,28 +200,33 @@ bool Thesaurus::addThesaurus(docstring const & lang) bool Thesaurus::thesaurusAvailable(docstring const & lang) const { - for (Thesauri::const_iterator it = thes_.begin(); - it != thes_.end(); ++it) { - if (it->first == lang) - if (it->second) - return true; - } + return d->thesaurusAvailable(lang); +} + - return false; +bool Thesaurus::thesaurusInstalled(docstring const & lang) const +{ + if (thesaurusAvailable(lang)) + return true; + pair files = d->getThesaurus(lang); + return (!files.first.empty() && !files.second.empty()); } -Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lang) +Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl) { Meanings meanings; MyThes * mythes = 0; - if (!addThesaurus(lang)) + docstring const lang_code = from_ascii(wl.lang()->code()); + docstring const t = wl.word(); + + if (!d->addThesaurus(lang_code)) return meanings; - for (Thesauri::const_iterator it = thes_.begin(); - it != thes_.end(); ++it) { - if (it->first == lang) { + for (Thesauri::const_iterator it = d->thes_.begin(); + it != d->thes_.end(); ++it) { + if (it->first == lang_code) { mythes = it->second; break; } @@ -235,24 +241,38 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan string const text = to_iconv_encoding(support::lowercase(t), encoding); int len = strlen(text.c_str()); int count = mythes->Lookup(text.c_str(), len, &pmean); - if (!count) - return meanings; + if (!count) { + SpellChecker * speller = theSpellChecker(); + if (!speller) + return meanings; + docstring_list suggestions; + speller->stem(wl, suggestions); + for (size_t i = 0; i != suggestions.size(); ++i) { + string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding); + len = strlen(wordform.c_str()); + count = mythes->Lookup(wordform.c_str(), len, &pmean); + if (count) + break; + } + if (!count) + return meanings; + } // don't change value of pmean or count // they are needed for the CleanUpAfterLookup routine mentry * pm = pmean; docstring meaning; - docstring ret; for (int i = 0; i < count; i++) { + vector ret; meaning = from_iconv_encoding(string(pm->defn), encoding); // remove silly item if (support::prefixIs(meaning, '-')) meaning = support::ltrim(meaning, "- "); for (int j = 0; j < pm->count; j++) { - ret = from_iconv_encoding(string(pm->psyns[j]), encoding); + ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding)); } - meanings[meaning].push_back(ret); - pm++; + meanings[meaning] = ret; + ++pm; } // now clean up all allocated memory mythes->CleanUpAfterLookup(&pmean, count); @@ -264,26 +284,18 @@ Thesaurus::Meanings Thesaurus::lookup(docstring const & t, docstring const & lan return meanings; } -#else -#ifndef HAVE_AIKSAURUS + Thesaurus::Thesaurus() + : d(new Thesaurus::Private) { } Thesaurus::~Thesaurus() { + delete d; } - -Thesaurus::Meanings Thesaurus::lookup(docstring const &, docstring const &) -{ - return Meanings(); -} - -#endif -#endif // HAVE_LIBMYTHES - // Global instance Thesaurus thesaurus;