* Licence details can be found in the file COPYING.
*
* \author John Levon
+ * \author Jürgen Spitzmüller
*
* Full author contact details are available in file CREDITS.
*/
#include "Thesaurus.h"
+#include "LyXRC.h"
+
+#include "SpellChecker.h"
+#include "WordLangTuple.h"
+
+#include "support/FileNameList.h"
+#include "support/Package.h"
+#include "support/debug.h"
+#include "support/docstring_list.h"
+#include "support/filetools.h"
+#include "support/gettext.h"
#include "support/lstrings.h"
+#include "support/os.h"
+
+#ifdef USE_EXTERNAL_MYTHES
+#include MYTHES_H_LOCATION
+#else
+#include <cstdio>
+#include "support/mythes/mythes.hxx"
+#endif
+
+#include "frontends/alert.h"
#include <algorithm>
+#include <cstring>
+#include <fstream>
+using namespace std;
+using namespace lyx::support;
+using namespace lyx::support::os;
namespace lyx {
-#ifdef HAVE_LIBAIKSAURUS
+namespace {
-using std::sort;
-using std::string;
+typedef std::map<docstring, MyThes *> Thesauri;
+} // namespace anon
-Thesaurus::Thesaurus()
- : aik_(new Aiksaurus)
-{}
+struct Thesaurus::Private
+{
+ ~Private()
+ {
+ for (Thesauri::iterator it = thes_.begin();
+ it != thes_.end(); ++it) {
+ delete it->second;
+ }
+ }
+ ///
+ bool thesaurusAvailable(docstring const & lang) const
+ {
+ for (Thesauri::const_iterator it = thes_.begin();
+ it != thes_.end(); ++it) {
+ if (it->first == lang)
+ if (it->second)
+ return true;
+ }
+ return false;
+ }
+ ///
+ typedef std::pair<std::string, std::string> ThesFiles;
+ ///
+ ThesFiles getThesaurus(string const & path, docstring const & lang);
+ ThesFiles getThesaurus(docstring const & lang);
+ /// add a thesaurus to the list
+ bool addThesaurus(docstring const & lang);
-Thesaurus::~Thesaurus()
+ /// the thesauri
+ Thesauri thes_;
+
+ /// the location below system/user directory
+ /// there the data+idx files lookup will happen
+ const string dataDirectory(void) { return "thes"; }
+
+};
+
+
+pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
+{
+ FileName base(path);
+ if (!base.isDirectory()) {
+ return make_pair(string(), string());
+ }
+ FileNameList const idx_files = base.dirList("idx");
+ FileNameList const data_files = base.dirList("dat");
+ string idx;
+ string data;
+ string basename;
+
+ LYXERR(Debug::FILES, "thesaurus path: " << path);
+ for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
+ basename = it->onlyFileNameWithoutExt();
+ if (contains(basename, to_ascii(lang))) {
+ ifstream ifs(it->absFileName().c_str());
+ if (ifs) {
+ // check for appropriate version of index file
+ string encoding; // first line is encoding
+ int items = 0; // second line is no. of items
+ getline(ifs,encoding);
+ ifs >> items;
+ if (ifs.fail()) {
+ LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
+ continue;
+ }
+ if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
+ LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
+ continue;
+ }
+ }
+ idx = it->absFileName();
+ LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
+ break;
+ }
+ }
+ if (idx.empty())
+ return make_pair(string(), string());
+ for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
+ if (contains(it->onlyFileName(), basename)) {
+ data = it->absFileName();
+ LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
+ break;
+ }
+ }
+ return make_pair(idx, data);
+}
+
+
+pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
+{
+ string const thes_path = external_path(lyxrc.thesaurusdir_path);
+ pair<string,string> result ;
+
+ if (thesaurusAvailable(lang))
+ return make_pair(string(), string());
+
+ if (!thes_path.empty())
+ result = getThesaurus(thes_path, lang);
+ if (result.first.empty() || result.second.empty()) {
+ string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
+ result = getThesaurus(sys_path, lang);
+ }
+ if (result.first.empty() || result.second.empty()) {
+ string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
+ result = getThesaurus(user_path, lang);
+ }
+ return result;
+}
+
+
+bool Thesaurus::Private::addThesaurus(docstring const & lang)
+{
+ if (thesaurusAvailable(lang))
+ return true;
+
+ ThesFiles files = getThesaurus(lang);
+ string const idx = files.first;
+ string const data = files.second;
+
+ if (idx.empty() || data.empty())
+ return false;
+
+ char const * af = idx.c_str();
+ char const * df = data.c_str();
+ thes_[lang] = new MyThes(af, df);
+ return true;
+}
+
+
+bool Thesaurus::thesaurusAvailable(docstring const & lang) const
+{
+ return d->thesaurusAvailable(lang);
+}
+
+
+bool Thesaurus::thesaurusInstalled(docstring const & lang) const
{
- delete aik_;
+ if (thesaurusAvailable(lang))
+ return true;
+ pair<string, string> files = d->getThesaurus(lang);
+ return (!files.first.empty() && !files.second.empty());
}
-Thesaurus::Meanings Thesaurus::lookup(docstring const & t)
+Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
{
Meanings meanings;
+ MyThes * mythes = 0;
- // aiksaurus is for english text only, therefore it does not work
- // with non-ascii strings.
- // The interface of the Thesaurus class uses docstring because a
- // non-english thesaurus is possible in theory.
- if (!support::isAscii(t))
- // to_ascii() would assert
- return meanings;
+ docstring const lang_code = from_ascii(wl.lang()->code());
+ docstring const t = wl.word();
- string const text = to_ascii(t);
- if (!aik_->find(text.c_str()))
+ if (!d->addThesaurus(lang_code))
return meanings;
- // weird api, but ...
+ for (Thesauri::const_iterator it = d->thes_.begin();
+ it != d->thes_.end(); ++it) {
+ if (it->first == lang_code) {
+ mythes = it->second;
+ break;
+ }
+ }
- int prev_meaning = -1;
- int cur_meaning;
- docstring meaning;
+ if (!mythes)
+ return meanings;
- // correct, returns "" at the end
- string ret = aik_->next(cur_meaning);
-
- while (!ret.empty()) {
- if (cur_meaning != prev_meaning) {
- meaning = from_ascii(ret);
- ret = aik_->next(cur_meaning);
- prev_meaning = cur_meaning;
- } else {
- if (ret != text)
- meanings[meaning].push_back(from_ascii(ret));
+ string const encoding = mythes->get_th_encoding();
+
+ mentry * pmean;
+ string const text = to_iconv_encoding(support::lowercase(t), encoding);
+ int len = strlen(text.c_str());
+ int count = mythes->Lookup(text.c_str(), len, &pmean);
+ if (!count) {
+ SpellChecker * speller = theSpellChecker();
+ if (!speller)
+ return meanings;
+ docstring_list suggestions;
+ speller->stem(wl, suggestions);
+ for (size_t i = 0; i != suggestions.size(); ++i) {
+ string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
+ len = strlen(wordform.c_str());
+ count = mythes->Lookup(wordform.c_str(), len, &pmean);
+ if (count)
+ break;
}
+ if (!count)
+ return meanings;
+ }
- ret = aik_->next(cur_meaning);
+ // don't change value of pmean or count
+ // they are needed for the CleanUpAfterLookup routine
+ mentry * pm = pmean;
+ docstring meaning;
+ for (int i = 0; i < count; i++) {
+ vector<docstring> ret;
+ meaning = from_iconv_encoding(string(pm->defn), encoding);
+ // remove silly item
+ if (support::prefixIs(meaning, '-'))
+ meaning = support::ltrim(meaning, "- ");
+ for (int j = 0; j < pm->count; j++) {
+ ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
+ }
+ meanings[meaning] = ret;
+ ++pm;
}
+ // now clean up all allocated memory
+ mythes->CleanUpAfterLookup(&pmean, count);
for (Meanings::iterator it = meanings.begin();
it != meanings.end(); ++it)
return meanings;
}
-#else
Thesaurus::Thesaurus()
+ : d(new Thesaurus::Private)
{
}
Thesaurus::~Thesaurus()
{
+ delete d;
}
-
-Thesaurus::Meanings Thesaurus::lookup(docstring const &)
-{
- return Meanings();
-}
-
-#endif // HAVE_LIBAIKSAURUS
-
// Global instance
Thesaurus thesaurus;