3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
7 * \author Jürgen Spitzmüller
9 * Full author contact details are available in file CREDITS.
14 #include "Thesaurus.h"
18 #include "SpellChecker.h"
19 #include "WordLangTuple.h"
21 #include "support/FileNameList.h"
22 #include "support/Package.h"
23 #include "support/debug.h"
24 #include "support/docstring_list.h"
25 #include "support/filetools.h"
26 #include "support/gettext.h"
27 #include "support/lstrings.h"
28 #include "support/os.h"
30 #ifdef USE_EXTERNAL_MYTHES
31 #include MYTHES_H_LOCATION
34 #include "support/mythes/mythes.hxx"
37 #include "frontends/alert.h"
44 using namespace lyx::support;
45 using namespace lyx::support::os;
51 typedef std::map<docstring, MyThes *> Thesauri;
55 struct Thesaurus::Private
59 for (Thesauri::iterator it = thes_.begin();
60 it != thes_.end(); ++it) {
65 bool thesaurusAvailable(docstring const & lang) const
67 for (Thesauri::const_iterator it = thes_.begin();
68 it != thes_.end(); ++it) {
69 if (it->first == lang)
77 typedef std::pair<std::string, std::string> ThesFiles;
79 ThesFiles getThesaurus(string const & path, docstring const & lang);
80 ThesFiles getThesaurus(docstring const & lang);
81 /// add a thesaurus to the list
82 bool addThesaurus(docstring const & lang);
87 /// the location below system/user directory
88 /// there the data+idx files lookup will happen
89 const string dataDirectory(void) { return "thes"; }
94 pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
97 if (!base.isDirectory()) {
98 return make_pair(string(), string());
100 FileNameList const idx_files = base.dirList("idx");
101 FileNameList const data_files = base.dirList("dat");
106 LYXERR(Debug::FILES, "thesaurus path: " << path);
107 for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
108 basename = it->onlyFileNameWithoutExt();
109 if (contains(basename, to_ascii(lang))) {
110 // do not use more specific dicts.
111 if (contains(basename, '_') && !contains(lang, '_'))
113 if (contains(basename, '-') && !contains(lang, '-'))
115 ifstream ifs(it->absFileName().c_str());
117 // check for appropriate version of index file
118 string encoding; // first line is encoding
119 int items = 0; // second line is no. of items
120 getline(ifs,encoding);
123 LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
126 if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
127 LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
131 idx = it->absFileName();
132 LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
137 // try with a more general dictionary
139 if (contains(lang, '_')) {
140 split(lang, shortcode, '_');
141 LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
142 << lang << ". Trying with " << shortcode);
143 return getThesaurus(path, shortcode);
145 else if (contains(lang, '-')) {
146 split(lang, shortcode, '-');
147 LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
148 << lang << ". Trying with " << shortcode);
149 return getThesaurus(path, shortcode);
151 return make_pair(string(), string());
153 for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
154 if (contains(it->onlyFileName(), basename)) {
155 data = it->absFileName();
156 LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
160 return make_pair(idx, data);
164 pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
166 string const thes_path = external_path(lyxrc.thesaurusdir_path);
167 pair<string,string> result ;
169 if (thesaurusAvailable(lang))
170 return make_pair(string(), string());
172 if (!thes_path.empty())
173 result = getThesaurus(thes_path, lang);
174 if (result.first.empty() || result.second.empty()) {
175 string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
176 result = getThesaurus(sys_path, lang);
178 if (result.first.empty() || result.second.empty()) {
179 string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
180 result = getThesaurus(user_path, lang);
186 bool Thesaurus::Private::addThesaurus(docstring const & lang)
188 if (thesaurusAvailable(lang))
191 ThesFiles files = getThesaurus(lang);
192 string const idx = files.first;
193 string const data = files.second;
195 if (idx.empty() || data.empty())
198 char const * af = idx.c_str();
199 char const * df = data.c_str();
200 thes_[lang] = new MyThes(af, df);
205 bool Thesaurus::thesaurusAvailable(docstring const & lang) const
207 return d->thesaurusAvailable(lang);
211 bool Thesaurus::thesaurusInstalled(docstring const & lang) const
213 if (thesaurusAvailable(lang))
215 pair<string, string> files = d->getThesaurus(lang);
216 return (!files.first.empty() && !files.second.empty());
220 Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
225 docstring const lang_code = from_ascii(wl.lang()->code());
226 docstring const t = wl.word();
228 if (!d->addThesaurus(lang_code))
231 for (Thesauri::const_iterator it = d->thes_.begin();
232 it != d->thes_.end(); ++it) {
233 if (it->first == lang_code) {
242 string const encoding = mythes->get_th_encoding();
245 string const text = to_iconv_encoding(support::lowercase(t), encoding);
246 int len = strlen(text.c_str());
247 int count = mythes->Lookup(text.c_str(), len, &pmean);
249 SpellChecker * speller = theSpellChecker();
252 docstring_list suggestions;
253 speller->stem(wl, suggestions);
254 for (size_t i = 0; i != suggestions.size(); ++i) {
255 string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
256 len = strlen(wordform.c_str());
257 count = mythes->Lookup(wordform.c_str(), len, &pmean);
265 // don't change value of pmean or count
266 // they are needed for the CleanUpAfterLookup routine
269 for (int i = 0; i < count; i++) {
270 vector<docstring> ret;
271 meaning = from_iconv_encoding(string(pm->defn), encoding);
273 if (support::prefixIs(meaning, '-'))
274 meaning = support::ltrim(meaning, "- ");
275 for (int j = 0; j < pm->count; j++) {
276 ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
278 meanings[meaning] = ret;
281 // now clean up all allocated memory
282 mythes->CleanUpAfterLookup(&pmean, count);
284 for (Meanings::iterator it = meanings.begin();
285 it != meanings.end(); ++it)
286 sort(it->second.begin(), it->second.end());
292 Thesaurus::Thesaurus()
293 : d(new Thesaurus::Private)
298 Thesaurus::~Thesaurus()