]> git.lyx.org Git - lyx.git/blob - src/Thesaurus.cpp
Avoid full metrics computation with Update:FitCursor
[lyx.git] / src / Thesaurus.cpp
1 /**
2  * \file Thesaurus.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author John Levon
7  * \author Jürgen Spitzmüller
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Thesaurus.h"
15
16 #include "LyXRC.h"
17
18 #include "SpellChecker.h"
19 #include "WordLangTuple.h"
20
21 #include "support/FileNameList.h"
22 #include "support/Package.h"
23 #include "support/debug.h"
24 #include "support/docstring.h"
25 #include "support/docstring_list.h"
26 #include "support/filetools.h"
27 #include "support/lstrings.h"
28 #include "support/os.h"
29
30 #include <cstdio>
31 #include MYTHES_H_LOCATION
32
33 #include <algorithm>
34 #include <cstring>
35 #include <fstream>
36
37 using namespace std;
38 using namespace lyx::support;
39 using namespace lyx::support::os;
40
41 namespace lyx {
42
43 namespace {
44
45 typedef std::map<docstring, MyThes *> Thesauri;
46
47 } // namespace
48
49 struct Thesaurus::Private
50 {
51         ~Private()
52         {
53                 for (Thesauri::iterator it = thes_.begin();
54                      it != thes_.end(); ++it) {
55                         delete it->second;
56                 }
57         }
58         ///
59         bool thesaurusAvailable(docstring const & lang) const
60         {
61                 for (Thesauri::const_iterator it = thes_.begin();
62                         it != thes_.end(); ++it) {
63                                 if (it->first == lang)
64                                         if (it->second)
65                                                 return true;
66                 }
67                 return false;
68         }
69
70         ///
71         typedef std::pair<std::string, std::string> ThesFiles;
72         ///
73         ThesFiles getThesaurus(string const & path, docstring const & lang);
74         ThesFiles getThesaurus(docstring const & lang);
75         /// add a thesaurus to the list
76         bool addThesaurus(docstring const & lang);
77
78         /// the thesauri
79         Thesauri thes_;
80
81         /// the location below system/user directory
82         /// there the data+idx files lookup will happen
83         const string dataDirectory(void) { return "thes"; }
84
85 };
86
87
88 pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
89 {
90         FileName base(path);
91         if (!base.isDirectory()) {
92                 return make_pair(string(), string());
93         }
94         FileNameList const idx_files = base.dirList("idx");
95         FileNameList const data_files = base.dirList("dat");
96         string idx;
97         string data;
98         string basename;
99
100         LYXERR(Debug::FILES, "thesaurus path: " << path);
101         for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
102                 basename = it->onlyFileNameWithoutExt();
103                 if (contains(basename, to_ascii(lang))) {
104                         // do not use more specific dicts.
105                         if (contains(basename, '_') && !contains(lang, '_'))
106                                 continue;
107                         if (contains(basename, '-') && !contains(lang, '-'))
108                                 continue;
109                         ifstream ifs(it->absFileName().c_str());
110                         if (ifs) {
111                                 // check for appropriate version of index file
112                                 string encoding; // first line is encoding
113                                 int items = 0;   // second line is no. of items
114                                 getline(ifs,encoding);
115                                 ifs >> items;
116                                 if (ifs.fail()) {
117                                         LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
118                                         continue;
119                                 }
120                                 if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
121                                         LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
122                                         continue;
123                                 }
124                         }
125                         idx = it->absFileName();
126                         LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
127                         break;
128                 }
129         }
130         if (idx.empty()) {
131                 // try with a more general dictionary
132                 docstring shortcode;
133                 if (contains(lang, '_')) {
134                         split(lang, shortcode, '_');
135                         LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
136                                << lang << ". Trying with " << shortcode);
137                         return getThesaurus(path, shortcode);
138                 }
139                 else if (contains(lang, '-')) {
140                         split(lang, shortcode, '-');
141                         LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
142                                << lang << ". Trying with " << shortcode);
143                         return getThesaurus(path, shortcode);
144                 }
145                 return make_pair(string(), string());
146         }
147         for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
148                 if (contains(it->onlyFileName(), basename)) {
149                         data = it->absFileName();
150                         LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
151                         break;
152                 }
153         }
154         return make_pair(idx, data);
155 }
156
157
158 pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
159 {
160         string const thes_path = external_path(lyxrc.thesaurusdir_path);
161         pair<string,string> result ;
162
163         if (thesaurusAvailable(lang))
164                 return make_pair(string(), string());
165
166         if (!thes_path.empty())
167                 result = getThesaurus(thes_path, lang);
168         if (result.first.empty() || result.second.empty()) {
169                 string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
170                 result = getThesaurus(sys_path, lang);
171         }
172         if (result.first.empty() || result.second.empty()) {
173                 string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
174                 result = getThesaurus(user_path, lang);
175         }
176         return result;
177 }
178
179
180 bool Thesaurus::Private::addThesaurus(docstring const & lang)
181 {
182         if (thesaurusAvailable(lang))
183                 return true;
184
185         ThesFiles files = getThesaurus(lang);
186         string const idx = files.first;
187         string const data = files.second;
188
189         if (idx.empty() || data.empty())
190                 return false;
191
192         char const * af = idx.c_str();
193         char const * df = data.c_str();
194         thes_[lang] = new MyThes(af, df);
195         return true;
196 }
197
198
199 bool Thesaurus::thesaurusAvailable(docstring const & lang) const
200 {
201         return d->thesaurusAvailable(lang);
202 }
203
204
205 bool Thesaurus::thesaurusInstalled(docstring const & lang) const
206 {
207         if (thesaurusAvailable(lang))
208                 return true;
209         pair<string, string> files = d->getThesaurus(lang);
210         return (!files.first.empty() && !files.second.empty());
211 }
212
213
214 Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
215 {
216         Meanings meanings;
217         MyThes * mythes = nullptr;
218
219         docstring const lang_code = from_ascii(wl.lang()->code());
220         docstring const & t = wl.word();
221
222         if (!d->addThesaurus(lang_code))
223                 return meanings;
224
225         for (Thesauri::const_iterator it = d->thes_.begin();
226              it != d->thes_.end(); ++it) {
227                 if (it->first == lang_code) {
228                         mythes = it->second;
229                         break;
230                 }
231         }
232
233         if (!mythes)
234                 return meanings;
235
236         string const encoding = mythes->get_th_encoding();
237
238         mentry * pmean;
239         string const text = to_iconv_encoding(support::lowercase(t), encoding);
240         int len = strlen(text.c_str());
241         int count = mythes->Lookup(text.c_str(), len, &pmean);
242         if (!count) {
243                 SpellChecker * speller = theSpellChecker();
244                 if (!speller)
245                         return meanings;
246                 docstring_list suggestions;
247                 speller->stem(wl, suggestions);
248                 for (size_t i = 0; i != suggestions.size(); ++i) {
249                         string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
250                         len = strlen(wordform.c_str());
251                         count = mythes->Lookup(wordform.c_str(), len, &pmean);
252                         if (count)
253                                 break;
254                 }
255                 if (!count)
256                         return meanings;
257         }
258
259         // don't change value of pmean or count
260         // they are needed for the CleanUpAfterLookup routine
261         mentry * pm = pmean;
262         docstring meaning;
263         for (int i = 0; i < count; i++) {
264                 vector<docstring> ret;
265                 meaning = from_iconv_encoding(string(pm->defn), encoding);
266                 // remove silly item
267                 if (support::prefixIs(meaning, '-'))
268                         meaning = support::ltrim(meaning, "- ");
269                 ret.reserve(pm->count);
270                 for (int j = 0; j < pm->count; j++) {
271                         ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
272                 }
273                 meanings[meaning] = ret;
274                 ++pm;
275         }
276         // now clean up all allocated memory
277         mythes->CleanUpAfterLookup(&pmean, count);
278
279         for (Meanings::iterator it = meanings.begin();
280              it != meanings.end(); ++it)
281                 sort(it->second.begin(), it->second.end());
282
283         return meanings;
284 }
285
286
287 Thesaurus::Thesaurus()
288         : d(new Thesaurus::Private)
289 {
290 }
291
292
293 Thesaurus::~Thesaurus()
294 {
295         delete d;
296 }
297
298 // Global instance
299 Thesaurus thesaurus;
300
301
302 } // namespace lyx