]> git.lyx.org Git - lyx.git/blob - src/Thesaurus.cpp
4f4ad64d14aba691df74e59eb45445e20e76f261
[lyx.git] / src / Thesaurus.cpp
1 /**
2  * \file Thesaurus.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author John Levon
7  * \author Jürgen Spitzmüller
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Thesaurus.h"
15
16 #include "LyXRC.h"
17
18 #include "SpellChecker.h"
19 #include "WordLangTuple.h"
20
21 #include "support/FileNameList.h"
22 #include "support/Package.h"
23 #include "support/debug.h"
24 #include "support/docstring_list.h"
25 #include "support/filetools.h"
26 #include "support/gettext.h"
27 #include "support/lstrings.h"
28 #include "support/os.h"
29
30 #ifdef USE_EXTERNAL_MYTHES
31 #include MYTHES_H_LOCATION
32 #else
33 #include <cstdio>
34 #include "support/mythes/mythes.hxx"
35 #endif
36
37 #include "frontends/alert.h"
38
39 #include <algorithm>
40 #include <cstring>
41 #include <fstream>
42
43 using namespace std;
44 using namespace lyx::support;
45 using namespace lyx::support::os;
46
47 namespace lyx {
48
49 namespace {
50
51 typedef std::map<docstring, MyThes *> Thesauri;
52
53 } // namespace anon
54
55 struct Thesaurus::Private
56 {
57         ~Private()
58         {
59                 for (Thesauri::iterator it = thes_.begin();
60                      it != thes_.end(); ++it) {
61                         delete it->second;
62                 }
63         }
64         ///
65         bool thesaurusAvailable(docstring const & lang) const
66         {
67                 for (Thesauri::const_iterator it = thes_.begin();
68                         it != thes_.end(); ++it) {
69                                 if (it->first == lang)
70                                         if (it->second)
71                                                 return true;
72                 }
73                 return false;
74         }
75
76         ///
77         typedef std::pair<std::string, std::string> ThesFiles;
78         ///
79         ThesFiles getThesaurus(string const & path, docstring const & lang);
80         ThesFiles getThesaurus(docstring const & lang);
81         /// add a thesaurus to the list
82         bool addThesaurus(docstring const & lang);
83
84         /// the thesauri
85         Thesauri thes_;
86
87         /// the location below system/user directory
88         /// there the data+idx files lookup will happen
89         const string dataDirectory(void) { return "thes"; }
90
91 };
92
93
94 pair<string,string> Thesaurus::Private::getThesaurus(string const & path, docstring const & lang)
95 {
96         FileName base(path);
97         if (!base.isDirectory()) {
98                 return make_pair(string(), string());
99         }
100         FileNameList const idx_files = base.dirList("idx");
101         FileNameList const data_files = base.dirList("dat");
102         string idx;
103         string data;
104         string basename;
105
106         LYXERR(Debug::FILES, "thesaurus path: " << path);
107         for (FileNameList::const_iterator it = idx_files.begin(); it != idx_files.end(); ++it) {
108                 basename = it->onlyFileNameWithoutExt();
109                 if (contains(basename, to_ascii(lang))) {
110                         // do not use more specific dicts.
111                         if (contains(basename, '_') && !contains(lang, '_'))
112                                 continue;
113                         if (contains(basename, '-') && !contains(lang, '-'))
114                                 continue;
115                         ifstream ifs(it->absFileName().c_str());
116                         if (ifs) {
117                                 // check for appropriate version of index file
118                                 string encoding; // first line is encoding
119                                 int items = 0;   // second line is no. of items
120                                 getline(ifs,encoding);
121                                 ifs >> items;
122                                 if (ifs.fail()) {
123                                         LYXERR(Debug::FILES, "ignore irregular thesaurus idx file: " << it->absFileName());
124                                         continue;
125                                 }
126                                 if (encoding.length() == 0 || encoding.find_first_of(',') != string::npos) {
127                                         LYXERR(Debug::FILES, "ignore version1 thesaurus idx file: " << it->absFileName());
128                                         continue;
129                                 }
130                         }
131                         idx = it->absFileName();
132                         LYXERR(Debug::FILES, "selected thesaurus idx file: " << idx);
133                         break;
134                 }
135         }
136         if (idx.empty()) {
137                 // try with a more general dictionary
138                 docstring shortcode;
139                 if (contains(lang, '_')) {
140                         split(lang, shortcode, '_');
141                         LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
142                                << lang << ". Trying with " << shortcode);
143                         return getThesaurus(path, shortcode);
144                 }
145                 else if (contains(lang, '-')) {
146                         split(lang, shortcode, '-');
147                         LYXERR(Debug::FILES, "Did not find thesaurus for LANG code "
148                                << lang << ". Trying with " << shortcode);
149                         return getThesaurus(path, shortcode);
150                 }
151                 return make_pair(string(), string());
152         }
153         for (support::FileNameList::const_iterator it = data_files.begin(); it != data_files.end(); ++it) {
154                 if (contains(it->onlyFileName(), basename)) {
155                         data = it->absFileName();
156                         LYXERR(Debug::FILES, "selected thesaurus data file: " << data);
157                         break;
158                 }
159         }
160         return make_pair(idx, data);
161 }
162
163
164 pair<string,string> Thesaurus::Private::getThesaurus(docstring const & lang)
165 {
166         string const thes_path = external_path(lyxrc.thesaurusdir_path);
167         pair<string,string> result ;
168
169         if (thesaurusAvailable(lang))
170                 return make_pair(string(), string());
171
172         if (!thes_path.empty())
173                 result = getThesaurus(thes_path, lang);
174         if (result.first.empty() || result.second.empty()) {
175                 string const sys_path = external_path(addName(lyx::support::package().system_support().absFileName(),dataDirectory())) ;
176                 result = getThesaurus(sys_path, lang);
177         }
178         if (result.first.empty() || result.second.empty()) {
179                 string const user_path = external_path(addName(lyx::support::package().user_support().absFileName(),dataDirectory())) ;
180                 result = getThesaurus(user_path, lang);
181         }
182         return result;
183 }
184
185
186 bool Thesaurus::Private::addThesaurus(docstring const & lang)
187 {
188         if (thesaurusAvailable(lang))
189                 return true;
190
191         ThesFiles files = getThesaurus(lang);
192         string const idx = files.first;
193         string const data = files.second;
194
195         if (idx.empty() || data.empty())
196                 return false;
197
198         char const * af = idx.c_str();
199         char const * df = data.c_str();
200         thes_[lang] = new MyThes(af, df);
201         return true;
202 }
203
204
205 bool Thesaurus::thesaurusAvailable(docstring const & lang) const
206 {
207         return d->thesaurusAvailable(lang);
208 }
209
210
211 bool Thesaurus::thesaurusInstalled(docstring const & lang) const
212 {
213         if (thesaurusAvailable(lang))
214                 return true;
215         pair<string, string> files = d->getThesaurus(lang);
216         return (!files.first.empty() && !files.second.empty());
217 }
218
219
220 Thesaurus::Meanings Thesaurus::lookup(WordLangTuple const & wl)
221 {
222         Meanings meanings;
223         MyThes * mythes = 0;
224
225         docstring const lang_code = from_ascii(wl.lang()->code());
226         docstring const t = wl.word();
227
228         if (!d->addThesaurus(lang_code))
229                 return meanings;
230
231         for (Thesauri::const_iterator it = d->thes_.begin();
232              it != d->thes_.end(); ++it) {
233                 if (it->first == lang_code) {
234                         mythes = it->second;
235                         break;
236                 }
237         }
238
239         if (!mythes)
240                 return meanings;
241
242         string const encoding = mythes->get_th_encoding();
243         
244         mentry * pmean;
245         string const text = to_iconv_encoding(support::lowercase(t), encoding);
246         int len = strlen(text.c_str());
247         int count = mythes->Lookup(text.c_str(), len, &pmean);
248         if (!count) {
249                 SpellChecker * speller = theSpellChecker();
250                 if (!speller)
251                         return meanings;
252                 docstring_list suggestions;
253                 speller->stem(wl, suggestions);
254                 for (size_t i = 0; i != suggestions.size(); ++i) {
255                         string const wordform = to_iconv_encoding(support::lowercase(suggestions[i]), encoding);
256                         len = strlen(wordform.c_str());
257                         count = mythes->Lookup(wordform.c_str(), len, &pmean);
258                         if (count)
259                                 break;
260                 }
261                 if (!count)
262                         return meanings;
263         }
264
265         // don't change value of pmean or count
266         // they are needed for the CleanUpAfterLookup routine
267         mentry * pm = pmean;
268         docstring meaning;
269         for (int i = 0; i < count; i++) {
270                 vector<docstring> ret;
271                 meaning = from_iconv_encoding(string(pm->defn), encoding);
272                 // remove silly item
273                 if (support::prefixIs(meaning, '-'))
274                         meaning = support::ltrim(meaning, "- ");
275                 for (int j = 0; j < pm->count; j++) {
276                         ret.push_back(from_iconv_encoding(string(pm->psyns[j]), encoding));
277                 }
278                 meanings[meaning] = ret;
279                 ++pm;
280         }
281         // now clean up all allocated memory
282         mythes->CleanUpAfterLookup(&pmean, count);
283
284         for (Meanings::iterator it = meanings.begin();
285              it != meanings.end(); ++it)
286                 sort(it->second.begin(), it->second.end());
287
288         return meanings;
289 }
290
291
292 Thesaurus::Thesaurus()
293         : d(new Thesaurus::Private)
294 {
295 }
296
297
298 Thesaurus::~Thesaurus()
299 {
300         delete d;
301 }
302
303 // Global instance
304 Thesaurus thesaurus;
305
306
307 } // namespace lyx