2 * \file HunspellChecker.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Abdelrazak Younes
8 * Full author contact details are available in file CREDITS.
13 #include "HunspellChecker.h"
14 #include "PersonalWordList.h"
17 #include "WordLangTuple.h"
19 #include "support/debug.h"
20 #include "support/docstring_list.h"
21 #include "support/filetools.h"
22 #include "support/Package.h"
23 #include "support/FileName.h"
24 #include "support/lassert.h"
25 #include "support/lstrings.h"
27 #include <hunspell/hunspell.hxx>
34 using namespace lyx::support;
35 using namespace lyx::support::os;
41 typedef map<std::string, Hunspell *> Spellers;
42 typedef map<std::string, PersonalWordList *> LangPersonalWordList;
44 typedef vector<WordLangTuple> IgnoreList;
46 docstring remap_result(docstring const & s)
48 // substitute RIGHT SINGLE QUOTATION MARK
50 return subst(s, 0x2019, 0x0027);
56 struct HunspellChecker::Private
62 void setUserPath(std::string const & path);
63 const string dictPath(int selector);
64 bool haveLanguageFiles(string const & hpath);
65 bool haveDictionary(Language const * lang, string & hpath);
66 bool haveDictionary(Language const * lang);
67 int numDictionaries() const;
68 Hunspell * addSpeller(Language const * lang, string & hpath);
69 Hunspell * addSpeller(Language const * lang);
70 Hunspell * speller(Language const * lang);
71 Hunspell * lookup(Language const * lang);
73 bool isIgnored(WordLangTuple const & wl) const;
74 /// personal word list interface
75 void remove(WordLangTuple const & wl);
76 void insert(WordLangTuple const & wl);
77 bool learned(WordLangTuple const & wl);
83 LangPersonalWordList personal_;
85 std::string user_path_;
87 /// the location below system/user directory
88 /// there the aff+dic files lookup will happen
89 const string dictDirectory(void) const { return "dicts"; }
90 int maxLookupSelector(void) const { return 5; }
91 const string HunspellDictionaryName(Language const * lang) {
92 return lang->variety().empty()
94 : lang->code() + "-" + lang->variety();
96 const string myspellPackageDictDirectory(void) {
97 return "/usr/share/myspell";
99 const string hunspellPackageDictDirectory(void) {
100 return "/usr/share/hunspell";
105 HunspellChecker::Private::Private()
107 setUserPath(lyxrc.hunspelldir_path);
111 HunspellChecker::Private::~Private()
117 void HunspellChecker::Private::setUserPath(std::string const & path)
119 if (user_path_ != lyxrc.hunspelldir_path) {
126 void HunspellChecker::Private::cleanCache()
128 Spellers::iterator it = spellers_.begin();
129 Spellers::iterator end = spellers_.end();
131 for (; it != end; ++it) {
133 it->second = nullptr;
136 LangPersonalWordList::const_iterator pdit = personal_.begin();
137 LangPersonalWordList::const_iterator pdet = personal_.end();
139 for (; pdit != pdet; ++pdit) {
140 if (pdit->second == nullptr)
142 PersonalWordList * pd = pdit->second;
149 bool HunspellChecker::Private::haveLanguageFiles(string const & hpath)
151 FileName const affix(hpath + ".aff");
152 FileName const dict(hpath + ".dic");
153 return affix.isReadableFile() && dict.isReadableFile();
157 const string HunspellChecker::Private::dictPath(int selector)
161 return hunspellPackageDictDirectory();
163 return myspellPackageDictDirectory();
165 return addName(package().system_support().absFileName(),dictDirectory());
167 return addName(package().user_support().absFileName(),dictDirectory());
174 bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath)
176 if (hpath.empty() || !lang)
179 if (lookup(lang)) return true;
181 string d_name = HunspellDictionaryName(lang);
183 LYXERR(Debug::FILES, "check hunspell path: " << hpath
184 << " for language " << lang->lang() << " with name " << d_name);
186 string h_path = addName(hpath, d_name);
187 // first we try lang code+variety
188 if (haveLanguageFiles(h_path)) {
189 LYXERR(Debug::FILES, " found " << h_path);
193 // another try with code, '_' replaced by '-'
194 h_path = addName(hpath, subst(lang->code(), '_', '-'));
195 if (!haveLanguageFiles(h_path))
197 LYXERR(Debug::FILES, " found " << h_path);
203 bool HunspellChecker::Private::haveDictionary(Language const * lang)
207 setUserPath(lyxrc.hunspelldir_path);
208 for (int p = 0; !result && p < maxLookupSelector(); ++p) {
209 string lpath = dictPath(p);
210 result = haveDictionary(lang, lpath);
216 Hunspell * HunspellChecker::Private::speller(Language const * lang)
218 Hunspell * h = lookup(lang);
221 setUserPath(lyxrc.hunspelldir_path);
222 return addSpeller(lang);
226 Hunspell * HunspellChecker::Private::lookup(Language const * lang)
228 Spellers::iterator it = spellers_.find(lang->lang());
229 return it != spellers_.end() ? it->second : nullptr;
233 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang, string & path)
235 if (!haveDictionary(lang, path)) {
236 spellers_[lang->lang()] = nullptr;
240 FileName const affix(path + ".aff");
241 FileName const dict(path + ".dic");
242 Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str());
243 LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " added.");
244 spellers_[lang->lang()] = h;
249 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
251 Hunspell * h = nullptr;
252 for (int p = 0; p < maxLookupSelector() && nullptr == h; ++p) {
253 string lpath = dictPath(p);
254 h = addSpeller(lang, lpath);
257 string const encoding = h->get_dic_encoding();
258 PersonalWordList * pd = new PersonalWordList(lang->lang());
260 personal_[lang->lang()] = pd;
261 docstring_list::const_iterator it = pd->begin();
262 docstring_list::const_iterator et = pd->end();
263 for (; it != et; ++it) {
264 string const word_to_add = to_iconv_encoding(*it, encoding);
265 h->add(word_to_add.c_str());
272 int HunspellChecker::Private::numDictionaries() const
275 Spellers::const_iterator it = spellers_.begin();
276 Spellers::const_iterator et = spellers_.end();
278 for (; it != et; ++it)
279 result += it->second != nullptr;
284 bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const
286 IgnoreList::const_iterator it = ignored_.begin();
287 for (; it != ignored_.end(); ++it) {
288 if (it->lang()->code() != wl.lang()->code())
290 if (it->word() == wl.word())
296 /// personal word list interface
297 void HunspellChecker::Private::remove(WordLangTuple const & wl)
299 Hunspell * h = speller(wl.lang());
302 string const encoding = h->get_dic_encoding();
303 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
304 h->remove(word_to_check.c_str());
305 PersonalWordList * pd = personal_[wl.lang()->lang()];
308 pd->remove(wl.word());
312 void HunspellChecker::Private::insert(WordLangTuple const & wl)
314 Hunspell * h = speller(wl.lang());
317 string const encoding = h->get_dic_encoding();
318 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
319 h->add(word_to_check.c_str());
320 PersonalWordList * pd = personal_[wl.lang()->lang()];
323 pd->insert(wl.word());
327 bool HunspellChecker::Private::learned(WordLangTuple const & wl)
329 PersonalWordList * pd = personal_[wl.lang()->lang()];
332 return pd->exists(wl.word());
336 HunspellChecker::HunspellChecker()
341 HunspellChecker::~HunspellChecker()
347 SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl,
348 vector<WordLangTuple> const & docdict)
350 if (d->isIgnored(wl))
353 IgnoreList::const_iterator it = docdict.begin();
354 for (; it != docdict.end(); ++it) {
355 if (it->lang()->code() != wl.lang()->code())
357 if (it->word() == wl.word())
358 return DOCUMENT_LEARNED_WORD;
361 Hunspell * h = d->speller(wl.lang());
363 return NO_DICTIONARY;
366 string const encoding = h->get_dic_encoding();
367 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
369 LYXERR(Debug::GUI, "spellCheck: \"" <<
370 wl.word() << "\", lang = " << wl.lang()->lang()) ;
371 #ifdef HAVE_HUNSPELL_CXXABI
372 if (h->spell(word_to_check, &info))
374 if (h->spell(word_to_check.c_str(), &info))
376 return d->learned(wl) ? LEARNED_WORD : WORD_OK;
378 if (info & SPELL_COMPOUND) {
379 // FIXME: What to do with that?
380 LYXERR(Debug::GUI, "Hunspell compound word found " << word_to_check);
382 if (info & SPELL_FORBIDDEN) {
383 // This was removed from personal dictionary
384 LYXERR(Debug::GUI, "Hunspell explicit forbidden word found " << word_to_check);
391 void HunspellChecker::advanceChangeNumber()
397 void HunspellChecker::insert(WordLangTuple const & wl)
400 LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
401 advanceChangeNumber();
405 void HunspellChecker::remove(WordLangTuple const & wl)
408 LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
409 advanceChangeNumber();
413 void HunspellChecker::accept(WordLangTuple const & wl)
415 d->ignored_.push_back(wl);
416 LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
417 advanceChangeNumber();
421 void HunspellChecker::suggest(WordLangTuple const & wl,
422 docstring_list & suggestions)
425 Hunspell * h = d->speller(wl.lang());
428 string const encoding = h->get_dic_encoding();
429 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
430 #ifdef HAVE_HUNSPELL_CXXABI
431 vector<string> wlst = h->suggest(word_to_check);
432 for (auto const & s : wlst)
433 suggestions.push_back(remap_result(from_iconv_encoding(s, encoding)));
435 char ** suggestion_list;
436 int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str());
437 if (suggestion_number <= 0)
439 for (int i = 0; i != suggestion_number; ++i)
440 suggestions.push_back(remap_result(from_iconv_encoding(suggestion_list[i], encoding)));
441 h->free_list(&suggestion_list, suggestion_number);
446 void HunspellChecker::stem(WordLangTuple const & wl,
447 docstring_list & suggestions)
450 Hunspell * h = d->speller(wl.lang());
453 string const encoding = h->get_dic_encoding();
454 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
455 #ifdef HAVE_HUNSPELL_CXXABI
456 vector<string> wlst = h->stem(word_to_check);
457 for (auto const & s : wlst)
458 suggestions.push_back(from_iconv_encoding(s, encoding));
460 char ** suggestion_list;
461 int const suggestion_number = h->stem(&suggestion_list, word_to_check.c_str());
462 if (suggestion_number <= 0)
464 for (int i = 0; i != suggestion_number; ++i)
465 suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding));
466 h->free_list(&suggestion_list, suggestion_number);
471 bool HunspellChecker::hasDictionary(Language const * lang) const
475 return d->haveDictionary(lang);
479 int HunspellChecker::numDictionaries() const
481 return d->numDictionaries();
485 docstring const HunspellChecker::error()