2 * \file HunspellChecker.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Abdelrazak Younes
8 * Full author contact details are available in file CREDITS.
13 #include "HunspellChecker.h"
14 #include "PersonalWordList.h"
17 #include "WordLangTuple.h"
19 #include "support/debug.h"
20 #include "support/docstring_list.h"
21 #include "support/filetools.h"
22 #include "support/Package.h"
23 #include "support/FileName.h"
24 #include "support/lassert.h"
25 #include "support/lstrings.h"
27 #include <hunspell/hunspell.hxx>
34 using namespace lyx::support;
35 using namespace lyx::support::os;
41 typedef map<std::string, Hunspell *> Spellers;
42 typedef map<std::string, PersonalWordList *> LangPersonalWordList;
44 typedef vector<WordLangTuple> IgnoreList;
46 docstring remap_result(docstring const & s)
48 // substitute RIGHT SINGLE QUOTATION MARK
50 return subst(s, 0x2019, 0x0027);
56 struct HunspellChecker::Private
62 void setUserPath(std::string const & path);
63 const string dictPath(int selector);
64 bool haveLanguageFiles(string const & hpath);
65 bool haveDictionary(Language const * lang, string & hpath);
66 bool haveDictionary(Language const * lang);
67 int numDictionaries() const;
68 Hunspell * addSpeller(Language const * lang, string & hpath);
69 Hunspell * addSpeller(Language const * lang);
70 Hunspell * speller(Language const * lang);
71 Hunspell * lookup(Language const * lang);
73 bool isIgnored(WordLangTuple const & wl,
74 std::vector<WordLangTuple> const & docdict) const;
75 /// personal word list interface
76 void remove(WordLangTuple const & wl);
77 void insert(WordLangTuple const & wl);
78 bool learned(WordLangTuple const & wl);
84 LangPersonalWordList personal_;
86 std::string user_path_;
88 /// the location below system/user directory
89 /// there the aff+dic files lookup will happen
90 const string dictDirectory(void) const { return "dicts"; }
91 int maxLookupSelector(void) const { return 5; }
92 const string HunspellDictionaryName(Language const * lang) {
93 return lang->variety().empty()
95 : lang->code() + "-" + lang->variety();
97 const string myspellPackageDictDirectory(void) {
98 return "/usr/share/myspell";
100 const string hunspellPackageDictDirectory(void) {
101 return "/usr/share/hunspell";
106 HunspellChecker::Private::Private()
108 setUserPath(lyxrc.hunspelldir_path);
112 HunspellChecker::Private::~Private()
118 void HunspellChecker::Private::setUserPath(std::string const & path)
120 if (user_path_ != lyxrc.hunspelldir_path) {
127 void HunspellChecker::Private::cleanCache()
129 Spellers::iterator it = spellers_.begin();
130 Spellers::iterator end = spellers_.end();
132 for (; it != end; ++it) {
134 it->second = nullptr;
137 LangPersonalWordList::const_iterator pdit = personal_.begin();
138 LangPersonalWordList::const_iterator pdet = personal_.end();
140 for (; pdit != pdet; ++pdit) {
141 if (pdit->second == nullptr)
143 PersonalWordList * pd = pdit->second;
150 bool HunspellChecker::Private::haveLanguageFiles(string const & hpath)
152 FileName const affix(hpath + ".aff");
153 FileName const dict(hpath + ".dic");
154 return affix.isReadableFile() && dict.isReadableFile();
158 const string HunspellChecker::Private::dictPath(int selector)
162 return hunspellPackageDictDirectory();
164 return myspellPackageDictDirectory();
166 return addName(package().system_support().absFileName(),dictDirectory());
168 return addName(package().user_support().absFileName(),dictDirectory());
175 bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath)
177 if (hpath.empty() || !lang)
180 if (lookup(lang)) return true;
182 string d_name = HunspellDictionaryName(lang);
184 LYXERR(Debug::FILES, "check hunspell path: " << hpath
185 << " for language " << lang->lang() << " with name " << d_name);
187 string h_path = addName(hpath, d_name);
188 // first we try lang code+variety
189 if (haveLanguageFiles(h_path)) {
190 LYXERR(Debug::FILES, " found " << h_path);
194 // another try with code, '_' replaced by '-'
195 h_path = addName(hpath, subst(lang->code(), '_', '-'));
196 if (!haveLanguageFiles(h_path))
198 LYXERR(Debug::FILES, " found " << h_path);
204 bool HunspellChecker::Private::haveDictionary(Language const * lang)
208 setUserPath(lyxrc.hunspelldir_path);
209 for (int p = 0; !result && p < maxLookupSelector(); ++p) {
210 string lpath = dictPath(p);
211 result = haveDictionary(lang, lpath);
217 Hunspell * HunspellChecker::Private::speller(Language const * lang)
219 Hunspell * h = lookup(lang);
222 setUserPath(lyxrc.hunspelldir_path);
223 return addSpeller(lang);
227 Hunspell * HunspellChecker::Private::lookup(Language const * lang)
229 Spellers::iterator it = spellers_.find(lang->lang());
230 return it != spellers_.end() ? it->second : nullptr;
234 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang, string & path)
236 if (!haveDictionary(lang, path)) {
237 spellers_[lang->lang()] = nullptr;
241 FileName const affix(path + ".aff");
242 FileName const dict(path + ".dic");
243 Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str());
244 LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " added.");
245 spellers_[lang->lang()] = h;
250 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
252 Hunspell * h = nullptr;
253 for (int p = 0; p < maxLookupSelector() && nullptr == h; ++p) {
254 string lpath = dictPath(p);
255 h = addSpeller(lang, lpath);
258 string const encoding = h->get_dic_encoding();
259 PersonalWordList * pd = new PersonalWordList(lang->lang());
261 personal_[lang->lang()] = pd;
262 docstring_list::const_iterator it = pd->begin();
263 docstring_list::const_iterator et = pd->end();
264 for (; it != et; ++it) {
265 string const word_to_add = to_iconv_encoding(*it, encoding);
266 h->add(word_to_add.c_str());
273 int HunspellChecker::Private::numDictionaries() const
276 Spellers::const_iterator it = spellers_.begin();
277 Spellers::const_iterator et = spellers_.end();
279 for (; it != et; ++it)
280 result += it->second != nullptr;
285 bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl,
286 vector<WordLangTuple> const & docdict) const
288 IgnoreList::const_iterator it = ignored_.begin();
289 for (; it != ignored_.end(); ++it) {
290 if (it->lang()->code() != wl.lang()->code())
292 if (it->word() == wl.word())
295 it = docdict.begin();
296 for (; it != docdict.end(); ++it) {
297 if (it->lang()->code() != wl.lang()->code())
299 if (it->word() == wl.word())
305 /// personal word list interface
306 void HunspellChecker::Private::remove(WordLangTuple const & wl)
308 Hunspell * h = speller(wl.lang());
311 string const encoding = h->get_dic_encoding();
312 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
313 h->remove(word_to_check.c_str());
314 PersonalWordList * pd = personal_[wl.lang()->lang()];
317 pd->remove(wl.word());
321 void HunspellChecker::Private::insert(WordLangTuple const & wl)
323 Hunspell * h = speller(wl.lang());
326 string const encoding = h->get_dic_encoding();
327 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
328 h->add(word_to_check.c_str());
329 PersonalWordList * pd = personal_[wl.lang()->lang()];
332 pd->insert(wl.word());
336 bool HunspellChecker::Private::learned(WordLangTuple const & wl)
338 PersonalWordList * pd = personal_[wl.lang()->lang()];
341 return pd->exists(wl.word());
345 HunspellChecker::HunspellChecker()
350 HunspellChecker::~HunspellChecker()
356 SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl,
357 vector<WordLangTuple> const & docdict)
359 if (d->isIgnored(wl, docdict))
362 Hunspell * h = d->speller(wl.lang());
364 return NO_DICTIONARY;
367 string const encoding = h->get_dic_encoding();
368 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
370 LYXERR(Debug::GUI, "spellCheck: \"" <<
371 wl.word() << "\", lang = " << wl.lang()->lang()) ;
372 #ifdef HAVE_HUNSPELL_CXXABI
373 if (h->spell(word_to_check, &info))
375 if (h->spell(word_to_check.c_str(), &info))
377 return d->learned(wl) ? LEARNED_WORD : WORD_OK;
379 if (info & SPELL_COMPOUND) {
380 // FIXME: What to do with that?
381 LYXERR(Debug::GUI, "Hunspell compound word found " << word_to_check);
383 if (info & SPELL_FORBIDDEN) {
384 // This was removed from personal dictionary
385 LYXERR(Debug::GUI, "Hunspell explicit forbidden word found " << word_to_check);
392 void HunspellChecker::advanceChangeNumber()
398 void HunspellChecker::insert(WordLangTuple const & wl)
401 LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
402 advanceChangeNumber();
406 void HunspellChecker::remove(WordLangTuple const & wl)
409 LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
410 advanceChangeNumber();
414 void HunspellChecker::accept(WordLangTuple const & wl)
416 d->ignored_.push_back(wl);
417 LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
418 advanceChangeNumber();
422 void HunspellChecker::suggest(WordLangTuple const & wl,
423 docstring_list & suggestions)
426 Hunspell * h = d->speller(wl.lang());
429 string const encoding = h->get_dic_encoding();
430 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
431 #ifdef HAVE_HUNSPELL_CXXABI
432 vector<string> wlst = h->suggest(word_to_check);
433 for (auto const & s : wlst)
434 suggestions.push_back(remap_result(from_iconv_encoding(s, encoding)));
436 char ** suggestion_list;
437 int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str());
438 if (suggestion_number <= 0)
440 for (int i = 0; i != suggestion_number; ++i)
441 suggestions.push_back(remap_result(from_iconv_encoding(suggestion_list[i], encoding)));
442 h->free_list(&suggestion_list, suggestion_number);
447 void HunspellChecker::stem(WordLangTuple const & wl,
448 docstring_list & suggestions)
451 Hunspell * h = d->speller(wl.lang());
454 string const encoding = h->get_dic_encoding();
455 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
456 #ifdef HAVE_HUNSPELL_CXXABI
457 vector<string> wlst = h->stem(word_to_check);
458 for (auto const & s : wlst)
459 suggestions.push_back(from_iconv_encoding(s, encoding));
461 char ** suggestion_list;
462 int const suggestion_number = h->stem(&suggestion_list, word_to_check.c_str());
463 if (suggestion_number <= 0)
465 for (int i = 0; i != suggestion_number; ++i)
466 suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding));
467 h->free_list(&suggestion_list, suggestion_number);
472 bool HunspellChecker::hasDictionary(Language const * lang) const
476 return d->haveDictionary(lang);
480 int HunspellChecker::numDictionaries() const
482 return d->numDictionaries();
486 docstring const HunspellChecker::error()