2 * \file HunspellChecker.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Abdelrazak Younes
8 * Full author contact details are available in file CREDITS.
13 #include "HunspellChecker.h"
14 #include "PersonalWordList.h"
17 #include "WordLangTuple.h"
19 #include "support/debug.h"
20 #include "support/docstring_list.h"
21 #include "support/filetools.h"
22 #include "support/Package.h"
23 #include "support/FileName.h"
24 #include "support/lassert.h"
25 #include "support/lstrings.h"
27 #include <hunspell/hunspell.hxx>
34 using namespace lyx::support;
35 using namespace lyx::support::os;
41 typedef map<std::string, Hunspell *> Spellers;
42 typedef map<std::string, PersonalWordList *> LangPersonalWordList;
44 docstring remap_result(docstring const & s)
46 // substitute RIGHT SINGLE QUOTATION MARK
48 return subst(s, 0x2019, 0x0027);
54 struct HunspellChecker::Private
60 void setUserPath(std::string const & path);
61 const string dictPath(int selector);
62 bool haveLanguageFiles(string const & hpath);
63 bool haveDictionary(Language const * lang, string & hpath);
64 bool haveDictionary(Language const * lang);
65 int numDictionaries() const;
66 Hunspell * addSpeller(Language const * lang, string & hpath);
67 Hunspell * addSpeller(Language const * lang);
68 Hunspell * speller(Language const * lang);
69 Hunspell * lookup(Language const * lang);
71 bool isIgnored(WordLangTuple const & wl) const;
72 /// personal word list interface
73 void remove(WordLangTuple const & wl);
74 void insert(WordLangTuple const & wl);
75 bool learned(WordLangTuple const & wl);
79 WordLangTable ignored_;
81 LangPersonalWordList personal_;
83 std::string user_path_;
85 /// the location below system/user directory
86 /// there the aff+dic files lookup will happen
87 const string dictDirectory(void) const { return "dicts"; }
88 int maxLookupSelector(void) const { return 5; }
89 const string HunspellDictionaryName(Language const * lang) {
90 return lang->variety().empty()
92 : lang->code() + "-" + lang->variety();
94 const string myspellPackageDictDirectory(void) {
95 return "/usr/share/myspell";
97 const string hunspellPackageDictDirectory(void) {
98 return "/usr/share/hunspell";
103 HunspellChecker::Private::Private()
105 setUserPath(lyxrc.hunspelldir_path);
109 HunspellChecker::Private::~Private()
115 void HunspellChecker::Private::setUserPath(std::string const & path)
117 if (user_path_ != lyxrc.hunspelldir_path) {
124 void HunspellChecker::Private::cleanCache()
126 Spellers::iterator it = spellers_.begin();
127 Spellers::iterator end = spellers_.end();
129 for (; it != end; ++it) {
131 it->second = nullptr;
134 LangPersonalWordList::const_iterator pdit = personal_.begin();
135 LangPersonalWordList::const_iterator pdet = personal_.end();
137 for (; pdit != pdet; ++pdit) {
138 if (pdit->second == nullptr)
140 PersonalWordList * pd = pdit->second;
147 bool HunspellChecker::Private::haveLanguageFiles(string const & hpath)
149 FileName const affix(hpath + ".aff");
150 FileName const dict(hpath + ".dic");
151 return affix.isReadableFile() && dict.isReadableFile();
155 const string HunspellChecker::Private::dictPath(int selector)
159 return hunspellPackageDictDirectory();
161 return myspellPackageDictDirectory();
163 return addName(package().system_support().absFileName(),dictDirectory());
165 return addName(package().user_support().absFileName(),dictDirectory());
172 bool HunspellChecker::Private::haveDictionary(Language const * lang, string & hpath)
174 if (hpath.empty() || !lang)
177 if (lookup(lang)) return true;
179 string d_name = HunspellDictionaryName(lang);
181 LYXERR(Debug::FILES, "check hunspell path: " << hpath
182 << " for language " << lang->lang() << " with name " << d_name);
184 string h_path = addName(hpath, d_name);
185 // first we try lang code+variety
186 if (haveLanguageFiles(h_path)) {
187 LYXERR(Debug::FILES, " found " << h_path);
191 // another try with code, '_' replaced by '-'
192 h_path = addName(hpath, subst(lang->code(), '_', '-'));
193 if (!haveLanguageFiles(h_path))
195 LYXERR(Debug::FILES, " found " << h_path);
201 bool HunspellChecker::Private::haveDictionary(Language const * lang)
205 setUserPath(lyxrc.hunspelldir_path);
206 for (int p = 0; !result && p < maxLookupSelector(); ++p) {
207 string lpath = dictPath(p);
208 result = haveDictionary(lang, lpath);
214 Hunspell * HunspellChecker::Private::speller(Language const * lang)
216 Hunspell * h = lookup(lang);
219 setUserPath(lyxrc.hunspelldir_path);
220 return addSpeller(lang);
224 Hunspell * HunspellChecker::Private::lookup(Language const * lang)
226 Spellers::iterator it = spellers_.find(lang->lang());
227 return it != spellers_.end() ? it->second : nullptr;
231 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang, string & path)
233 if (!haveDictionary(lang, path)) {
234 spellers_[lang->lang()] = nullptr;
238 FileName const affix(path + ".aff");
239 FileName const dict(path + ".dic");
240 Hunspell * h = new Hunspell(affix.absFileName().c_str(), dict.absFileName().c_str());
241 LYXERR(Debug::FILES, "Hunspell speller for langage " << lang << " at " << dict << " added.");
242 spellers_[lang->lang()] = h;
247 Hunspell * HunspellChecker::Private::addSpeller(Language const * lang)
249 Hunspell * h = nullptr;
250 for (int p = 0; p < maxLookupSelector() && nullptr == h; ++p) {
251 string lpath = dictPath(p);
252 h = addSpeller(lang, lpath);
255 string const encoding = h->get_dic_encoding();
256 PersonalWordList * pd = new PersonalWordList(lang->lang());
258 personal_[lang->lang()] = pd;
259 docstring_list::const_iterator it = pd->begin();
260 docstring_list::const_iterator et = pd->end();
261 for (; it != et; ++it) {
262 string const word_to_add = to_iconv_encoding(*it, encoding);
263 h->add(word_to_add.c_str());
270 int HunspellChecker::Private::numDictionaries() const
273 Spellers::const_iterator it = spellers_.begin();
274 Spellers::const_iterator et = spellers_.end();
276 for (; it != et; ++it)
277 result += it->second != nullptr;
282 bool HunspellChecker::Private::isIgnored(WordLangTuple const & wl) const
284 WordLangTable::const_iterator it = ignored_.begin();
285 for (; it != ignored_.end(); ++it) {
286 if (it->lang()->code() != wl.lang()->code())
288 if (it->word() == wl.word())
294 /// personal word list interface
295 void HunspellChecker::Private::remove(WordLangTuple const & wl)
297 Hunspell * h = speller(wl.lang());
300 string const encoding = h->get_dic_encoding();
301 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
302 h->remove(word_to_check.c_str());
303 PersonalWordList * pd = personal_[wl.lang()->lang()];
306 pd->remove(wl.word());
310 void HunspellChecker::Private::insert(WordLangTuple const & wl)
312 Hunspell * h = speller(wl.lang());
315 string const encoding = h->get_dic_encoding();
316 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
317 h->add(word_to_check.c_str());
318 PersonalWordList * pd = personal_[wl.lang()->lang()];
321 pd->insert(wl.word());
325 bool HunspellChecker::Private::learned(WordLangTuple const & wl)
327 PersonalWordList * pd = personal_[wl.lang()->lang()];
330 return pd->exists(wl.word());
334 HunspellChecker::HunspellChecker()
339 HunspellChecker::~HunspellChecker()
345 SpellChecker::Result HunspellChecker::check(WordLangTuple const & wl,
346 vector<WordLangTuple> const & docdict)
348 if (d->isIgnored(wl))
351 WordLangTable::const_iterator it = docdict.begin();
352 for (; it != docdict.end(); ++it) {
353 if (it->lang()->code() != wl.lang()->code())
355 if (it->word() == wl.word())
356 return DOCUMENT_LEARNED_WORD;
359 Hunspell * h = d->speller(wl.lang());
361 return NO_DICTIONARY;
364 string const encoding = h->get_dic_encoding();
365 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
367 LYXERR(Debug::GUI, "spellCheck: \"" <<
368 wl.word() << "\", lang = " << wl.lang()->lang()) ;
369 #ifdef HAVE_HUNSPELL_CXXABI
370 if (h->spell(word_to_check, &info))
372 if (h->spell(word_to_check.c_str(), &info))
374 return d->learned(wl) ? LEARNED_WORD : WORD_OK;
376 if (info & SPELL_COMPOUND) {
377 // FIXME: What to do with that?
378 LYXERR(Debug::GUI, "Hunspell compound word found " << word_to_check);
380 if (info & SPELL_FORBIDDEN) {
381 // This was removed from personal dictionary
382 LYXERR(Debug::GUI, "Hunspell explicit forbidden word found " << word_to_check);
389 void HunspellChecker::advanceChangeNumber()
395 void HunspellChecker::insert(WordLangTuple const & wl)
398 LYXERR(Debug::GUI, "learn word: \"" << wl.word() << "\"") ;
399 advanceChangeNumber();
403 void HunspellChecker::remove(WordLangTuple const & wl)
406 LYXERR(Debug::GUI, "unlearn word: \"" << wl.word() << "\"") ;
407 advanceChangeNumber();
411 void HunspellChecker::accept(WordLangTuple const & wl)
413 d->ignored_.push_back(wl);
414 LYXERR(Debug::GUI, "ignore word: \"" << wl.word() << "\"") ;
415 advanceChangeNumber();
419 void HunspellChecker::suggest(WordLangTuple const & wl,
420 docstring_list & suggestions)
423 Hunspell * h = d->speller(wl.lang());
426 string const encoding = h->get_dic_encoding();
427 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
428 #ifdef HAVE_HUNSPELL_CXXABI
429 vector<string> wlst = h->suggest(word_to_check);
430 for (auto const & s : wlst)
431 suggestions.push_back(remap_result(from_iconv_encoding(s, encoding)));
433 char ** suggestion_list;
434 int const suggestion_number = h->suggest(&suggestion_list, word_to_check.c_str());
435 if (suggestion_number <= 0)
437 for (int i = 0; i != suggestion_number; ++i)
438 suggestions.push_back(remap_result(from_iconv_encoding(suggestion_list[i], encoding)));
439 h->free_list(&suggestion_list, suggestion_number);
444 void HunspellChecker::stem(WordLangTuple const & wl,
445 docstring_list & suggestions)
448 Hunspell * h = d->speller(wl.lang());
451 string const encoding = h->get_dic_encoding();
452 string const word_to_check = to_iconv_encoding(wl.word(), encoding);
453 #ifdef HAVE_HUNSPELL_CXXABI
454 vector<string> wlst = h->stem(word_to_check);
455 for (auto const & s : wlst)
456 suggestions.push_back(from_iconv_encoding(s, encoding));
458 char ** suggestion_list;
459 int const suggestion_number = h->stem(&suggestion_list, word_to_check.c_str());
460 if (suggestion_number <= 0)
462 for (int i = 0; i != suggestion_number; ++i)
463 suggestions.push_back(from_iconv_encoding(suggestion_list[i], encoding));
464 h->free_list(&suggestion_list, suggestion_number);
469 bool HunspellChecker::hasDictionary(Language const * lang) const
473 return d->haveDictionary(lang);
477 int HunspellChecker::numDictionaries() const
479 return d->numDictionaries();
483 docstring const HunspellChecker::error()