2 * \file AspellChecker.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Kevin Atkinson
9 * Full author contact details are available in file CREDITS.
14 #include "AspellChecker.h"
15 #include "PersonalWordList.h"
18 #include "WordLangTuple.h"
20 #include "support/lassert.h"
21 #include "support/debug.h"
22 #include "support/lstrings.h"
23 #include "support/docstring_list.h"
25 #include "support/filetools.h"
26 #include "support/Package.h"
27 #include "support/FileName.h"
28 #include "support/PathChanger.h"
36 using namespace lyx::support;
43 AspellConfig * config;
44 AspellCanHaveError * e_speller;
46 docstring_list ignored_words_;
49 typedef std::map<std::string, Speller> Spellers;
50 typedef map<std::string, PersonalWordList *> LangPersonalWordList;
54 struct AspellChecker::Private
61 /// add a speller of the given language and variety
62 AspellSpeller * addSpeller(Language const * lang);
65 AspellSpeller * speller(Language const * lang);
67 bool isValidDictionary(AspellConfig * config,
68 string const & lang, string const & variety);
69 int numDictionaries() const;
70 bool checkAspellData(AspellConfig * config,
71 string const & basepath, string const & datapath, string const & dictpath,
72 string const & lang, string const & variety);
73 AspellConfig * getConfig(string const & lang, string const & variety);
75 string toAspellWord(docstring const & word) const;
77 SpellChecker::Result check(AspellSpeller * m,
78 WordLangTuple const & word) const;
80 void initSessionDictionary(Speller const & speller, PersonalWordList * pd);
81 void addToSession(AspellCanHaveError * speller, docstring const & word);
82 void insert(WordLangTuple const & word);
83 void remove(WordLangTuple const & word);
84 bool learned(WordLangTuple const & word);
86 void accept(Speller & speller, WordLangTuple const & word);
91 LangPersonalWordList personal_;
93 /// the location below system/user directory
94 /// there the rws files lookup will happen
95 const string dictDirectory(void)
99 /// there the dat+cmap files lookup will happen
100 const string dataDirectory(void)
104 /// os package directory constants
105 /// macports on Mac OS X or
106 /// aspell rpms on Linux
107 const string osPackageBase(void)
109 #ifdef USE_MACOSX_PACKAGING
115 const string osPackageDictDirectory(void)
117 #ifdef USE_MACOSX_PACKAGING
118 return "/share/aspell";
120 return "/lib/aspell-0.60";
123 const string osPackageDataDirectory(void)
125 return "/lib/aspell-0.60";
131 AspellChecker::Private::~Private()
133 Spellers::iterator it = spellers_.begin();
134 Spellers::iterator end = spellers_.end();
136 for (; it != end; ++it) {
137 if (it->second.e_speller) {
138 AspellSpeller * speller = to_aspell_speller(it->second.e_speller);
139 aspell_speller_save_all_word_lists(speller);
140 delete_aspell_can_have_error(it->second.e_speller);
142 delete_aspell_config(it->second.config);
145 LangPersonalWordList::const_iterator pdit = personal_.begin();
146 LangPersonalWordList::const_iterator pdet = personal_.end();
148 for (; pdit != pdet; ++pdit) {
149 if ( 0 == pdit->second)
151 PersonalWordList * pd = pdit->second;
158 bool AspellChecker::Private::isValidDictionary(AspellConfig * config,
159 string const & lang, string const & variety)
162 // code taken from aspell's list-dicts example
163 // the returned pointer should _not_ need to be deleted
164 AspellDictInfoList * dlist = get_aspell_dict_info_list(config);
165 AspellDictInfoEnumeration * dels = aspell_dict_info_list_elements(dlist);
166 const AspellDictInfo * entry;
168 while (0 != (entry = aspell_dict_info_enumeration_next(dels))) {
169 LYXERR(Debug::DEBUG, "aspell dict:"
170 << " name=" << entry->name
171 << ",code=" << entry->code
172 << ",variety=" << entry->jargon);
173 if (entry->code == lang && (variety.empty() || entry->jargon == variety)) {
178 delete_aspell_dict_info_enumeration(dels);
179 LYXERR(Debug::FILES, "aspell dictionary: " << lang << (have ? " yes" : " no"));
184 bool AspellChecker::Private::checkAspellData(AspellConfig * config,
185 string const & basepath, string const & datapath, string const & dictpath,
186 string const & lang, string const & variety)
188 FileName base(basepath);
189 bool have_dict = base.isDirectory() ;
192 FileName data(addPath(base.absFileName(), datapath));
193 FileName dict(addPath(base.absFileName(), dictpath));
194 have_dict = dict.isDirectory() && data.isDirectory();
196 LYXERR(Debug::FILES, "aspell dict-dir: " << dict);
197 LYXERR(Debug::FILES, "aspell data-dir: " << data);
198 aspell_config_replace(config, "dict-dir", dict.absFileName().c_str());
199 aspell_config_replace(config, "data-dir", data.absFileName().c_str());
200 have_dict = isValidDictionary(config, lang, variety);
207 AspellConfig * AspellChecker::Private::getConfig(string const & lang, string const & variety)
209 AspellConfig * config = new_aspell_config();
210 bool have_dict = false;
211 string const sysdir = lyx::support::package().system_support().absFileName() ;
212 string const userdir = lyx::support::package().user_support().absFileName() ;
214 LYXERR(Debug::FILES, "aspell user dir: " << userdir);
215 have_dict = checkAspellData(config, userdir, dataDirectory(), dictDirectory(), lang, variety);
217 LYXERR(Debug::FILES, "aspell sysdir dir: " << sysdir);
218 have_dict = checkAspellData(config, sysdir, dataDirectory(), dictDirectory(), lang, variety);
221 // check for package data of OS installation
222 checkAspellData(config, osPackageBase(), osPackageDataDirectory(), osPackageDictDirectory(), lang, variety);
228 void AspellChecker::Private::addToSession(AspellCanHaveError * speller, docstring const & word)
230 string const word_to_add = toAspellWord(word);
231 if(1 != aspell_speller_add_to_session(to_aspell_speller(speller), word_to_add.c_str(), -1))
232 LYXERR(Debug::GUI, "aspell add to session: " << aspell_error_message(speller));
236 void AspellChecker::Private::initSessionDictionary(
237 Speller const & speller,
238 PersonalWordList * pd)
240 AspellSpeller * aspell = to_aspell_speller(speller.e_speller);
241 aspell_speller_clear_session(aspell);
242 docstring_list::const_iterator it = pd->begin();
243 docstring_list::const_iterator et = pd->end();
244 for (; it != et; ++it) {
245 addToSession(speller.e_speller, *it);
247 it = speller.ignored_words_.begin();
248 et = speller.ignored_words_.end();
249 for (; it != et; ++it) {
250 addToSession(speller.e_speller, *it);
255 AspellSpeller * AspellChecker::Private::addSpeller(Language const * lang)
258 string const code = lang->code();
259 string const variety = lang->variety();
260 m.config = getConfig(code, variety);
261 // Aspell supports both languages and varieties (such as German
262 // old vs. new spelling). The respective naming convention is
263 // lang_REGION-variety (e.g. de_DE-alt).
264 aspell_config_replace(m.config, "lang", code.c_str());
265 if (!variety.empty())
266 aspell_config_replace(m.config, "variety", variety.c_str());
267 // Set the encoding to utf-8.
268 // aspell does also understand "ucs-4", so we would not need a
269 // conversion in theory, but if this is used it expects all
270 // char const * arguments to be a cast from uint const *, and it
271 // seems that this uint is not compatible with our char_type on some
272 // platforms (cygwin, OS X). Therefore we use utf-8, that does
274 aspell_config_replace(m.config, "encoding", "utf-8");
275 if (lyxrc.spellchecker_accept_compound)
276 // Consider run-together words as legal compounds
277 aspell_config_replace(m.config, "run-together", "true");
279 // Report run-together words as errors
280 aspell_config_replace(m.config, "run-together", "false");
282 m.accept_compound = lyxrc.spellchecker_accept_compound;
283 m.e_speller = new_aspell_speller(m.config);
284 if (aspell_error_number(m.e_speller) != 0) {
285 // FIXME: We should indicate somehow that this language is not supported.
286 LYXERR(Debug::FILES, "aspell error: " << aspell_error_message(m.e_speller));
287 delete_aspell_can_have_error(m.e_speller);
288 delete_aspell_config(m.config);
292 PersonalWordList * pd = new PersonalWordList(lang->lang());
294 personal_[lang->lang()] = pd;
295 initSessionDictionary(m, pd);
298 spellers_[lang->lang()] = m;
299 return m.e_speller ? to_aspell_speller(m.e_speller) : 0;
303 AspellSpeller * AspellChecker::Private::speller(Language const * lang)
305 Spellers::iterator it = spellers_.find(lang->lang());
306 if (it != spellers_.end()) {
307 Speller aspell = it->second;
308 if (lyxrc.spellchecker_accept_compound != aspell.accept_compound) {
309 // spell checker setting changed... adjust run-together
310 aspell.accept_compound = lyxrc.spellchecker_accept_compound;
311 if (aspell.accept_compound)
312 // Consider run-together words as legal compounds
313 aspell_config_replace(aspell.config, "run-together", "true");
315 // Report run-together words as errors
316 aspell_config_replace(aspell.config, "run-together", "false");
317 AspellCanHaveError * e_speller = aspell.e_speller;
318 aspell.e_speller = new_aspell_speller(aspell.config);
319 delete_aspell_speller(to_aspell_speller(e_speller));
320 spellers_[lang->lang()] = aspell;
322 return to_aspell_speller(aspell.e_speller);
325 return addSpeller(lang);
329 int AspellChecker::Private::numDictionaries() const
332 Spellers::const_iterator it = spellers_.begin();
333 Spellers::const_iterator et = spellers_.end();
335 for (; it != et; ++it) {
336 Speller aspell = it->second;
337 result += aspell.e_speller != 0;
343 string AspellChecker::Private::toAspellWord(docstring const & word) const
346 string word_str = to_utf8(word);
347 while ((mpos = word_str.find('-')) != word_str.npos) {
348 word_str.erase(mpos, 1);
354 SpellChecker::Result AspellChecker::Private::check(
355 AspellSpeller * m, WordLangTuple const & word)
358 SpellChecker::Result result = WORD_OK;
360 LYXERR(Debug::GUI, "spellCheck: \"" <<
361 word.word() << "\", lang = " << word.lang()->lang()) ;
362 docstring rest = split(word.word(), w1, '-');
363 for (; result == WORD_OK;) {
364 string const word_str = toAspellWord(w1);
365 int const word_ok = aspell_speller_check(m, word_str.c_str(), -1);
366 LASSERT(word_ok != -1, return UNKNOWN_WORD);
367 result = (word_ok) ? WORD_OK : UNKNOWN_WORD;
370 rest = split(rest,w1,'-');
372 if (result == WORD_OK)
374 string const word_str = toAspellWord(word.word());
375 int const word_ok = aspell_speller_check(m, word_str.c_str(), -1);
376 LASSERT(word_ok != -1, return UNKNOWN_WORD);
377 return (word_ok) ? WORD_OK : UNKNOWN_WORD;
380 void AspellChecker::Private::accept(Speller & speller, WordLangTuple const & word)
382 speller.ignored_words_.push_back(word.word());
386 /// personal word list interface
387 void AspellChecker::Private::remove(WordLangTuple const & word)
389 PersonalWordList * pd = personal_[word.lang()->lang()];
392 pd->remove(word.word());
393 Spellers::iterator it = spellers_.find(word.lang()->lang());
394 if (it != spellers_.end()) {
395 initSessionDictionary(it->second, pd);
400 void AspellChecker::Private::insert(WordLangTuple const & word)
402 Spellers::iterator it = spellers_.find(word.lang()->lang());
403 if (it != spellers_.end()) {
404 addToSession(it->second.e_speller, word.word());
405 PersonalWordList * pd = personal_[word.lang()->lang()];
408 pd->insert(word.word());
412 bool AspellChecker::Private::learned(WordLangTuple const & word)
414 PersonalWordList * pd = personal_[word.lang()->lang()];
417 return pd->exists(word.word());
421 AspellChecker::AspellChecker()
426 AspellChecker::~AspellChecker()
432 SpellChecker::Result AspellChecker::check(WordLangTuple const & word)
435 AspellSpeller * m = d->speller(word.lang());
438 return NO_DICTIONARY;
440 if (word.word().empty())
441 // MSVC compiled Aspell doesn't like it.
444 SpellChecker::Result rc = d->check(m, word);
445 return (rc == WORD_OK && d->learned(word)) ? LEARNED_WORD : rc;
449 void AspellChecker::advanceChangeNumber()
455 void AspellChecker::insert(WordLangTuple const & word)
458 advanceChangeNumber();
462 void AspellChecker::accept(WordLangTuple const & word)
464 Spellers::iterator it = d->spellers_.find(word.lang()->lang());
465 if (it != d->spellers_.end()) {
466 d->addToSession(it->second.e_speller, word.word());
467 d->accept(it->second, word);
468 advanceChangeNumber();
473 void AspellChecker::suggest(WordLangTuple const & wl,
474 docstring_list & suggestions)
477 AspellSpeller * m = d->speller(wl.lang());
482 string const word = d->toAspellWord(wl.word());
483 AspellWordList const * sugs =
484 aspell_speller_suggest(m, word.c_str(), -1);
485 LASSERT(sugs != 0, return);
486 AspellStringEnumeration * els = aspell_word_list_elements(sugs);
487 if (!els || aspell_word_list_empty(sugs))
491 char const * str = aspell_string_enumeration_next(els);
494 suggestions.push_back(from_utf8(str));
497 delete_aspell_string_enumeration(els);
501 void AspellChecker::remove(WordLangTuple const & word)
504 advanceChangeNumber();
508 bool AspellChecker::hasDictionary(Language const * lang) const
511 Spellers::iterator it = d->spellers_.begin();
512 Spellers::iterator end = d->spellers_.end();
515 for (; it != end && !have; ++it) {
516 have = it->second.config && d->isValidDictionary(it->second.config, lang->code(), lang->variety());
519 AspellConfig * config = d->getConfig(lang->code(), lang->variety());
520 have = d->isValidDictionary(config, lang->code(), lang->variety());
521 delete_aspell_config(config);
528 int AspellChecker::numDictionaries() const
530 return d->numDictionaries();
534 docstring const AspellChecker::error()
536 Spellers::iterator it = d->spellers_.begin();
537 Spellers::iterator end = d->spellers_.end();
538 char const * err = 0;
540 for (; it != end && 0 == err; ++it) {
541 if (it->second.e_speller && aspell_error_number(it->second.e_speller) != 0)
542 err = aspell_error_message(it->second.e_speller);
545 // FIXME UNICODE: err is not in UTF8, but probably the locale encoding
546 return (err ? from_utf8(err) : docstring());