X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FAspellChecker.cpp;h=a5c54d8c74b6ae727e10526d29ef15f3d2cfddea;hb=1511516ef769cb3a11546a78cea6abd41cb91ffe;hp=71475544a4f8cc79b6cb7e1984277f0905e736d0;hpb=4c859c98351ed0119d1c6b81e78c3beb0a5f00c3;p=lyx.git diff --git a/src/AspellChecker.cpp b/src/AspellChecker.cpp index 71475544a4..a5c54d8c74 100644 --- a/src/AspellChecker.cpp +++ b/src/AspellChecker.cpp @@ -12,15 +12,20 @@ #include #include "AspellChecker.h" +#include "PersonalWordList.h" + #include "LyXRC.h" #include "WordLangTuple.h" #include "support/lassert.h" #include "support/debug.h" +#include "support/lstrings.h" #include "support/docstring_list.h" +#include "support/filetools.h" +#include "support/Package.h" #include "support/FileName.h" -#include "support/Path.h" +#include "support/PathChanger.h" #include @@ -28,96 +33,236 @@ #include using namespace std; +using namespace lyx::support; namespace lyx { namespace { struct Speller { - AspellSpeller * speller; AspellConfig * config; + AspellCanHaveError * e_speller; + bool accept_compound; + docstring_list ignored_words_; }; typedef std::map Spellers; +typedef map LangPersonalWordList; -} // anon namespace +} // namespace struct AspellChecker::Private { - Private(): spell_error_object(0) {} + Private() + {} ~Private(); /// add a speller of the given language and variety - AspellSpeller * addSpeller(string const & lang, - string const & variety = string()); + AspellSpeller * addSpeller(Language const * lang); /// - AspellSpeller * speller(string const & lang, - string const & variety); + AspellSpeller * speller(Language const * lang); + + bool isValidDictionary(AspellConfig * config, + string const & lang, string const & variety); + int numDictionaries() const; + bool checkAspellData(AspellConfig * config, + string const & basepath, string const & datapath, string const & dictpath, + string const & lang, string const & variety); + AspellConfig * getConfig(string const & lang, string const & variety); + + string toAspellWord(docstring const & word) const; + + SpellChecker::Result check(AspellSpeller * m, + WordLangTuple const & word) const; - /// create a unique ID from lang code and variety - string const spellerID(string const & lang, - string const & variety); + void initSessionDictionary(Speller const & speller, PersonalWordList * pd); + void addToSession(AspellCanHaveError * speller, docstring const & word); + void insert(WordLangTuple const & word); + void remove(WordLangTuple const & word); + bool learned(WordLangTuple const & word); + + void accept(Speller & speller, WordLangTuple const & word); /// the spellers Spellers spellers_; - /// FIXME - AspellCanHaveError * spell_error_object; + LangPersonalWordList personal_; + + /// the location below system/user directory + /// there the rws files lookup will happen + const string dictDirectory(void) + { + return "dicts"; + } + /// there the dat+cmap files lookup will happen + const string dataDirectory(void) + { + return "data"; + } + /// os package directory constants + /// macports on Mac OS X or + /// aspell rpms on Linux + const string osPackageBase(void) + { +#ifdef USE_MACOSX_PACKAGING + return "/opt/local"; +#else + return "/usr"; +#endif + } + const string osPackageDictDirectory(void) + { +#ifdef USE_MACOSX_PACKAGING + return "/share/aspell"; +#else + return "/lib/aspell-0.60"; +#endif + } + const string osPackageDataDirectory(void) + { + return "/lib/aspell-0.60"; + } }; AspellChecker::Private::~Private() { - if (spell_error_object) { - delete_aspell_can_have_error(spell_error_object); - spell_error_object = 0; - } - Spellers::iterator it = spellers_.begin(); Spellers::iterator end = spellers_.end(); for (; it != end; ++it) { - aspell_speller_save_all_word_lists(it->second.speller); - delete_aspell_speller(it->second.speller); + if (it->second.e_speller) { + AspellSpeller * speller = to_aspell_speller(it->second.e_speller); + aspell_speller_save_all_word_lists(speller); + delete_aspell_can_have_error(it->second.e_speller); + } delete_aspell_config(it->second.config); } + + LangPersonalWordList::const_iterator pdit = personal_.begin(); + LangPersonalWordList::const_iterator pdet = personal_.end(); + + for (; pdit != pdet; ++pdit) { + if (0 == pdit->second) + continue; + PersonalWordList * pd = pdit->second; + pd->save(); + delete pd; + } } -AspellConfig * getConfig() +bool AspellChecker::Private::isValidDictionary(AspellConfig * config, + string const & lang, string const & variety) { - AspellConfig * config = new_aspell_config(); -#ifdef __APPLE__ - char buf[2048] ; - - if ( getPrivateFrameworkPathName(buf, sizeof(buf), "Aspell.framework") ) { - lyx::support::FileName const base(buf); - lyx::support::FileName const data(base.absFilename() + "/lib/aspell-0.60"); - lyx::support::FileName const dict(base.absFilename() + "/share/aspell"); - LYXERR(Debug::FILES, "aspell bundle path: " << buf); - if (dict.isDirectory() && data.isDirectory()) { - aspell_config_replace(config, "dict-dir", dict.absFilename().c_str()); - aspell_config_replace(config, "data-dir", data.absFilename().c_str()); - LYXERR(Debug::FILES, "aspell dict: " << dict); + bool have = false; + // code taken from aspell's list-dicts example + // the returned pointer should _not_ need to be deleted + AspellDictInfoList * dlist = get_aspell_dict_info_list(config); + AspellDictInfoEnumeration * dels = aspell_dict_info_list_elements(dlist); + const AspellDictInfo * entry; + + while (0 != (entry = aspell_dict_info_enumeration_next(dels))) { + LYXERR(Debug::DEBUG, "aspell dict:" + << " name=" << entry->name + << ",code=" << entry->code + << ",variety=" << entry->jargon); + if (entry->code == lang && (variety.empty() || entry->jargon == variety)) { + have = true; + break; } } -#endif - return config ; + delete_aspell_dict_info_enumeration(dels); + LYXERR(Debug::FILES, "aspell dictionary: " << lang << (have ? " yes" : " no")); + return have; } -AspellSpeller * AspellChecker::Private::addSpeller(string const & lang, - string const & variety) +bool AspellChecker::Private::checkAspellData(AspellConfig * config, + string const & basepath, string const & datapath, string const & dictpath, + string const & lang, string const & variety) { - AspellConfig * config = getConfig(); + FileName base(basepath); + bool have_dict = base.isDirectory(); + + if (have_dict) { + FileName data(addPath(base.absFileName(), datapath)); + FileName dict(addPath(base.absFileName(), dictpath)); + have_dict = dict.isDirectory() && data.isDirectory(); + if (have_dict) { + LYXERR(Debug::FILES, "aspell dict-dir: " << dict); + LYXERR(Debug::FILES, "aspell data-dir: " << data); + aspell_config_replace(config, "dict-dir", dict.absFileName().c_str()); + aspell_config_replace(config, "data-dir", data.absFileName().c_str()); + have_dict = isValidDictionary(config, lang, variety); + } + } + return have_dict; +} + + +AspellConfig * AspellChecker::Private::getConfig(string const & lang, string const & variety) +{ + AspellConfig * config = new_aspell_config(); + bool have_dict = false; + string const sysdir = lyx::support::package().system_support().absFileName(); + string const userdir = lyx::support::package().user_support().absFileName(); + + LYXERR(Debug::FILES, "aspell user dir: " << userdir); + have_dict = checkAspellData(config, userdir, dataDirectory(), dictDirectory(), lang, variety); + if (!have_dict) { + LYXERR(Debug::FILES, "aspell sysdir dir: " << sysdir); + have_dict = checkAspellData(config, sysdir, dataDirectory(), dictDirectory(), lang, variety); + } + if (!have_dict) { + // check for package data of OS installation + checkAspellData(config, osPackageBase(), osPackageDataDirectory(), osPackageDictDirectory(), lang, variety); + } + return config; +} + + +void AspellChecker::Private::addToSession(AspellCanHaveError * speller, docstring const & word) +{ + string const word_to_add = toAspellWord(word); + if (1 != aspell_speller_add_to_session(to_aspell_speller(speller), word_to_add.c_str(), -1)) + LYXERR(Debug::GUI, "aspell add to session: " << aspell_error_message(speller)); +} + + +void AspellChecker::Private::initSessionDictionary( + Speller const & speller, + PersonalWordList * pd) +{ + AspellSpeller * aspell = to_aspell_speller(speller.e_speller); + aspell_speller_clear_session(aspell); + docstring_list::const_iterator it = pd->begin(); + docstring_list::const_iterator et = pd->end(); + for (; it != et; ++it) { + addToSession(speller.e_speller, *it); + } + it = speller.ignored_words_.begin(); + et = speller.ignored_words_.end(); + for (; it != et; ++it) { + addToSession(speller.e_speller, *it); + } +} + + +AspellSpeller * AspellChecker::Private::addSpeller(Language const * lang) +{ + Speller m; + string const code = lang->code(); + string const variety = lang->variety(); + m.config = getConfig(code, variety); // Aspell supports both languages and varieties (such as German // old vs. new spelling). The respective naming convention is // lang_REGION-variety (e.g. de_DE-alt). - aspell_config_replace(config, "lang", lang.c_str()); + aspell_config_replace(m.config, "lang", code.c_str()); if (!variety.empty()) - aspell_config_replace(config, "variety", variety.c_str()); + aspell_config_replace(m.config, "variety", variety.c_str()); // Set the encoding to utf-8. // aspell does also understand "ucs-4", so we would not need a // conversion in theory, but if this is used it expects all @@ -125,58 +270,157 @@ AspellSpeller * AspellChecker::Private::addSpeller(string const & lang, // seems that this uint is not compatible with our char_type on some // platforms (cygwin, OS X). Therefore we use utf-8, that does // always work. - aspell_config_replace(config, "encoding", "utf-8"); + aspell_config_replace(m.config, "encoding", "utf-8"); if (lyxrc.spellchecker_accept_compound) // Consider run-together words as legal compounds - aspell_config_replace(config, "run-together", "true"); + aspell_config_replace(m.config, "run-together", "true"); else // Report run-together words as errors - aspell_config_replace(config, "run-together", "false"); - - AspellCanHaveError * err = new_aspell_speller(config); - if (spell_error_object) - delete_aspell_can_have_error(spell_error_object); - spell_error_object = 0; - - if (aspell_error_number(err) != 0) { - // FIXME: We should we indicate somehow that this language is not - // supported. - spell_error_object = err; - LYXERR(Debug::FILES, "aspell error: " << aspell_error_message(err)); - return 0; + aspell_config_replace(m.config, "run-together", "false"); + + m.accept_compound = lyxrc.spellchecker_accept_compound; + m.e_speller = new_aspell_speller(m.config); + if (aspell_error_number(m.e_speller) != 0) { + // FIXME: We should indicate somehow that this language is not supported. + LYXERR(Debug::FILES, "aspell error: " << aspell_error_message(m.e_speller)); + delete_aspell_can_have_error(m.e_speller); + delete_aspell_config(m.config); + m.config = 0; + m.e_speller = 0; + } else { + PersonalWordList * pd = new PersonalWordList(lang->lang()); + pd->load(); + personal_[lang->lang()] = pd; + initSessionDictionary(m, pd); } - Speller m; - m.speller = to_aspell_speller(err); - m.config = config; - spellers_[spellerID(lang, variety)] = m; - return m.speller; + + spellers_[lang->lang()] = m; + return m.e_speller ? to_aspell_speller(m.e_speller) : 0; +} + + +AspellSpeller * AspellChecker::Private::speller(Language const * lang) +{ + Spellers::iterator it = spellers_.find(lang->lang()); + if (it != spellers_.end()) { + Speller aspell = it->second; + if (lyxrc.spellchecker_accept_compound != aspell.accept_compound) { + // spell checker setting changed... adjust run-together + aspell.accept_compound = lyxrc.spellchecker_accept_compound; + if (aspell.accept_compound) + // Consider run-together words as legal compounds + aspell_config_replace(aspell.config, "run-together", "true"); + else + // Report run-together words as errors + aspell_config_replace(aspell.config, "run-together", "false"); + AspellCanHaveError * e_speller = aspell.e_speller; + aspell.e_speller = new_aspell_speller(aspell.config); + delete_aspell_speller(to_aspell_speller(e_speller)); + spellers_[lang->lang()] = aspell; + } + return to_aspell_speller(aspell.e_speller); + } + + return addSpeller(lang); +} + + +int AspellChecker::Private::numDictionaries() const +{ + int result = 0; + Spellers::const_iterator it = spellers_.begin(); + Spellers::const_iterator et = spellers_.end(); + + for (; it != et; ++it) { + Speller aspell = it->second; + result += aspell.e_speller != 0; + } + return result; +} + + +string AspellChecker::Private::toAspellWord(docstring const & word) const +{ + size_t mpos; + string word_str = to_utf8(word); + while ((mpos = word_str.find('-')) != word_str.npos) { + word_str.erase(mpos, 1); + } + return word_str; } -AspellSpeller * AspellChecker::Private::speller(string const & lang, - string const & variety) +SpellChecker::Result AspellChecker::Private::check( + AspellSpeller * m, WordLangTuple const & word) + const { - Spellers::iterator it = spellers_.find(spellerID(lang, variety)); - if (it != spellers_.end()) - return it->second.speller; - - return addSpeller(lang, variety); + SpellChecker::Result result = WORD_OK; + docstring w1; + LYXERR(Debug::GUI, "spellCheck: \"" << + word.word() << "\", lang = " << word.lang()->lang()) ; + docstring rest = split(word.word(), w1, '-'); + for (; result == WORD_OK;) { + string const word_str = toAspellWord(w1); + int const word_ok = aspell_speller_check(m, word_str.c_str(), -1); + LASSERT(word_ok != -1, return UNKNOWN_WORD); + result = (word_ok) ? WORD_OK : UNKNOWN_WORD; + if (rest.empty()) + break; + rest = split(rest, w1, '-'); + } + if (result == WORD_OK) + return result; + string const word_str = toAspellWord(word.word()); + int const word_ok = aspell_speller_check(m, word_str.c_str(), -1); + LASSERT(word_ok != -1, return UNKNOWN_WORD); + return (word_ok) ? WORD_OK : UNKNOWN_WORD; } +void AspellChecker::Private::accept(Speller & speller, WordLangTuple const & word) +{ + speller.ignored_words_.push_back(word.word()); +} -string const AspellChecker::Private::spellerID(string const & lang, - string const & variety) + +/// personal word list interface +void AspellChecker::Private::remove(WordLangTuple const & word) { - if (variety.empty()) - return lang; - return lang + "-" + variety; + PersonalWordList * pd = personal_[word.lang()->lang()]; + if (!pd) + return; + pd->remove(word.word()); + Spellers::iterator it = spellers_.find(word.lang()->lang()); + if (it != spellers_.end()) { + initSessionDictionary(it->second, pd); + } } -AspellChecker::AspellChecker(): d(new Private) +void AspellChecker::Private::insert(WordLangTuple const & word) { + Spellers::iterator it = spellers_.find(word.lang()->lang()); + if (it != spellers_.end()) { + addToSession(it->second.e_speller, word.word()); + PersonalWordList * pd = personal_[word.lang()->lang()]; + if (!pd) + return; + pd->insert(word.word()); + } } +bool AspellChecker::Private::learned(WordLangTuple const & word) +{ + PersonalWordList * pd = personal_[word.lang()->lang()]; + if (!pd) + return false; + return pd->exists(word.word()); +} + + +AspellChecker::AspellChecker() + : d(new Private) +{} + AspellChecker::~AspellChecker() { @@ -184,41 +428,51 @@ AspellChecker::~AspellChecker() } -SpellChecker::Result AspellChecker::check(WordLangTuple const & word) +SpellChecker::Result AspellChecker::check(WordLangTuple const & word, + vector const & docdict) { - - AspellSpeller * m = - d->speller(word.lang()->code(), word.lang()->variety()); + AspellSpeller * m = d->speller(word.lang()); if (!m) - return OK; + return NO_DICTIONARY; if (word.word().empty()) // MSVC compiled Aspell doesn't like it. - return OK; + return WORD_OK; + + vector::const_iterator it = docdict.begin(); + for (; it != docdict.end(); ++it) { + if (it->lang()->code() != word.lang()->code()) + continue; + if (it->word() == word.word()) + return DOCUMENT_LEARNED_WORD; + } + SpellChecker::Result rc = d->check(m, word); + return (rc == WORD_OK && d->learned(word)) ? LEARNED_WORD : rc; +} - int const word_ok = aspell_speller_check(m, to_utf8(word.word()).c_str(), -1); - LASSERT(word_ok != -1, /**/); - return (word_ok) ? OK : UNKNOWN_WORD; +void AspellChecker::advanceChangeNumber() +{ + nextChangeNumber(); } void AspellChecker::insert(WordLangTuple const & word) { - Spellers::iterator it = d->spellers_.find( - d->spellerID(word.lang()->code(), word.lang()->variety())); - if (it != d->spellers_.end()) - aspell_speller_add_to_personal(it->second.speller, to_utf8(word.word()).c_str(), -1); + d->insert(word); + advanceChangeNumber(); } void AspellChecker::accept(WordLangTuple const & word) { - Spellers::iterator it = d->spellers_.find( - d->spellerID(word.lang()->code(), word.lang()->variety())); - if (it != d->spellers_.end()) - aspell_speller_add_to_session(it->second.speller, to_utf8(word.word()).c_str(), -1); + Spellers::iterator it = d->spellers_.find(word.lang()->lang()); + if (it != d->spellers_.end()) { + d->addToSession(it->second.e_speller, word.word()); + d->accept(it->second, word); + advanceChangeNumber(); + } } @@ -226,15 +480,15 @@ void AspellChecker::suggest(WordLangTuple const & wl, docstring_list & suggestions) { suggestions.clear(); - AspellSpeller * m = - d->speller(wl.lang()->code(), wl.lang()->variety()); + AspellSpeller * m = d->speller(wl.lang()); if (!m) return; + string const word = d->toAspellWord(wl.word()); AspellWordList const * sugs = - aspell_speller_suggest(m, to_utf8(wl.word()).c_str(), -1); - LASSERT(sugs != 0, /**/); + aspell_speller_suggest(m, word.c_str(), -1); + LASSERT(sugs != 0, return); AspellStringEnumeration * els = aspell_word_list_elements(sugs); if (!els || aspell_word_list_empty(sugs)) return; @@ -250,48 +504,49 @@ void AspellChecker::suggest(WordLangTuple const & wl, } -bool AspellChecker::hasDictionary(Language const * lang) const +void AspellChecker::remove(WordLangTuple const & word) { - if (!lang) - return false; - // code taken from aspell's list-dicts example - AspellConfig * config; - AspellDictInfoList * dlist; - AspellDictInfoEnumeration * dels; - const AspellDictInfo * entry; - - config = getConfig(); - - /* the returned pointer should _not_ need to be deleted */ - dlist = get_aspell_dict_info_list(config); - - /* config is no longer needed */ - delete_aspell_config(config); + d->remove(word); + advanceChangeNumber(); +} - dels = aspell_dict_info_list_elements(dlist); +bool AspellChecker::hasDictionary(Language const * lang) const +{ bool have = false; - while ((entry = aspell_dict_info_enumeration_next(dels)) != 0) - { - if (entry->code == lang->code() - && (lang->variety().empty() || entry->jargon == lang->variety())) { - have = true; - break; + Spellers::iterator it = d->spellers_.begin(); + Spellers::iterator end = d->spellers_.end(); + + if (lang) { + for (; it != end && !have; ++it) { + have = it->second.config && d->isValidDictionary(it->second.config, lang->code(), lang->variety()); + } + if (!have) { + AspellConfig * config = d->getConfig(lang->code(), lang->variety()); + have = d->isValidDictionary(config, lang->code(), lang->variety()); + delete_aspell_config(config); } } + return have; +} - delete_aspell_dict_info_enumeration(dels); - return have; +int AspellChecker::numDictionaries() const +{ + return d->numDictionaries(); } docstring const AspellChecker::error() { + Spellers::iterator it = d->spellers_.begin(); + Spellers::iterator end = d->spellers_.end(); char const * err = 0; - if (d->spell_error_object && aspell_error_number(d->spell_error_object) != 0) - err = aspell_error_message(d->spell_error_object); + for (; it != end && 0 == err; ++it) { + if (it->second.e_speller && aspell_error_number(it->second.e_speller) != 0) + err = aspell_error_message(it->second.e_speller); + } // FIXME UNICODE: err is not in UTF8, but probably the locale encoding return (err ? from_utf8(err) : docstring());