3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
7 * \author Jürgen Spitzmüller
9 * Full author contact details are available in file CREDITS.
14 #include "Thesaurus.h"
17 #include "support/lstrings.h"
18 #include "support/unicode.h"
30 #ifndef HAVE_LIBMYTHES
31 #ifdef HAVE_LIBAIKSAURUS
32 Thesaurus::Thesaurus()
33 : thes_(new Aiksaurus)
37 Thesaurus::~Thesaurus()
43 Thesaurus::Meanings Thesaurus::lookup(docstring const & t)
47 // aiksaurus is for english text only, therefore it does not work
48 // with non-ascii strings.
49 // The interface of the Thesaurus class uses docstring because a
50 // non-english thesaurus is possible in theory.
51 if (!support::isAscii(t))
52 // to_ascii() would assert
55 string const text = to_ascii(t);
56 if (!thes_->find(text.c_str()))
61 int prev_meaning = -1;
65 // correct, returns "" at the end
66 string ret = thes_->next(cur_meaning);
68 while (!ret.empty()) {
69 if (cur_meaning != prev_meaning) {
70 meaning = from_ascii(ret);
71 ret = thes_->next(cur_meaning);
72 prev_meaning = cur_meaning;
75 meanings[meaning].push_back(from_ascii(ret));
78 ret = thes_->next(cur_meaning);
81 for (Meanings::iterator it = meanings.begin();
82 it != meanings.end(); ++it)
83 sort(it->second.begin(), it->second.end());
88 #endif // HAVE_LIBAIKSAURUS
89 #endif // !HAVE_LIBMYTHES
96 string const to_iconv_encoding(docstring const & s, string const & encoding)
98 std::vector<char> const encoded =
99 ucs4_to_eightbit(s.data(), s.length(), encoding);
100 return string(encoded.begin(), encoded.end());
104 docstring const from_iconv_encoding(string const & s, string const & encoding)
106 std::vector<char_type> const ucs4 =
107 eightbit_to_ucs4(s.data(), s.length(), encoding);
108 return docstring(ucs4.begin(), ucs4.end());
114 Thesaurus::Thesaurus()
116 string const idx("/home/juergen/updates/MyThes-1.0/th_de_DE_v2.idx");
117 string const data("/home/juergen/updates/MyThes-1.0/th_de_DE_v2.dat");
118 char const * af = idx.c_str();
119 char const * df = data.c_str();
120 thes_ = new MyThes(af, df);
124 Thesaurus::~Thesaurus()
131 Thesaurus::Meanings Thesaurus::lookup(docstring const & t)
135 string const encoding = thes_->get_th_encoding();
138 string const text = to_iconv_encoding(support::lowercase(t), encoding);
139 int len = strlen(text.c_str());
140 int count = thes_->Lookup(text.c_str(), len, &pmean);
144 // don't change value of pmean or count
145 // they are needed for the CleanUpAfterLookup routine
149 for (int i = 0; i < count; i++) {
150 meaning = from_iconv_encoding(string(pm->defn), encoding);
152 if (support::prefixIs(meaning, '-'))
153 meaning = support::ltrim(meaning, "- ");
154 for (int j = 0; j < pm->count; j++) {
155 ret = from_iconv_encoding(string(pm->psyns[j]), encoding);
157 meanings[meaning].push_back(ret);
160 // now clean up all allocated memory
161 thes_->CleanUpAfterLookup(&pmean, count);
163 for (Meanings::iterator it = meanings.begin();
164 it != meanings.end(); ++it)
165 sort(it->second.begin(), it->second.end());
172 Thesaurus::Thesaurus()
177 Thesaurus::~Thesaurus()
182 Thesaurus::Meanings Thesaurus::lookup(docstring const &)
187 #endif // HAVE_LIBMYTHES