1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * Copyright (C) 2002-2017 Németh László
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39 * And Contributors. All rights reserved.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
52 * 3. All modifications to the source code must be clearly marked as
53 * such. Binary redistributions based on modified source code
54 * must be clearly marked as modified versions in the documentation
55 * and/or other materials provided with the distribution.
57 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
61 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
78 #include "hashmgr.hxx"
82 // build a hash table from a munched word list
84 HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
90 forbiddenword(FORBIDDENWORD) // forbidden word signing flag
99 load_config(apath, key);
100 int ec = load_tables(tpath, key);
102 /* error condition - what should we do here */
103 HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
105 //keep tablesize to 1 to fix possible division with zero
107 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
114 HashMgr::~HashMgr() {
116 // now pass through hash table freeing up everything
117 // go through column by column of the table
118 for (int i = 0; i < tablesize; i++) {
119 struct hentry* pt = tableptr[i];
120 struct hentry* nt = NULL;
124 (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
135 for (int j = 0; j < (numaliasf); j++)
145 for (int j = 0; j < (numaliasm); j++)
151 #ifndef OPENOFFICEORG
152 #ifndef MOZILLA_CLIENT
158 #ifdef MOZILLA_CLIENT
163 // lookup a root word in the hashtable
165 struct hentry* HashMgr::lookup(const char* word) const {
168 dp = tableptr[hash(word)];
171 for (; dp != NULL; dp = dp->next) {
172 if (strcmp(word, dp->word) == 0)
179 // add a word to the hash table (private)
180 int HashMgr::add_word(const std::string& in_word,
184 const std::string* in_desc,
186 const std::string* word = &in_word;
187 const std::string* desc = in_desc;
189 std::string *word_copy = NULL;
190 std::string *desc_copy = NULL;
191 if (!ignorechars.empty() || complexprefixes) {
192 word_copy = new std::string(in_word);
194 if (!ignorechars.empty()) {
196 wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
198 remove_ignored_chars(*word_copy, ignorechars);
202 if (complexprefixes) {
204 wcl = reverseword_utf(*word_copy);
206 reverseword(*word_copy);
208 if (in_desc && !aliasm) {
209 desc_copy = new std::string(*in_desc);
211 if (complexprefixes) {
213 reverseword_utf(*desc_copy);
215 reverseword(*desc_copy);
224 bool upcasehomonym = false;
225 int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
226 // variable-length hash record with word and optional fields
228 (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl);
235 char* hpw = hp->word;
236 strcpy(hpw, word->c_str());
240 hp->blen = (unsigned char)word->size();
241 hp->clen = (unsigned char)wcl;
242 hp->alen = (short)al;
245 hp->next_homonym = NULL;
247 // store the description string or its pointer
251 hp->var += H_OPT_ALIASM;
252 store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));
254 strcpy(hpw + word->size() + 1, desc->c_str());
256 if (strstr(HENTRY_DATA(hp), MORPH_PHON))
257 hp->var += H_OPT_PHON;
261 struct hentry* dp = tableptr[i];
268 while (dp->next != NULL) {
269 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
270 // remove hidden onlyupcase homonym
272 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
281 dp->next_homonym = hp;
284 upcasehomonym = true;
289 if (strcmp(hp->word, dp->word) == 0) {
290 // remove hidden onlyupcase homonym
292 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
301 dp->next_homonym = hp;
304 upcasehomonym = true;
307 if (!upcasehomonym) {
310 // remove hidden onlyupcase homonym
321 int HashMgr::add_hidden_capitalized_word(const std::string& word,
323 unsigned short* flags,
325 const std::string* dp,
330 // add inner capitalized forms to handle the following allcap forms:
331 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
332 // Allcaps with suffixes: CIA's -> CIA'S
333 if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
334 ((captype == ALLCAP) && (flagslen != 0))) &&
335 !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
336 unsigned short* flags2 =
337 (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1));
341 memcpy(flags2, flags, flagslen * sizeof(unsigned short));
342 flags2[flagslen] = ONLYUPCASEFLAG;
345 std::vector<w_char> w;
347 mkallsmall_utf(w, langnum);
348 mkinitcap_utf(w, langnum);
350 return add_word(st, wcl, flags2, flagslen + 1, dp, true);
352 std::string new_word(word);
353 mkallsmall(new_word, csconv);
354 mkinitcap(new_word, csconv);
355 int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true);
362 // detect captype and modify word length for UTF-8 encoding
363 int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
366 len = u8_u16(workbuf, word);
367 *captype = get_captype_utf8(workbuf, langnum);
370 *captype = get_captype(word, csconv);
375 int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
376 std::vector<w_char> workbuf;
377 return get_clen_and_captype(word, captype, workbuf);
380 // remove word (personal dictionary function for standalone applications)
381 int HashMgr::remove(const std::string& word) {
382 struct hentry* dp = lookup(word.c_str());
384 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
385 unsigned short* flags =
386 (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1));
389 for (int i = 0; i < dp->alen; i++)
390 flags[i] = dp->astr[i];
391 flags[dp->alen] = forbiddenword;
395 std::sort(flags, flags + dp->alen);
397 dp = dp->next_homonym;
402 /* remove forbidden flag to add a personal word to the hash */
403 int HashMgr::remove_forbidden_flag(const std::string& word) {
404 struct hentry* dp = lookup(word.c_str());
408 if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
410 dp->alen = 0; // XXX forbidden words of personal dic.
412 unsigned short* flags2 =
413 (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen - 1));
417 for (i = 0; i < dp->alen; i++) {
418 if (dp->astr[i] != forbiddenword)
419 flags2[j++] = dp->astr[i];
423 dp->astr = flags2; // XXX allowed forbidden words
426 dp = dp->next_homonym;
431 // add a custom dic. word to the hash table (public)
432 int HashMgr::add(const std::string& word) {
433 if (remove_forbidden_flag(word)) {
436 unsigned short* flags = NULL;
437 int wcl = get_clen_and_captype(word, &captype);
438 add_word(word, wcl, flags, al, NULL, false);
439 return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
445 int HashMgr::add_with_affix(const std::string& word, const std::string& example) {
446 // detect captype and modify word length for UTF-8 encoding
447 struct hentry* dp = lookup(example.c_str());
448 remove_forbidden_flag(word);
449 if (dp && dp->astr) {
451 int wcl = get_clen_and_captype(word, &captype);
453 add_word(word, wcl, dp->astr, dp->alen, NULL, false);
455 unsigned short* flags =
456 (unsigned short*)malloc(dp->alen * sizeof(unsigned short));
458 memcpy((void*)flags, (void*)dp->astr,
459 dp->alen * sizeof(unsigned short));
460 add_word(word, wcl, flags, dp->alen, NULL, false);
464 return add_hidden_capitalized_word(word, wcl, dp->astr,
465 dp->alen, NULL, captype);
470 // walk the hash table entry by entry - null at end
471 // initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
472 struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {
473 if (hp && hp->next != NULL)
475 for (col++; col < tablesize; col++) {
477 return tableptr[col];
479 // null at end and reset to start
484 // load a munched word list and build a hash table on the fly
485 int HashMgr::load_tables(const char* tpath, const char* key) {
486 // open dictionary file
487 FileMgr* dict = new FileMgr(tpath, key);
491 // first read the first line of file to get hash table size */
493 if (!dict->getline(ts)) {
494 HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
500 /* remove byte order mark */
501 if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
505 tablesize = atoi(ts.c_str());
507 int nExtra = 5 + USERWORD;
509 if (tablesize <= 0 ||
510 (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /
511 int(sizeof(struct hentry*)))) {
513 stderr, "error: line 1: missing or bad word count in the dic file\n");
518 if ((tablesize % 2) == 0)
521 // allocate the hash table
522 tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
528 // loop through all words on much list and add to hash
529 // table and create word and affix strings
531 std::vector<w_char> workbuf;
533 while (dict->getline(ts)) {
535 // split each line into word and morphological description
537 while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) {
538 if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) {
539 for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)
541 if (dp_pos == 0) { // missing word
542 dp_pos = std::string::npos;
551 // tabulator is the old morphological field separator
552 size_t dp2_pos = ts.find('\t');
553 if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {
554 dp_pos = dp2_pos + 1;
558 if (dp_pos != std::string::npos) {
559 dp.assign(ts.substr(dp_pos));
560 ts.resize(dp_pos - 1);
563 // split each line into word and affix char strings
564 // "\/" signs slash in words (not affix separator)
565 // "/" at beginning of the line is word character (not affix separator)
566 size_t ap_pos = ts.find('/');
567 while (ap_pos != std::string::npos) {
571 } else if (ts[ap_pos - 1] != '\\')
573 // replace "\/" with "/"
574 ts.erase(ap_pos - 1, 1);
575 ap_pos = ts.find('/', ap_pos);
578 unsigned short* flags;
580 if (ap_pos != std::string::npos && ap_pos != ts.size()) {
581 std::string ap(ts.substr(ap_pos + 1));
584 int index = atoi(ap.c_str());
585 al = get_aliasf(index, &flags, dict);
587 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
591 al = decode_flags(&flags, ap.c_str(), dict);
593 HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
597 std::sort(flags, flags + al);
605 int wcl = get_clen_and_captype(ts, &captype, workbuf);
606 const std::string *dp_str = dp.empty() ? NULL : &dp;
607 // add the word and its index plus its capitalized form optionally
608 if (add_word(ts, wcl, flags, al, dp_str, false) ||
609 add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) {
619 // the hash function is a simple load and rotate
620 // algorithm borrowed
621 int HashMgr::hash(const char* word) const {
622 unsigned long hv = 0;
623 for (int i = 0; i < 4 && *word != 0; i++)
624 hv = (hv << 8) | (*word++);
626 ROTATE(hv, ROTATE_LEN);
629 return (unsigned long)hv % tablesize;
632 int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
639 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
642 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
645 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
648 for (int i = 0; i < len; i++) {
649 (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
650 (unsigned char)flags[i * 2 + 1];
654 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
657 unsigned short* dest;
658 for (size_t i = 0; i < flags.size(); ++i) {
662 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
666 const char* src = flags.c_str();
667 for (const char* p = src; *p; p++) {
670 if (i >= DEFAULTFLAGS)
672 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
673 af->getlinenum(), i, DEFAULTFLAGS - 1);
674 *dest = (unsigned short)i;
676 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
683 if (i >= DEFAULTFLAGS)
684 HUNSPELL_WARNING(stderr,
685 "error: line %d: flag id %d is too large (max: %d)\n",
686 af->getlinenum(), i, DEFAULTFLAGS - 1);
687 *dest = (unsigned short)i;
689 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
693 case FLAG_UNI: { // UTF-8 characters
694 std::vector<w_char> w;
697 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
700 memcpy(*result, &w[0], len * sizeof(short));
703 default: { // Ispell's one-character flags (erfg -> e r f g)
704 unsigned short* dest;
706 *result = (unsigned short*)malloc(len * sizeof(unsigned short));
710 for (size_t i = 0; i < flags.size(); ++i) {
711 *dest = (unsigned char)flags[i];
719 bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const {
724 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
725 size_t len = flags.size();
727 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
730 result.reserve(result.size() + len);
731 for (size_t i = 0; i < len; ++i) {
732 result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) +
733 (unsigned char)flags[i * 2 + 1]);
737 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
739 const char* src = flags.c_str();
740 for (const char* p = src; *p; p++) {
743 if (i >= DEFAULTFLAGS)
745 stderr, "error: line %d: flag id %d is too large (max: %d)\n",
746 af->getlinenum(), i, DEFAULTFLAGS - 1);
747 result.push_back((unsigned short)i);
748 if (result.back() == 0)
749 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
755 if (i >= DEFAULTFLAGS)
756 HUNSPELL_WARNING(stderr,
757 "error: line %d: flag id %d is too large (max: %d)\n",
758 af->getlinenum(), i, DEFAULTFLAGS - 1);
759 result.push_back((unsigned short)i);
760 if (result.back() == 0)
761 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
765 case FLAG_UNI: { // UTF-8 characters
766 std::vector<w_char> w;
768 size_t len = w.size();
769 size_t origsize = result.size();
770 result.resize(origsize + len);
771 memcpy(&result[origsize], &w[0], len * sizeof(short));
774 default: { // Ispell's one-character flags (erfg -> e r f g)
775 result.reserve(flags.size());
776 for (size_t i = 0; i < flags.size(); ++i) {
777 result.push_back((unsigned char)flags[i]);
784 unsigned short HashMgr::decode_flag(const char* f) const {
785 unsigned short s = 0;
789 s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1];
793 if (i >= DEFAULTFLAGS)
794 HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
795 i, DEFAULTFLAGS - 1);
796 s = (unsigned short)i;
799 std::vector<w_char> w;
802 memcpy(&s, &w[0], 1 * sizeof(short));
806 s = *(unsigned char*)f;
809 HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
813 char* HashMgr::encode_flag(unsigned short f) const {
815 return mystrdup("(NULL)");
817 if (flag_mode == FLAG_LONG) {
818 ch.push_back((unsigned char)(f >> 8));
819 ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
820 } else if (flag_mode == FLAG_NUM) {
821 std::ostringstream stream;
824 } else if (flag_mode == FLAG_UNI) {
825 const w_char* w_c = (const w_char*)&f;
826 std::vector<w_char> w(w_c, w_c + 1);
829 ch.push_back((unsigned char)(f));
831 return mystrdup(ch.c_str());
834 // read in aff file and set flag mode
835 int HashMgr::load_config(const char* affpath, const char* key) {
838 // open the affix file
839 FileMgr* afflst = new FileMgr(affpath, key);
842 stderr, "Error - could not open affix description file %s\n", affpath);
846 // read in each line ignoring any that do not
847 // start with a known line type indicator
850 while (afflst->getline(line)) {
853 /* remove byte order mark */
856 if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
861 /* parse in the try string */
862 if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) {
863 if (flag_mode != FLAG_CHAR) {
864 HUNSPELL_WARNING(stderr,
865 "error: line %d: multiple definitions of the FLAG "
866 "affix file parameter\n",
867 afflst->getlinenum());
869 if (line.find("long") != std::string::npos)
870 flag_mode = FLAG_LONG;
871 if (line.find("num") != std::string::npos)
872 flag_mode = FLAG_NUM;
873 if (line.find("UTF-8") != std::string::npos)
874 flag_mode = FLAG_UNI;
875 if (flag_mode == FLAG_CHAR) {
878 "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
879 afflst->getlinenum());
883 if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
885 if (!parse_string(line, st, afflst->getlinenum())) {
889 forbiddenword = decode_flag(st.c_str());
892 if (line.compare(0, 3, "SET", 3) == 0) {
893 if (!parse_string(line, enc, afflst->getlinenum())) {
897 if (enc == "UTF-8") {
899 #ifndef OPENOFFICEORG
900 #ifndef MOZILLA_CLIENT
901 initialize_utf_tbl();
905 csconv = get_current_cs(enc);
908 if (line.compare(0, 4, "LANG", 4) == 0) {
909 if (!parse_string(line, lang, afflst->getlinenum())) {
913 langnum = get_lang_num(lang);
916 /* parse in the ignored characters (for example, Arabic optional diacritics
918 if (line.compare(0, 6, "IGNORE", 6) == 0) {
919 if (!parse_array(line, ignorechars, ignorechars_utf16,
920 utf8, afflst->getlinenum())) {
926 if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) {
927 if (!parse_aliasf(line, afflst)) {
933 if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) {
934 if (!parse_aliasm(line, afflst)) {
940 if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
943 if (((line.compare(0, 3, "SFX", 3) == 0) ||
944 (line.compare(0, 3, "PFX", 3) == 0)) && line.size() > 3 && isspace(line[3]))
949 csconv = get_current_cs(SPELL_ENCODING);
954 /* parse in the ALIAS table */
955 bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) {
956 if (numaliasf != 0) {
957 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
963 std::string::const_iterator iter = line.begin();
964 std::string::const_iterator start_piece = mystrsep(line, iter);
965 while (start_piece != line.end()) {
972 numaliasf = atoi(std::string(start_piece, iter).c_str());
977 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
982 (unsigned short**)malloc(numaliasf * sizeof(unsigned short*));
984 (unsigned short*)malloc(numaliasf * sizeof(unsigned short));
985 if (!aliasf || !aliasflen) {
1002 start_piece = mystrsep(line, iter);
1010 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1015 /* now parse the numaliasf lines to read in the remainder of the table */
1016 for (int j = 0; j < numaliasf; j++) {
1018 if (!af->getline(nl))
1025 start_piece = mystrsep(nl, iter);
1026 while (start_piece != nl.end()) {
1029 if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) {
1035 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1042 std::string piece(start_piece, iter);
1044 (unsigned short)decode_flags(&(aliasf[j]), piece, af);
1045 std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
1052 start_piece = mystrsep(nl, iter);
1060 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1068 int HashMgr::is_aliasf() const {
1069 return (aliasf != NULL);
1072 int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const {
1073 if ((index > 0) && (index <= numaliasf)) {
1074 *fvec = aliasf[index - 1];
1075 return aliasflen[index - 1];
1077 HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",
1078 af->getlinenum(), index);
1083 /* parse morph alias definitions */
1084 bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) {
1085 if (numaliasm != 0) {
1086 HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
1092 std::string::const_iterator iter = line.begin();
1093 std::string::const_iterator start_piece = mystrsep(line, iter);
1094 while (start_piece != line.end()) {
1101 numaliasm = atoi(std::string(start_piece, iter).c_str());
1102 if (numaliasm < 1) {
1103 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
1107 aliasm = (char**)malloc(numaliasm * sizeof(char*));
1119 start_piece = mystrsep(line, iter);
1125 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
1130 /* now parse the numaliasm lines to read in the remainder of the table */
1131 for (int j = 0; j < numaliasm; j++) {
1133 if (!af->getline(nl))
1139 start_piece = mystrsep(nl, iter);
1140 while (start_piece != nl.end()) {
1143 if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) {
1144 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1154 // add the remaining of the line
1155 std::string::const_iterator end = nl.end();
1156 std::string chunk(start_piece, end);
1157 if (complexprefixes) {
1159 reverseword_utf(chunk);
1163 aliasm[j] = mystrdup(chunk.c_str());
1170 start_piece = mystrsep(nl, iter);
1176 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
1184 int HashMgr::is_aliasm() const {
1185 return (aliasm != NULL);
1188 char* HashMgr::get_aliasm(int index) const {
1189 if ((index > 0) && (index <= numaliasm))
1190 return aliasm[index - 1];
1191 HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);