11 MyThes::MyThes(const char* idxpath, const char * datpath)
19 if (thInitialize(idxpath, datpath) != 1) {
20 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
23 // did not initialize properly - throw exception?
34 int MyThes::thInitialize(const char* idxpath, const char* datpath)
37 // open the index file
38 FILE * pifile = fopen(idxpath,"r");
43 // parse in encoding and index size */
44 std::vector<char> buffer(MAX_WD_LEN);
45 char * wrd = &buffer[0];
46 readLine(pifile,wrd,MAX_WD_LEN);
47 encoding = mystrdup(wrd);
48 readLine(pifile,wrd,MAX_WD_LEN);
49 int idxsz = atoi(wrd);
51 // spitz: fix -Wsign-compare warning
52 if (idxsz <= 0 || static_cast<unsigned int>(idxsz) > std::numeric_limits<int>::max() / sizeof(char*)) {
53 fprintf(stderr,"Error - bad index %d\n", idxsz);
58 // now allocate list, offst for the given size
59 list = (char**) calloc(idxsz,sizeof(char*));
60 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
62 if ( (!(list)) || (!(offst)) ) {
63 fprintf(stderr,"Error - bad memory allocation\n");
68 // now parse the remaining lines of the index
69 int len = readLine(pifile,wrd,MAX_WD_LEN);
72 int np = mystr_indexOfChar(wrd,'|');
76 list[nw] = (char *)calloc(1,(np+1));
78 fprintf(stderr,"Error - bad memory allocation\n");
83 memcpy((list[nw]),wrd,np);
84 offst[nw] = atoi(wrd+np+1);
88 len = readLine(pifile,wrd,MAX_WD_LEN);
93 /* next open the data file */
94 pdfile = fopen(datpath,"r");
103 void MyThes::thCleanup()
105 /* first close the data file */
113 /* now free up all the allocated strings on the list */
114 for (int i=0; i < nw; i++)
124 if (encoding) free((void*)encoding);
125 if (offst) free((void*)offst);
135 // lookup text in index and count of meanings and a list of meaning entries
136 // with each entry having a synonym count and pointer to an
137 // array of char * (i.e the synonyms)
139 // note: calling routine should call CleanUpAfterLookup with the original
140 // meaning point and count to properly deallocate memory
142 int MyThes::Lookup(const char * pText, int len, mentry** pme)
147 // handle the case of missing file or file related errors
148 if (! pdfile) return 0;
152 /* copy search word and make sure null terminated */
153 std::vector<char> buffer(len+1);
154 char * wrd = &buffer[0];
155 memcpy(wrd,pText,len);
157 /* find it in the list */
158 int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
159 if (idx < 0) return 0;
161 // now seek to the offset
162 offset = (long) offst[idx];
163 int rc = fseek(pdfile,offset,SEEK_SET);
168 // grab the count of the number of meanings
169 // and allocate a list of meaning entries
171 buf = (char *) malloc( MAX_LN_LEN );
173 readLine(pdfile, buf, (MAX_LN_LEN-1));
174 int np = mystr_indexOfChar(buf,'|');
179 int nmeanings = atoi(buf+np+1);
180 // spitz: fix -Wsign-compare warning
181 if (nmeanings < 0 || static_cast<unsigned int>(nmeanings) > std::numeric_limits<int>::max() / sizeof(mentry))
183 *pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL);
189 // now read in each meaning and parse it to get defn, count and synonym lists
191 char dfn[MAX_WD_LEN];
193 for (int j = 0; j < nmeanings; j++) {
194 readLine(pdfile, buf, (MAX_LN_LEN-1));
200 // store away the part of speech for later use
203 np = mystr_indexOfChar(p,'|');
212 // count the number of fields in the remaining line
215 np = mystr_indexOfChar(d,'|');
219 np = mystr_indexOfChar(d,'|');
222 pm->psyns = (char **) malloc(nf*sizeof(char*));
224 // fill in the synonym list
226 for (int jj = 0; jj < nf; jj++)
228 np = mystr_indexOfChar(d,'|');
232 pm->psyns[jj] = mystrdup(d);
237 pm->psyns[jj] = mystrdup(d);
241 // add pos to first synonym to create the definition
245 int m = strlen(pm->psyns[0]);
246 if ((k+m) < (MAX_WD_LEN - 1)) {
249 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
250 pm->defn = mystrdup(dfn);
252 pm->defn = mystrdup(pm->psyns[0]);
266 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
269 if (nmeanings == 0) return;
270 if ((*pme) == NULL) return;
274 for (int i = 0; i < nmeanings; i++) {
275 int count = pm->count;
276 for (int j = 0; j < count; j++) {
277 if (pm->psyns[j]) free(pm->psyns[j]);
280 if (pm->psyns) free(pm->psyns);
282 if (pm->defn) free(pm->defn);
294 // read a line of text from a text file stripping
295 // off the line terminator and replacing it with
296 // a null string terminator.
297 // returns: -1 on error or the number of characters in
298 // in the returning string
300 // A maximum of nc characters will be returned
302 int MyThes::readLine(FILE * pf, char * buf, int nc)
305 if (fgets(buf,nc,pf)) {
314 // performs a binary search on null terminated character
317 // returns: -1 on not found
318 // index of wrd in the list[]
320 int MyThes::binsearch(char * sw, char* _list[], int nlst)
322 int lp, up, mp, j, indx;
326 if (strcmp(sw,_list[lp]) < 0) return -1;
327 if (strcmp(sw,_list[up]) > 0) return -1;
329 mp = (int)((lp+up) >> 1);
330 j = strcmp(sw,_list[mp]);
338 if (lp > up) return -1;
343 char * MyThes::get_th_encoding()
349 // string duplication routine
350 char * MyThes::mystrdup(const char * s)
354 int sl = strlen(s)+1;
355 d = (char *) malloc(sl);
356 if (d) memcpy(d,s,sl);
361 // remove cross-platform text line end characters
362 void MyThes::mychomp(char * s)
365 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
366 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
370 // return index of char in string
371 int MyThes::mystr_indexOfChar(const char * d, int c)
373 char * p = strchr((char *)d,c);
374 if (p) return (int)(p-d);