11 MyThes::MyThes(const char* idxpath, const char * datpath)
19 if (thInitialize(idxpath, datpath) != 1) {
20 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
23 // did not initialize properly - throw exception?
34 int MyThes::thInitialize(const char* idxpath, const char* datpath)
37 // open the index file
38 FILE * pifile = fopen(idxpath,"r");
43 // parse in encoding and index size */
44 std::vector<char> buffer(MAX_WD_LEN);
45 char * wrd = &buffer[0];
46 readLine(pifile,wrd,MAX_WD_LEN);
47 encoding = mystrdup(wrd);
48 readLine(pifile,wrd,MAX_WD_LEN);
49 int idxsz = atoi(wrd);
51 if (idxsz <= 0 || idxsz > std::numeric_limits<int>::max() / sizeof(sizeof(char*))) {
52 fprintf(stderr,"Error - bad index %d\n", idxsz);
57 // now allocate list, offst for the given size
58 list = (char**) calloc(idxsz,sizeof(char*));
59 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
61 if ( (!(list)) || (!(offst)) ) {
62 fprintf(stderr,"Error - bad memory allocation\n");
67 // now parse the remaining lines of the index
68 int len = readLine(pifile,wrd,MAX_WD_LEN);
71 int np = mystr_indexOfChar(wrd,'|');
75 list[nw] = (char *)calloc(1,(np+1));
77 fprintf(stderr,"Error - bad memory allocation\n");
82 memcpy((list[nw]),wrd,np);
83 offst[nw] = atoi(wrd+np+1);
87 len = readLine(pifile,wrd,MAX_WD_LEN);
92 /* next open the data file */
93 pdfile = fopen(datpath,"r");
102 void MyThes::thCleanup()
104 /* first close the data file */
112 /* now free up all the allocated strings on the list */
113 for (int i=0; i < nw; i++)
123 if (encoding) free((void*)encoding);
124 if (offst) free((void*)offst);
134 // lookup text in index and count of meanings and a list of meaning entries
135 // with each entry having a synonym count and pointer to an
136 // array of char * (i.e the synonyms)
138 // note: calling routine should call CleanUpAfterLookup with the original
139 // meaning point and count to properly deallocate memory
141 int MyThes::Lookup(const char * pText, int len, mentry** pme)
146 // handle the case of missing file or file related errors
147 if (! pdfile) return 0;
151 /* copy search word and make sure null terminated */
152 std::vector<char> buffer(len+1);
153 char * wrd = &buffer[0];
154 memcpy(wrd,pText,len);
156 /* find it in the list */
157 int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
158 if (idx < 0) return 0;
160 // now seek to the offset
161 offset = (long) offst[idx];
162 int rc = fseek(pdfile,offset,SEEK_SET);
167 // grab the count of the number of meanings
168 // and allocate a list of meaning entries
170 buf = (char *) malloc( MAX_LN_LEN );
172 readLine(pdfile, buf, (MAX_LN_LEN-1));
173 int np = mystr_indexOfChar(buf,'|');
178 int nmeanings = atoi(buf+np+1);
179 if (nmeanings < 0 || nmeanings > std::numeric_limits<int>::max() / sizeof(mentry))
181 *pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL);
187 // now read in each meaning and parse it to get defn, count and synonym lists
189 char dfn[MAX_WD_LEN];
191 for (int j = 0; j < nmeanings; j++) {
192 readLine(pdfile, buf, (MAX_LN_LEN-1));
198 // store away the part of speech for later use
201 np = mystr_indexOfChar(p,'|');
210 // count the number of fields in the remaining line
213 np = mystr_indexOfChar(d,'|');
217 np = mystr_indexOfChar(d,'|');
220 pm->psyns = (char **) malloc(nf*sizeof(char*));
222 // fill in the synonym list
224 for (int jj = 0; jj < nf; jj++)
226 np = mystr_indexOfChar(d,'|');
230 pm->psyns[jj] = mystrdup(d);
235 pm->psyns[jj] = mystrdup(d);
239 // add pos to first synonym to create the definition
243 int m = strlen(pm->psyns[0]);
244 if ((k+m) < (MAX_WD_LEN - 1)) {
247 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
248 pm->defn = mystrdup(dfn);
250 pm->defn = mystrdup(pm->psyns[0]);
264 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
267 if (nmeanings == 0) return;
268 if ((*pme) == NULL) return;
272 for (int i = 0; i < nmeanings; i++) {
273 int count = pm->count;
274 for (int j = 0; j < count; j++) {
275 if (pm->psyns[j]) free(pm->psyns[j]);
278 if (pm->psyns) free(pm->psyns);
280 if (pm->defn) free(pm->defn);
292 // read a line of text from a text file stripping
293 // off the line terminator and replacing it with
294 // a null string terminator.
295 // returns: -1 on error or the number of characters in
296 // in the returning string
298 // A maximum of nc characters will be returned
300 int MyThes::readLine(FILE * pf, char * buf, int nc)
303 if (fgets(buf,nc,pf)) {
312 // performs a binary search on null terminated character
315 // returns: -1 on not found
316 // index of wrd in the list[]
318 int MyThes::binsearch(char * sw, char* _list[], int nlst)
320 int lp, up, mp, j, indx;
324 if (strcmp(sw,_list[lp]) < 0) return -1;
325 if (strcmp(sw,_list[up]) > 0) return -1;
327 mp = (int)((lp+up) >> 1);
328 j = strcmp(sw,_list[mp]);
336 if (lp > up) return -1;
341 char * MyThes::get_th_encoding()
347 // string duplication routine
348 char * MyThes::mystrdup(const char * s)
352 int sl = strlen(s)+1;
353 d = (char *) malloc(sl);
354 if (d) memcpy(d,s,sl);
359 // remove cross-platform text line end characters
360 void MyThes::mychomp(char * s)
363 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
364 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
368 // return index of char in string
369 int MyThes::mystr_indexOfChar(const char * d, int c)
371 char * p = strchr((char *)d,c);
372 if (p) return (int)(p-d);