1 #include "license.readme"
9 // some basic utility routines
12 // string duplication routine
13 char * mythesstrdup(const char * p)
16 int sl = strlen(p) + 1;
17 char * d = (char *)malloc(sl);
26 // return index of char in string
27 int mystr_indexOfChar(const char * d, int c)
29 const char * p = strchr(d,c);
30 if (p) return (int)(p-d);
35 // remove cross-platform text line end characters
36 void mytheschomp(char * s)
39 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
40 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
45 MyThes::MyThes(const char* idxpath, const char * datpath)
52 if (thInitialize(idxpath, datpath) != 1) {
53 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
55 if (encoding) free((void*)encoding);
56 if (list) free((void*)list);
57 if (offst) free((void*)offst);
58 // did not initialize properly - throw exception?
65 if (thCleanup() != 1) {
66 /* did not cleanup properly - throw exception? */
68 if (encoding) free((void*)encoding);
75 int MyThes::thInitialize(const char* idxpath, const char* datpath)
78 // open the index file
79 FILE * pifile = fopen(idxpath,"r");
85 // parse in encoding and index size */
87 wrd = (char *)calloc(1, MAX_WD_LEN);
88 int len = readLine(pifile,wrd,MAX_WD_LEN);
89 encoding = mythesstrdup(wrd);
90 len = readLine(pifile,wrd,MAX_WD_LEN);
91 int idxsz = atoi(wrd);
94 // now allocate list, offst for the given size
95 list = (char**) calloc(idxsz,sizeof(char*));
96 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
98 if ( (!(list)) || (!(offst)) ) {
99 fprintf(stderr,"Error - bad memory allocation\n");
104 // now parse the remaining lines of the index
105 len = readLine(pifile,wrd,MAX_WD_LEN);
108 int np = mystr_indexOfChar(wrd,'|');
112 list[nw] = (char *)calloc(1,(np+1));
113 memcpy((list[nw]),wrd,np);
114 offst[nw] = atoi(wrd+np+1);
118 len = readLine(pifile,wrd,MAX_WD_LEN);
125 /* next open the data file */
126 pdfile = fopen(datpath,"r");
136 int MyThes::thCleanup()
138 /* first close the data file */
144 /* now free up all the allocated strings on the list */
145 for (int i=0; i < nw; i++)
153 if (list) free((void*)list);
154 if (offst) free((void*)offst);
162 // lookup text in index and count of meanings and a list of meaning entries
163 // with each entry having a synonym count and pointer to an
164 // array of char * (i.e the synonyms)
166 // note: calling routine should call CleanUpAfterLookup with the original
167 // meaning point and count to properly deallocate memory
169 int MyThes::Lookup(const char * pText, int len, mentry** pme)
174 // handle the case of missing file or file related errors
175 if (! pdfile) return 0;
179 /* copy search word and make sure null terminated */
180 char * wrd = (char *) calloc(1,(len+1));
181 memcpy(wrd,pText,len);
183 /* find it in the list */
184 int idx = binsearch(wrd,list,nw);
186 if (idx < 0) return 0;
188 // now seek to the offset
189 offset = (long) offst[idx];
190 int rc = fseek(pdfile,offset,SEEK_SET);
195 // grab the count of the number of meanings
196 // and allocate a list of meaning entries
198 buf = (char *) malloc( MAX_LN_LEN );
200 readLine(pdfile, buf, (MAX_LN_LEN-1));
201 int np = mystr_indexOfChar(buf,'|');
206 int nmeanings = atoi(buf+np+1);
207 *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
213 // now read in each meaning and parse it to get defn, count and synonym lists
215 char dfn[MAX_WD_LEN];
217 for (int j = 0; j < nmeanings; j++) {
218 readLine(pdfile, buf, (MAX_LN_LEN-1));
224 // store away the part of speech for later use
227 np = mystr_indexOfChar(p,'|');
230 pos = mythesstrdup(p);
233 pos = mythesstrdup("");
236 // count the number of fields in the remaining line
239 np = mystr_indexOfChar(d,'|');
243 np = mystr_indexOfChar(d,'|');
246 pm->psyns = (char **) malloc(nf*sizeof(char*));
248 // fill in the synonym list
250 for (int j = 0; j < nf; j++) {
251 np = mystr_indexOfChar(d,'|');
254 pm->psyns[j] = mythesstrdup(d);
257 pm->psyns[j] = mythesstrdup(d);
261 // add pos to first synonym to create the definition
263 int m = strlen(pm->psyns[0]);
264 if ((k+m) < (MAX_WD_LEN - 1)) {
267 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
268 pm->defn = mythesstrdup(dfn);
270 pm->defn = mythesstrdup(pm->psyns[0]);
283 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
286 if (nmeanings == 0) return;
287 if ((*pme) == NULL) return;
291 for (int i = 0; i < nmeanings; i++) {
292 int count = pm->count;
293 for (int j = 0; j < count; j++) {
294 if (pm->psyns[j]) free(pm->psyns[j]);
297 if (pm->psyns) free(pm->psyns);
299 if (pm->defn) free(pm->defn);
311 // read a line of text from a text file stripping
312 // off the line terminator and replacing it with
313 // a null string terminator.
314 // returns: -1 on error or the number of characters in
315 // in the returning string
317 // A maximum of nc characters will be returned
319 int MyThes::readLine(FILE * pf, char * buf, int nc)
322 if (fgets(buf,nc,pf)) {
331 // performs a binary search on null terminated character
334 // returns: -1 on not found
335 // index of wrd in the list[]
337 int MyThes::binsearch(char * sw, char* list[], int nlst)
339 int lp, up, mp, j, indx;
343 if (strcmp(sw,list[lp]) < 0) return -1;
344 if (strcmp(sw,list[up]) > 0) return -1;
346 mp = (int)((lp+up) >> 1);
347 j = strcmp(sw,list[mp]);
355 if (lp > up) return -1;
360 char * MyThes::get_th_encoding()
362 if (encoding) return encoding;