Move it to 3rdparty/ directory alongside the other ones.
include $(top_srcdir)/config/common.am
-DIST_SUBDIRS = boost hunspell libiconv zlib
+DIST_SUBDIRS = boost hunspell mythes libiconv zlib
if USE_INCLUDED_BOOST
if LYX_USE_STD_REGEX
HUNSPELL = hunspell
endif
+if USE_INCLUDED_MYTHES
+MYTHES = mythes
+endif
+
if USE_INCLUDED_ICONV
ICONV = libiconv
endif
ZLIB = zlib
endif
-SUBDIRS = $(BOOST) $(HUNSPELL) $(ICONV) $(ZLIB)
+SUBDIRS = $(BOOST) $(HUNSPELL) $(MYTHES) $(ICONV) $(ZLIB)
--- /dev/null
+Kevin Hendricks <kevin.hendricks@sympatico.ca>
+Németh László <nemeth@openoffice.org>
+Caolán McNamara <caolanm@redhat.com>
--- /dev/null
+/*
+ * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
--- /dev/null
+MyThes is a simple thesaurus that uses a structured
+text data file and an index file with binary search
+to lookup words and phrases and return information
+on part of speech, meanings, and synonyms
+
+MyThes was originall written to provide a thesaurus
+for the OpenOffice.org project
+
+The Main features of MyThes are:
+
+1. written in C++ to make it easier to interface with
+ LibreOffice, OpenOffice, AbiWord, Pspell, etc
+
+2. it is stateless, uses no static variables and
+ should be completely reentrant with no ifdefs
+
+3. it compiles with -ansi and -pedantic and -Wall
+ with no warnigns so it shouldbe quite portable
+
+4. it uses a simple perl program to read the structured
+ text file and create the index needed for binary
+ searching
+
+5. it is very simple with *lots* of comments.
+ The main "smarts" are in the structure of the
+ text file that makes up the thesaurus data
+
+6. It comes with a ready-to-go structured thesaurus
+ data file for en_US extracted from the WordNet-2.0 data.
+
+ Please see WordNet_license.txt and WordNet_readme.txt
+ for more information on the very useful project!
+
+ See http://www.danielnaber.de/wn2ooo/ for utilities to
+ regenerate an up to date English thesaurus from the most
+ recent WordNet data.
+
+7. The source code has a BSD license (and no advertising clause)
+
+
+MyThes comes with a simple example program that looks up some words and returns
+meanings and synonyms.
+
+To build it simply do the following:
+
+unzip mythes.zip
+cd mythes
+./configure
+make
+
+To run the example program:
+./example th_en_US_new.idx th_en_US_new.dat checkme.lst
+
+To run the example program with stemming and morphological generation:
+e.g. to check mouse, mice, rodents, eats, eaten, ate, eating etc. words
+./example morph.idx morph.dat morph.lst morph.aff morph.dic
+
+NOTE: this is only an example and test environment for dictionary developers,
+full English stemming and morphological generation needs an improved
+English Hunspell dictionary.
+
+László Németh <nemeth at OO.o>
+Kevin Hendricks <kevin.hendricks@sympatico.ca>
--- /dev/null
+#include "COPYING"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits>
+#include <vector>
+
+#include "mythes.hxx"
+
+MyThes::MyThes(const char* idxpath, const char * datpath)
+{
+ nw = 0;
+ encoding = NULL;
+ list = NULL;
+ offst = NULL;
+ pdfile = NULL;
+
+ if (thInitialize(idxpath, datpath) != 1) {
+ fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
+ fflush(stderr);
+ thCleanup();
+ // did not initialize properly - throw exception?
+ }
+}
+
+
+MyThes::~MyThes()
+{
+ thCleanup();
+}
+
+
+int MyThes::thInitialize(const char* idxpath, const char* datpath)
+{
+
+ // open the index file
+ FILE * pifile = fopen(idxpath,"r");
+ if (!pifile) {
+ return 0;
+ }
+
+ // parse in encoding and index size */
+ std::vector<char> buffer(MAX_WD_LEN);
+ char * wrd = &buffer[0];
+ readLine(pifile,wrd,MAX_WD_LEN);
+ encoding = mystrdup(wrd);
+ readLine(pifile,wrd,MAX_WD_LEN);
+ int idxsz = atoi(wrd);
+
+ if (idxsz <= 0 || idxsz > std::numeric_limits<int>::max() / sizeof(sizeof(char*))) {
+ fprintf(stderr,"Error - bad index %d\n", idxsz);
+ fclose(pifile);
+ return 0;
+ }
+
+ // now allocate list, offst for the given size
+ list = (char**) calloc(idxsz,sizeof(char*));
+ offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
+
+ if ( (!(list)) || (!(offst)) ) {
+ fprintf(stderr,"Error - bad memory allocation\n");
+ fclose(pifile);
+ return 0;
+ }
+
+ // now parse the remaining lines of the index
+ int len = readLine(pifile,wrd,MAX_WD_LEN);
+ while (len > 0)
+ {
+ int np = mystr_indexOfChar(wrd,'|');
+ if (nw < idxsz) {
+ if (np >= 0) {
+ *(wrd+np) = '\0';
+ list[nw] = (char *)calloc(1,(np+1));
+ if (!list[nw]) {
+ fprintf(stderr,"Error - bad memory allocation\n");
+ fflush(stderr);
+ fclose(pifile);
+ return 0;
+ }
+ memcpy((list[nw]),wrd,np);
+ offst[nw] = atoi(wrd+np+1);
+ nw++;
+ }
+ }
+ len = readLine(pifile,wrd,MAX_WD_LEN);
+ }
+
+ fclose(pifile);
+
+ /* next open the data file */
+ pdfile = fopen(datpath,"r");
+ if (!pdfile) {
+ return 0;
+ }
+
+ return 1;
+}
+
+
+void MyThes::thCleanup()
+{
+ /* first close the data file */
+ if (pdfile) {
+ fclose(pdfile);
+ pdfile=NULL;
+ }
+
+ if (list)
+ {
+ /* now free up all the allocated strings on the list */
+ for (int i=0; i < nw; i++)
+ {
+ if (list[i]) {
+ free(list[i]);
+ list[i] = 0;
+ }
+ }
+ free((void*)list);
+ }
+
+ if (encoding) free((void*)encoding);
+ if (offst) free((void*)offst);
+
+ encoding = NULL;
+ list = NULL;
+ offst = NULL;
+ nw = 0;
+}
+
+
+
+// lookup text in index and count of meanings and a list of meaning entries
+// with each entry having a synonym count and pointer to an
+// array of char * (i.e the synonyms)
+//
+// note: calling routine should call CleanUpAfterLookup with the original
+// meaning point and count to properly deallocate memory
+
+int MyThes::Lookup(const char * pText, int len, mentry** pme)
+{
+
+ *pme = NULL;
+
+ // handle the case of missing file or file related errors
+ if (! pdfile) return 0;
+
+ long offset = 0;
+
+ /* copy search word and make sure null terminated */
+ std::vector<char> buffer(len+1);
+ char * wrd = &buffer[0];
+ memcpy(wrd,pText,len);
+
+ /* find it in the list */
+ int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
+ if (idx < 0) return 0;
+
+ // now seek to the offset
+ offset = (long) offst[idx];
+ int rc = fseek(pdfile,offset,SEEK_SET);
+ if (rc) {
+ return 0;
+ }
+
+ // grab the count of the number of meanings
+ // and allocate a list of meaning entries
+ char * buf = NULL;
+ buf = (char *) malloc( MAX_LN_LEN );
+ if (!buf) return 0;
+ readLine(pdfile, buf, (MAX_LN_LEN-1));
+ int np = mystr_indexOfChar(buf,'|');
+ if (np < 0) {
+ free(buf);
+ return 0;
+ }
+ int nmeanings = atoi(buf+np+1);
+ if (nmeanings < 0 || nmeanings > std::numeric_limits<int>::max() / sizeof(mentry))
+ nmeanings = 0;
+ *pme = (mentry*)(nmeanings ? malloc(nmeanings * sizeof(mentry)) : NULL);
+ if (!(*pme)) {
+ free(buf);
+ return 0;
+ }
+
+ // now read in each meaning and parse it to get defn, count and synonym lists
+ mentry* pm = *(pme);
+ char dfn[MAX_WD_LEN];
+
+ for (int j = 0; j < nmeanings; j++) {
+ readLine(pdfile, buf, (MAX_LN_LEN-1));
+
+ pm->count = 0;
+ pm->psyns = NULL;
+ pm->defn = NULL;
+
+ // store away the part of speech for later use
+ char * p = buf;
+ char * pos = NULL;
+ np = mystr_indexOfChar(p,'|');
+ if (np >= 0) {
+ *(buf+np) = '\0';
+ pos = mystrdup(p);
+ p = p + np + 1;
+ } else {
+ pos = mystrdup("");
+ }
+
+ // count the number of fields in the remaining line
+ int nf = 1;
+ char * d = p;
+ np = mystr_indexOfChar(d,'|');
+ while ( np >= 0 ) {
+ nf++;
+ d = d + np + 1;
+ np = mystr_indexOfChar(d,'|');
+ }
+ pm->count = nf;
+ pm->psyns = (char **) malloc(nf*sizeof(char*));
+
+ // fill in the synonym list
+ d = p;
+ for (int jj = 0; jj < nf; jj++)
+ {
+ np = mystr_indexOfChar(d,'|');
+ if (np > 0)
+ {
+ *(d+np) = '\0';
+ pm->psyns[jj] = mystrdup(d);
+ d = d + np + 1;
+ }
+ else
+ {
+ pm->psyns[jj] = mystrdup(d);
+ }
+ }
+
+ // add pos to first synonym to create the definition
+ if (pm->psyns[0])
+ {
+ int k = strlen(pos);
+ int m = strlen(pm->psyns[0]);
+ if ((k+m) < (MAX_WD_LEN - 1)) {
+ strncpy(dfn,pos,k);
+ *(dfn+k) = ' ';
+ strncpy((dfn+k+1),(pm->psyns[0]),m+1);
+ pm->defn = mystrdup(dfn);
+ } else {
+ pm->defn = mystrdup(pm->psyns[0]);
+ }
+ }
+ free(pos);
+ pm++;
+
+ }
+ free(buf);
+
+ return nmeanings;
+}
+
+
+
+void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
+{
+
+ if (nmeanings == 0) return;
+ if ((*pme) == NULL) return;
+
+ mentry * pm = *pme;
+
+ for (int i = 0; i < nmeanings; i++) {
+ int count = pm->count;
+ for (int j = 0; j < count; j++) {
+ if (pm->psyns[j]) free(pm->psyns[j]);
+ pm->psyns[j] = NULL;
+ }
+ if (pm->psyns) free(pm->psyns);
+ pm->psyns = NULL;
+ if (pm->defn) free(pm->defn);
+ pm->defn = NULL;
+ pm->count = 0;
+ pm++;
+ }
+ pm = *pme;
+ free(pm);
+ *pme = NULL;
+ return;
+}
+
+
+// read a line of text from a text file stripping
+// off the line terminator and replacing it with
+// a null string terminator.
+// returns: -1 on error or the number of characters in
+// in the returning string
+
+// A maximum of nc characters will be returned
+
+int MyThes::readLine(FILE * pf, char * buf, int nc)
+{
+
+ if (fgets(buf,nc,pf)) {
+ mychomp(buf);
+ return strlen(buf);
+ }
+ return -1;
+}
+
+
+
+// performs a binary search on null terminated character
+// strings
+//
+// returns: -1 on not found
+// index of wrd in the list[]
+
+int MyThes::binsearch(char * sw, char* _list[], int nlst)
+{
+ int lp, up, mp, j, indx;
+ lp = 0;
+ up = nlst-1;
+ indx = -1;
+ if (strcmp(sw,_list[lp]) < 0) return -1;
+ if (strcmp(sw,_list[up]) > 0) return -1;
+ while (indx < 0 ) {
+ mp = (int)((lp+up) >> 1);
+ j = strcmp(sw,_list[mp]);
+ if ( j > 0) {
+ lp = mp + 1;
+ } else if (j < 0 ) {
+ up = mp - 1;
+ } else {
+ indx = mp;
+ }
+ if (lp > up) return -1;
+ }
+ return indx;
+}
+
+char * MyThes::get_th_encoding()
+{
+ return encoding;
+}
+
+
+// string duplication routine
+char * MyThes::mystrdup(const char * s)
+{
+ char * d = NULL;
+ if (s) {
+ int sl = strlen(s)+1;
+ d = (char *) malloc(sl);
+ if (d) memcpy(d,s,sl);
+ }
+ return d;
+}
+
+// remove cross-platform text line end characters
+void MyThes::mychomp(char * s)
+{
+ int k = strlen(s);
+ if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+ if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
+
+// return index of char in string
+int MyThes::mystr_indexOfChar(const char * d, int c)
+{
+ char * p = strchr((char *)d,c);
+ if (p) return (int)(p-d);
+ return -1;
+}
+
--- /dev/null
+#ifndef _MYTHES_HXX_
+#define _MYTHES_HXX_
+
+// some maximum sizes for buffers
+#define MAX_WD_LEN 200
+#define MAX_LN_LEN 16384
+
+
+// a meaning with definition, count of synonyms and synonym list
+struct mentry {
+ char* defn;
+ int count;
+ char** psyns;
+};
+
+
+class MyThes
+{
+
+ int nw; /* number of entries in thesaurus */
+ char** list; /* stores word list */
+ unsigned int* offst; /* stores offset list */
+ char * encoding; /* stores text encoding; */
+
+ FILE *pdfile;
+
+ // disallow copy-constructor and assignment-operator for now
+ MyThes();
+ MyThes(const MyThes &);
+ MyThes & operator = (const MyThes &);
+
+public:
+ MyThes(const char* idxpath, const char* datpath);
+ ~MyThes();
+
+ // lookup text in index and return number of meanings
+ // each meaning entry has a defintion, synonym count and pointer
+ // when complete return the *original* meaning entry and count via
+ // CleanUpAfterLookup to properly handle memory deallocation
+
+ int Lookup(const char * pText, int len, mentry** pme);
+
+ void CleanUpAfterLookup(mentry** pme, int nmean);
+
+ char* get_th_encoding();
+
+private:
+ // Open index and dat files and load list array
+ int thInitialize (const char* indxpath, const char* datpath);
+
+ // internal close and cleanup dat and idx files
+ void thCleanup ();
+
+ // read a text line (\n terminated) stripping off line terminator
+ int readLine(FILE * pf, char * buf, int nc);
+
+ // binary search on null terminated character strings
+ int binsearch(char * wrd, char* list[], int nlst);
+
+ // string duplication routine
+ char * mystrdup(const char * p);
+
+ // remove cross-platform text line end characters
+ void mychomp(char * s);
+
+ // return index of char in string
+ int mystr_indexOfChar(const char * d, int c);
+
+};
+
+#endif
+
+
+
+
+
--- /dev/null
+include $(top_srcdir)/config/common.am
+
+noinst_LIBRARIES = liblyxmythes.a
+
+EXTRA_DIST = \
+ 1.2.5/AUTHORS \
+ 1.2.5/COPYING \
+ 1.2.5/README
+
+liblyxmythes_a_SOURCES = \
+ 1.2.5/mythes.cxx \
+ 1.2.5/myspell.hxx
break])
AC_LANG_POP(C++)
fi
- if test $use_included_mythes = no ; then
- AC_DEFINE(USE_EXTERNAL_MYTHES, 1, [Define as 1 to use an external MyThes library])
- AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx])
- AC_SUBST(MYTHES_LIBS)
- else
+ if test $use_included_mythes = yes ; then
+ mythes_h_location="<mythes.hxx>"
+ MYTHES_INCLUDES='-I$(top_srcdir)/3rdparty/mythes/1.2.5/'
+ MYTHES_LIBS='$(top_builddir)/3rdparty/mythes/liblyxmythes.a'
lyx_included_libs="$lyx_included_libs mythes"
fi
AM_CONDITIONAL(USE_INCLUDED_MYTHES, test x$use_included_mythes = xyes)
+ AC_DEFINE_UNQUOTED(MYTHES_H_LOCATION,$mythes_h_location,[Location of mythes.hxx])
+ AC_SUBST(MYTHES_INCLUDES)
+ AC_SUBST(MYTHES_LIBS)
AC_MSG_CHECKING([whether to use included MyThes library])
AC_MSG_RESULT([$use_included_mythes])
])
3rdparty/Makefile \
3rdparty/boost/Makefile \
3rdparty/hunspell/Makefile \
+ 3rdparty/mythes/Makefile \
3rdparty/libiconv/Makefile \
$ICONV_ICONV_H_IN \
3rdparty/zlib/Makefile \
AM_CPPFLAGS += -I$(top_srcdir)/src
AM_CPPFLAGS += $(BOOST_INCLUDES) $(ICONV_INCLUDES) $(ZLIB_INCLUDES)
-AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS)
+AM_CPPFLAGS += $(ENCHANT_CFLAGS) $(HUNSPELL_CFLAGS) $(MYTHES_INCLUDES)
AM_CPPFLAGS += $(QT_CPPFLAGS) $(QT_CORE_INCLUDES)
if BUILD_CLIENT_SUBDIR
#include "support/lstrings.h"
#include "support/os.h"
-#ifdef USE_EXTERNAL_MYTHES
-#include MYTHES_H_LOCATION
-#else
#include <cstdio>
-#include "support/mythes/mythes.hxx"
-#endif
+#include MYTHES_H_LOCATION
#include "frontends/alert.h"
unicode.cpp \
unicode.h \
weighted_btree.h
-if USE_INCLUDED_MYTHES
-liblyxsupport_a_SOURCES += \
- mythes/mythes.cxx \
- mythes/mythes.hxx \
- mythes/license.readme
-endif
#if INSTALL_MACOSX
#liblyxsupport_a_SOURCES += \
+++ /dev/null
-/*
- * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
- * And Contributors. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * 3. All modifications to the source code must be clearly marked as
- * such. Binary redistributions based on modified source code
- * must be clearly marked as modified versions in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
+++ /dev/null
-#include "license.readme"
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#include "mythes.hxx"
-
-// some basic utility routines
-
-
-// string duplication routine
-char * mythesstrdup(const char * p)
-{
-
- int sl = strlen(p) + 1;
- char * d = (char *)malloc(sl);
- if (d) {
- memcpy(d,p,sl);
- return d;
- }
- return NULL;
-}
-
-
-// return index of char in string
-int mystr_indexOfChar(const char * d, int c)
-{
- const char * p = strchr(d,c);
- if (p) return (int)(p-d);
- return -1;
-}
-
-
-// remove cross-platform text line end characters
-void mytheschomp(char * s)
-{
- int k = strlen(s);
- if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
- if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
-}
-
-
-
-MyThes::MyThes(const char* idxpath, const char * datpath)
-{
- nw = 0;
- encoding = NULL;
- list = NULL;
- offst = NULL;
-
- if (thInitialize(idxpath, datpath) != 1) {
- fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
- fflush(stderr);
- if (encoding) free((void*)encoding);
- if (list) free((void*)list);
- if (offst) free((void*)offst);
- // did not initialize properly - throw exception?
- }
-}
-
-
-MyThes::~MyThes()
-{
- if (thCleanup() != 1) {
- /* did not cleanup properly - throw exception? */
- }
- if (encoding) free((void*)encoding);
- encoding = NULL;
- list = NULL;
- offst = NULL;
-}
-
-
-int MyThes::thInitialize(const char* idxpath, const char* datpath)
-{
-
- // open the index file
- FILE * pifile = fopen(idxpath,"r");
- if (!pifile) {
- pifile = NULL;
- return 0;
- }
-
- // parse in encoding and index size */
- char * wrd;
- wrd = (char *)calloc(1, MAX_WD_LEN);
- int len = readLine(pifile,wrd,MAX_WD_LEN);
- encoding = mythesstrdup(wrd);
- len = readLine(pifile,wrd,MAX_WD_LEN);
- int idxsz = atoi(wrd);
-
-
- // now allocate list, offst for the given size
- list = (char**) calloc(idxsz,sizeof(char*));
- offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
-
- if ( (!(list)) || (!(offst)) ) {
- fprintf(stderr,"Error - bad memory allocation\n");
- fflush(stderr);
- return 0;
- }
-
- // now parse the remaining lines of the index
- len = readLine(pifile,wrd,MAX_WD_LEN);
- while (len > 0)
- {
- int np = mystr_indexOfChar(wrd,'|');
- if (nw < idxsz) {
- if (np >= 0) {
- *(wrd+np) = '\0';
- list[nw] = (char *)calloc(1,(np+1));
- memcpy((list[nw]),wrd,np);
- offst[nw] = atoi(wrd+np+1);
- nw++;
- }
- }
- len = readLine(pifile,wrd,MAX_WD_LEN);
- }
-
- free((void *)wrd);
- fclose(pifile);
- pifile=NULL;
-
- /* next open the data file */
- pdfile = fopen(datpath,"r");
- if (!pdfile) {
- pdfile = NULL;
- return 0;
- }
-
- return 1;
-}
-
-
-int MyThes::thCleanup()
-{
- /* first close the data file */
- if (pdfile) {
- fclose(pdfile);
- pdfile=NULL;
- }
-
- /* now free up all the allocated strings on the list */
- for (int i=0; i < nw; i++)
- {
- if (list[i]) {
- free(list[i]);
- list[i] = 0;
- }
- }
-
- if (list) free((void*)list);
- if (offst) free((void*)offst);
-
- nw = 0;
- return 1;
-}
-
-
-
-// lookup text in index and count of meanings and a list of meaning entries
-// with each entry having a synonym count and pointer to an
-// array of char * (i.e the synonyms)
-//
-// note: calling routine should call CleanUpAfterLookup with the original
-// meaning point and count to properly deallocate memory
-
-int MyThes::Lookup(const char * pText, int len, mentry** pme)
-{
-
- *pme = NULL;
-
- // handle the case of missing file or file related errors
- if (! pdfile) return 0;
-
- long offset = 0;
-
- /* copy search word and make sure null terminated */
- char * wrd = (char *) calloc(1,(len+1));
- memcpy(wrd,pText,len);
-
- /* find it in the list */
- int idx = binsearch(wrd,list,nw);
- free(wrd);
- if (idx < 0) return 0;
-
- // now seek to the offset
- offset = (long) offst[idx];
- int rc = fseek(pdfile,offset,SEEK_SET);
- if (rc) {
- return 0;
- }
-
- // grab the count of the number of meanings
- // and allocate a list of meaning entries
- char * buf = NULL;
- buf = (char *) malloc( MAX_LN_LEN );
- if (!buf) return 0;
- readLine(pdfile, buf, (MAX_LN_LEN-1));
- int np = mystr_indexOfChar(buf,'|');
- if (np < 0) {
- free(buf);
- return 0;
- }
- int nmeanings = atoi(buf+np+1);
- *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
- if (!(*pme)) {
- free(buf);
- return 0;
- }
-
- // now read in each meaning and parse it to get defn, count and synonym lists
- mentry* pm = *(pme);
- char dfn[MAX_WD_LEN];
-
- for (int j = 0; j < nmeanings; j++) {
- readLine(pdfile, buf, (MAX_LN_LEN-1));
-
- pm->count = 0;
- pm->psyns = NULL;
- pm->defn = NULL;
-
- // store away the part of speech for later use
- char * p = buf;
- char * pos = NULL;
- np = mystr_indexOfChar(p,'|');
- if (np >= 0) {
- *(buf+np) = '\0';
- pos = mythesstrdup(p);
- p = p + np + 1;
- } else {
- pos = mythesstrdup("");
- }
-
- // count the number of fields in the remaining line
- int nf = 1;
- char * d = p;
- np = mystr_indexOfChar(d,'|');
- while ( np >= 0 ) {
- nf++;
- d = d + np + 1;
- np = mystr_indexOfChar(d,'|');
- }
- pm->count = nf;
- pm->psyns = (char **) malloc(nf*sizeof(char*));
-
- // fill in the synonym list
- d = p;
- for (int j = 0; j < nf; j++) {
- np = mystr_indexOfChar(d,'|');
- if (np > 0) {
- *(d+np) = '\0';
- pm->psyns[j] = mythesstrdup(d);
- d = d + np + 1;
- } else {
- pm->psyns[j] = mythesstrdup(d);
- }
- }
-
- // add pos to first synonym to create the definition
- int k = strlen(pos);
- int m = strlen(pm->psyns[0]);
- if ((k+m) < (MAX_WD_LEN - 1)) {
- strncpy(dfn,pos,k);
- *(dfn+k) = ' ';
- strncpy((dfn+k+1),(pm->psyns[0]),m+1);
- pm->defn = mythesstrdup(dfn);
- } else {
- pm->defn = mythesstrdup(pm->psyns[0]);
- }
- free(pos);
- pm++;
-
- }
- free(buf);
-
- return nmeanings;
-}
-
-
-
-void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
-{
-
- if (nmeanings == 0) return;
- if ((*pme) == NULL) return;
-
- mentry * pm = *pme;
-
- for (int i = 0; i < nmeanings; i++) {
- int count = pm->count;
- for (int j = 0; j < count; j++) {
- if (pm->psyns[j]) free(pm->psyns[j]);
- pm->psyns[j] = NULL;
- }
- if (pm->psyns) free(pm->psyns);
- pm->psyns = NULL;
- if (pm->defn) free(pm->defn);
- pm->defn = NULL;
- pm->count = 0;
- pm++;
- }
- pm = *pme;
- free(pm);
- *pme = NULL;
- return;
-}
-
-
-// read a line of text from a text file stripping
-// off the line terminator and replacing it with
-// a null string terminator.
-// returns: -1 on error or the number of characters in
-// in the returning string
-
-// A maximum of nc characters will be returned
-
-int MyThes::readLine(FILE * pf, char * buf, int nc)
-{
-
- if (fgets(buf,nc,pf)) {
- mytheschomp(buf);
- return strlen(buf);
- }
- return -1;
-}
-
-
-
-// performs a binary search on null terminated character
-// strings
-//
-// returns: -1 on not found
-// index of wrd in the list[]
-
-int MyThes::binsearch(char * sw, char* list[], int nlst)
-{
- int lp, up, mp, j, indx;
- lp = 0;
- up = nlst-1;
- indx = -1;
- if (strcmp(sw,list[lp]) < 0) return -1;
- if (strcmp(sw,list[up]) > 0) return -1;
- while (indx < 0 ) {
- mp = (int)((lp+up) >> 1);
- j = strcmp(sw,list[mp]);
- if ( j > 0) {
- lp = mp + 1;
- } else if (j < 0 ) {
- up = mp - 1;
- } else {
- indx = mp;
- }
- if (lp > up) return -1;
- }
- return indx;
-}
-
-char * MyThes::get_th_encoding()
-{
- if (encoding) return encoding;
- return NULL;
-}
-
+++ /dev/null
-#ifndef _MYTHES_HXX_
-#define _MYTHES_HXX_
-
-// some maximum sizes for buffers
-#define MAX_WD_LEN 200
-#define MAX_LN_LEN 16384
-
-
-// a meaning with definition, count of synonyms and synonym list
-struct mentry {
- char* defn;
- int count;
- char** psyns;
-};
-
-
-class MyThes
-{
-
- int nw; /* number of entries in thesaurus */
- char** list; /* stores word list */
- unsigned int* offst; /* stores offset list */
- char * encoding; /* stores text encoding; */
-
- FILE *pdfile;
-
- // disallow copy-constructor and assignment-operator for now
- MyThes();
- MyThes(const MyThes &);
- MyThes & operator = (const MyThes &);
-
-public:
- MyThes(const char* idxpath, const char* datpath);
- ~MyThes();
-
- // lookup text in index and return number of meanings
- // each meaning entry has a defintion, synonym count and pointer
- // when complete return the *original* meaning entry and count via
- // CleanUpAfterLookup to properly handle memory deallocation
-
- int Lookup(const char * pText, int len, mentry** pme);
-
- void CleanUpAfterLookup(mentry** pme, int nmean);
-
- char* get_th_encoding();
-
-private:
- // Open index and dat files and load list array
- int thInitialize (const char* indxpath, const char* datpath);
-
- // internal close and cleanup dat and idx files
- int thCleanup ();
-
- // read a text line (\n terminated) stripping off line terminator
- int readLine(FILE * pf, char * buf, int nc);
-
- // binary search on null terminated character strings
- int binsearch(char * wrd, char* list[], int nlst);
-
-};
-
-#endif
-
-
-
-
-