From d04a8cf58fee7162bfab3d57191f6059c46b8e42 Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Sun, 20 Dec 2020 19:39:35 +0100 Subject: [PATCH] Remove parsers in included hunspell --- .../hunspell/1.7.0/src/parsers/Makefile.am | 18 -- .../1.7.0/src/parsers/firstparser.cxx | 65 ---- .../1.7.0/src/parsers/firstparser.hxx | 56 ---- .../hunspell/1.7.0/src/parsers/htmlparser.cxx | 88 ----- .../hunspell/1.7.0/src/parsers/htmlparser.hxx | 56 ---- .../1.7.0/src/parsers/latexparser.cxx | 277 ---------------- .../1.7.0/src/parsers/latexparser.hxx | 65 ---- .../hunspell/1.7.0/src/parsers/manparser.cxx | 98 ------ .../hunspell/1.7.0/src/parsers/manparser.hxx | 58 ---- .../hunspell/1.7.0/src/parsers/odfparser.cxx | 86 ----- .../hunspell/1.7.0/src/parsers/odfparser.hxx | 57 ---- .../hunspell/1.7.0/src/parsers/testparser.cxx | 86 ----- .../hunspell/1.7.0/src/parsers/textparser.cxx | 302 ------------------ .../hunspell/1.7.0/src/parsers/textparser.hxx | 99 ------ .../hunspell/1.7.0/src/parsers/xmlparser.cxx | 253 --------------- .../hunspell/1.7.0/src/parsers/xmlparser.hxx | 76 ----- 3rdparty/hunspell/Makefile.am | 15 - 17 files changed, 1755 deletions(-) delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/Makefile.am delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/htmlparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/latexparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx delete mode 100644 3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx diff --git a/3rdparty/hunspell/1.7.0/src/parsers/Makefile.am b/3rdparty/hunspell/1.7.0/src/parsers/Makefile.am deleted file mode 100644 index b27551498b..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ - -AM_CPPFLAGS=-I${top_builddir}/src/hunspell - -noinst_LIBRARIES=libparsers.a -libparsers_a_SOURCES=firstparser.cxx xmlparser.cxx \ - latexparser.cxx manparser.cxx \ - textparser.cxx htmlparser.cxx \ - odfparser.cxx - -noinst_PROGRAMS=testparser -testparser_SOURCES=firstparser.cxx firstparser.hxx xmlparser.cxx \ - xmlparser.hxx latexparser.cxx latexparser.hxx \ - manparser.cxx manparser.hxx testparser.cxx \ - textparser.cxx textparser.hxx htmlparser.cxx \ - htmlparser.hxx odfparser.hxx odfparser.cxx - -# need mystrdup() -LDADD = ../hunspell/libhunspell-1.7.la diff --git a/3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx deleted file mode 100644 index 965037b6a4..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/firstparser.cxx +++ /dev/null @@ -1,65 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "firstparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -FirstParser::FirstParser(const char* wordchars) - : TextParser(wordchars) { -} - -FirstParser::~FirstParser() {} - -bool FirstParser::next_token(std::string& t) { - t.clear(); - const size_t tabpos = line[actual].find('\t'); - if (tabpos != std::string::npos && tabpos > token) { - token = tabpos; - t = line[actual].substr(0, tabpos); - return true; - } - return false; -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx deleted file mode 100644 index 07f77455ad..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/firstparser.hxx +++ /dev/null @@ -1,56 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef FIRSTPARSER_HXX_ -#define FIRSTPARSER_HXX_ - -#include "textparser.hxx" - -/* - * Check first word of the input line - * - */ - -class FirstParser : public TextParser { - public: - explicit FirstParser(const char* wc); - virtual ~FirstParser(); - - virtual bool next_token(std::string&); -}; - -#endif diff --git a/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx deleted file mode 100644 index 1b6573368f..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/htmlparser.cxx +++ /dev/null @@ -1,88 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "htmlparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -static const char* PATTERN[][2] = {{""}, - {""}, - {""}, - {""}, - {""}, - {""}, - {""}, - {""}, - {""}, - {""}, - {"<[cdata[", "]]>"}, // XML comment - {"<", ">"}}; - -#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2)) - -static const char* PATTERN2[][2] = { - {" -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "latexparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -#define UTF8_APOS "\xe2\x80\x99" -#define APOSTROPHE "'" - -static struct { - const char* pat[2]; - int arg; -} PATTERN[] = {{{"\\(", "\\)"}, 0}, - {{"$$", "$$"}, 0}, - {{"$", "$"}, 0}, - {{"\\begin{math}", "\\end{math}"}, 0}, - {{"\\[", "\\]"}, 0}, - {{"\\begin{displaymath}", "\\end{displaymath}"}, 0}, - {{"\\begin{equation}", "\\end{equation}"}, 0}, - {{"\\begin{equation*}", "\\end{equation*}"}, 0}, - {{"\\cite", NULL}, 1}, - {{"\\nocite", NULL}, 1}, - {{"\\index", NULL}, 1}, - {{"\\label", NULL}, 1}, - {{"\\ref", NULL}, 1}, - {{"\\pageref", NULL}, 1}, - {{"\\autoref", NULL}, 1}, - {{"\\parbox", NULL}, 1}, - {{"\\begin{verbatim}", "\\end{verbatim}"}, 0}, - {{"\\verb+", "+"}, 0}, - {{"\\verb|", "|"}, 0}, - {{"\\verb#", "#"}, 0}, - {{"\\verb*", "*"}, 0}, - {{"\\documentstyle", "\\begin{document}"}, 0}, - {{"\\documentclass", "\\begin{document}"}, 0}, - // { { "\\documentclass", NULL } , 1 }, - {{"\\usepackage", NULL}, 1}, - {{"\\includeonly", NULL}, 1}, - {{"\\include", NULL}, 1}, - {{"\\input", NULL}, 1}, - {{"\\vspace", NULL}, 1}, - {{"\\setlength", NULL}, 2}, - {{"\\addtolength", NULL}, 2}, - {{"\\settowidth", NULL}, 2}, - {{"\\rule", NULL}, 2}, - {{"\\hspace", NULL}, 1}, - {{"\\vspace", NULL}, 1}, - {{"\\\\[", "]"}, 0}, - {{"\\pagebreak[", "]"}, 0}, - {{"\\nopagebreak[", "]"}, 0}, - {{"\\enlargethispage", NULL}, 1}, - {{"\\begin{tabular}", NULL}, 1}, - {{"\\addcontentsline", NULL}, 2}, - {{"\\begin{thebibliography}", NULL}, 1}, - {{"\\bibliography", NULL}, 1}, - {{"\\bibliographystyle", NULL}, 1}, - {{"\\bibitem", NULL}, 1}, - {{"\\begin", NULL}, 1}, - {{"\\end", NULL}, 1}, - {{"\\pagestyle", NULL}, 1}, - {{"\\pagenumbering", NULL}, 1}, - {{"\\thispagestyle", NULL}, 1}, - {{"\\newtheorem", NULL}, 2}, - {{"\\newcommand", NULL}, 2}, - {{"\\renewcommand", NULL}, 2}, - {{"\\setcounter", NULL}, 2}, - {{"\\addtocounter", NULL}, 1}, - {{"\\stepcounter", NULL}, 1}, - {{"\\selectlanguage", NULL}, 1}, - {{"\\inputencoding", NULL}, 1}, - {{"\\hyphenation", NULL}, 1}, - {{"\\definecolor", NULL}, 3}, - {{"\\color", NULL}, 1}, - {{"\\textcolor", NULL}, 1}, - {{"\\pagecolor", NULL}, 1}, - {{"\\colorbox", NULL}, 2}, - {{"\\fcolorbox", NULL}, 2}, - {{"\\declaregraphicsextensions", NULL}, 1}, - {{"\\psfig", NULL}, 1}, - {{"\\url", NULL}, 1}, - {{"\\eqref", NULL}, 1}, - {{"\\vskip", NULL}, 1}, - {{"\\vglue", NULL}, 1}, - {{"\'\'", NULL}, 1}}; - -#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) - -LaTeXParser::LaTeXParser(const char* wordchars) - : TextParser(wordchars) - , pattern_num(0), depth(0), arg(0), opt(0) { -} - -LaTeXParser::LaTeXParser(const w_char* wordchars, int len) - : TextParser(wordchars, len) - , pattern_num(0), depth(0), arg(0), opt(0) { -} - -LaTeXParser::~LaTeXParser() {} - -int LaTeXParser::look_pattern(int col) { - for (unsigned int i = 0; i < PATTERN_LEN; i++) { - const char* j = line[actual].c_str() + head; - const char* k = PATTERN[i].pat[col]; - if (!k) - continue; - while ((*k != '\0') && (tolower(*j) == *k)) { - j++; - k++; - } - if (*k == '\0') - return i; - } - return -1; -} - -/* - * LaTeXParser - * - * state 0: not wordchar - * state 1: wordchar - * state 2: comments - * state 3: commands - * state 4: commands with arguments - * state 5: % comment - * - */ - -bool LaTeXParser::next_token(std::string& t) { - t.clear(); - int i; - int slash = 0; - int apostrophe; - for (;;) { - // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: - // %s\n",depth,state,arg,line[actual]+head); - - switch (state) { - case 0: // non word chars - if ((pattern_num = look_pattern(0)) != -1) { - if (PATTERN[pattern_num].pat[1]) { - state = 2; - } else { - state = 4; - depth = 0; - arg = 0; - opt = 1; - } - head += strlen(PATTERN[pattern_num].pat[0]) - 1; - } else if (line[actual][head] == '%') { - state = 5; - } else if (is_wordchar(line[actual].c_str() + head)) { - state = 1; - token = head; - } else if (line[actual][head] == '\\') { - if (line[actual][head + 1] == '\\' || // \\ (linebreak) - (line[actual][head + 1] == '$') || // \$ (dollar sign) - (line[actual][head + 1] == '%')) { // \% (percent) - head++; - break; - } - state = 3; - } - break; - case 1: // wordchar - apostrophe = 0; - if ((is_wordchar((char*)APOSTROPHE) || - (is_utf8() && is_wordchar((char*)UTF8_APOS))) && - !line[actual].empty() && line[actual][head] == '\'' && - is_wordchar(line[actual].c_str() + head + 1)) { - head++; - } else if (is_utf8() && - is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe - // to the WORDCHARS, if - // needed - strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) == - 0 && - is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) { - head += strlen(UTF8_APOS) - 1; - } else if (!is_wordchar(line[actual].c_str() + head) || - (line[actual][head] == '\'' && line[actual][head + 1] == '\'' && - ++apostrophe)) { - state = 0; - bool ok = alloc_token(token, &head, t); - if (apostrophe) - head += 2; - if (ok) - return true; - } - break; - case 2: // comment, labels, etc - if (((i = look_pattern(1)) != -1) && - (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) { - state = 0; - head += strlen(PATTERN[pattern_num].pat[1]) - 1; - } - break; - case 3: // command - if ((tolower(line[actual][head]) < 'a') || - (tolower(line[actual][head]) > 'z')) { - state = 0; - head--; - } - break; - case 4: // command with arguments - if (slash && (line[actual][head] != '\0')) { - slash = 0; - head++; - break; - } else if (line[actual][head] == '\\') { - slash = 1; - } else if ((line[actual][head] == '{') || - ((opt) && (line[actual][head] == '['))) { - depth++; - opt = 0; - } else if (line[actual][head] == '}') { - depth--; - if (depth == 0) { - opt = 1; - arg++; - } - if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) || - (depth < 0)) { - state = 0; // XXX not handles the last optional arg. - } - } else if (line[actual][head] == ']') - depth--; - } // case - if (next_char(line[actual].c_str(), &head)) { - if (state == 5) - state = 0; - return false; - } - } -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx deleted file mode 100644 index 534a37bd3c..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/latexparser.hxx +++ /dev/null @@ -1,65 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef LATEXPARSER_HXX_ -#define LATEXPARSER_HXX_ - -#include "textparser.hxx" - -/* - * HTML Parser - * - */ - -class LaTeXParser : public TextParser { - int pattern_num; // number of comment - int depth; // depth of blocks - int arg; // arguments's number - int opt; // optional argument attrib. - - public: - explicit LaTeXParser(const char* wc); - LaTeXParser(const w_char* wordchars, int len); - virtual ~LaTeXParser(); - - virtual bool next_token(std::string&); - - private: - int look_pattern(int col); -}; - -#endif diff --git a/3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx deleted file mode 100644 index bf3bdac4c3..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/manparser.cxx +++ /dev/null @@ -1,98 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "manparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -ManParser::ManParser(const char* wordchars) - : TextParser(wordchars) { -} - -ManParser::ManParser(const w_char* wordchars, int len) - : TextParser(wordchars, len) { -} - -ManParser::~ManParser() {} - -bool ManParser::next_token(std::string& t) { - for (;;) { - switch (state) { - case 1: // command arguments - if (line[actual][head] == ' ') - state = 2; - break; - case 0: // dot in begin of line - if (line[actual][0] == '.') { - state = 1; - break; - } else { - state = 2; - } - /* FALLTHROUGH */ - case 2: // non word chars - if (is_wordchar(line[actual].c_str() + head)) { - state = 3; - token = head; - } else if ((line[actual][head] == '\\') && - (line[actual][head + 1] == 'f') && - (line[actual][head + 2] != '\0')) { - head += 2; - } - break; - case 3: // wordchar - if (!is_wordchar(line[actual].c_str() + head)) { - state = 2; - if (alloc_token(token, &head, t)) - return true; - } - break; - } - if (next_char(line[actual].c_str(), &head)) { - state = 0; - return false; - } - } -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx deleted file mode 100644 index 59279279d4..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/manparser.hxx +++ /dev/null @@ -1,58 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef MANPARSER_HXX_ -#define MANPARSER_HXX_ - -#include "textparser.hxx" - -/* - * Manparse Parser - * - */ - -class ManParser : public TextParser { - protected: - public: - explicit ManParser(const char* wc); - ManParser(const w_char* wordchars, int len); - virtual ~ManParser(); - - virtual bool next_token(std::string&); -}; - -#endif diff --git a/3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx deleted file mode 100644 index caa10e3f3a..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/odfparser.cxx +++ /dev/null @@ -1,86 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "odfparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -static const char* PATTERN[][2] = { - {"", ""}, - {"", ""}, - {"", ""}, - {""}, - {"<[cdata[", "]]>"}, // XML comment - {"<", ">"}}; - -#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2)) - -static const char* (*PATTERN2)[2] = NULL; - -#define PATTERN_LEN2 0 - -static const char* PATTERN3[][2] = { - {""}, // part of the reedited words - {""}}; // for example, an inserted letter - -#define PATTERN_LEN3 (sizeof(PATTERN3) / (sizeof(char*) * 2)) - -ODFParser::ODFParser(const char* wordchars) - : XMLParser(wordchars) { -} - -ODFParser::ODFParser(const w_char* wordchars, int len) - : XMLParser(wordchars, len) { -} - -bool ODFParser::next_token(std::string& t) { - return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t); -} - -std::string ODFParser::get_word(const std::string &tok) { - return XMLParser::get_word2(PATTERN3, PATTERN_LEN3, tok); -} - -ODFParser::~ODFParser() {} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx deleted file mode 100644 index e96956a0b7..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/odfparser.hxx +++ /dev/null @@ -1,57 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef ODFPARSER_HXX_ -#define ODFPARSER_HXX_ - -#include "xmlparser.hxx" - -/* - * HTML Parser - * - */ - -class ODFParser : public XMLParser { - public: - explicit ODFParser(const char* wc); - ODFParser(const w_char* wordchars, int len); - virtual bool next_token(std::string&); - virtual std::string get_word(const std::string &tok); - virtual ~ODFParser(); -}; - -#endif diff --git a/3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx deleted file mode 100644 index b9be460338..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/testparser.cxx +++ /dev/null @@ -1,86 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include - -#include "textparser.hxx" -#include "htmlparser.hxx" -#include "latexparser.hxx" -#include "xmlparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -int main(int argc, char** argv) { - FILE* f; - /* first parse the command line options */ - - if (argc < 2) { - fprintf(stderr, "correct syntax is:\n"); - fprintf(stderr, "testparser file\n"); - fprintf(stderr, "example: testparser /dev/stdin\n"); - exit(1); - } - - /* open the words to check list */ - f = fopen(argv[1], "r"); - if (!f) { - fprintf(stderr, "Error - could not open file of words to check\n"); - exit(1); - } - - TextParser* p = new TextParser( - "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"); - - char buf[MAXLNLEN]; - - while (fgets(buf, MAXLNLEN, f)) { - p->put_line(buf); - p->set_url_checking(1); - std::string next; - while (p->next_token(next)) { - fprintf(stdout, "token: %s\n", next.c_str()); - } - } - - delete p; - fclose(f); - return 0; -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx deleted file mode 100644 index d7ac599bb1..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/textparser.cxx +++ /dev/null @@ -1,302 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "textparser.hxx" - -#include - -#ifndef W32 -using namespace std; -#endif - -// ISO-8859-1 HTML character entities - -static const char* LATIN1[] = { - "À", "Ã", "Å", "Æ", "È", "Ê", - "Ì", "Ï", "Ð", "Ñ", "Ò", "Ø", - "Ù", "Þ", "à", "ã", "å", "æ", - "è", "ê", "ì", "ï", "ð", "ñ", - "ò", "ø", "ù", "þ", "ÿ"}; - -#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char*)) - -#define ENTITY_APOS "'" -#define UTF8_APOS "\xe2\x80\x99" -#define APOSTROPHE "'" - -TextParser::TextParser(const char* wordchars) { - init(wordchars); -} - -TextParser::TextParser(const w_char* wordchars, int len) { - init(wordchars, len); -} - -TextParser::~TextParser() {} - -int TextParser::is_wordchar(const char* w) { - if (*w == '\0') - return 0; - if (utf8) { - std::vector wc; - unsigned short idx; - u8_u16(wc, w); - if (wc.empty()) - return 0; - idx = (wc[0].h << 8) + wc[0].l; - return (unicodeisalpha(idx) || - (wordchars_utf16 && - std::binary_search(wordchars_utf16, wordchars_utf16 + wclen, wc[0]))); - } else { - return wordcharacters[(*w + 256) % 256]; - } -} - -const char* TextParser::get_latin1(const char* s) { - if (s[0] == '&') { - unsigned int i = 0; - while ((i < LATIN1_LEN) && strncmp(LATIN1[i], s, strlen(LATIN1[i]))) - i++; - if (i != LATIN1_LEN) - return LATIN1[i]; - } - return NULL; -} - -void TextParser::init(const char* wordchars) { - actual = 0; - head = 0; - token = 0; - state = 0; - utf8 = 0; - checkurl = 0; - wordchars_utf16 = NULL; - wclen = 0; - wordcharacters.resize(256, 0); - if (!wordchars) - wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"; - for (unsigned int j = 0; j < strlen(wordchars); ++j) { - wordcharacters[(wordchars[j] + 256) % 256] = 1; - } -} - -void TextParser::init(const w_char* wc, int len) { - actual = 0; - head = 0; - token = 0; - state = 0; - utf8 = 1; - checkurl = 0; - wordchars_utf16 = wc; - wclen = len; -} - -int TextParser::next_char(const char* ln, size_t* pos) { - if (*(ln + *pos) == '\0') - return 1; - if (utf8) { - if (*(ln + *pos) >> 7) { - // jump to next UTF-8 character - for ((*pos)++; (*(ln + *pos) & 0xc0) == 0x80; (*pos)++) - ; - } else { - (*pos)++; - } - } else - (*pos)++; - return 0; -} - -void TextParser::put_line(const char* word) { - actual = (actual + 1) % MAXPREVLINE; - line[actual].assign(word); - token = 0; - head = 0; - check_urls(); -} - -std::string TextParser::get_prevline(int n) const { - return line[(actual + MAXPREVLINE - n) % MAXPREVLINE]; -} - -std::string TextParser::get_line() const { - return get_prevline(0); -} - -bool TextParser::next_token(std::string &t) { - const char* latin1; - - for (;;) { - switch (state) { - case 0: // non word chars - if (is_wordchar(line[actual].c_str() + head)) { - state = 1; - token = head; - } else if ((latin1 = get_latin1(line[actual].c_str() + head))) { - state = 1; - token = head; - head += strlen(latin1); - } - break; - case 1: // wordchar - if ((latin1 = get_latin1(line[actual].c_str() + head))) { - head += strlen(latin1); - } else if ((is_wordchar((char*)APOSTROPHE) || - (is_utf8() && is_wordchar((char*)UTF8_APOS))) && - !line[actual].empty() && line[actual][head] == '\'' && - is_wordchar(line[actual].c_str() + head + 1)) { - head++; - } else if (is_utf8() && - is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe - // to the WORDCHARS, if - // needed - strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) == - 0 && - is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) { - head += strlen(UTF8_APOS) - 1; - } else if (!is_wordchar(line[actual].c_str() + head)) { - state = 0; - if (alloc_token(token, &head, t)) - return true; - } - break; - } - if (next_char(line[actual].c_str(), &head)) - return false; - } -} - -size_t TextParser::get_tokenpos() { - return token; -} - -int TextParser::change_token(const char* word) { - if (word) { - std::string remainder(line[actual].substr(head)); - line[actual].resize(token); - line[actual].append(word); - line[actual].append(remainder); - head = token; - return 1; - } - return 0; -} - -std::string TextParser::get_word(const std::string &tok) { - return tok; -} - -void TextParser::check_urls() { - urlline.resize(line[actual].size() + 1); - int url_state = 0; - size_t url_head = 0; - size_t url_token = 0; - int url = 0; - for (;;) { - switch (url_state) { - case 0: // non word chars - if (is_wordchar(line[actual].c_str() + url_head)) { - url_state = 1; - url_token = url_head; - // Unix path - } else if (line[actual][url_head] == '/') { - url_state = 1; - url_token = url_head; - url = 1; - } - break; - case 1: // wordchar - char ch = line[actual][url_head]; - // e-mail address - if ((ch == '@') || - // MS-DOS, Windows path - (strncmp(line[actual].c_str() + url_head, ":\\", 2) == 0) || - // URL - (strncmp(line[actual].c_str() + url_head, "://", 3) == 0)) { - url = 1; - } else if (!(is_wordchar(line[actual].c_str() + url_head) || (ch == '-') || - (ch == '_') || (ch == '\\') || (ch == '.') || - (ch == ':') || (ch == '/') || (ch == '~') || (ch == '%') || - (ch == '*') || (ch == '$') || (ch == '[') || (ch == ']') || - (ch == '?') || (ch == '!') || - ((ch >= '0') && (ch <= '9')))) { - url_state = 0; - if (url == 1) { - for (size_t i = url_token; i < url_head; ++i) { - urlline[i] = true; - } - } - url = 0; - } - break; - } - urlline[url_head] = false; - if (next_char(line[actual].c_str(), &url_head)) - return; - } -} - -int TextParser::get_url(size_t token_pos, size_t* hd) { - for (size_t i = *hd; i < line[actual].size() && urlline[i]; i++, (*hd)++) - ; - return checkurl ? 0 : urlline[token_pos]; -} - -void TextParser::set_url_checking(int check) { - checkurl = check; -} - -bool TextParser::alloc_token(size_t tokn, size_t* hd, std::string& t) { - size_t url_head = *hd; - if (get_url(tokn, &url_head)) - return false; - t = line[actual].substr(tokn, *hd - tokn); - // remove colon for Finnish and Swedish language - if (!t.empty() && t[t.size() - 1] == ':') { - t.resize(t.size() - 1); - if (t.empty()) { - return false; - } - } - return true; -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx deleted file mode 100644 index c063723318..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/textparser.hxx +++ /dev/null @@ -1,99 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef TEXTPARSER_HXX_ -#define TEXTPARSER_HXX_ - -// set sum of actual and previous lines -#define MAXPREVLINE 4 - -#ifndef MAXLNLEN -#define MAXLNLEN 8192 -#endif - -#include "../hunspell/w_char.hxx" - -#include - -/* - * Base Text Parser - * - */ - -class TextParser { - protected: - std::vector wordcharacters;// for detection of the word boundaries - std::string line[MAXPREVLINE]; // parsed and previous lines - std::vector urlline; // mask for url detection - int checkurl; - int actual; // actual line - size_t head; // head position - size_t token;// begin of token - int state; // state of automata - int utf8; // UTF-8 character encoding - int next_char(const char* line, size_t* pos); - const w_char* wordchars_utf16; - int wclen; - - public: - TextParser(const w_char* wordchars, int len); - explicit TextParser(const char* wc); - virtual ~TextParser(); - - void put_line(const char* line); - std::string get_line() const; - std::string get_prevline(int n) const; - virtual bool next_token(std::string&); - virtual std::string get_word(const std::string &tok); - virtual int change_token(const char* word); - void set_url_checking(int check); - - size_t get_tokenpos(); - int is_wordchar(const char* w); - inline int is_utf8() { return utf8; } - const char* get_latin1(const char* s); - char* next_char(); - int tokenize_urls(); - void check_urls(); - int get_url(size_t token_pos, size_t* head); - bool alloc_token(size_t token, size_t* head, std::string& out); -private: - void init(const char*); - void init(const w_char* wordchars, int len); -}; - -#endif diff --git a/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx deleted file mode 100644 index abd3a7578d..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.cxx +++ /dev/null @@ -1,253 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#include -#include -#include -#include - -#include "../hunspell/csutil.hxx" -#include "xmlparser.hxx" - -#ifndef W32 -using namespace std; -#endif - -enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB }; - -static const char* __PATTERN__[][2] = {{""}, - {"<[cdata[", "]]>"}, // XML comment - {"<", ">"}}; - -#define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2)) - -// for checking attributes, eg. text in HTML -static const char* (*__PATTERN2__)[2] = NULL; - -#define __PATTERN_LEN2__ 0 - -// for checking words with in-word patterns -// for example, "example" in ODT -static const char* (*__PATTERN3__)[2] = NULL; - -#define __PATTERN_LEN3__ 0 - -#define ENTITY_APOS "'" -#define UTF8_APOS "\xe2\x80\x99" -#define APOSTROPHE "'" - -XMLParser::XMLParser(const char* wordchars) - : TextParser(wordchars) - , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) { -} - -XMLParser::XMLParser(const w_char* wordchars, int len) - : TextParser(wordchars, len) - , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) { -} - -XMLParser::~XMLParser() {} - -int XMLParser::look_pattern(const char* p[][2], unsigned int len, int column) { - for (unsigned int i = 0; i < len; i++) { - const char* j = line[actual].c_str() + head; - const char* k = p[i][column]; - while ((*k != '\0') && (tolower(*j) == *k)) { - j++; - k++; - } - if (*k == '\0') - return i; - } - return -1; -} - -/* - * XML parser - * - */ - -bool XMLParser::next_token(const char* PATTERN[][2], - unsigned int PATTERN_LEN, - const char* PATTERN2[][2], - unsigned int PATTERN_LEN2, - const char* PATTERN3[][2], - unsigned int PATTERN_LEN3, - std::string& t) { - t.clear(); - const char* latin1; - - for (;;) { - switch (state) { - case ST_NON_WORD: // non word chars - prevstate = ST_NON_WORD; - if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) { - checkattr = 0; - if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) { - checkattr = 1; - } - state = ST_TAG; - } else if (is_wordchar(line[actual].c_str() + head)) { - state = ST_WORD; - token = head; - } else if ((latin1 = get_latin1(line[actual].c_str() + head))) { - state = ST_WORD; - token = head; - head += strlen(latin1); - } else if (line[actual][head] == '&') { - state = ST_CHAR_ENTITY; - } - break; - case ST_WORD: // wordchar - if ((latin1 = get_latin1(line[actual].c_str() + head))) { - head += strlen(latin1); - } else if ((is_wordchar((char*)APOSTROPHE) || - (is_utf8() && is_wordchar((char*)UTF8_APOS))) && - strncmp(line[actual].c_str() + head, ENTITY_APOS, - strlen(ENTITY_APOS)) == 0 && - is_wordchar(line[actual].c_str() + head + strlen(ENTITY_APOS))) { - head += strlen(ENTITY_APOS) - 1; - } else if (is_utf8() && - is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe - // to the WORDCHARS, if - // needed - strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) == - 0 && - is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) { - head += strlen(UTF8_APOS) - 1; - } else if (!is_wordchar(line[actual].c_str() + head)) { - // in-word patterns - if ((pattern3_num = look_pattern(PATTERN3, PATTERN_LEN3, 0)) != -1) { - size_t pos = line[actual].find(PATTERN3[pattern3_num][1], head); - if (pos != std::string::npos) { - size_t endpos = pos + strlen(PATTERN3[pattern3_num][1]) - 1; - if (is_wordchar(line[actual].c_str() + endpos + 1)) { - head = endpos; - break; - } - } - } - state = prevstate; - // return with the token, except in the case of in-word patterns - if (alloc_token(token, &head, t)) - return true; - } - break; - case ST_TAG: // comment, labels, etc - int i; - if ((checkattr == 1) && - ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) && - (strcmp(PATTERN2[i][0], PATTERN2[pattern2_num][0]) == 0)) { - checkattr = 2; - } else if ((checkattr > 0) && (line[actual][head] == '>')) { - state = ST_NON_WORD; - } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) && - (strcmp(PATTERN[i][1], PATTERN[pattern_num][1]) == 0)) { - state = ST_NON_WORD; - head += strlen(PATTERN[pattern_num][1]) - 1; - } else if ((strcmp(PATTERN[pattern_num][0], "<") == 0) && - ((line[actual][head] == '"') || - (line[actual][head] == '\''))) { - quotmark = line[actual][head]; - state = ST_ATTRIB; - } - break; - case ST_ATTRIB: // non word chars - prevstate = ST_ATTRIB; - if (line[actual][head] == quotmark) { - state = ST_TAG; - if (checkattr == 2) - checkattr = 1; - // for IMG ALT - } else if (is_wordchar(line[actual].c_str() + head) && (checkattr == 2)) { - state = ST_WORD; - token = head; - } else if (line[actual][head] == '&') { - state = ST_CHAR_ENTITY; - } - break; - case ST_CHAR_ENTITY: // SGML element - if ((tolower(line[actual][head]) == ';')) { - state = prevstate; - head--; - } - } - if (next_char(line[actual].c_str(), &head)) - return false; - } - //FIXME No return, in function returning non-void -} - -bool XMLParser::next_token(std::string& t) { - return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__, - __PATTERN_LEN2__, __PATTERN3__, __PATTERN_LEN3__, t); -} - -// remove in-word patterns -std::string XMLParser::get_word2( - const char* PATTERN3[][2], - unsigned int PATTERN_LEN3, - const std::string &tok) { - std::string word = tok; - for (unsigned int i = 0; i < PATTERN_LEN3; i++) { - size_t pos; - while ((pos = word.find(PATTERN3[i][0])) != word.npos) { - size_t endpos = word.find(PATTERN3[i][1], pos); - if (endpos != word.npos) { - word.erase(pos, endpos + strlen(PATTERN3[i][1]) - pos); - } else - return word; - } - } - return word; -} - -int XMLParser::change_token(const char* word) { - if (strstr(word, APOSTROPHE) != NULL || strchr(word, '"') != NULL || - strchr(word, '&') != NULL || strchr(word, '<') != NULL || - strchr(word, '>') != NULL) { - std::string r(word); - mystrrep(r, "&", "__namp;__"); - mystrrep(r, "__namp;__", "&"); - mystrrep(r, APOSTROPHE, ENTITY_APOS); - mystrrep(r, "\"", """); - mystrrep(r, ">", ">"); - mystrrep(r, "<", "<"); - return TextParser::change_token(r.c_str()); - } - return TextParser::change_token(word); -} diff --git a/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx b/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx deleted file mode 100644 index 03f024574d..0000000000 --- a/3rdparty/hunspell/1.7.0/src/parsers/xmlparser.hxx +++ /dev/null @@ -1,76 +0,0 @@ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * Copyright (C) 2002-2017 Németh László - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. - * - * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, - * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, - * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, - * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, - * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -#ifndef XMLPARSER_HXX_ -#define XMLPARSER_HXX_ - -#include "textparser.hxx" - -/* - * XML Parser - * - */ - -class XMLParser : public TextParser { - public: - explicit XMLParser(const char* wc); - XMLParser(const w_char* wordchars, int len); - bool next_token(const char* p[][2], - unsigned int len, - const char* p2[][2], - unsigned int len2, - const char* p3[][2], - unsigned int len3, - std::string&); - virtual bool next_token(std::string&); - std::string get_word2(const char* p2[][2], - unsigned int len2, - const std::string &tok); - int change_token(const char* word); - virtual ~XMLParser(); - - private: - int look_pattern(const char* p[][2], unsigned int len, int column); - int pattern_num; - int pattern2_num; - int pattern3_num; - int prevstate; - int checkattr; - char quotmark; -}; - -#endif diff --git a/3rdparty/hunspell/Makefile.am b/3rdparty/hunspell/Makefile.am index 2644988eea..83da1b3ae1 100644 --- a/3rdparty/hunspell/Makefile.am +++ b/3rdparty/hunspell/Makefile.am @@ -45,19 +45,4 @@ liblyxhunspell_a_SOURCES = \ 1.7.0/src/hunspell/suggestmgr.hxx \ 1.7.0/src/hunspell/utf_info.hxx \ 1.7.0/src/hunspell/w_char.hxx \ - 1.7.0/src/parsers/firstparser.cxx \ - 1.7.0/src/parsers/firstparser.hxx \ - 1.7.0/src/parsers/htmlparser.cxx \ - 1.7.0/src/parsers/htmlparser.hxx \ - 1.7.0/src/parsers/latexparser.cxx \ - 1.7.0/src/parsers/latexparser.hxx \ - 1.7.0/src/parsers/manparser.cxx \ - 1.7.0/src/parsers/manparser.hxx \ - 1.7.0/src/parsers/odfparser.cxx \ - 1.7.0/src/parsers/odfparser.hxx \ - 1.7.0/src/parsers/testparser.cxx \ - 1.7.0/src/parsers/textparser.cxx \ - 1.7.0/src/parsers/textparser.hxx \ - 1.7.0/src/parsers/xmlparser.cxx \ - 1.7.0/src/parsers/xmlparser.hxx \ 1.7.0/src/win_api/config.h -- 2.39.5