+++ /dev/null
-
-AM_CPPFLAGS=-I${top_builddir}/src/hunspell
-
-noinst_LIBRARIES=libparsers.a
-libparsers_a_SOURCES=firstparser.cxx xmlparser.cxx \
- latexparser.cxx manparser.cxx \
- textparser.cxx htmlparser.cxx \
- odfparser.cxx
-
-noinst_PROGRAMS=testparser
-testparser_SOURCES=firstparser.cxx firstparser.hxx xmlparser.cxx \
- xmlparser.hxx latexparser.cxx latexparser.hxx \
- manparser.cxx manparser.hxx testparser.cxx \
- textparser.cxx textparser.hxx htmlparser.cxx \
- htmlparser.hxx odfparser.hxx odfparser.cxx
-
-# need mystrdup()
-LDADD = ../hunspell/libhunspell-1.7.la
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "firstparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-FirstParser::FirstParser(const char* wordchars)
- : TextParser(wordchars) {
-}
-
-FirstParser::~FirstParser() {}
-
-bool FirstParser::next_token(std::string& t) {
- t.clear();
- const size_t tabpos = line[actual].find('\t');
- if (tabpos != std::string::npos && tabpos > token) {
- token = tabpos;
- t = line[actual].substr(0, tabpos);
- return true;
- }
- return false;
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef FIRSTPARSER_HXX_
-#define FIRSTPARSER_HXX_
-
-#include "textparser.hxx"
-
-/*
- * Check first word of the input line
- *
- */
-
-class FirstParser : public TextParser {
- public:
- explicit FirstParser(const char* wc);
- virtual ~FirstParser();
-
- virtual bool next_token(std::string&);
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "htmlparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-static const char* PATTERN[][2] = {{"<script", "</script>"},
- {"<style", "</style>"},
- {"<code", "</code>"},
- {"<samp", "</samp>"},
- {"<kbd", "</kbd>"},
- {"<var", "</var>"},
- {"<listing", "</listing>"},
- {"<address", "</address>"},
- {"<pre", "</pre>"},
- {"<!--", "-->"},
- {"<[cdata[", "]]>"}, // XML comment
- {"<", ">"}};
-
-#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
-
-static const char* PATTERN2[][2] = {
- {"<img", "alt="}, // ALT and TITLE attrib handled spec.
- {"<img", "title="},
- {"<a ", "title="}};
-
-#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char*) * 2))
-
-static const char* (*PATTERN3)[2] = NULL;
-
-#define PATTERN_LEN3 0
-
-HTMLParser::HTMLParser(const char* wordchars)
- : XMLParser(wordchars) {
-}
-
-HTMLParser::HTMLParser(const w_char* wordchars, int len)
- : XMLParser(wordchars, len) {
-}
-
-bool HTMLParser::next_token(std::string& t) {
- return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
-}
-
-HTMLParser::~HTMLParser() {}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef HTMLPARSER_HXX_
-#define HTMLPARSER_HXX_
-
-#include "xmlparser.hxx"
-
-/*
- * HTML Parser
- *
- */
-
-class HTMLParser : public XMLParser {
- public:
- explicit HTMLParser(const char* wc);
- HTMLParser(const w_char* wordchars, int len);
- virtual bool next_token(std::string&);
- virtual ~HTMLParser();
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "latexparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-#define UTF8_APOS "\xe2\x80\x99"
-#define APOSTROPHE "'"
-
-static struct {
- const char* pat[2];
- int arg;
-} PATTERN[] = {{{"\\(", "\\)"}, 0},
- {{"$$", "$$"}, 0},
- {{"$", "$"}, 0},
- {{"\\begin{math}", "\\end{math}"}, 0},
- {{"\\[", "\\]"}, 0},
- {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
- {{"\\begin{equation}", "\\end{equation}"}, 0},
- {{"\\begin{equation*}", "\\end{equation*}"}, 0},
- {{"\\cite", NULL}, 1},
- {{"\\nocite", NULL}, 1},
- {{"\\index", NULL}, 1},
- {{"\\label", NULL}, 1},
- {{"\\ref", NULL}, 1},
- {{"\\pageref", NULL}, 1},
- {{"\\autoref", NULL}, 1},
- {{"\\parbox", NULL}, 1},
- {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
- {{"\\verb+", "+"}, 0},
- {{"\\verb|", "|"}, 0},
- {{"\\verb#", "#"}, 0},
- {{"\\verb*", "*"}, 0},
- {{"\\documentstyle", "\\begin{document}"}, 0},
- {{"\\documentclass", "\\begin{document}"}, 0},
- // { { "\\documentclass", NULL } , 1 },
- {{"\\usepackage", NULL}, 1},
- {{"\\includeonly", NULL}, 1},
- {{"\\include", NULL}, 1},
- {{"\\input", NULL}, 1},
- {{"\\vspace", NULL}, 1},
- {{"\\setlength", NULL}, 2},
- {{"\\addtolength", NULL}, 2},
- {{"\\settowidth", NULL}, 2},
- {{"\\rule", NULL}, 2},
- {{"\\hspace", NULL}, 1},
- {{"\\vspace", NULL}, 1},
- {{"\\\\[", "]"}, 0},
- {{"\\pagebreak[", "]"}, 0},
- {{"\\nopagebreak[", "]"}, 0},
- {{"\\enlargethispage", NULL}, 1},
- {{"\\begin{tabular}", NULL}, 1},
- {{"\\addcontentsline", NULL}, 2},
- {{"\\begin{thebibliography}", NULL}, 1},
- {{"\\bibliography", NULL}, 1},
- {{"\\bibliographystyle", NULL}, 1},
- {{"\\bibitem", NULL}, 1},
- {{"\\begin", NULL}, 1},
- {{"\\end", NULL}, 1},
- {{"\\pagestyle", NULL}, 1},
- {{"\\pagenumbering", NULL}, 1},
- {{"\\thispagestyle", NULL}, 1},
- {{"\\newtheorem", NULL}, 2},
- {{"\\newcommand", NULL}, 2},
- {{"\\renewcommand", NULL}, 2},
- {{"\\setcounter", NULL}, 2},
- {{"\\addtocounter", NULL}, 1},
- {{"\\stepcounter", NULL}, 1},
- {{"\\selectlanguage", NULL}, 1},
- {{"\\inputencoding", NULL}, 1},
- {{"\\hyphenation", NULL}, 1},
- {{"\\definecolor", NULL}, 3},
- {{"\\color", NULL}, 1},
- {{"\\textcolor", NULL}, 1},
- {{"\\pagecolor", NULL}, 1},
- {{"\\colorbox", NULL}, 2},
- {{"\\fcolorbox", NULL}, 2},
- {{"\\declaregraphicsextensions", NULL}, 1},
- {{"\\psfig", NULL}, 1},
- {{"\\url", NULL}, 1},
- {{"\\eqref", NULL}, 1},
- {{"\\vskip", NULL}, 1},
- {{"\\vglue", NULL}, 1},
- {{"\'\'", NULL}, 1}};
-
-#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
-
-LaTeXParser::LaTeXParser(const char* wordchars)
- : TextParser(wordchars)
- , pattern_num(0), depth(0), arg(0), opt(0) {
-}
-
-LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
- : TextParser(wordchars, len)
- , pattern_num(0), depth(0), arg(0), opt(0) {
-}
-
-LaTeXParser::~LaTeXParser() {}
-
-int LaTeXParser::look_pattern(int col) {
- for (unsigned int i = 0; i < PATTERN_LEN; i++) {
- const char* j = line[actual].c_str() + head;
- const char* k = PATTERN[i].pat[col];
- if (!k)
- continue;
- while ((*k != '\0') && (tolower(*j) == *k)) {
- j++;
- k++;
- }
- if (*k == '\0')
- return i;
- }
- return -1;
-}
-
-/*
- * LaTeXParser
- *
- * state 0: not wordchar
- * state 1: wordchar
- * state 2: comments
- * state 3: commands
- * state 4: commands with arguments
- * state 5: % comment
- *
- */
-
-bool LaTeXParser::next_token(std::string& t) {
- t.clear();
- int i;
- int slash = 0;
- int apostrophe;
- for (;;) {
- // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
- // %s\n",depth,state,arg,line[actual]+head);
-
- switch (state) {
- case 0: // non word chars
- if ((pattern_num = look_pattern(0)) != -1) {
- if (PATTERN[pattern_num].pat[1]) {
- state = 2;
- } else {
- state = 4;
- depth = 0;
- arg = 0;
- opt = 1;
- }
- head += strlen(PATTERN[pattern_num].pat[0]) - 1;
- } else if (line[actual][head] == '%') {
- state = 5;
- } else if (is_wordchar(line[actual].c_str() + head)) {
- state = 1;
- token = head;
- } else if (line[actual][head] == '\\') {
- if (line[actual][head + 1] == '\\' || // \\ (linebreak)
- (line[actual][head + 1] == '$') || // \$ (dollar sign)
- (line[actual][head + 1] == '%')) { // \% (percent)
- head++;
- break;
- }
- state = 3;
- }
- break;
- case 1: // wordchar
- apostrophe = 0;
- if ((is_wordchar((char*)APOSTROPHE) ||
- (is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
- !line[actual].empty() && line[actual][head] == '\'' &&
- is_wordchar(line[actual].c_str() + head + 1)) {
- head++;
- } else if (is_utf8() &&
- is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
- // to the WORDCHARS, if
- // needed
- strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
- 0 &&
- is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
- head += strlen(UTF8_APOS) - 1;
- } else if (!is_wordchar(line[actual].c_str() + head) ||
- (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
- ++apostrophe)) {
- state = 0;
- bool ok = alloc_token(token, &head, t);
- if (apostrophe)
- head += 2;
- if (ok)
- return true;
- }
- break;
- case 2: // comment, labels, etc
- if (((i = look_pattern(1)) != -1) &&
- (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
- state = 0;
- head += strlen(PATTERN[pattern_num].pat[1]) - 1;
- }
- break;
- case 3: // command
- if ((tolower(line[actual][head]) < 'a') ||
- (tolower(line[actual][head]) > 'z')) {
- state = 0;
- head--;
- }
- break;
- case 4: // command with arguments
- if (slash && (line[actual][head] != '\0')) {
- slash = 0;
- head++;
- break;
- } else if (line[actual][head] == '\\') {
- slash = 1;
- } else if ((line[actual][head] == '{') ||
- ((opt) && (line[actual][head] == '['))) {
- depth++;
- opt = 0;
- } else if (line[actual][head] == '}') {
- depth--;
- if (depth == 0) {
- opt = 1;
- arg++;
- }
- if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
- (depth < 0)) {
- state = 0; // XXX not handles the last optional arg.
- }
- } else if (line[actual][head] == ']')
- depth--;
- } // case
- if (next_char(line[actual].c_str(), &head)) {
- if (state == 5)
- state = 0;
- return false;
- }
- }
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef LATEXPARSER_HXX_
-#define LATEXPARSER_HXX_
-
-#include "textparser.hxx"
-
-/*
- * HTML Parser
- *
- */
-
-class LaTeXParser : public TextParser {
- int pattern_num; // number of comment
- int depth; // depth of blocks
- int arg; // arguments's number
- int opt; // optional argument attrib.
-
- public:
- explicit LaTeXParser(const char* wc);
- LaTeXParser(const w_char* wordchars, int len);
- virtual ~LaTeXParser();
-
- virtual bool next_token(std::string&);
-
- private:
- int look_pattern(int col);
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "manparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-ManParser::ManParser(const char* wordchars)
- : TextParser(wordchars) {
-}
-
-ManParser::ManParser(const w_char* wordchars, int len)
- : TextParser(wordchars, len) {
-}
-
-ManParser::~ManParser() {}
-
-bool ManParser::next_token(std::string& t) {
- for (;;) {
- switch (state) {
- case 1: // command arguments
- if (line[actual][head] == ' ')
- state = 2;
- break;
- case 0: // dot in begin of line
- if (line[actual][0] == '.') {
- state = 1;
- break;
- } else {
- state = 2;
- }
- /* FALLTHROUGH */
- case 2: // non word chars
- if (is_wordchar(line[actual].c_str() + head)) {
- state = 3;
- token = head;
- } else if ((line[actual][head] == '\\') &&
- (line[actual][head + 1] == 'f') &&
- (line[actual][head + 2] != '\0')) {
- head += 2;
- }
- break;
- case 3: // wordchar
- if (!is_wordchar(line[actual].c_str() + head)) {
- state = 2;
- if (alloc_token(token, &head, t))
- return true;
- }
- break;
- }
- if (next_char(line[actual].c_str(), &head)) {
- state = 0;
- return false;
- }
- }
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef MANPARSER_HXX_
-#define MANPARSER_HXX_
-
-#include "textparser.hxx"
-
-/*
- * Manparse Parser
- *
- */
-
-class ManParser : public TextParser {
- protected:
- public:
- explicit ManParser(const char* wc);
- ManParser(const w_char* wordchars, int len);
- virtual ~ManParser();
-
- virtual bool next_token(std::string&);
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "odfparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-static const char* PATTERN[][2] = {
- {"<office:meta>", "</office:meta>"},
- {"<office:settings>", "</office:settings>"},
- {"<office:binary-data>", "</office:binary-data>"},
- {"<!--", "-->"},
- {"<[cdata[", "]]>"}, // XML comment
- {"<", ">"}};
-
-#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char*) * 2))
-
-static const char* (*PATTERN2)[2] = NULL;
-
-#define PATTERN_LEN2 0
-
-static const char* PATTERN3[][2] = {
- {"<text:span", ">"}, // part of the reedited words
- {"</text:span", ">"}}; // for example, an inserted letter
-
-#define PATTERN_LEN3 (sizeof(PATTERN3) / (sizeof(char*) * 2))
-
-ODFParser::ODFParser(const char* wordchars)
- : XMLParser(wordchars) {
-}
-
-ODFParser::ODFParser(const w_char* wordchars, int len)
- : XMLParser(wordchars, len) {
-}
-
-bool ODFParser::next_token(std::string& t) {
- return XMLParser::next_token(PATTERN, PATTERN_LEN, PATTERN2, PATTERN_LEN2, PATTERN3, PATTERN_LEN3, t);
-}
-
-std::string ODFParser::get_word(const std::string &tok) {
- return XMLParser::get_word2(PATTERN3, PATTERN_LEN3, tok);
-}
-
-ODFParser::~ODFParser() {}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef ODFPARSER_HXX_
-#define ODFPARSER_HXX_
-
-#include "xmlparser.hxx"
-
-/*
- * HTML Parser
- *
- */
-
-class ODFParser : public XMLParser {
- public:
- explicit ODFParser(const char* wc);
- ODFParser(const w_char* wordchars, int len);
- virtual bool next_token(std::string&);
- virtual std::string get_word(const std::string &tok);
- virtual ~ODFParser();
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstring>
-#include <cstdlib>
-#include <cstdio>
-
-#include "textparser.hxx"
-#include "htmlparser.hxx"
-#include "latexparser.hxx"
-#include "xmlparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-int main(int argc, char** argv) {
- FILE* f;
- /* first parse the command line options */
-
- if (argc < 2) {
- fprintf(stderr, "correct syntax is:\n");
- fprintf(stderr, "testparser file\n");
- fprintf(stderr, "example: testparser /dev/stdin\n");
- exit(1);
- }
-
- /* open the words to check list */
- f = fopen(argv[1], "r");
- if (!f) {
- fprintf(stderr, "Error - could not open file of words to check\n");
- exit(1);
- }
-
- TextParser* p = new TextParser(
- "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
-
- char buf[MAXLNLEN];
-
- while (fgets(buf, MAXLNLEN, f)) {
- p->put_line(buf);
- p->set_url_checking(1);
- std::string next;
- while (p->next_token(next)) {
- fprintf(stdout, "token: %s\n", next.c_str());
- }
- }
-
- delete p;
- fclose(f);
- return 0;
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "textparser.hxx"
-
-#include <algorithm>
-
-#ifndef W32
-using namespace std;
-#endif
-
-// ISO-8859-1 HTML character entities
-
-static const char* LATIN1[] = {
- "À", "Ã", "Å", "Æ", "È", "Ê",
- "Ì", "Ï", "Ð", "Ñ", "Ò", "Ø",
- "Ù", "Þ", "à", "ã", "å", "æ",
- "è", "ê", "ì", "ï", "ð", "ñ",
- "ò", "ø", "ù", "þ", "ÿ"};
-
-#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char*))
-
-#define ENTITY_APOS "'"
-#define UTF8_APOS "\xe2\x80\x99"
-#define APOSTROPHE "'"
-
-TextParser::TextParser(const char* wordchars) {
- init(wordchars);
-}
-
-TextParser::TextParser(const w_char* wordchars, int len) {
- init(wordchars, len);
-}
-
-TextParser::~TextParser() {}
-
-int TextParser::is_wordchar(const char* w) {
- if (*w == '\0')
- return 0;
- if (utf8) {
- std::vector<w_char> wc;
- unsigned short idx;
- u8_u16(wc, w);
- if (wc.empty())
- return 0;
- idx = (wc[0].h << 8) + wc[0].l;
- return (unicodeisalpha(idx) ||
- (wordchars_utf16 &&
- std::binary_search(wordchars_utf16, wordchars_utf16 + wclen, wc[0])));
- } else {
- return wordcharacters[(*w + 256) % 256];
- }
-}
-
-const char* TextParser::get_latin1(const char* s) {
- if (s[0] == '&') {
- unsigned int i = 0;
- while ((i < LATIN1_LEN) && strncmp(LATIN1[i], s, strlen(LATIN1[i])))
- i++;
- if (i != LATIN1_LEN)
- return LATIN1[i];
- }
- return NULL;
-}
-
-void TextParser::init(const char* wordchars) {
- actual = 0;
- head = 0;
- token = 0;
- state = 0;
- utf8 = 0;
- checkurl = 0;
- wordchars_utf16 = NULL;
- wclen = 0;
- wordcharacters.resize(256, 0);
- if (!wordchars)
- wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
- for (unsigned int j = 0; j < strlen(wordchars); ++j) {
- wordcharacters[(wordchars[j] + 256) % 256] = 1;
- }
-}
-
-void TextParser::init(const w_char* wc, int len) {
- actual = 0;
- head = 0;
- token = 0;
- state = 0;
- utf8 = 1;
- checkurl = 0;
- wordchars_utf16 = wc;
- wclen = len;
-}
-
-int TextParser::next_char(const char* ln, size_t* pos) {
- if (*(ln + *pos) == '\0')
- return 1;
- if (utf8) {
- if (*(ln + *pos) >> 7) {
- // jump to next UTF-8 character
- for ((*pos)++; (*(ln + *pos) & 0xc0) == 0x80; (*pos)++)
- ;
- } else {
- (*pos)++;
- }
- } else
- (*pos)++;
- return 0;
-}
-
-void TextParser::put_line(const char* word) {
- actual = (actual + 1) % MAXPREVLINE;
- line[actual].assign(word);
- token = 0;
- head = 0;
- check_urls();
-}
-
-std::string TextParser::get_prevline(int n) const {
- return line[(actual + MAXPREVLINE - n) % MAXPREVLINE];
-}
-
-std::string TextParser::get_line() const {
- return get_prevline(0);
-}
-
-bool TextParser::next_token(std::string &t) {
- const char* latin1;
-
- for (;;) {
- switch (state) {
- case 0: // non word chars
- if (is_wordchar(line[actual].c_str() + head)) {
- state = 1;
- token = head;
- } else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
- state = 1;
- token = head;
- head += strlen(latin1);
- }
- break;
- case 1: // wordchar
- if ((latin1 = get_latin1(line[actual].c_str() + head))) {
- head += strlen(latin1);
- } else if ((is_wordchar((char*)APOSTROPHE) ||
- (is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
- !line[actual].empty() && line[actual][head] == '\'' &&
- is_wordchar(line[actual].c_str() + head + 1)) {
- head++;
- } else if (is_utf8() &&
- is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
- // to the WORDCHARS, if
- // needed
- strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
- 0 &&
- is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
- head += strlen(UTF8_APOS) - 1;
- } else if (!is_wordchar(line[actual].c_str() + head)) {
- state = 0;
- if (alloc_token(token, &head, t))
- return true;
- }
- break;
- }
- if (next_char(line[actual].c_str(), &head))
- return false;
- }
-}
-
-size_t TextParser::get_tokenpos() {
- return token;
-}
-
-int TextParser::change_token(const char* word) {
- if (word) {
- std::string remainder(line[actual].substr(head));
- line[actual].resize(token);
- line[actual].append(word);
- line[actual].append(remainder);
- head = token;
- return 1;
- }
- return 0;
-}
-
-std::string TextParser::get_word(const std::string &tok) {
- return tok;
-}
-
-void TextParser::check_urls() {
- urlline.resize(line[actual].size() + 1);
- int url_state = 0;
- size_t url_head = 0;
- size_t url_token = 0;
- int url = 0;
- for (;;) {
- switch (url_state) {
- case 0: // non word chars
- if (is_wordchar(line[actual].c_str() + url_head)) {
- url_state = 1;
- url_token = url_head;
- // Unix path
- } else if (line[actual][url_head] == '/') {
- url_state = 1;
- url_token = url_head;
- url = 1;
- }
- break;
- case 1: // wordchar
- char ch = line[actual][url_head];
- // e-mail address
- if ((ch == '@') ||
- // MS-DOS, Windows path
- (strncmp(line[actual].c_str() + url_head, ":\\", 2) == 0) ||
- // URL
- (strncmp(line[actual].c_str() + url_head, "://", 3) == 0)) {
- url = 1;
- } else if (!(is_wordchar(line[actual].c_str() + url_head) || (ch == '-') ||
- (ch == '_') || (ch == '\\') || (ch == '.') ||
- (ch == ':') || (ch == '/') || (ch == '~') || (ch == '%') ||
- (ch == '*') || (ch == '$') || (ch == '[') || (ch == ']') ||
- (ch == '?') || (ch == '!') ||
- ((ch >= '0') && (ch <= '9')))) {
- url_state = 0;
- if (url == 1) {
- for (size_t i = url_token; i < url_head; ++i) {
- urlline[i] = true;
- }
- }
- url = 0;
- }
- break;
- }
- urlline[url_head] = false;
- if (next_char(line[actual].c_str(), &url_head))
- return;
- }
-}
-
-int TextParser::get_url(size_t token_pos, size_t* hd) {
- for (size_t i = *hd; i < line[actual].size() && urlline[i]; i++, (*hd)++)
- ;
- return checkurl ? 0 : urlline[token_pos];
-}
-
-void TextParser::set_url_checking(int check) {
- checkurl = check;
-}
-
-bool TextParser::alloc_token(size_t tokn, size_t* hd, std::string& t) {
- size_t url_head = *hd;
- if (get_url(tokn, &url_head))
- return false;
- t = line[actual].substr(tokn, *hd - tokn);
- // remove colon for Finnish and Swedish language
- if (!t.empty() && t[t.size() - 1] == ':') {
- t.resize(t.size() - 1);
- if (t.empty()) {
- return false;
- }
- }
- return true;
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef TEXTPARSER_HXX_
-#define TEXTPARSER_HXX_
-
-// set sum of actual and previous lines
-#define MAXPREVLINE 4
-
-#ifndef MAXLNLEN
-#define MAXLNLEN 8192
-#endif
-
-#include "../hunspell/w_char.hxx"
-
-#include <vector>
-
-/*
- * Base Text Parser
- *
- */
-
-class TextParser {
- protected:
- std::vector<int> wordcharacters;// for detection of the word boundaries
- std::string line[MAXPREVLINE]; // parsed and previous lines
- std::vector<bool> urlline; // mask for url detection
- int checkurl;
- int actual; // actual line
- size_t head; // head position
- size_t token;// begin of token
- int state; // state of automata
- int utf8; // UTF-8 character encoding
- int next_char(const char* line, size_t* pos);
- const w_char* wordchars_utf16;
- int wclen;
-
- public:
- TextParser(const w_char* wordchars, int len);
- explicit TextParser(const char* wc);
- virtual ~TextParser();
-
- void put_line(const char* line);
- std::string get_line() const;
- std::string get_prevline(int n) const;
- virtual bool next_token(std::string&);
- virtual std::string get_word(const std::string &tok);
- virtual int change_token(const char* word);
- void set_url_checking(int check);
-
- size_t get_tokenpos();
- int is_wordchar(const char* w);
- inline int is_utf8() { return utf8; }
- const char* get_latin1(const char* s);
- char* next_char();
- int tokenize_urls();
- void check_urls();
- int get_url(size_t token_pos, size_t* head);
- bool alloc_token(size_t token, size_t* head, std::string& out);
-private:
- void init(const char*);
- void init(const w_char* wordchars, int len);
-};
-
-#endif
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <ctype.h>
-
-#include "../hunspell/csutil.hxx"
-#include "xmlparser.hxx"
-
-#ifndef W32
-using namespace std;
-#endif
-
-enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
-
-static const char* __PATTERN__[][2] = {{"<!--", "-->"},
- {"<[cdata[", "]]>"}, // XML comment
- {"<", ">"}};
-
-#define __PATTERN_LEN__ (sizeof(__PATTERN__) / (sizeof(char*) * 2))
-
-// for checking attributes, eg. <img alt="text"> in HTML
-static const char* (*__PATTERN2__)[2] = NULL;
-
-#define __PATTERN_LEN2__ 0
-
-// for checking words with in-word patterns
-// for example, "exam<text:span>p</text:span>le" in ODT
-static const char* (*__PATTERN3__)[2] = NULL;
-
-#define __PATTERN_LEN3__ 0
-
-#define ENTITY_APOS "'"
-#define UTF8_APOS "\xe2\x80\x99"
-#define APOSTROPHE "'"
-
-XMLParser::XMLParser(const char* wordchars)
- : TextParser(wordchars)
- , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
-}
-
-XMLParser::XMLParser(const w_char* wordchars, int len)
- : TextParser(wordchars, len)
- , pattern_num(0), pattern2_num(0), pattern3_num(0), prevstate(0), checkattr(0), quotmark(0) {
-}
-
-XMLParser::~XMLParser() {}
-
-int XMLParser::look_pattern(const char* p[][2], unsigned int len, int column) {
- for (unsigned int i = 0; i < len; i++) {
- const char* j = line[actual].c_str() + head;
- const char* k = p[i][column];
- while ((*k != '\0') && (tolower(*j) == *k)) {
- j++;
- k++;
- }
- if (*k == '\0')
- return i;
- }
- return -1;
-}
-
-/*
- * XML parser
- *
- */
-
-bool XMLParser::next_token(const char* PATTERN[][2],
- unsigned int PATTERN_LEN,
- const char* PATTERN2[][2],
- unsigned int PATTERN_LEN2,
- const char* PATTERN3[][2],
- unsigned int PATTERN_LEN3,
- std::string& t) {
- t.clear();
- const char* latin1;
-
- for (;;) {
- switch (state) {
- case ST_NON_WORD: // non word chars
- prevstate = ST_NON_WORD;
- if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
- checkattr = 0;
- if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
- checkattr = 1;
- }
- state = ST_TAG;
- } else if (is_wordchar(line[actual].c_str() + head)) {
- state = ST_WORD;
- token = head;
- } else if ((latin1 = get_latin1(line[actual].c_str() + head))) {
- state = ST_WORD;
- token = head;
- head += strlen(latin1);
- } else if (line[actual][head] == '&') {
- state = ST_CHAR_ENTITY;
- }
- break;
- case ST_WORD: // wordchar
- if ((latin1 = get_latin1(line[actual].c_str() + head))) {
- head += strlen(latin1);
- } else if ((is_wordchar((char*)APOSTROPHE) ||
- (is_utf8() && is_wordchar((char*)UTF8_APOS))) &&
- strncmp(line[actual].c_str() + head, ENTITY_APOS,
- strlen(ENTITY_APOS)) == 0 &&
- is_wordchar(line[actual].c_str() + head + strlen(ENTITY_APOS))) {
- head += strlen(ENTITY_APOS) - 1;
- } else if (is_utf8() &&
- is_wordchar((char*)APOSTROPHE) && // add Unicode apostrophe
- // to the WORDCHARS, if
- // needed
- strncmp(line[actual].c_str() + head, UTF8_APOS, strlen(UTF8_APOS)) ==
- 0 &&
- is_wordchar(line[actual].c_str() + head + strlen(UTF8_APOS))) {
- head += strlen(UTF8_APOS) - 1;
- } else if (!is_wordchar(line[actual].c_str() + head)) {
- // in-word patterns
- if ((pattern3_num = look_pattern(PATTERN3, PATTERN_LEN3, 0)) != -1) {
- size_t pos = line[actual].find(PATTERN3[pattern3_num][1], head);
- if (pos != std::string::npos) {
- size_t endpos = pos + strlen(PATTERN3[pattern3_num][1]) - 1;
- if (is_wordchar(line[actual].c_str() + endpos + 1)) {
- head = endpos;
- break;
- }
- }
- }
- state = prevstate;
- // return with the token, except in the case of in-word patterns
- if (alloc_token(token, &head, t))
- return true;
- }
- break;
- case ST_TAG: // comment, labels, etc
- int i;
- if ((checkattr == 1) &&
- ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) &&
- (strcmp(PATTERN2[i][0], PATTERN2[pattern2_num][0]) == 0)) {
- checkattr = 2;
- } else if ((checkattr > 0) && (line[actual][head] == '>')) {
- state = ST_NON_WORD;
- } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
- (strcmp(PATTERN[i][1], PATTERN[pattern_num][1]) == 0)) {
- state = ST_NON_WORD;
- head += strlen(PATTERN[pattern_num][1]) - 1;
- } else if ((strcmp(PATTERN[pattern_num][0], "<") == 0) &&
- ((line[actual][head] == '"') ||
- (line[actual][head] == '\''))) {
- quotmark = line[actual][head];
- state = ST_ATTRIB;
- }
- break;
- case ST_ATTRIB: // non word chars
- prevstate = ST_ATTRIB;
- if (line[actual][head] == quotmark) {
- state = ST_TAG;
- if (checkattr == 2)
- checkattr = 1;
- // for IMG ALT
- } else if (is_wordchar(line[actual].c_str() + head) && (checkattr == 2)) {
- state = ST_WORD;
- token = head;
- } else if (line[actual][head] == '&') {
- state = ST_CHAR_ENTITY;
- }
- break;
- case ST_CHAR_ENTITY: // SGML element
- if ((tolower(line[actual][head]) == ';')) {
- state = prevstate;
- head--;
- }
- }
- if (next_char(line[actual].c_str(), &head))
- return false;
- }
- //FIXME No return, in function returning non-void
-}
-
-bool XMLParser::next_token(std::string& t) {
- return next_token(__PATTERN__, __PATTERN_LEN__, __PATTERN2__,
- __PATTERN_LEN2__, __PATTERN3__, __PATTERN_LEN3__, t);
-}
-
-// remove in-word patterns
-std::string XMLParser::get_word2(
- const char* PATTERN3[][2],
- unsigned int PATTERN_LEN3,
- const std::string &tok) {
- std::string word = tok;
- for (unsigned int i = 0; i < PATTERN_LEN3; i++) {
- size_t pos;
- while ((pos = word.find(PATTERN3[i][0])) != word.npos) {
- size_t endpos = word.find(PATTERN3[i][1], pos);
- if (endpos != word.npos) {
- word.erase(pos, endpos + strlen(PATTERN3[i][1]) - pos);
- } else
- return word;
- }
- }
- return word;
-}
-
-int XMLParser::change_token(const char* word) {
- if (strstr(word, APOSTROPHE) != NULL || strchr(word, '"') != NULL ||
- strchr(word, '&') != NULL || strchr(word, '<') != NULL ||
- strchr(word, '>') != NULL) {
- std::string r(word);
- mystrrep(r, "&", "__namp;__");
- mystrrep(r, "__namp;__", "&");
- mystrrep(r, APOSTROPHE, ENTITY_APOS);
- mystrrep(r, "\"", """);
- mystrrep(r, ">", ">");
- mystrrep(r, "<", "<");
- return TextParser::change_token(r.c_str());
- }
- return TextParser::change_token(word);
-}
+++ /dev/null
-/* ***** BEGIN LICENSE BLOCK *****
- * Version: MPL 1.1/GPL 2.0/LGPL 2.1
- *
- * Copyright (C) 2002-2017 Németh László
- *
- * The contents of this file are subject to the Mozilla Public License Version
- * 1.1 (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.mozilla.org/MPL/
- *
- * Software distributed under the License is distributed on an "AS IS" basis,
- * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- * for the specific language governing rights and limitations under the
- * License.
- *
- * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
- *
- * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
- * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
- * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
- * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
- * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
- *
- * Alternatively, the contents of this file may be used under the terms of
- * either the GNU General Public License Version 2 or later (the "GPL"), or
- * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
- * in which case the provisions of the GPL or the LGPL are applicable instead
- * of those above. If you wish to allow use of your version of this file only
- * under the terms of either the GPL or the LGPL, and not to allow others to
- * use your version of this file under the terms of the MPL, indicate your
- * decision by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL or the LGPL. If you do not delete
- * the provisions above, a recipient may use your version of this file under
- * the terms of any one of the MPL, the GPL or the LGPL.
- *
- * ***** END LICENSE BLOCK ***** */
-
-#ifndef XMLPARSER_HXX_
-#define XMLPARSER_HXX_
-
-#include "textparser.hxx"
-
-/*
- * XML Parser
- *
- */
-
-class XMLParser : public TextParser {
- public:
- explicit XMLParser(const char* wc);
- XMLParser(const w_char* wordchars, int len);
- bool next_token(const char* p[][2],
- unsigned int len,
- const char* p2[][2],
- unsigned int len2,
- const char* p3[][2],
- unsigned int len3,
- std::string&);
- virtual bool next_token(std::string&);
- std::string get_word2(const char* p2[][2],
- unsigned int len2,
- const std::string &tok);
- int change_token(const char* word);
- virtual ~XMLParser();
-
- private:
- int look_pattern(const char* p[][2], unsigned int len, int column);
- int pattern_num;
- int pattern2_num;
- int pattern3_num;
- int prevstate;
- int checkattr;
- char quotmark;
-};
-
-#endif
1.7.0/src/hunspell/suggestmgr.hxx \
1.7.0/src/hunspell/utf_info.hxx \
1.7.0/src/hunspell/w_char.hxx \
- 1.7.0/src/parsers/firstparser.cxx \
- 1.7.0/src/parsers/firstparser.hxx \
- 1.7.0/src/parsers/htmlparser.cxx \
- 1.7.0/src/parsers/htmlparser.hxx \
- 1.7.0/src/parsers/latexparser.cxx \
- 1.7.0/src/parsers/latexparser.hxx \
- 1.7.0/src/parsers/manparser.cxx \
- 1.7.0/src/parsers/manparser.hxx \
- 1.7.0/src/parsers/odfparser.cxx \
- 1.7.0/src/parsers/odfparser.hxx \
- 1.7.0/src/parsers/testparser.cxx \
- 1.7.0/src/parsers/textparser.cxx \
- 1.7.0/src/parsers/textparser.hxx \
- 1.7.0/src/parsers/xmlparser.cxx \
- 1.7.0/src/parsers/xmlparser.hxx \
1.7.0/src/win_api/config.h