1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * Copyright (C) 2002-2017 Németh László
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
18 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
43 #include "../hunspell/csutil.hxx"
44 #include "latexparser.hxx"
53 } PATTERN[] = {{{"\\(", "\\)"}, 0},
56 {{"\\begin{math}", "\\end{math}"}, 0},
58 {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
59 {{"\\begin{equation}", "\\end{equation}"}, 0},
60 {{"\\begin{equation*}", "\\end{equation*}"}, 0},
61 {{"\\cite", NULL}, 1},
62 {{"\\nocite", NULL}, 1},
63 {{"\\index", NULL}, 1},
64 {{"\\label", NULL}, 1},
66 {{"\\pageref", NULL}, 1},
67 {{"\\autoref", NULL}, 1},
68 {{"\\parbox", NULL}, 1},
69 {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
70 {{"\\verb+", "+"}, 0},
71 {{"\\verb|", "|"}, 0},
72 {{"\\verb#", "#"}, 0},
73 {{"\\verb*", "*"}, 0},
74 {{"\\documentstyle", "\\begin{document}"}, 0},
75 {{"\\documentclass", "\\begin{document}"}, 0},
76 // { { "\\documentclass", NULL } , 1 },
77 {{"\\usepackage", NULL}, 1},
78 {{"\\includeonly", NULL}, 1},
79 {{"\\include", NULL}, 1},
80 {{"\\input", NULL}, 1},
81 {{"\\vspace", NULL}, 1},
82 {{"\\setlength", NULL}, 2},
83 {{"\\addtolength", NULL}, 2},
84 {{"\\settowidth", NULL}, 2},
85 {{"\\rule", NULL}, 2},
86 {{"\\hspace", NULL}, 1},
87 {{"\\vspace", NULL}, 1},
89 {{"\\pagebreak[", "]"}, 0},
90 {{"\\nopagebreak[", "]"}, 0},
91 {{"\\enlargethispage", NULL}, 1},
92 {{"\\begin{tabular}", NULL}, 1},
93 {{"\\addcontentsline", NULL}, 2},
94 {{"\\begin{thebibliography}", NULL}, 1},
95 {{"\\bibliography", NULL}, 1},
96 {{"\\bibliographystyle", NULL}, 1},
97 {{"\\bibitem", NULL}, 1},
98 {{"\\begin", NULL}, 1},
100 {{"\\pagestyle", NULL}, 1},
101 {{"\\pagenumbering", NULL}, 1},
102 {{"\\thispagestyle", NULL}, 1},
103 {{"\\newtheorem", NULL}, 2},
104 {{"\\newcommand", NULL}, 2},
105 {{"\\renewcommand", NULL}, 2},
106 {{"\\setcounter", NULL}, 2},
107 {{"\\addtocounter", NULL}, 1},
108 {{"\\stepcounter", NULL}, 1},
109 {{"\\selectlanguage", NULL}, 1},
110 {{"\\inputencoding", NULL}, 1},
111 {{"\\hyphenation", NULL}, 1},
112 {{"\\definecolor", NULL}, 3},
113 {{"\\color", NULL}, 1},
114 {{"\\textcolor", NULL}, 1},
115 {{"\\pagecolor", NULL}, 1},
116 {{"\\colorbox", NULL}, 2},
117 {{"\\fcolorbox", NULL}, 2},
118 {{"\\declaregraphicsextensions", NULL}, 1},
119 {{"\\psfig", NULL}, 1},
120 {{"\\url", NULL}, 1},
121 {{"\\eqref", NULL}, 1},
122 {{"\\vskip", NULL}, 1},
123 {{"\\vglue", NULL}, 1},
124 {{"\'\'", NULL}, 1}};
126 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
128 LaTeXParser::LaTeXParser(const char* wordchars)
129 : TextParser(wordchars)
130 , pattern_num(0), depth(0), arg(0), opt(0) {
133 LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
134 : TextParser(wordchars, len)
135 , pattern_num(0), depth(0), arg(0), opt(0) {
138 LaTeXParser::~LaTeXParser() {}
140 int LaTeXParser::look_pattern(int col) {
141 for (unsigned int i = 0; i < PATTERN_LEN; i++) {
142 const char* j = line[actual].c_str() + head;
143 const char* k = PATTERN[i].pat[col];
146 while ((*k != '\0') && (tolower(*j) == *k)) {
159 * state 0: not wordchar
163 * state 4: commands with arguments
168 bool LaTeXParser::next_token(std::string& t) {
174 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
175 // %s\n",depth,state,arg,line[actual]+head);
178 case 0: // non word chars
179 if ((pattern_num = look_pattern(0)) != -1) {
180 if (PATTERN[pattern_num].pat[1]) {
188 head += strlen(PATTERN[pattern_num].pat[0]) - 1;
189 } else if (line[actual][head] == '%') {
191 } else if (is_wordchar(line[actual].c_str() + head)) {
194 } else if (line[actual][head] == '\\') {
195 if (line[actual][head + 1] == '\\' || // \\ (linebreak)
196 (line[actual][head + 1] == '$') || // \$ (dollar sign)
197 (line[actual][head + 1] == '%')) { // \% (percent)
206 if (!is_wordchar(line[actual].c_str() + head) ||
207 (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
210 bool ok = alloc_token(token, &head, t);
217 case 2: // comment, labels, etc
218 if (((i = look_pattern(1)) != -1) &&
219 (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
221 head += strlen(PATTERN[pattern_num].pat[1]) - 1;
225 if ((tolower(line[actual][head]) < 'a') ||
226 (tolower(line[actual][head]) > 'z')) {
231 case 4: // command with arguments
232 if (slash && (line[actual][head] != '\0')) {
236 } else if (line[actual][head] == '\\') {
238 } else if ((line[actual][head] == '{') ||
239 ((opt) && (line[actual][head] == '['))) {
242 } else if (line[actual][head] == '}') {
248 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
250 state = 0; // XXX not handles the last optional arg.
252 } else if (line[actual][head] == ']')
255 if (next_char(line[actual].c_str(), &head)) {