]> git.lyx.org Git - features.git/blob - src/3rdparty/hunspell/1.3.3/src/parsers/latexparser.cxx
add stripped down hunspell 1.3.3
[features.git] / src / 3rdparty / hunspell / 1.3.3 / src / parsers / latexparser.cxx
1 #include <cstdlib>
2 #include <cstring>
3 #include <cstdio>
4 #include <ctype.h>
5
6 #include "../hunspell/csutil.hxx"
7 #include "latexparser.hxx"
8
9 #ifndef W32
10 using namespace std;
11 #endif
12
13 static struct {
14         const char * pat[2];
15         int arg;
16 } PATTERN[] = {
17         { { "\\(", "\\)" } , 0 },
18         { { "$$", "$$" } , 0 },
19         { { "$", "$" } , 0 },
20         { { "\\begin{math}", "\\end{math}" } , 0 },
21         { { "\\[", "\\]" } , 0 },
22         { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 },
23         { { "\\begin{equation}", "\\end{equation}" } , 0 },
24         { { "\\begin{equation*}", "\\end{equation*}" } , 0 },
25         { { "\\cite", NULL } , 1 },
26         { { "\\nocite", NULL } , 1 },
27         { { "\\index", NULL } , 1 },
28         { { "\\label", NULL } , 1 },
29         { { "\\ref", NULL } , 1 },
30         { { "\\pageref", NULL } , 1 },
31         { { "\\parbox", NULL } , 1 },
32         { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 },
33         { { "\\verb+", "+" } , 0 },
34         { { "\\verb|", "|" } , 0 },
35         { { "\\verb#", "#" } , 0 },
36         { { "\\verb*", "*" } , 0 },
37         { { "\\documentstyle", "\\begin{document}" } , 0 },
38         { { "\\documentclass", "\\begin{document}" } , 0 },
39 //      { { "\\documentclass", NULL } , 1 },
40         { { "\\usepackage", NULL } , 1 },
41         { { "\\includeonly", NULL } , 1 },
42         { { "\\include", NULL } , 1 },
43         { { "\\input", NULL } , 1 },
44         { { "\\vspace", NULL } , 1 },
45         { { "\\setlength", NULL } , 2 },
46         { { "\\addtolength", NULL } , 2 },
47         { { "\\settowidth", NULL } , 2 },
48         { { "\\rule", NULL } , 2 },
49         { { "\\hspace", NULL } , 1 } ,
50         { { "\\vspace", NULL } , 1 } ,
51         { { "\\\\[", "]" } , 0 },
52         { { "\\pagebreak[", "]" } , 0 } ,
53         { { "\\nopagebreak[", "]" } , 0 } ,
54         { { "\\enlargethispage", NULL } , 1 } ,
55         { { "\\begin{tabular}", NULL } , 1 } ,
56         { { "\\addcontentsline", NULL } , 2 } ,
57         { { "\\begin{thebibliography}", NULL } , 1 } ,
58         { { "\\bibliography", NULL } , 1 } ,
59         { { "\\bibliographystyle", NULL } , 1 } ,
60         { { "\\bibitem", NULL } , 1 } ,
61         { { "\\begin", NULL } , 1 } ,
62         { { "\\end", NULL } , 1 } ,
63         { { "\\pagestyle", NULL } , 1 } ,
64         { { "\\pagenumbering", NULL } , 1 } ,
65         { { "\\thispagestyle", NULL } , 1 } ,
66         { { "\\newtheorem", NULL } , 2 },
67         { { "\\newcommand", NULL } , 2 },
68         { { "\\renewcommand", NULL } , 2 },
69         { { "\\setcounter", NULL } , 2 },
70         { { "\\addtocounter", NULL } , 1 },
71         { { "\\stepcounter", NULL } , 1 },
72         { { "\\selectlanguage", NULL } , 1 },
73         { { "\\inputencoding", NULL } , 1 },
74         { { "\\hyphenation", NULL } , 1 },
75         { { "\\definecolor", NULL } , 3 },
76         { { "\\color", NULL } , 1 },
77         { { "\\textcolor", NULL } , 1 },
78         { { "\\pagecolor", NULL } , 1 },
79         { { "\\colorbox", NULL } , 2 },
80         { { "\\fcolorbox", NULL } , 2 },
81         { { "\\declaregraphicsextensions", NULL } , 1 },
82         { { "\\psfig", NULL } , 1 },
83         { { "\\url", NULL } , 1 },
84         { { "\\eqref", NULL } , 1 },
85         { { "\\vskip", NULL } , 1 },
86         { { "\\vglue", NULL } , 1 },
87         { { "\'\'", NULL } , 1 }
88 };
89
90 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
91
92 LaTeXParser::LaTeXParser(const char * wordchars)
93 {
94         init(wordchars);
95 }
96
97 LaTeXParser::LaTeXParser(unsigned short * wordchars, int len)
98 {
99         init(wordchars, len);
100 }
101
102 LaTeXParser::~LaTeXParser() 
103 {
104 }
105
106 int LaTeXParser::look_pattern(int col)
107 {
108         for (unsigned int i = 0; i < PATTERN_LEN; i++) {
109                 char * j = line[actual] + head;
110                 const char * k = PATTERN[i].pat[col];
111                 if (! k) continue;
112                 while ((*k != '\0') && (tolower(*j) == *k)) {
113                         j++;
114                         k++;
115                 }
116                 if (*k == '\0') return i;
117         }
118         return -1;
119 }
120
121 /*
122  * LaTeXParser
123  *
124  * state 0: not wordchar
125  * state 1: wordchar
126  * state 2: comments
127  * state 3: commands 
128  * state 4: commands with arguments
129  * state 5: % comment
130  *
131  */
132
133
134 char * LaTeXParser::next_token()
135 {
136         int i;
137         int slash = 0;
138         int apostrophe;
139         for (;;) {
140                 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head);
141                 
142                 switch (state)
143                 {
144                 case 0: // non word chars
145                         if ((pattern_num = look_pattern(0)) != -1) {
146                                 if (PATTERN[pattern_num].pat[1]) {
147                                         state = 2;
148                                 } else {
149                                         state = 4;
150                                         depth = 0;
151                                         arg = 0;
152                                         opt = 1;
153                                 }
154                                 head += strlen(PATTERN[pattern_num].pat[0]) - 1;
155                         } else if ((line[actual][head] == '%')) {
156                                         state = 5;
157                         } else if (is_wordchar(line[actual] + head)) {
158                                 state = 1;
159                                 token = head;
160                         } else if (line[actual][head] == '\\') {
161                                 if (line[actual][head + 1] == '\\' ||  // \\ (linebreak)
162                                         (line[actual][head + 1] == '$') || // \$ (dollar sign)
163                                         (line[actual][head + 1] == '%')) { // \% (percent)
164                                         head++;
165                                         break;
166                                 }
167                                 state = 3;
168                         } else if (line[actual][head] == '%') {
169                                 if ((head==0) || (line[actual][head - 1] != '\\')) state = 5;
170                         }
171                         break;
172                 case 1: // wordchar
173                         apostrophe = 0;
174                         if (! is_wordchar(line[actual] + head) ||
175                           (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) {
176                                 state = 0;
177                                 char * t = alloc_token(token, &head);
178                                 if (apostrophe) head += 2;
179                                 if (t) return t;
180                         }
181                         break;
182                 case 2: // comment, labels, etc
183                         if (((i = look_pattern(1)) != -1) && 
184                                 (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) {
185                                         state = 0;
186                                         head += strlen(PATTERN[pattern_num].pat[1]) - 1;
187                         }
188                         break;
189                 case 3: // command
190                         if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) {
191                                 state = 0;
192                                 head--;
193                         }
194                         break;
195                 case 4: // command with arguments
196                         if (slash && (line[actual][head] != '\0')) {
197                                 slash = 0;
198                                 head++;
199                                 break;
200                         } else if (line[actual][head]=='\\') {
201                                 slash = 1;
202                         } else if ((line[actual][head] == '{') ||
203                                 ((opt) && (line[actual][head] == '['))) {
204                                         depth++;
205                                         opt = 0;
206                         } else if (line[actual][head] == '}') {
207                                 depth--;
208                                 if (depth == 0) { 
209                                         opt = 1;
210                                         arg++;
211                                 }
212                                 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
213                                         (depth < 0) ) {
214                                                 state = 0; // XXX not handles the last optional arg.
215                                 }
216                         } else if (line[actual][head] == ']') depth--;
217                 } // case
218                 if (next_char(line[actual], &head)) {
219                         if (state == 5) state = 0;
220                         return NULL;
221                 }
222         }
223 }