src/3rdparty/hunspell/1.3.3/src/parsers/latexparser.cxx

   1 #include <cstdlib>
   2 #include <cstring>
   3 #include <cstdio>
   4 #include <ctype.h>
   5
   6 #include "../hunspell/csutil.hxx"
   7 #include "latexparser.hxx"
   8
   9 #ifndef W32
  10 using namespace std;
  11 #endif
  12
  13 static struct {
  14         const char * pat[2];
  15         int arg;
  16 } PATTERN[] = {
  17         { { "\\(", "\\)" } , 0 },
  18         { { "$$", "$$" } , 0 },
  19         { { "$", "$" } , 0 },
  20         { { "\\begin{math}", "\\end{math}" } , 0 },
  21         { { "\\[", "\\]" } , 0 },
  22         { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 },
  23         { { "\\begin{equation}", "\\end{equation}" } , 0 },
  24         { { "\\begin{equation*}", "\\end{equation*}" } , 0 },
  25         { { "\\cite", NULL } , 1 },
  26         { { "\\nocite", NULL } , 1 },
  27         { { "\\index", NULL } , 1 },
  28         { { "\\label", NULL } , 1 },
  29         { { "\\ref", NULL } , 1 },
  30         { { "\\pageref", NULL } , 1 },
  31         { { "\\parbox", NULL } , 1 },
  32         { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 },
  33         { { "\\verb+", "+" } , 0 },
  34         { { "\\verb|", "|" } , 0 },
  35         { { "\\verb#", "#" } , 0 },
  36         { { "\\verb*", "*" } , 0 },
  37         { { "\\documentstyle", "\\begin{document}" } , 0 },
  38         { { "\\documentclass", "\\begin{document}" } , 0 },
  39 //      { { "\\documentclass", NULL } , 1 },
  40         { { "\\usepackage", NULL } , 1 },
  41         { { "\\includeonly", NULL } , 1 },
  42         { { "\\include", NULL } , 1 },
  43         { { "\\input", NULL } , 1 },
  44         { { "\\vspace", NULL } , 1 },
  45         { { "\\setlength", NULL } , 2 },
  46         { { "\\addtolength", NULL } , 2 },
  47         { { "\\settowidth", NULL } , 2 },
  48         { { "\\rule", NULL } , 2 },
  49         { { "\\hspace", NULL } , 1 } ,
  50         { { "\\vspace", NULL } , 1 } ,
  51         { { "\\\\[", "]" } , 0 },
  52         { { "\\pagebreak[", "]" } , 0 } ,
  53         { { "\\nopagebreak[", "]" } , 0 } ,
  54         { { "\\enlargethispage", NULL } , 1 } ,
  55         { { "\\begin{tabular}", NULL } , 1 } ,
  56         { { "\\addcontentsline", NULL } , 2 } ,
  57         { { "\\begin{thebibliography}", NULL } , 1 } ,
  58         { { "\\bibliography", NULL } , 1 } ,
  59         { { "\\bibliographystyle", NULL } , 1 } ,
  60         { { "\\bibitem", NULL } , 1 } ,
  61         { { "\\begin", NULL } , 1 } ,
  62         { { "\\end", NULL } , 1 } ,
  63         { { "\\pagestyle", NULL } , 1 } ,
  64         { { "\\pagenumbering", NULL } , 1 } ,
  65         { { "\\thispagestyle", NULL } , 1 } ,
  66         { { "\\newtheorem", NULL } , 2 },
  67         { { "\\newcommand", NULL } , 2 },
  68         { { "\\renewcommand", NULL } , 2 },
  69         { { "\\setcounter", NULL } , 2 },
  70         { { "\\addtocounter", NULL } , 1 },
  71         { { "\\stepcounter", NULL } , 1 },
  72         { { "\\selectlanguage", NULL } , 1 },
  73         { { "\\inputencoding", NULL } , 1 },
  74         { { "\\hyphenation", NULL } , 1 },
  75         { { "\\definecolor", NULL } , 3 },
  76         { { "\\color", NULL } , 1 },
  77         { { "\\textcolor", NULL } , 1 },
  78         { { "\\pagecolor", NULL } , 1 },
  79         { { "\\colorbox", NULL } , 2 },
  80         { { "\\fcolorbox", NULL } , 2 },
  81         { { "\\declaregraphicsextensions", NULL } , 1 },
  82         { { "\\psfig", NULL } , 1 },
  83         { { "\\url", NULL } , 1 },
  84         { { "\\eqref", NULL } , 1 },
  85         { { "\\vskip", NULL } , 1 },
  86         { { "\\vglue", NULL } , 1 },
  87         { { "\'\'", NULL } , 1 }
  88 };
  89
  90 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
  91
  92 LaTeXParser::LaTeXParser(const char * wordchars)
  93 {
  94         init(wordchars);
  95 }
  96
  97 LaTeXParser::LaTeXParser(unsigned short * wordchars, int len)
  98 {
  99         init(wordchars, len);
 100 }
 101
 102 LaTeXParser::~LaTeXParser()
 103 {
 104 }
 105
 106 int LaTeXParser::look_pattern(int col)
 107 {
 108         for (unsigned int i = 0; i < PATTERN_LEN; i++) {
 109                 char * j = line[actual] + head;
 110                 const char * k = PATTERN[i].pat[col];
 111                 if (! k) continue;
 112                 while ((*k != '\0') && (tolower(*j) == *k)) {
 113                         j++;
 114                         k++;
 115                 }
 116                 if (*k == '\0') return i;
 117         }
 118         return -1;
 119 }
 120
 121 /*
 122  * LaTeXParser
 123  *
 124  * state 0: not wordchar
 125  * state 1: wordchar
 126  * state 2: comments
 127  * state 3: commands
 128  * state 4: commands with arguments
 129  * state 5: % comment
 130  *
 131  */
 132
 133
 134 char * LaTeXParser::next_token()
 135 {
 136         int i;
 137         int slash = 0;
 138         int apostrophe;
 139         for (;;) {
 140                 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head);
 141
 142                 switch (state)
 143                 {
 144                 case 0: // non word chars
 145                         if ((pattern_num = look_pattern(0)) != -1) {
 146                                 if (PATTERN[pattern_num].pat[1]) {
 147                                         state = 2;
 148                                 } else {
 149                                         state = 4;
 150                                         depth = 0;
 151                                         arg = 0;
 152                                         opt = 1;
 153                                 }
 154                                 head += strlen(PATTERN[pattern_num].pat[0]) - 1;
 155                         } else if ((line[actual][head] == '%')) {
 156                                         state = 5;
 157                         } else if (is_wordchar(line[actual] + head)) {
 158                                 state = 1;
 159                                 token = head;
 160                         } else if (line[actual][head] == '\\') {
 161                                 if (line[actual][head + 1] == '\\' ||  // \\ (linebreak)
 162                                         (line[actual][head + 1] == '$') || // \$ (dollar sign)
 163                                         (line[actual][head + 1] == '%')) { // \% (percent)
 164                                         head++;
 165                                         break;
 166                                 }
 167                                 state = 3;
 168                         } else if (line[actual][head] == '%') {
 169                                 if ((head==0) || (line[actual][head - 1] != '\\')) state = 5;
 170                         }
 171                         break;
 172                 case 1: // wordchar
 173                         apostrophe = 0;
 174                         if (! is_wordchar(line[actual] + head) ||
 175                           (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) {
 176                                 state = 0;
 177                                 char * t = alloc_token(token, &head);
 178                                 if (apostrophe) head += 2;
 179                                 if (t) return t;
 180                         }
 181                         break;
 182                 case 2: // comment, labels, etc
 183                         if (((i = look_pattern(1)) != -1) &&
 184                                 (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) {
 185                                         state = 0;
 186                                         head += strlen(PATTERN[pattern_num].pat[1]) - 1;
 187                         }
 188                         break;
 189                 case 3: // command
 190                         if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) {
 191                                 state = 0;
 192                                 head--;
 193                         }
 194                         break;
 195                 case 4: // command with arguments
 196                         if (slash && (line[actual][head] != '\0')) {
 197                                 slash = 0;
 198                                 head++;
 199                                 break;
 200                         } else if (line[actual][head]=='\\') {
 201                                 slash = 1;
 202                         } else if ((line[actual][head] == '{') ||
 203                                 ((opt) && (line[actual][head] == '['))) {
 204                                         depth++;
 205                                         opt = 0;
 206                         } else if (line[actual][head] == '}') {
 207                                 depth--;
 208                                 if (depth == 0) {
 209                                         opt = 1;
 210                                         arg++;
 211                                 }
 212                                 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
 213                                         (depth < 0) ) {
 214                                                 state = 0; // XXX not handles the last optional arg.
 215                                 }
 216                         } else if (line[actual][head] == ']') depth--;
 217                 } // case
 218                 if (next_char(line[actual], &head)) {
 219                         if (state == 5) state = 0;
 220                         return NULL;
 221                 }
 222         }
 223 }