]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/table.C
prevent tex2lyx segfault
[lyx.git] / src / tex2lyx / table.C
1 /**
2  * \file table.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  * \author Jean-Marc Lasgouttes
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 // {[(
13
14 #include <config.h>
15
16 #include "tex2lyx.h"
17
18 #include <cctype>
19 #include <fstream>
20 #include <iostream>
21 #include <sstream>
22 #include <vector>
23 #include <map>
24
25 using std::cerr;
26 using std::endl;
27 using std::istringstream;
28 using std::ostream;
29 using std::ostringstream;
30 using std::string;
31 using std::vector;
32
33 #include "mathed/math_gridinfo.h"
34
35 // filled in preamble.C
36 std::map<char, int> special_columns;
37
38
39 namespace {
40
41 int string2int(string const & s, int deflt = 0)
42 {
43         istringstream is(s);
44         int i = deflt;
45         is >> i;
46         return i;
47 }
48
49
50 string read_hlines(Parser & p)
51 {
52         ostringstream os;
53         p.skip_spaces();
54         while (p.good()) {
55                 if (p.next_token().cs() == "hline") {
56                         p.get_token();
57                         os << "\\hline";
58                 } else if (p.next_token().cs() == "cline") {
59                         p.get_token();
60                         os << "\\cline{" << p.verbatim_item() << "}";
61                 } else
62                         break;
63                 p.skip_spaces();
64         };
65         //cerr << "read_hlines(), read: '" << os.str() << "'\n";
66         //cerr << "read_hlines(), next token: " << p.next_token() << "\n";
67         return os.str();
68 }
69
70
71 /* rather brutish way to code table structure in a string:
72
73   \begin{tabular}{ccc}
74     1 & 2 & 3\\ \hline
75     \multicolumn{2}{c}{4} & 5 //
76     6 & 7 \\
77   \end{tabular}
78
79  gets "translated" to:
80
81   1 TAB 2 TAB 3 LINE
82   \hline HLINE  TAB 5 LINE
83   5 TAB 7 LINE
84 */
85
86 char const TAB   = '\001';
87 char const LINE  = '\002';
88 char const HLINE = '\004';
89
90
91 void handle_colalign(Parser & p, vector<ColInfo> & colinfo)
92 {
93         if (p.get_token().cat() != catBegin)
94                 cerr << "wrong syntax for table column alignment. '{' expected\n";
95
96         char nextalign = 'b';
97         bool leftline = false;
98         for (Token t=p.get_token(); p.good() && t.cat() != catEnd; t = p.get_token()){
99 #ifdef FILEDEBUG
100                 cerr << "t: " << t << "  c: '" << t.character() << "'\n";
101 #endif
102
103                 // We cannot handle comments here
104                 if (t.cat() == catComment) {
105                         if (t.cs().empty()) {
106                                 // "%\n" combination
107                                 p.skip_spaces();
108                         } else
109                                 cerr << "Ignoring comment: " << t.asInput();
110                         continue;
111                 }
112
113                 switch (t.character()) {
114                         case 'c':
115                         case 'l':
116                         case 'r': {
117                                 ColInfo ci;
118                                 ci.align = t.character();
119                                 if (colinfo.size() && colinfo.back().rightline > 1) {
120                                         ci.leftline = true;
121                                         --colinfo.back().rightline;
122                                 }
123                                 colinfo.push_back(ci);
124                                 break;
125                         }
126                         case 'p':
127                                 colinfo.push_back(ColInfo());
128                                 colinfo.back().align = nextalign;
129                                 colinfo.back().width = p.verbatim_item();
130                                 nextalign = 'b';
131                                 break;
132                         case '|':
133                                 if (colinfo.empty())
134                                         leftline = true;
135                                 else
136                                         ++colinfo.back().rightline;
137                                 break;
138                         case '>': {
139                                 string s = p.verbatim_item();
140                                 if (s == "\\raggedleft ")
141                                         nextalign = 'l';
142                                 else if (s == "\\raggedright ")
143                                         nextalign = 'r';
144                                 else
145                                         cerr << "unknown '>' column '" << s << "'\n";
146                                 break;
147                         }
148                         default:
149                                 if (special_columns.find(t.character()) != special_columns.end()) {
150                                         ColInfo ci;
151                                         ci.align = 'c';
152                                         ci.special += t.character();
153                                         int const nargs = special_columns[t.character()];
154                                         for (int i = 0; i < nargs; ++i)
155                                                 ci.special += "{" + p.verbatim_item() + "}";
156                                         //cerr << "handling special column '" << t << "' " << nargs
157                                         //      << "  '" << ci.special << "'\n";
158                                         colinfo.push_back(ci);
159                                 } else {
160                                         cerr << "ignoring special separator '" << t << "'\n";
161                                 }
162                                 break;
163                         }
164         }
165         if (colinfo.size() && leftline)
166                 colinfo[0].leftline = true;
167 }
168
169
170 } // anonymous namespace
171
172
173 void parse_table(Parser & p, ostream & os, unsigned flags)
174 {
175         string hlines;
176
177         while (p.good()) {
178                 Token const & t = p.get_token();
179
180 #ifdef FILEDEBUG
181                 cerr << "t: " << t << " flags: " << flags << "\n";
182 #endif
183
184                 //
185                 // cat codes
186                 //
187                 if (t.cat() == catMath) {
188                         // we are inside some text mode thingy, so opening new math is allowed
189                         Token const & n = p.get_token();
190                         if (n.cat() == catMath) {
191                                 // TeX's $$...$$ syntax for displayed math
192                                 os << "\\[";
193                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
194                                 os << "\\]";
195                                 p.get_token(); // skip the second '$' token
196                         } else {
197                                 // simple $...$  stuff
198                                 p.putback();
199                                 os << '$';
200                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
201                                 os << '$';
202                         }
203                 }
204
205                 else if (t.cat() == catSpace || t.cat() == catNewline)
206                                 os << t.cs();
207
208                 else if (t.cat() == catLetter ||
209                                t.cat() == catSuper ||
210                                t.cat() == catSub ||
211                                t.cat() == catOther ||
212                                t.cat() == catActive ||
213                                t.cat() == catParameter)
214                         os << t.character();
215
216                 else if (t.cat() == catBegin) {
217                         os << '{';
218                         parse_table(p, os, FLAG_BRACE_LAST);
219                         os << '}';
220                 }
221
222                 else if (t.cat() == catEnd) {
223                         if (flags & FLAG_BRACE_LAST)
224                                 return;
225                         cerr << "unexpected '}'\n";
226                 }
227
228                 else if (t.cat() == catAlign) {
229                         os << TAB;
230                         p.skip_spaces();
231                 }
232
233                 else if (t.cs() == "tabularnewline" || t.cs() == "\\" ||
234                          t.cs() == "cr") {
235                         if (t.cs() == "cr")
236                                 cerr << "Warning: Converting TeX '\\cr' to LaTeX '\\\\'."
237                                      << endl;
238                         // stuff before the line break
239                         // and look ahead for stuff after the line break
240                         os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE;
241                         hlines.erase();
242                 }
243
244                 else if (t.cs() == "hline")
245                         hlines += "\\hline";
246
247                 else if (t.cs() == "cline")
248                         hlines += "\\cline{" + p.verbatim_item() + '}';
249
250                 else if (t.cat() == catComment)
251                         os << t.asInput();
252
253                 else if (t.cs() == "(") {
254                         os << "\\(";
255                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
256                         os << "\\)";
257                 }
258
259                 else if (t.cs() == "[") {
260                         os << "\\[";
261                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
262                         os << "\\]";
263                 }
264
265                 else if (t.cs() == "begin") {
266                         string const name = p.getArg('{', '}');
267                         active_environments.push_back(name);
268                         os << "\\begin{" << name << '}';
269                         if (is_math_env(name)) {
270                                 parse_math(p, os, FLAG_END, MATH_MODE);
271                         } else {
272                                 parse_table(p, os, FLAG_END);
273                         }
274                         os << "\\end{" << name << '}';
275                         active_environments.pop_back();
276                 }
277
278                 else if (t.cs() == "end") {
279                         if (flags & FLAG_END) {
280                                 // eat environment name
281                                 string const name = p.getArg('{', '}');
282                                 if (name != active_environment())
283                                         p.error("\\end{" + name + "} does not match \\begin{"
284                                                 + active_environment() + "}");
285                                 return;
286                         }
287                         p.error("found 'end' unexpectedly");
288                 }
289
290                 else
291                         os << t.asInput();
292         }
293 }
294
295
296 void handle_hline_above(RowInfo & ri, vector<CellInfo> & ci)
297 {
298         ri.topline = true;
299         for (size_t col = 0; col < ci.size(); ++col)
300                 ci[col].topline = true;
301 }
302
303
304 void handle_hline_below(RowInfo & ri, vector<CellInfo> & ci)
305 {
306         ri.bottomline = true;
307         for (size_t col = 0; col < ci.size(); ++col)
308                 ci[col].bottomline = true;
309 }
310
311
312 void handle_tabular(Parser & p, ostream & os,
313                     Context & context)
314 {
315         string posopts = p.getOpt();
316         if (!posopts.empty())
317                 cerr << "vertical tabular positioning '" << posopts << "' ignored\n";
318
319         vector<ColInfo>            colinfo;
320
321         // handle column formatting
322         handle_colalign(p, colinfo);
323
324         // handle initial hlines
325
326         // first scan of cells
327         // use table mode to keep it minimal-invasive
328         // not exactly what's TeX doing...
329         vector<string> lines;
330         ostringstream ss;
331         ss << read_hlines(p) << HLINE; // handle initial hlines
332         parse_table(p, ss, FLAG_END);
333         split(ss.str(), lines, LINE);
334
335         vector< vector<CellInfo> > cellinfo(lines.size());
336         vector<RowInfo> rowinfo(lines.size());
337
338         // split into rows
339         //cerr << "// split into rows\n";
340         for (size_t row = 0; row < rowinfo.size(); ++row) {
341
342                 // init row
343                 cellinfo[row].resize(colinfo.size());
344
345                 // split row
346                 vector<string> dummy;
347                 //cerr << "\n########### LINE: " << lines[row] << "########\n";
348                 split(lines[row], dummy, HLINE);
349
350                 // handle horizontal line fragments
351                 if (dummy.size() != 3) {
352                         if (dummy.size() != 1)
353                                 cerr << "unexpected dummy size: " << dummy.size()
354                                         << " content: " << lines[row] << "\n";
355                         dummy.resize(3);
356                 }
357                 lines[row] = dummy[1];
358
359                 //cerr << "line: " << row << " above 0: " << dummy[0] << "\n";
360                 //cerr << "line: " << row << " below 2: " << dummy[2] <<  "\n";
361                 //cerr << "line: " << row << " cells 1: " << dummy[1] <<  "\n";
362
363                 for (int i = 0; i <= 2; i += 2) {
364                         //cerr << "   reading from line string '" << dummy[i] << "'\n";
365                         Parser p1(dummy[i]);
366                         while (p1.good()) {
367                                 Token t = p1.get_token();
368                                 //cerr << "read token: " << t << "\n";
369                                 if (t.cs() == "hline") {
370                                         if (i == 0) {
371                                                 if (rowinfo[row].topline) {
372                                                         if (row > 0) // extra bottomline above
373                                                                 handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]);
374                                                         else
375                                                                 cerr << "dropping extra hline\n";
376                                                         //cerr << "below row: " << row-1 << endl;
377                                                 } else {
378                                                         handle_hline_above(rowinfo[row], cellinfo[row]);
379                                                         //cerr << "above row: " << row << endl;
380                                                 }
381                                         } else {
382                                                 //cerr << "below row: " << row << endl;
383                                                 handle_hline_below(rowinfo[row], cellinfo[row]);
384                                         }
385                                 } else if (t.cs() == "cline") {
386                                         string arg = p1.verbatim_item();
387                                         //cerr << "read cline arg: '" << arg << "'\n";
388                                         vector<string> t;
389                                         split(arg, t, '-');
390                                         t.resize(2);
391                                         size_t from = string2int(t[0]) - 1;
392                                         if (from >= colinfo.size()) {
393                                                 cerr << "cline starts at non "
394                                                         "existing column "
395                                                      << from << endl;
396                                                 from = colinfo.size() - 1;
397                                         }
398                                         size_t to = string2int(t[1]);
399                                         if (to >= colinfo.size()) {
400                                                 cerr << "cline ends at non "
401                                                         "existing column "
402                                                      << to << endl;
403                                                 to = colinfo.size() - 1;
404                                         }
405                                         for (size_t col = from; col < to; ++col) {
406                                                 //cerr << "row: " << row << " col: " << col << " i: " << i << endl;
407                                                 if (i == 0) {
408                                                         rowinfo[row].topline = true;
409                                                         cellinfo[row][col].topline = true;
410                                                 } else {
411                                                         rowinfo[row].bottomline = true;
412                                                         cellinfo[row][col].bottomline = true;
413                                                 }
414                                         }
415                                 } else {
416                                         cerr << "unexpected line token: " << t << endl;
417                                 }
418                         }
419                 }
420
421                 // split into cells
422                 vector<string> cells;
423                 split(lines[row], cells, TAB);
424                 // Has the last multicolumn cell a rightline?
425                 bool last_rightline = false;
426                 for (size_t col = 0, cell = 0;
427                                 cell < cells.size() && col < colinfo.size(); ++col, ++cell) {
428                         //cerr << "cell content: '" << cells[cell] << "'\n";
429                         Parser p(cells[cell]);
430                         p.skip_spaces();
431                         //cells[cell] << "'\n";
432                         if (p.next_token().cs() == "multicolumn") {
433                                 // how many cells?
434                                 p.get_token();
435                                 size_t const ncells = string2int(p.verbatim_item());
436
437                                 // special cell properties alignment
438                                 vector<ColInfo> t;
439                                 handle_colalign(p, t);
440                                 cellinfo[row][col].multi     = 1;
441                                 cellinfo[row][col].align     = t.front().align;
442                                 ostringstream os;
443                                 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
444                                 cellinfo[row][col].content   = os.str();
445
446                                 // multicolumn cells are tricky: This
447                                 // \multicolumn{2}{|c|}{col1-2}&
448                                 // \multicolumn{2}{|c|}{col3-4} "\\"
449                                 // gives | col1-2 | col3-4 | and not
450                                 //       | col1-2 || col3-4 |
451                                 // So:
452                                 if (last_rightline && t.front().leftline) {
453                                         t.front().leftline = false;
454                                 }
455                                 last_rightline = t.front().rightline;
456
457                                 // multicolumn lines override normal cell lines
458                                 cellinfo[row][col].leftline  = t.front().leftline;
459                                 cellinfo[row][col].rightline = t.front().rightline;
460
461                                 // add dummy cells for multicol
462                                 for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) {
463                                         ++col;
464                                         cellinfo[row][col].multi = 2;
465                                         cellinfo[row][col].align = 'c';
466                                 }
467
468                                 // more than one line on the right?
469                                 if (t.front().rightline > 1)
470                                         cellinfo[row][col + 1].leftline = true;
471
472                         } else {
473                                 // FLAG_END is a hack, we need to read all of it
474                                 cellinfo[row][col].leftline = colinfo[col].leftline;
475                                 cellinfo[row][col].rightline = colinfo[col].rightline;
476                                 cellinfo[row][col].align = colinfo[col].align;
477                                 ostringstream os;
478                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
479                                 cellinfo[row][col].content   = os.str();
480                                 last_rightline = false;
481                         }
482                 }
483
484                 //cerr << "//  handle almost empty last row what we have\n";
485                 // handle almost empty last row
486                 if (row && lines[row].empty() && row + 1 == rowinfo.size()) {
487                         //cerr << "remove empty last line\n";
488                         if (rowinfo[row].topline)
489                                 rowinfo[row - 1].bottomline = true;
490                         for (size_t col = 0; col < colinfo.size(); ++col)
491                                 if (cellinfo[row][col].topline)
492                                         cellinfo[row - 1][col].bottomline = true;
493                         rowinfo.pop_back();
494                 }
495
496         }
497
498         //cerr << "// output what we have\n";
499         // output what we have
500         os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
501                  << "\" columns=\"" << colinfo.size() << "\">\n"
502                  << "<features>\n";
503
504         //cerr << "// after header\n";
505         for (size_t col = 0; col < colinfo.size(); ++col) {
506                 os << "<column alignment=\""
507                    << verbose_align(colinfo[col].align) << "\"";
508                 os << " valignment=\"top\"";
509                 if (colinfo[col].leftline)
510                         os << " leftline=\"true\"";
511                 if (colinfo[col].rightline)
512                         os << " rightline=\"true\"";
513                 if (!colinfo[col].width.empty())
514                         os << " width=\"" << colinfo[col].width << "\"";
515                 if (!colinfo[col].special.empty())
516                         os << " special=\"" << colinfo[col].special << "\"";
517                 os << ">\n";
518         }
519         //cerr << "// after cols\n";
520
521         for (size_t row = 0; row < rowinfo.size(); ++row) {
522                 os << "<row";
523                 if (rowinfo[row].topline)
524                         os << " topline=\"true\"";
525                 if (rowinfo[row].bottomline)
526                         os << " bottomline=\"true\"";
527                 os << ">\n";
528                 for (size_t col = 0; col < colinfo.size(); ++col) {
529                         CellInfo const & cell = cellinfo[row][col];
530                         os << "<cell";
531                         if (cell.multi)
532                                 os << " multicolumn=\"" << cell.multi << "\"";
533                         os << " alignment=\"" << verbose_align(cell.align)
534                            << "\""
535                            << " valignment=\"top\"";
536                         if (cell.topline)
537                                 os << " topline=\"true\"";
538                         if (cell.bottomline)
539                                 os << " bottomline=\"true\"";
540                         if (cell.leftline)
541                                 os << " leftline=\"true\"";
542                         if (cell.rightline)
543                                 os << " rightline=\"true\"";
544                         //cerr << "\nrow: " << row << " col: " << col;
545                         //if (cell.topline)
546                         //      cerr << " topline=\"true\"";
547                         //if (cell.bottomline)
548                         //      cerr << " bottomline=\"true\"";
549                         os << " usebox=\"none\""
550                            << ">"
551                            << "\n\\begin_inset Text\n"
552                            << cell.content
553                            << "\n\\end_inset\n"
554                            << "</cell>\n";
555                 }
556                 os << "</row>\n";
557         }
558
559         os << "</lyxtabular>\n";
560 }
561
562
563
564
565 // }])