]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/table.cpp
fix bug 580: reading of some ill-formed tables
[lyx.git] / src / tex2lyx / table.cpp
1 /**
2  * \file table.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  * \author Jean-Marc Lasgouttes
8  * \author Georg Baum
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 // {[(
14
15 #include <config.h>
16
17 #include "tex2lyx.h"
18
19 #include "support/convert.h"
20 #include "support/lstrings.h"
21
22 #include <iostream>
23 #include <sstream>
24 #include <vector>
25 #include <map>
26
27
28 namespace lyx {
29
30 using std::cerr;
31 using std::endl;
32 using std::istringstream;
33 using std::ostream;
34 using std::ostringstream;
35 using std::string;
36 using std::vector;
37
38
39 // filled in preamble.cpp
40 std::map<char, int> special_columns;
41
42
43 namespace {
44
45 class ColInfo {
46 public:
47         ColInfo() : align('n'), valign('n'), rightlines(0), leftlines(0) {}
48         /// column alignment
49         char align;
50         /// vertical alignment
51         char valign;
52         /// column width
53         string width;
54         /// special column alignment
55         string special;
56         /// number of lines on the right
57         int rightlines;
58         /// number of lines on the left
59         int leftlines;
60 };
61
62
63 /// row type for longtables
64 enum LTRowType
65 {
66         /// normal row
67         LT_NORMAL,
68         /// part of head
69         LT_HEAD,
70         /// part of head on first page
71         LT_FIRSTHEAD,
72         /// part of foot
73         LT_FOOT,
74         /// part of foot on last page
75         LT_LASTFOOT
76 };
77
78
79 class RowInfo {
80 public:
81         RowInfo() : topline(false), bottomline(false), type(LT_NORMAL),
82                     newpage(false) {}
83         /// horizontal line above
84         bool topline;
85         /// horizontal line below
86         bool bottomline;
87         /// These are for longtabulars only
88         /// row type (head, foot, firsthead etc.)
89         LTRowType type;
90         /// row for a pagebreak
91         bool newpage;
92 };
93
94
95 enum Multicolumn {
96         /// A normal cell
97         CELL_NORMAL = 0,
98         /// A multicolumn cell. The number of columns is <tt>1 + number
99         /// of CELL_PART_OF_MULTICOLUMN cells</tt> that follow directly
100         CELL_BEGIN_OF_MULTICOLUMN,
101         /// This is a dummy cell (part of a multicolumn cell)
102         CELL_PART_OF_MULTICOLUMN
103 };
104
105
106 class CellInfo {
107 public:
108         CellInfo() : multi(CELL_NORMAL), align('n'), valign('n'),
109                      leftlines(0), rightlines(0), topline(false),
110                      bottomline(false), rotate(false) {}
111         /// cell content
112         string content;
113         /// multicolumn flag
114         Multicolumn multi;
115         /// cell alignment
116         char align;
117         /// vertical cell alignment
118         char valign;
119         /// number of lines on the left
120         int leftlines;
121         /// number of lines on the right
122         int rightlines;
123         /// do we have a line above?
124         bool topline;
125         /// do we have a line below?
126         bool bottomline;
127         /// is the cell rotated?
128         bool rotate;
129         /// width for multicolumn cells
130         string width;
131         /// special formatting for multicolumn cells
132         string special;
133 };
134
135
136 /// translate a horizontal alignment (as stored in ColInfo and CellInfo) to LyX
137 inline char const * verbose_align(char c)
138 {
139         switch (c) {
140         case 'c':
141                 return "center";
142         case 'r':
143                 return "right";
144         case 'l':
145                 return "left";
146         default:
147                 return "none";
148         }
149 }
150
151
152 /// translate a vertical alignment (as stored in ColInfo and CellInfo) to LyX
153 inline char const * verbose_valign(char c)
154 {
155         // The default value for no special alignment is "top".
156         switch (c) {
157         case 'm':
158                 return "middle";
159         case 'b':
160                 return "bottom";
161         case 'p':
162         default:
163                 return "top";
164         }
165 }
166
167
168 // stripped down from tabluar.C. We use it currently only for bools and
169 // strings
170 string const write_attribute(string const & name, bool const & b)
171 {
172         // we write only true attribute values so we remove a bit of the
173         // file format bloat for tabulars.
174         return b ? ' ' + name + "=\"true\"" : string();
175 }
176
177
178 string const write_attribute(string const & name, string const & s)
179 {
180         return s.empty() ? string() : ' ' + name + "=\"" + s + '"';
181 }
182
183
184 /*! rather brutish way to code table structure in a string:
185
186 \verbatim
187   \begin{tabular}{ccc}
188     1 & 2 & 3\\ \hline
189     \multicolumn{2}{c}{4} & 5 //
190     6 & 7 \\
191     8 \endhead
192   \end{tabular}
193 \endverbatim
194
195  gets "translated" to:
196
197 \verbatim
198          HLINE 1                     TAB 2 TAB 3 HLINE          HLINE LINE
199   \hline HLINE \multicolumn{2}{c}{4} TAB 5       HLINE          HLINE LINE
200          HLINE 6                     TAB 7       HLINE          HLINE LINE
201          HLINE 8                                 HLINE \endhead HLINE LINE
202 \endverbatim
203  */
204
205 char const TAB   = '\001';
206 char const LINE  = '\002';
207 char const HLINE = '\004';
208
209
210 /*!
211  * Move the information in leftlines, rightlines, align and valign to the
212  * special field. This is necessary if the special field is not empty,
213  * because LyX ignores leftlines, rightlines, align and valign in this case.
214  */
215 void ci2special(ColInfo & ci)
216 {
217         if (ci.width.empty() && ci.align == 'n')
218                 // The alignment setting is already in special, since
219                 // handle_colalign() never stores ci with these settings
220                 // and ensures that leftlines == 0 and rightlines == 0 in
221                 // this case.
222                 return;
223
224         if (!ci.width.empty()) {
225                 switch (ci.align) {
226                 case 'l':
227                         ci.special += ">{\\raggedright}";
228                         break;
229                 case 'r':
230                         ci.special += ">{\\raggedleft}";
231                         break;
232                 case 'c':
233                         ci.special += ">{\\centering}";
234                         break;
235                 }
236                 if (ci.valign == 'n')
237                         ci.special += 'p';
238                 else
239                         ci.special += ci.valign;
240                 ci.special += '{' + ci.width + '}';
241                 ci.width.erase();
242         } else
243                 ci.special += ci.align;
244
245         for (int i = 0; i < ci.leftlines; ++i)
246                 ci.special.insert(0, "|");
247         for (int i = 0; i < ci.rightlines; ++i)
248                 ci.special += '|';
249         ci.leftlines = 0;
250         ci.rightlines = 0;
251         ci.align = 'n';
252         ci.valign = 'n';
253 }
254
255
256 /*!
257  * Handle column specifications for tabulars and multicolumns.
258  * The next token of the parser \p p must be an opening brace, and we read
259  * everything until the matching closing brace.
260  * The resulting column specifications are filled into \p colinfo. This is
261  * in an intermediate form. fix_colalign() makes it suitable for LyX output.
262  */
263 void handle_colalign(Parser & p, vector<ColInfo> & colinfo,
264                      ColInfo const & start)
265 {
266         if (p.get_token().cat() != catBegin)
267                 cerr << "Wrong syntax for table column alignment.\n"
268                         "Expected '{', got '" << p.curr_token().asInput()
269                      << "'.\n";
270
271         ColInfo next = start;
272         for (Token t = p.get_token(); p.good() && t.cat() != catEnd;
273              t = p.get_token()) {
274 #ifdef FILEDEBUG
275                 cerr << "t: " << t << "  c: '" << t.character() << "'\n";
276 #endif
277
278                 // We cannot handle comments here
279                 if (t.cat() == catComment) {
280                         if (t.cs().empty()) {
281                                 // "%\n" combination
282                                 p.skip_spaces();
283                         } else
284                                 cerr << "Ignoring comment: " << t.asInput();
285                         continue;
286                 }
287
288                 switch (t.character()) {
289                         case 'c':
290                         case 'l':
291                         case 'r':
292                                 // new column, horizontal aligned
293                                 next.align = t.character();
294                                 if (!next.special.empty())
295                                         ci2special(next);
296                                 colinfo.push_back(next);
297                                 next = ColInfo();
298                                 break;
299                         case 'p':
300                         case 'b':
301                         case 'm':
302                                 // new column, vertical aligned box
303                                 next.valign = t.character();
304                                 next.width = p.verbatim_item();
305                                 if (!next.special.empty())
306                                         ci2special(next);
307                                 colinfo.push_back(next);
308                                 next = ColInfo();
309                                 break;
310                         case '|':
311                                 // vertical rule
312                                 if (colinfo.empty()) {
313                                         if (next.special.empty())
314                                                 ++next.leftlines;
315                                         else
316                                                 next.special += '|';
317                                 } else if (colinfo.back().special.empty())
318                                         ++colinfo.back().rightlines;
319                                 else if (next.special.empty())
320                                         ++next.leftlines;
321                                 else
322                                         colinfo.back().special += '|';
323                                 break;
324                         case '>': {
325                                 // text before the next column
326                                 string const s = trim(p.verbatim_item());
327                                 if (next.special.empty() &&
328                                     next.align == 'n') {
329                                         // Maybe this can be converted to a
330                                         // horizontal alignment setting for
331                                         // fixed width columns
332                                         if (s == "\\raggedleft")
333                                                 next.align = 'r';
334                                         else if (s == "\\raggedright")
335                                                 next.align = 'l';
336                                         else if (s == "\\centering")
337                                                 next.align = 'c';
338                                         else
339                                                 next.special = ">{" + s + '}';
340                                 } else
341                                         next.special += ">{" + s + '}';
342                                 break;
343                         }
344                         case '<': {
345                                 // text after the last column
346                                 string const s = trim(p.verbatim_item());
347                                 if (colinfo.empty())
348                                         // This is not possible in LaTeX.
349                                         cerr << "Ignoring separator '<{"
350                                              << s << "}'." << endl;
351                                 else {
352                                         ColInfo & ci = colinfo.back();
353                                         ci2special(ci);
354                                         ci.special += "<{" + s + '}';
355                                 }
356                                 break;
357                         }
358                         case '*': {
359                                 // *{n}{arg} means 'n' columns of type 'arg'
360                                 string const num = p.verbatim_item();
361                                 string const arg = p.verbatim_item();
362                                 size_t const n = convert<unsigned int>(num);
363                                 if (!arg.empty() && n > 0) {
364                                         string s("{");
365                                         for (size_t i = 0; i < n; ++i)
366                                                 s += arg;
367                                         s += '}';
368                                         Parser p2(s);
369                                         handle_colalign(p2, colinfo, next);
370                                         next = ColInfo();
371                                 } else {
372                                         cerr << "Ignoring column specification"
373                                                 " '*{" << num << "}{"
374                                              << arg << "}'." << endl;
375                                 }
376                                 break;
377                         }
378                         case '@':
379                                 // text instead of the column spacing
380                         case '!':
381                                 // text in addition to the column spacing
382                                 next.special += t.character();
383                                 next.special += '{' + p.verbatim_item() + '}';
384                                 break;
385                         default:
386                                 // try user defined column types
387                                 if (special_columns.find(t.character()) !=
388                                     special_columns.end()) {
389                                         ci2special(next);
390                                         next.special += t.character();
391                                         int const nargs =
392                                                 special_columns[t.character()];
393                                         for (int i = 0; i < nargs; ++i)
394                                                 next.special += '{' +
395                                                         p.verbatim_item() +
396                                                         '}';
397                                         colinfo.push_back(next);
398                                         next = ColInfo();
399                                 } else
400                                         cerr << "Ignoring column specification"
401                                                 " '" << t << "'." << endl;
402                                 break;
403                         }
404         }
405
406         // Maybe we have some column separators that need to be added to the
407         // last column?
408         ci2special(next);
409         if (!next.special.empty()) {
410                 ColInfo & ci = colinfo.back();
411                 ci2special(ci);
412                 ci.special += next.special;
413                 next.special.erase();
414         }
415 }
416
417
418 /*!
419  * Move the left and right lines and alignment settings of the column \p ci
420  * to the special field if necessary.
421  */
422 void fix_colalign(ColInfo & ci)
423 {
424         if (ci.leftlines > 1 || ci.rightlines > 1)
425                 ci2special(ci);
426 }
427
428
429 /*!
430  * LyX can't handle more than one vertical line at the left or right side
431  * of a column.
432  * This function moves the left and right lines and alignment settings of all
433  * columns in \p colinfo to the special field if necessary.
434  */
435 void fix_colalign(vector<ColInfo> & colinfo)
436 {
437         // Try to move extra leftlines to the previous column.
438         // We do this only if both special fields are empty, otherwise we
439         // can't tell wether the result will be the same.
440         for (size_t col = 0; col < colinfo.size(); ++col) {
441                 if (colinfo[col].leftlines > 1 &&
442                     colinfo[col].special.empty() && col > 0 &&
443                     colinfo[col - 1].rightlines == 0 &&
444                     colinfo[col - 1].special.empty()) {
445                         ++colinfo[col - 1].rightlines;
446                         --colinfo[col].leftlines;
447                 }
448         }
449         // Try to move extra rightlines to the next column
450         for (size_t col = 0; col < colinfo.size(); ++col) {
451                 if (colinfo[col].rightlines > 1 &&
452                     colinfo[col].special.empty() &&
453                     col < colinfo.size() - 1 &&
454                     colinfo[col + 1].leftlines == 0 &&
455                     colinfo[col + 1].special.empty()) {
456                         ++colinfo[col + 1].leftlines;
457                         --colinfo[col].rightlines;
458                 }
459         }
460         // Move the lines and alignment settings to the special field if
461         // necessary
462         for (size_t col = 0; col < colinfo.size(); ++col)
463                 fix_colalign(colinfo[col]);
464 }
465
466
467 /*!
468  * Parse hlines and similar stuff.
469  * \returns wether the token \p t was parsed
470  */
471 bool parse_hlines(Parser & p, Token const & t, string & hlines,
472                   bool is_long_tabular)
473 {
474         BOOST_ASSERT(t.cat() == catEscape);
475
476         if (t.cs() == "hline")
477                 hlines += "\\hline";
478
479         else if (t.cs() == "cline")
480                 hlines += "\\cline{" + p.verbatim_item() + '}';
481
482         else if (is_long_tabular && t.cs() == "newpage")
483                 hlines += "\\newpage";
484
485         else
486                 return false;
487
488         return true;
489 }
490
491
492 /// Position in a row
493 enum RowPosition {
494         /// At the very beginning, before the first token
495         ROW_START,
496         /// After the first token and before any column token
497         IN_HLINES_START,
498         /// After the first column token. Comments and whitespace are only
499         /// treated as tokens in this position
500         IN_COLUMNS,
501         /// After the first non-column token at the end
502         IN_HLINES_END
503 };
504
505
506 /*!
507  * Parse table structure.
508  * We parse tables in a two-pass process: This function extracts the table
509  * structure (rows, columns, hlines etc.), but does not change the cell
510  * content. The cell content is parsed in a second step in handle_tabular().
511  */
512 void parse_table(Parser & p, ostream & os, bool is_long_tabular,
513                  RowPosition & pos, unsigned flags)
514 {
515         // table structure commands such as \hline
516         string hlines;
517
518         // comments that occur at places where we can't handle them
519         string comments;
520
521         while (p.good()) {
522                 Token const & t = p.get_token();
523
524 #ifdef FILEDEBUG
525                 cerr << "t: " << t << " flags: " << flags << "\n";
526 #endif
527
528                 // comments and whitespace in hlines
529                 switch (pos) {
530                 case ROW_START:
531                 case IN_HLINES_START:
532                 case IN_HLINES_END:
533                         if (t.cat() == catComment) {
534                                 if (t.cs().empty())
535                                         // line continuation
536                                         p.skip_spaces();
537                                 else
538                                         // We can't handle comments here,
539                                         // store them for later use
540                                         comments += t.asInput();
541                                 continue;
542                         } else if (t.cat() == catSpace ||
543                                    t.cat() == catNewline) {
544                                 // whitespace is irrelevant here, we
545                                 // need to recognize hline stuff
546                                 p.skip_spaces();
547                                 continue;
548                         }
549                         break;
550                 case IN_COLUMNS:
551                         break;
552                 }
553
554                 // We need to handle structure stuff first in order to
555                 // determine wether we need to output a HLINE separator
556                 // before the row or not.
557                 if (t.cat() == catEscape) {
558                         if (parse_hlines(p, t, hlines, is_long_tabular)) {
559                                 switch (pos) {
560                                 case ROW_START:
561                                         pos = IN_HLINES_START;
562                                         break;
563                                 case IN_COLUMNS:
564                                         pos = IN_HLINES_END;
565                                         break;
566                                 case IN_HLINES_START:
567                                 case IN_HLINES_END:
568                                         break;
569                                 }
570                                 continue;
571                         }
572
573                         else if (t.cs() == "tabularnewline" ||
574                                  t.cs() == "\\" ||
575                                  t.cs() == "cr") {
576                                 if (t.cs() == "cr")
577                                         cerr << "Warning: Converting TeX "
578                                                 "'\\cr' to LaTeX '\\\\'."
579                                              << endl;
580                                 // stuff before the line break
581                                 os << comments << HLINE << hlines << HLINE
582                                    << LINE;
583                                 //cerr << "hlines: " << hlines << endl;
584                                 hlines.erase();
585                                 comments.erase();
586                                 pos = ROW_START;
587                                 continue;
588                         }
589
590                         else if (is_long_tabular &&
591                                  (t.cs() == "endhead" ||
592                                   t.cs() == "endfirsthead" ||
593                                   t.cs() == "endfoot" ||
594                                   t.cs() == "endlastfoot")) {
595                                 hlines += t.asInput();
596                                 switch (pos) {
597                                 case IN_COLUMNS:
598                                 case IN_HLINES_END:
599                                         // these commands are implicit line
600                                         // breaks
601                                         os << comments << HLINE << hlines
602                                            << HLINE << LINE;
603                                         hlines.erase();
604                                         comments.erase();
605                                         pos = ROW_START;
606                                         break;
607                                 case ROW_START:
608                                         pos = IN_HLINES_START;
609                                         break;
610                                 case IN_HLINES_START:
611                                         break;
612                                 }
613                                 continue;
614                         }
615
616                 }
617
618                 // We need a HLINE separator if we either have no hline
619                 // stuff at all and are just starting a row or if we just
620                 // got the first non-hline token.
621                 switch (pos) {
622                 case ROW_START:
623                         // no hline tokens exist, first token at row start
624                 case IN_HLINES_START:
625                         // hline tokens exist, first non-hline token at row
626                         // start
627                         os << hlines << HLINE << comments;
628                         hlines.erase();
629                         comments.erase();
630                         pos = IN_COLUMNS;
631                         break;
632                 case IN_HLINES_END:
633                         // Oops, there is still cell content after hline
634                         // stuff. This does not work in LaTeX, so we ignore
635                         // the hlines.
636                         cerr << "Ignoring '" << hlines << "' in a cell"
637                              << endl;
638                         os << comments;
639                         hlines.erase();
640                         comments.erase();
641                         pos = IN_COLUMNS;
642                         break;
643                 case IN_COLUMNS:
644                         break;
645                 }
646
647                 // If we come here we have normal cell content
648                 //
649                 // cat codes
650                 //
651                 if (t.cat() == catMath) {
652                         // we are inside some text mode thingy, so opening new math is allowed
653                         Token const & n = p.get_token();
654                         if (n.cat() == catMath) {
655                                 // TeX's $$...$$ syntax for displayed math
656                                 os << "\\[";
657                                 // This does only work because parse_math outputs TeX
658                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
659                                 os << "\\]";
660                                 p.get_token(); // skip the second '$' token
661                         } else {
662                                 // simple $...$  stuff
663                                 p.putback();
664                                 os << '$';
665                                 // This does only work because parse_math outputs TeX
666                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
667                                 os << '$';
668                         }
669                 }
670
671                 else if (t.cat() == catSpace || t.cat() == catNewline)
672                                 os << t.cs();
673
674                 else if (t.cat() == catLetter ||
675                                t.cat() == catSuper ||
676                                t.cat() == catSub ||
677                                t.cat() == catOther ||
678                                t.cat() == catActive ||
679                                t.cat() == catParameter)
680                         os << t.character();
681
682                 else if (t.cat() == catBegin) {
683                         os << '{';
684                         parse_table(p, os, is_long_tabular, pos,
685                                     FLAG_BRACE_LAST);
686                         os << '}';
687                 }
688
689                 else if (t.cat() == catEnd) {
690                         if (flags & FLAG_BRACE_LAST)
691                                 return;
692                         cerr << "unexpected '}'\n";
693                 }
694
695                 else if (t.cat() == catAlign) {
696                         os << TAB;
697                         p.skip_spaces();
698                 }
699
700                 else if (t.cat() == catComment)
701                         os << t.asInput();
702
703                 else if (t.cs() == "(") {
704                         os << "\\(";
705                         // This does only work because parse_math outputs TeX
706                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
707                         os << "\\)";
708                 }
709
710                 else if (t.cs() == "[") {
711                         os << "\\[";
712                         // This does only work because parse_math outputs TeX
713                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
714                         os << "\\]";
715                 }
716
717                 else if (t.cs() == "begin") {
718                         string const name = p.getArg('{', '}');
719                         active_environments.push_back(name);
720                         os << "\\begin{" << name << '}';
721                         // treat the nested environment as a block, don't
722                         // parse &, \\ etc, because they don't belong to our
723                         // table if they appear.
724                         os << p.verbatimEnvironment(name);
725                         os << "\\end{" << name << '}';
726                         active_environments.pop_back();
727                 }
728
729                 else if (t.cs() == "end") {
730                         if (flags & FLAG_END) {
731                                 // eat environment name
732                                 string const name = p.getArg('{', '}');
733                                 if (name != active_environment())
734                                         p.error("\\end{" + name + "} does not match \\begin{"
735                                                 + active_environment() + "}");
736                                 return;
737                         }
738                         p.error("found 'end' unexpectedly");
739                 }
740
741                 else
742                         os << t.asInput();
743         }
744
745         // We can have comments if the last line is incomplete
746         os << comments;
747
748         // We can have hline stuff if the last line is incomplete
749         if (!hlines.empty()) {
750                 // this does not work in LaTeX, so we ignore it
751                 cerr << "Ignoring '" << hlines << "' at end of tabular"
752                      << endl;
753         }
754 }
755
756
757 void handle_hline_above(RowInfo & ri, vector<CellInfo> & ci)
758 {
759         ri.topline = true;
760         for (size_t col = 0; col < ci.size(); ++col)
761                 ci[col].topline = true;
762 }
763
764
765 void handle_hline_below(RowInfo & ri, vector<CellInfo> & ci)
766 {
767         ri.bottomline = true;
768         for (size_t col = 0; col < ci.size(); ++col)
769                 ci[col].bottomline = true;
770 }
771
772
773 } // anonymous namespace
774
775
776 void handle_tabular(Parser & p, ostream & os, bool is_long_tabular,
777                     Context & context)
778 {
779         string posopts = p.getOpt();
780         if (!posopts.empty()) {
781                 // FIXME: Convert this to ERT
782                 if (is_long_tabular)
783                         cerr << "horizontal longtable";
784                 else
785                         cerr << "vertical tabular";
786                 cerr << " positioning '" << posopts << "' ignored\n";
787         }
788
789         vector<ColInfo> colinfo;
790
791         // handle column formatting
792         handle_colalign(p, colinfo, ColInfo());
793         fix_colalign(colinfo);
794
795         // first scan of cells
796         // use table mode to keep it minimal-invasive
797         // not exactly what's TeX doing...
798         vector<string> lines;
799         ostringstream ss;
800         RowPosition rowpos = ROW_START;
801         parse_table(p, ss, is_long_tabular, rowpos, FLAG_END);
802         split(ss.str(), lines, LINE);
803
804         vector< vector<CellInfo> > cellinfo(lines.size());
805         vector<RowInfo> rowinfo(lines.size());
806
807         // split into rows
808         //cerr << "// split into rows\n";
809         for (size_t row = 0; row < rowinfo.size(); ++row) {
810
811                 // init row
812                 cellinfo[row].resize(colinfo.size());
813
814                 // split row
815                 vector<string> dummy;
816                 //cerr << "\n########### LINE: " << lines[row] << "########\n";
817                 split(lines[row], dummy, HLINE);
818
819                 // handle horizontal line fragments
820                 // we do only expect this for a last line without '\\'
821                 if (dummy.size() != 3) {
822                         if ((dummy.size() != 1 && dummy.size() != 2) ||
823                             row != rowinfo.size() - 1)
824                                 cerr << "unexpected dummy size: " << dummy.size()
825                                         << " content: " << lines[row] << "\n";
826                         dummy.resize(3);
827                 }
828                 lines[row] = dummy[1];
829
830                 //cerr << "line: " << row << " above 0: " << dummy[0] << "\n";
831                 //cerr << "line: " << row << " below 2: " << dummy[2] <<  "\n";
832                 //cerr << "line: " << row << " cells 1: " << dummy[1] <<  "\n";
833
834                 for (int i = 0; i <= 2; i += 2) {
835                         //cerr << "   reading from line string '" << dummy[i] << "'\n";
836                         Parser p1(dummy[i]);
837                         while (p1.good()) {
838                                 Token t = p1.get_token();
839                                 //cerr << "read token: " << t << "\n";
840                                 if (t.cs() == "hline") {
841                                         if (i == 0) {
842                                                 if (rowinfo[row].topline) {
843                                                         if (row > 0) // extra bottomline above
844                                                                 handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]);
845                                                         else
846                                                                 cerr << "dropping extra hline\n";
847                                                         //cerr << "below row: " << row-1 << endl;
848                                                 } else {
849                                                         handle_hline_above(rowinfo[row], cellinfo[row]);
850                                                         //cerr << "above row: " << row << endl;
851                                                 }
852                                         } else {
853                                                 //cerr << "below row: " << row << endl;
854                                                 handle_hline_below(rowinfo[row], cellinfo[row]);
855                                         }
856                                 } else if (t.cs() == "cline") {
857                                         string arg = p1.verbatim_item();
858                                         //cerr << "read cline arg: '" << arg << "'\n";
859                                         vector<string> t;
860                                         split(arg, t, '-');
861                                         t.resize(2);
862                                         size_t from = convert<unsigned int>(t[0]);
863                                         if (from == 0)
864                                                 cerr << "Could not parse "
865                                                         "cline start column."
866                                                      << endl;
867                                         else
868                                                 // 1 based index -> 0 based
869                                                 --from;
870                                         if (from >= colinfo.size()) {
871                                                 cerr << "cline starts at non "
872                                                         "existing column "
873                                                      << (from + 1) << endl;
874                                                 from = colinfo.size() - 1;
875                                         }
876                                         size_t to = convert<unsigned int>(t[1]);
877                                         if (to == 0)
878                                                 cerr << "Could not parse "
879                                                         "cline end column."
880                                                      << endl;
881                                         else
882                                                 // 1 based index -> 0 based
883                                                 --to;
884                                         if (to >= colinfo.size()) {
885                                                 cerr << "cline ends at non "
886                                                         "existing column "
887                                                      << (to + 1) << endl;
888                                                 to = colinfo.size() - 1;
889                                         }
890                                         for (size_t col = from; col <= to; ++col) {
891                                                 //cerr << "row: " << row << " col: " << col << " i: " << i << endl;
892                                                 if (i == 0) {
893                                                         rowinfo[row].topline = true;
894                                                         cellinfo[row][col].topline = true;
895                                                 } else {
896                                                         rowinfo[row].bottomline = true;
897                                                         cellinfo[row][col].bottomline = true;
898                                                 }
899                                         }
900                                 } else if (t.cs() == "endhead") {
901                                         if (i > 0)
902                                                 rowinfo[row].type = LT_HEAD;
903                                         for (int r = row - 1; r >= 0; --r) {
904                                                 if (rowinfo[r].type != LT_NORMAL)
905                                                         break;
906                                                 rowinfo[r].type = LT_HEAD;
907                                         }
908                                 } else if (t.cs() == "endfirsthead") {
909                                         if (i > 0)
910                                                 rowinfo[row].type = LT_FIRSTHEAD;
911                                         for (int r = row - 1; r >= 0; --r) {
912                                                 if (rowinfo[r].type != LT_NORMAL)
913                                                         break;
914                                                 rowinfo[r].type = LT_FIRSTHEAD;
915                                         }
916                                 } else if (t.cs() == "endfoot") {
917                                         if (i > 0)
918                                                 rowinfo[row].type = LT_FOOT;
919                                         for (int r = row - 1; r >= 0; --r) {
920                                                 if (rowinfo[r].type != LT_NORMAL)
921                                                         break;
922                                                 rowinfo[r].type = LT_FOOT;
923                                         }
924                                 } else if (t.cs() == "endlastfoot") {
925                                         if (i > 0)
926                                                 rowinfo[row].type = LT_LASTFOOT;
927                                         for (int r = row - 1; r >= 0; --r) {
928                                                 if (rowinfo[r].type != LT_NORMAL)
929                                                         break;
930                                                 rowinfo[r].type = LT_LASTFOOT;
931                                         }
932                                 } else if (t.cs() == "newpage") {
933                                         if (i == 0) {
934                                                 if (row > 0)
935                                                         rowinfo[row - 1].newpage = true;
936                                                 else
937                                                         // This does not work in LaTeX
938                                                         cerr << "Ignoring "
939                                                                 "'\\newpage' "
940                                                                 "before rows."
941                                                              << endl;
942                                         } else
943                                                 rowinfo[row].newpage = true;
944                                 } else {
945                                         cerr << "unexpected line token: " << t << endl;
946                                 }
947                         }
948                 }
949
950                 // split into cells
951                 vector<string> cells;
952                 split(lines[row], cells, TAB);
953                 for (size_t col = 0, cell = 0; cell < cells.size();
954                      ++col, ++cell) {
955                         //cerr << "cell content: '" << cells[cell] << "'\n";
956                         if (col >= colinfo.size()) {
957                                 // This does not work in LaTeX
958                                 cerr << "Ignoring extra cell '"
959                                      << cells[cell] << "'." << endl;
960                                 continue;
961                         }
962                         Parser p(cells[cell]);
963                         p.skip_spaces();
964                         //cells[cell] << "'\n";
965                         if (p.next_token().cs() == "multicolumn") {
966                                 // how many cells?
967                                 p.get_token();
968                                 size_t const ncells =
969                                         convert<unsigned int>(p.verbatim_item());
970
971                                 // special cell properties alignment
972                                 vector<ColInfo> t;
973                                 handle_colalign(p, t, ColInfo());
974                                 ColInfo & ci = t.front();
975
976                                 // The logic of LyX for multicolumn vertical
977                                 // lines is too complicated to reproduce it
978                                 // here (see LyXTabular::TeXCellPreamble()).
979                                 // Therefore we simply put everything in the
980                                 // special field.
981                                 ci2special(ci);
982
983                                 cellinfo[row][col].multi      = CELL_BEGIN_OF_MULTICOLUMN;
984                                 cellinfo[row][col].align      = ci.align;
985                                 cellinfo[row][col].special    = ci.special;
986                                 cellinfo[row][col].leftlines  = ci.leftlines;
987                                 cellinfo[row][col].rightlines = ci.rightlines;
988                                 ostringstream os;
989                                 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
990                                 if (!cellinfo[row][col].content.empty()) {
991                                         // This may or may not work in LaTeX,
992                                         // but it does not work in LyX.
993                                         // FIXME: Handle it correctly!
994                                         cerr << "Moving cell content '"
995                                              << cells[cell]
996                                              << "' into a multicolumn cell. "
997                                                 "This will probably not work."
998                                              << endl;
999                                 }
1000                                 cellinfo[row][col].content += os.str();
1001
1002                                 // add dummy cells for multicol
1003                                 for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) {
1004                                         ++col;
1005                                         cellinfo[row][col].multi = CELL_PART_OF_MULTICOLUMN;
1006                                         cellinfo[row][col].align = 'c';
1007                                 }
1008
1009                         } else {
1010                                 cellinfo[row][col].leftlines  = colinfo[col].leftlines;
1011                                 cellinfo[row][col].rightlines = colinfo[col].rightlines;
1012                                 cellinfo[row][col].align      = colinfo[col].align;
1013                                 ostringstream os;
1014                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
1015                                 cellinfo[row][col].content += os.str();
1016                         }
1017                 }
1018
1019                 //cerr << "//  handle almost empty last row what we have\n";
1020                 // handle almost empty last row
1021                 if (row && lines[row].empty() && row + 1 == rowinfo.size()) {
1022                         //cerr << "remove empty last line\n";
1023                         if (rowinfo[row].topline)
1024                                 rowinfo[row - 1].bottomline = true;
1025                         for (size_t col = 0; col < colinfo.size(); ++col)
1026                                 if (cellinfo[row][col].topline)
1027                                         cellinfo[row - 1][col].bottomline = true;
1028                         rowinfo.pop_back();
1029                 }
1030         }
1031
1032         // Now we have the table structure and content in rowinfo, colinfo
1033         // and cellinfo.
1034         // Unfortunately LyX has some limitations that we need to work around.
1035
1036         // Convert cells with special content to multicolumn cells
1037         // (LyX ignores the special field for non-multicolumn cells).
1038         for (size_t row = 0; row < rowinfo.size(); ++row) {
1039                 for (size_t col = 0; col < cellinfo[row].size(); ++col) {
1040                         if (cellinfo[row][col].multi == CELL_NORMAL &&
1041                             !cellinfo[row][col].special.empty())
1042                                 cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN;
1043                 }
1044         }
1045
1046         //cerr << "// output what we have\n";
1047         // output what we have
1048         os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
1049            << "\" columns=\"" << colinfo.size() << "\">\n";
1050         os << "<features"
1051            << write_attribute("rotate", false)
1052            << write_attribute("islongtable", is_long_tabular)
1053            << ">\n";
1054
1055         //cerr << "// after header\n";
1056         for (size_t col = 0; col < colinfo.size(); ++col) {
1057                 os << "<column alignment=\""
1058                    << verbose_align(colinfo[col].align) << "\""
1059                    << " valignment=\""
1060                    << verbose_valign(colinfo[col].valign) << "\""
1061                    << write_attribute("leftline", colinfo[col].leftlines > 0)
1062                    << write_attribute("rightline", colinfo[col].rightlines > 0)
1063                    << write_attribute("width", translate_len(colinfo[col].width))
1064                    << write_attribute("special", colinfo[col].special)
1065                    << ">\n";
1066         }
1067         //cerr << "// after cols\n";
1068
1069         for (size_t row = 0; row < rowinfo.size(); ++row) {
1070                 os << "<row"
1071                    << write_attribute("topline", rowinfo[row].topline)
1072                    << write_attribute("bottomline", rowinfo[row].bottomline)
1073                    << write_attribute("endhead",
1074                                       rowinfo[row].type == LT_HEAD)
1075                    << write_attribute("endfirsthead",
1076                                       rowinfo[row].type == LT_FIRSTHEAD)
1077                    << write_attribute("endfoot",
1078                                       rowinfo[row].type == LT_FOOT)
1079                    << write_attribute("endlastfoot",
1080                                       rowinfo[row].type == LT_LASTFOOT)
1081                    << write_attribute("newpage", rowinfo[row].newpage)
1082                    << ">\n";
1083                 for (size_t col = 0; col < colinfo.size(); ++col) {
1084                         CellInfo const & cell = cellinfo[row][col];
1085                         os << "<cell";
1086                         if (cell.multi != CELL_NORMAL)
1087                                 os << " multicolumn=\"" << cell.multi << "\"";
1088                         os << " alignment=\"" << verbose_align(cell.align)
1089                            << "\""
1090                            << " valignment=\"" << verbose_valign(cell.valign)
1091                            << "\""
1092                            << write_attribute("topline", cell.topline)
1093                            << write_attribute("bottomline", cell.bottomline)
1094                            << write_attribute("leftline", cell.leftlines > 0)
1095                            << write_attribute("rightline", cell.rightlines > 0)
1096                            << write_attribute("rotate", cell.rotate);
1097                         //cerr << "\nrow: " << row << " col: " << col;
1098                         //if (cell.topline)
1099                         //      cerr << " topline=\"true\"";
1100                         //if (cell.bottomline)
1101                         //      cerr << " bottomline=\"true\"";
1102                         os << " usebox=\"none\""
1103                            << write_attribute("width", translate_len(cell.width));
1104                         if (cell.multi != CELL_NORMAL)
1105                                 os << write_attribute("special", cell.special);
1106                         os << ">"
1107                            << "\n\\begin_inset Text\n"
1108                            << cell.content
1109                            << "\n\\end_inset\n"
1110                            << "</cell>\n";
1111                 }
1112                 os << "</row>\n";
1113         }
1114
1115         os << "</lyxtabular>\n";
1116 }
1117
1118
1119
1120
1121 // }])
1122
1123
1124 } // namespace lyx