]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/table.cpp
hyperref support for tex2lyx
[lyx.git] / src / tex2lyx / table.cpp
1 /**
2  * \file table.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  * \author Jean-Marc Lasgouttes
8  * \author Georg Baum
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 // {[(
14
15 #include <config.h>
16
17 #include "tex2lyx.h"
18
19 #include "support/lassert.h"
20 #include "support/convert.h"
21 #include "support/lstrings.h"
22
23 #include <iostream>
24 #include <sstream>
25 #include <vector>
26 #include <map>
27
28 using namespace std;
29
30 namespace lyx {
31
32 // filled in preamble.cpp
33 map<char, int> special_columns;
34
35
36 namespace {
37
38 class ColInfo {
39 public:
40         ColInfo() : align('n'), valign('n'), rightlines(0), leftlines(0) {}
41         /// column alignment
42         char align;
43         /// vertical alignment
44         char valign;
45         /// column width
46         string width;
47         /// special column alignment
48         string special;
49         /// number of lines on the right
50         int rightlines;
51         /// number of lines on the left
52         int leftlines;
53 };
54
55
56 /// row type for longtables
57 enum LTRowType
58 {
59         /// normal row
60         LT_NORMAL,
61         /// part of head
62         LT_HEAD,
63         /// part of head on first page
64         LT_FIRSTHEAD,
65         /// part of foot
66         LT_FOOT,
67         /// part of foot on last page
68         LT_LASTFOOT
69 };
70
71
72 class RowInfo {
73 public:
74         RowInfo() : topline(false), bottomline(false), type(LT_NORMAL),
75                     caption(false), newpage(false) {}
76         /// horizontal line above
77         bool topline;
78         /// horizontal line below
79         bool bottomline;
80         /// These are for longtabulars only
81         /// row type (head, foot, firsthead etc.)
82         LTRowType type;
83         /// row for a caption
84         bool caption;
85         /// row for a newpage
86         bool newpage;
87 };
88
89
90 /// the numeric values are part of the file format!
91 enum Multicolumn {
92         /// A normal cell
93         CELL_NORMAL = 0,
94         /// A multicolumn cell. The number of columns is <tt>1 + number
95         /// of CELL_PART_OF_MULTICOLUMN cells</tt> that follow directly
96         CELL_BEGIN_OF_MULTICOLUMN = 1,
97         /// This is a dummy cell (part of a multicolumn cell)
98         CELL_PART_OF_MULTICOLUMN = 2
99 };
100
101
102 class CellInfo {
103 public:
104         CellInfo() : multi(CELL_NORMAL), align('n'), valign('n'),
105                      leftlines(0), rightlines(0), topline(false),
106                      bottomline(false), rotate(false) {}
107         /// cell content
108         string content;
109         /// multicolumn flag
110         Multicolumn multi;
111         /// cell alignment
112         char align;
113         /// vertical cell alignment
114         char valign;
115         /// number of lines on the left
116         int leftlines;
117         /// number of lines on the right
118         int rightlines;
119         /// do we have a line above?
120         bool topline;
121         /// do we have a line below?
122         bool bottomline;
123         /// is the cell rotated?
124         bool rotate;
125         /// width for multicolumn cells
126         string width;
127         /// special formatting for multicolumn cells
128         string special;
129 };
130
131
132 /// translate a horizontal alignment (as stored in ColInfo and CellInfo) to LyX
133 inline char const * verbose_align(char c)
134 {
135         switch (c) {
136         case 'c':
137                 return "center";
138         case 'r':
139                 return "right";
140         case 'l':
141                 return "left";
142         default:
143                 return "none";
144         }
145 }
146
147
148 /// translate a vertical alignment (as stored in ColInfo and CellInfo) to LyX
149 inline char const * verbose_valign(char c)
150 {
151         // The default value for no special alignment is "top".
152         switch (c) {
153         case 'm':
154                 return "middle";
155         case 'b':
156                 return "bottom";
157         case 'p':
158         default:
159                 return "top";
160         }
161 }
162
163
164 // stripped down from tabluar.C. We use it currently only for bools and
165 // strings
166 string const write_attribute(string const & name, bool const & b)
167 {
168         // we write only true attribute values so we remove a bit of the
169         // file format bloat for tabulars.
170         return b ? ' ' + name + "=\"true\"" : string();
171 }
172
173
174 string const write_attribute(string const & name, string const & s)
175 {
176         return s.empty() ? string() : ' ' + name + "=\"" + s + '"';
177 }
178
179
180 /*! rather brutish way to code table structure in a string:
181
182 \verbatim
183   \begin{tabular}{ccc}
184     1 & 2 & 3\\ \hline
185     \multicolumn{2}{c}{4} & 5 //
186     6 & 7 \\
187     8 \endhead
188   \end{tabular}
189 \endverbatim
190
191  gets "translated" to:
192
193 \verbatim
194          HLINE 1                     TAB 2 TAB 3 HLINE          HLINE LINE
195   \hline HLINE \multicolumn{2}{c}{4} TAB 5       HLINE          HLINE LINE
196          HLINE 6                     TAB 7       HLINE          HLINE LINE
197          HLINE 8                                 HLINE \endhead HLINE LINE
198 \endverbatim
199  */
200
201 char const TAB   = '\001';
202 char const LINE  = '\002';
203 char const HLINE = '\004';
204
205
206 /*!
207  * Move the information in leftlines, rightlines, align and valign to the
208  * special field. This is necessary if the special field is not empty,
209  * because LyX ignores leftlines > 1, rightlines > 1, align and valign in
210  * this case.
211  */
212 void ci2special(ColInfo & ci)
213 {
214         if (ci.width.empty() && ci.align == 'n')
215                 // The alignment setting is already in special, since
216                 // handle_colalign() never stores ci with these settings
217                 // and ensures that leftlines == 0 and rightlines == 0 in
218                 // this case.
219                 return;
220
221         if (!ci.width.empty()) {
222                 switch (ci.align) {
223                 case 'l':
224                         ci.special += ">{\\raggedright}";
225                         break;
226                 case 'r':
227                         ci.special += ">{\\raggedleft}";
228                         break;
229                 case 'c':
230                         ci.special += ">{\\centering}";
231                         break;
232                 }
233                 if (ci.valign == 'n')
234                         ci.special += 'p';
235                 else
236                         ci.special += ci.valign;
237                 ci.special += '{' + ci.width + '}';
238                 ci.width.erase();
239         } else
240                 ci.special += ci.align;
241
242         // LyX can only have one left and one right line.
243         for (int i = 1; i < ci.leftlines; ++i)
244                 ci.special.insert(0, "|");
245         for (int i = 1; i < ci.rightlines; ++i)
246                 ci.special += '|';
247         ci.leftlines = min(ci.leftlines, 1);
248         ci.rightlines = min(ci.rightlines, 1);
249         ci.align = 'n';
250         ci.valign = 'n';
251 }
252
253
254 /*!
255  * Handle column specifications for tabulars and multicolumns.
256  * The next token of the parser \p p must be an opening brace, and we read
257  * everything until the matching closing brace.
258  * The resulting column specifications are filled into \p colinfo. This is
259  * in an intermediate form. fix_colalign() makes it suitable for LyX output.
260  */
261 void handle_colalign(Parser & p, vector<ColInfo> & colinfo,
262                      ColInfo const & start)
263 {
264         if (p.get_token().cat() != catBegin)
265                 cerr << "Wrong syntax for table column alignment.\n"
266                         "Expected '{', got '" << p.curr_token().asInput()
267                      << "'.\n";
268
269         ColInfo next = start;
270         for (Token t = p.get_token(); p.good() && t.cat() != catEnd;
271              t = p.get_token()) {
272 #ifdef FILEDEBUG
273                 cerr << "t: " << t << "  c: '" << t.character() << "'\n";
274 #endif
275
276                 // We cannot handle comments here
277                 if (t.cat() == catComment) {
278                         if (t.cs().empty()) {
279                                 // "%\n" combination
280                                 p.skip_spaces();
281                         } else
282                                 cerr << "Ignoring comment: " << t.asInput();
283                         continue;
284                 }
285
286                 switch (t.character()) {
287                         case 'c':
288                         case 'l':
289                         case 'r':
290                                 // new column, horizontal aligned
291                                 next.align = t.character();
292                                 if (!next.special.empty())
293                                         ci2special(next);
294                                 colinfo.push_back(next);
295                                 next = ColInfo();
296                                 break;
297                         case 'p':
298                         case 'b':
299                         case 'm':
300                                 // new column, vertical aligned box
301                                 next.valign = t.character();
302                                 next.width = p.verbatim_item();
303                                 if (!next.special.empty())
304                                         ci2special(next);
305                                 colinfo.push_back(next);
306                                 next = ColInfo();
307                                 break;
308                         case '|':
309                                 // vertical rule
310                                 if (colinfo.empty()) {
311                                         if (next.special.empty())
312                                                 ++next.leftlines;
313                                         else
314                                                 next.special += '|';
315                                 } else if (colinfo.back().special.empty())
316                                         ++colinfo.back().rightlines;
317                                 else if (next.special.empty())
318                                         ++next.leftlines;
319                                 else
320                                         colinfo.back().special += '|';
321                                 break;
322                         case '>': {
323                                 // text before the next column
324                                 string const s = trim(p.verbatim_item());
325                                 if (next.special.empty() &&
326                                     next.align == 'n') {
327                                         // Maybe this can be converted to a
328                                         // horizontal alignment setting for
329                                         // fixed width columns
330                                         if (s == "\\raggedleft")
331                                                 next.align = 'r';
332                                         else if (s == "\\raggedright")
333                                                 next.align = 'l';
334                                         else if (s == "\\centering")
335                                                 next.align = 'c';
336                                         else
337                                                 next.special = ">{" + s + '}';
338                                 } else
339                                         next.special += ">{" + s + '}';
340                                 break;
341                         }
342                         case '<': {
343                                 // text after the last column
344                                 string const s = trim(p.verbatim_item());
345                                 if (colinfo.empty())
346                                         // This is not possible in LaTeX.
347                                         cerr << "Ignoring separator '<{"
348                                              << s << "}'." << endl;
349                                 else {
350                                         ColInfo & ci = colinfo.back();
351                                         ci2special(ci);
352                                         ci.special += "<{" + s + '}';
353                                 }
354                                 break;
355                         }
356                         case '*': {
357                                 // *{n}{arg} means 'n' columns of type 'arg'
358                                 string const num = p.verbatim_item();
359                                 string const arg = p.verbatim_item();
360                                 size_t const n = convert<unsigned int>(num);
361                                 if (!arg.empty() && n > 0) {
362                                         string s("{");
363                                         for (size_t i = 0; i < n; ++i)
364                                                 s += arg;
365                                         s += '}';
366                                         Parser p2(s);
367                                         handle_colalign(p2, colinfo, next);
368                                         next = ColInfo();
369                                 } else {
370                                         cerr << "Ignoring column specification"
371                                                 " '*{" << num << "}{"
372                                              << arg << "}'." << endl;
373                                 }
374                                 break;
375                         }
376                         case '@':
377                                 // text instead of the column spacing
378                         case '!':
379                                 // text in addition to the column spacing
380                                 next.special += t.character();
381                                 next.special += '{' + p.verbatim_item() + '}';
382                                 break;
383                         default:
384                                 // try user defined column types
385                                 if (special_columns.find(t.character()) !=
386                                     special_columns.end()) {
387                                         ci2special(next);
388                                         next.special += t.character();
389                                         int const nargs =
390                                                 special_columns[t.character()];
391                                         for (int i = 0; i < nargs; ++i)
392                                                 next.special += '{' +
393                                                         p.verbatim_item() +
394                                                         '}';
395                                         colinfo.push_back(next);
396                                         next = ColInfo();
397                                 } else
398                                         cerr << "Ignoring column specification"
399                                                 " '" << t << "'." << endl;
400                                 break;
401                         }
402         }
403
404         // Maybe we have some column separators that need to be added to the
405         // last column?
406         ci2special(next);
407         if (!next.special.empty()) {
408                 ColInfo & ci = colinfo.back();
409                 ci2special(ci);
410                 ci.special += next.special;
411                 next.special.erase();
412         }
413 }
414
415
416 /*!
417  * Move the left and right lines and alignment settings of the column \p ci
418  * to the special field if necessary.
419  */
420 void fix_colalign(ColInfo & ci)
421 {
422         if (ci.leftlines > 1 || ci.rightlines > 1)
423                 ci2special(ci);
424 }
425
426
427 /*!
428  * LyX can't handle more than one vertical line at the left or right side
429  * of a column.
430  * This function moves the left and right lines and alignment settings of all
431  * columns in \p colinfo to the special field if necessary.
432  */
433 void fix_colalign(vector<ColInfo> & colinfo)
434 {
435         // Try to move extra leftlines to the previous column.
436         // We do this only if both special fields are empty, otherwise we
437         // can't tell wether the result will be the same.
438         for (size_t col = 0; col < colinfo.size(); ++col) {
439                 if (colinfo[col].leftlines > 1 &&
440                     colinfo[col].special.empty() && col > 0 &&
441                     colinfo[col - 1].rightlines == 0 &&
442                     colinfo[col - 1].special.empty()) {
443                         ++colinfo[col - 1].rightlines;
444                         --colinfo[col].leftlines;
445                 }
446         }
447         // Try to move extra rightlines to the next column
448         for (size_t col = 0; col < colinfo.size(); ++col) {
449                 if (colinfo[col].rightlines > 1 &&
450                     colinfo[col].special.empty() &&
451                     col < colinfo.size() - 1 &&
452                     colinfo[col + 1].leftlines == 0 &&
453                     colinfo[col + 1].special.empty()) {
454                         ++colinfo[col + 1].leftlines;
455                         --colinfo[col].rightlines;
456                 }
457         }
458         // Move the lines and alignment settings to the special field if
459         // necessary
460         for (size_t col = 0; col < colinfo.size(); ++col)
461                 fix_colalign(colinfo[col]);
462 }
463
464
465 /*!
466  * Parse hlines and similar stuff.
467  * \returns wether the token \p t was parsed
468  */
469 bool parse_hlines(Parser & p, Token const & t, string & hlines,
470                   bool is_long_tabular)
471 {
472         LASSERT(t.cat() == catEscape, return false);
473
474         if (t.cs() == "hline")
475                 hlines += "\\hline";
476
477         else if (t.cs() == "cline")
478                 hlines += "\\cline{" + p.verbatim_item() + '}';
479
480         else if (is_long_tabular && t.cs() == "newpage")
481                 hlines += "\\newpage";
482
483         else
484                 return false;
485
486         return true;
487 }
488
489
490 /// Position in a row
491 enum RowPosition {
492         /// At the very beginning, before the first token
493         ROW_START,
494         /// After the first token and before any column token
495         IN_HLINES_START,
496         /// After the first column token. Comments and whitespace are only
497         /// treated as tokens in this position
498         IN_COLUMNS,
499         /// After the first non-column token at the end
500         IN_HLINES_END
501 };
502
503
504 /*!
505  * Parse table structure.
506  * We parse tables in a two-pass process: This function extracts the table
507  * structure (rows, columns, hlines etc.), but does not change the cell
508  * content. The cell content is parsed in a second step in handle_tabular().
509  */
510 void parse_table(Parser & p, ostream & os, bool is_long_tabular,
511                  RowPosition & pos, unsigned flags)
512 {
513         // table structure commands such as \hline
514         string hlines;
515
516         // comments that occur at places where we can't handle them
517         string comments;
518
519         while (p.good()) {
520                 Token const & t = p.get_token();
521
522 #ifdef FILEDEBUG
523                 debugToken(cerr, t, flags);
524 #endif
525
526                 // comments and whitespace in hlines
527                 switch (pos) {
528                 case ROW_START:
529                 case IN_HLINES_START:
530                 case IN_HLINES_END:
531                         if (t.cat() == catComment) {
532                                 if (t.cs().empty())
533                                         // line continuation
534                                         p.skip_spaces();
535                                 else
536                                         // We can't handle comments here,
537                                         // store them for later use
538                                         comments += t.asInput();
539                                 continue;
540                         } else if (t.cat() == catSpace ||
541                                    t.cat() == catNewline) {
542                                 // whitespace is irrelevant here, we
543                                 // need to recognize hline stuff
544                                 p.skip_spaces();
545                                 continue;
546                         }
547                         break;
548                 case IN_COLUMNS:
549                         break;
550                 }
551
552                 // We need to handle structure stuff first in order to
553                 // determine wether we need to output a HLINE separator
554                 // before the row or not.
555                 if (t.cat() == catEscape) {
556                         if (parse_hlines(p, t, hlines, is_long_tabular)) {
557                                 switch (pos) {
558                                 case ROW_START:
559                                         pos = IN_HLINES_START;
560                                         break;
561                                 case IN_COLUMNS:
562                                         pos = IN_HLINES_END;
563                                         break;
564                                 case IN_HLINES_START:
565                                 case IN_HLINES_END:
566                                         break;
567                                 }
568                                 continue;
569                         }
570
571                         else if (t.cs() == "tabularnewline" ||
572                                  t.cs() == "\\" ||
573                                  t.cs() == "cr") {
574                                 if (t.cs() == "cr")
575                                         cerr << "Warning: Converting TeX "
576                                                 "'\\cr' to LaTeX '\\\\'."
577                                              << endl;
578                                 // stuff before the line break
579                                 os << comments << HLINE << hlines << HLINE
580                                    << LINE;
581                                 //cerr << "hlines: " << hlines << endl;
582                                 hlines.erase();
583                                 comments.erase();
584                                 pos = ROW_START;
585                                 continue;
586                         }
587
588                         else if (is_long_tabular &&
589                                  (t.cs() == "endhead" ||
590                                   t.cs() == "endfirsthead" ||
591                                   t.cs() == "endfoot" ||
592                                   t.cs() == "endlastfoot")) {
593                                 hlines += t.asInput();
594                                 switch (pos) {
595                                 case IN_COLUMNS:
596                                 case IN_HLINES_END:
597                                         // these commands are implicit line
598                                         // breaks
599                                         os << comments << HLINE << hlines
600                                            << HLINE << LINE;
601                                         hlines.erase();
602                                         comments.erase();
603                                         pos = ROW_START;
604                                         break;
605                                 case ROW_START:
606                                         pos = IN_HLINES_START;
607                                         break;
608                                 case IN_HLINES_START:
609                                         break;
610                                 }
611                                 continue;
612                         }
613
614                 }
615
616                 // We need a HLINE separator if we either have no hline
617                 // stuff at all and are just starting a row or if we just
618                 // got the first non-hline token.
619                 switch (pos) {
620                 case ROW_START:
621                         // no hline tokens exist, first token at row start
622                 case IN_HLINES_START:
623                         // hline tokens exist, first non-hline token at row
624                         // start
625                         os << hlines << HLINE << comments;
626                         hlines.erase();
627                         comments.erase();
628                         pos = IN_COLUMNS;
629                         break;
630                 case IN_HLINES_END:
631                         // Oops, there is still cell content after hline
632                         // stuff. This does not work in LaTeX, so we ignore
633                         // the hlines.
634                         cerr << "Ignoring '" << hlines << "' in a cell"
635                              << endl;
636                         os << comments;
637                         hlines.erase();
638                         comments.erase();
639                         pos = IN_COLUMNS;
640                         break;
641                 case IN_COLUMNS:
642                         break;
643                 }
644
645                 // If we come here we have normal cell content
646                 //
647                 // cat codes
648                 //
649                 if (t.cat() == catMath) {
650                         // we are inside some text mode thingy, so opening new math is allowed
651                         Token const & n = p.get_token();
652                         if (n.cat() == catMath) {
653                                 // TeX's $$...$$ syntax for displayed math
654                                 os << "\\[";
655                                 // This does only work because parse_math outputs TeX
656                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
657                                 os << "\\]";
658                                 p.get_token(); // skip the second '$' token
659                         } else {
660                                 // simple $...$  stuff
661                                 p.putback();
662                                 os << '$';
663                                 // This does only work because parse_math outputs TeX
664                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
665                                 os << '$';
666                         }
667                 }
668
669                 else if (t.cat() == catSpace 
670                          || t.cat() == catNewline
671                          || t.cat() == catLetter 
672                          || t.cat() == catSuper 
673                          || t.cat() == catSub 
674                          || t.cat() == catOther 
675                          || t.cat() == catActive 
676                          || t.cat() == catParameter)
677                         os << t.cs();
678
679                 else if (t.cat() == catBegin) {
680                         os << '{';
681                         parse_table(p, os, is_long_tabular, pos,
682                                     FLAG_BRACE_LAST);
683                         os << '}';
684                 }
685
686                 else if (t.cat() == catEnd) {
687                         if (flags & FLAG_BRACE_LAST)
688                                 return;
689                         cerr << "unexpected '}'\n";
690                 }
691
692                 else if (t.cat() == catAlign) {
693                         os << TAB;
694                         p.skip_spaces();
695                 }
696
697                 else if (t.cat() == catComment)
698                         os << t.asInput();
699
700                 else if (t.cs() == "(") {
701                         os << "\\(";
702                         // This does only work because parse_math outputs TeX
703                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
704                         os << "\\)";
705                 }
706
707                 else if (t.cs() == "[") {
708                         os << "\\[";
709                         // This does only work because parse_math outputs TeX
710                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
711                         os << "\\]";
712                 }
713
714                 else if (t.cs() == "begin") {
715                         string const name = p.getArg('{', '}');
716                         active_environments.push_back(name);
717                         os << "\\begin{" << name << '}';
718                         // treat the nested environment as a block, don't
719                         // parse &, \\ etc, because they don't belong to our
720                         // table if they appear.
721                         os << p.verbatimEnvironment(name);
722                         os << "\\end{" << name << '}';
723                         active_environments.pop_back();
724                 }
725
726                 else if (t.cs() == "end") {
727                         if (flags & FLAG_END) {
728                                 // eat environment name
729                                 string const name = p.getArg('{', '}');
730                                 if (name != active_environment())
731                                         p.error("\\end{" + name + "} does not match \\begin{"
732                                                 + active_environment() + "}");
733                                 return;
734                         }
735                         p.error("found 'end' unexpectedly");
736                 }
737
738                 else
739                         os << t.asInput();
740         }
741
742         // We can have comments if the last line is incomplete
743         os << comments;
744
745         // We can have hline stuff if the last line is incomplete
746         if (!hlines.empty()) {
747                 // this does not work in LaTeX, so we ignore it
748                 cerr << "Ignoring '" << hlines << "' at end of tabular"
749                      << endl;
750         }
751 }
752
753
754 void handle_hline_above(RowInfo & ri, vector<CellInfo> & ci)
755 {
756         ri.topline = true;
757         for (size_t col = 0; col < ci.size(); ++col)
758                 ci[col].topline = true;
759 }
760
761
762 void handle_hline_below(RowInfo & ri, vector<CellInfo> & ci)
763 {
764         ri.bottomline = true;
765         for (size_t col = 0; col < ci.size(); ++col)
766                 ci[col].bottomline = true;
767 }
768
769
770 } // anonymous namespace
771
772
773 void handle_tabular(Parser & p, ostream & os, bool is_long_tabular,
774                     Context & context)
775 {
776         string posopts = p.getOpt();
777         if (!posopts.empty()) {
778                 // FIXME: Convert this to ERT
779                 if (is_long_tabular)
780                         cerr << "horizontal longtable";
781                 else
782                         cerr << "vertical tabular";
783                 cerr << " positioning '" << posopts << "' ignored\n";
784         }
785
786         vector<ColInfo> colinfo;
787
788         // handle column formatting
789         handle_colalign(p, colinfo, ColInfo());
790         fix_colalign(colinfo);
791
792         // first scan of cells
793         // use table mode to keep it minimal-invasive
794         // not exactly what's TeX doing...
795         vector<string> lines;
796         ostringstream ss;
797         RowPosition rowpos = ROW_START;
798         parse_table(p, ss, is_long_tabular, rowpos, FLAG_END);
799         split(ss.str(), lines, LINE);
800
801         vector< vector<CellInfo> > cellinfo(lines.size());
802         vector<RowInfo> rowinfo(lines.size());
803
804         // split into rows
805         //cerr << "// split into rows\n";
806         for (size_t row = 0; row < rowinfo.size(); ++row) {
807
808                 // init row
809                 cellinfo[row].resize(colinfo.size());
810
811                 // split row
812                 vector<string> dummy;
813                 //cerr << "\n########### LINE: " << lines[row] << "########\n";
814                 split(lines[row], dummy, HLINE);
815
816                 // handle horizontal line fragments
817                 // we do only expect this for a last line without '\\'
818                 if (dummy.size() != 3) {
819                         if ((dummy.size() != 1 && dummy.size() != 2) ||
820                             row != rowinfo.size() - 1)
821                                 cerr << "unexpected dummy size: " << dummy.size()
822                                         << " content: " << lines[row] << "\n";
823                         dummy.resize(3);
824                 }
825                 lines[row] = dummy[1];
826
827                 //cerr << "line: " << row << " above 0: " << dummy[0] << "\n";
828                 //cerr << "line: " << row << " below 2: " << dummy[2] <<  "\n";
829                 //cerr << "line: " << row << " cells 1: " << dummy[1] <<  "\n";
830
831                 for (int i = 0; i <= 2; i += 2) {
832                         //cerr << "   reading from line string '" << dummy[i] << "'\n";
833                         Parser p1(dummy[i]);
834                         while (p1.good()) {
835                                 Token t = p1.get_token();
836                                 //cerr << "read token: " << t << "\n";
837                                 if (t.cs() == "hline") {
838                                         if (i == 0) {
839                                                 if (rowinfo[row].topline) {
840                                                         if (row > 0) // extra bottomline above
841                                                                 handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]);
842                                                         else
843                                                                 cerr << "dropping extra hline\n";
844                                                         //cerr << "below row: " << row-1 << endl;
845                                                 } else {
846                                                         handle_hline_above(rowinfo[row], cellinfo[row]);
847                                                         //cerr << "above row: " << row << endl;
848                                                 }
849                                         } else {
850                                                 //cerr << "below row: " << row << endl;
851                                                 handle_hline_below(rowinfo[row], cellinfo[row]);
852                                         }
853                                 } else if (t.cs() == "cline") {
854                                         string arg = p1.verbatim_item();
855                                         //cerr << "read cline arg: '" << arg << "'\n";
856                                         vector<string> t;
857                                         split(arg, t, '-');
858                                         t.resize(2);
859                                         size_t from = convert<unsigned int>(t[0]);
860                                         if (from == 0)
861                                                 cerr << "Could not parse "
862                                                         "cline start column."
863                                                      << endl;
864                                         else
865                                                 // 1 based index -> 0 based
866                                                 --from;
867                                         if (from >= colinfo.size()) {
868                                                 cerr << "cline starts at non "
869                                                         "existing column "
870                                                      << (from + 1) << endl;
871                                                 from = colinfo.size() - 1;
872                                         }
873                                         size_t to = convert<unsigned int>(t[1]);
874                                         if (to == 0)
875                                                 cerr << "Could not parse "
876                                                         "cline end column."
877                                                      << endl;
878                                         else
879                                                 // 1 based index -> 0 based
880                                                 --to;
881                                         if (to >= colinfo.size()) {
882                                                 cerr << "cline ends at non "
883                                                         "existing column "
884                                                      << (to + 1) << endl;
885                                                 to = colinfo.size() - 1;
886                                         }
887                                         for (size_t col = from; col <= to; ++col) {
888                                                 //cerr << "row: " << row << " col: " << col << " i: " << i << endl;
889                                                 if (i == 0) {
890                                                         rowinfo[row].topline = true;
891                                                         cellinfo[row][col].topline = true;
892                                                 } else {
893                                                         rowinfo[row].bottomline = true;
894                                                         cellinfo[row][col].bottomline = true;
895                                                 }
896                                         }
897                                 } else if (t.cs() == "endhead") {
898                                         if (i > 0)
899                                                 rowinfo[row].type = LT_HEAD;
900                                         for (int r = row - 1; r >= 0; --r) {
901                                                 if (rowinfo[r].type != LT_NORMAL)
902                                                         break;
903                                                 rowinfo[r].type = LT_HEAD;
904                                         }
905                                 } else if (t.cs() == "endfirsthead") {
906                                         if (i > 0)
907                                                 rowinfo[row].type = LT_FIRSTHEAD;
908                                         for (int r = row - 1; r >= 0; --r) {
909                                                 if (rowinfo[r].type != LT_NORMAL)
910                                                         break;
911                                                 rowinfo[r].type = LT_FIRSTHEAD;
912                                         }
913                                 } else if (t.cs() == "endfoot") {
914                                         if (i > 0)
915                                                 rowinfo[row].type = LT_FOOT;
916                                         for (int r = row - 1; r >= 0; --r) {
917                                                 if (rowinfo[r].type != LT_NORMAL)
918                                                         break;
919                                                 rowinfo[r].type = LT_FOOT;
920                                         }
921                                 } else if (t.cs() == "endlastfoot") {
922                                         if (i > 0)
923                                                 rowinfo[row].type = LT_LASTFOOT;
924                                         for (int r = row - 1; r >= 0; --r) {
925                                                 if (rowinfo[r].type != LT_NORMAL)
926                                                         break;
927                                                 rowinfo[r].type = LT_LASTFOOT;
928                                         }
929                                 } else if (t.cs() == "newpage") {
930                                         if (i == 0) {
931                                                 if (row > 0)
932                                                         rowinfo[row - 1].newpage = true;
933                                                 else
934                                                         // This does not work in LaTeX
935                                                         cerr << "Ignoring "
936                                                                 "'\\newpage' "
937                                                                 "before rows."
938                                                              << endl;
939                                         } else
940                                                 rowinfo[row].newpage = true;
941                                 } else {
942                                         cerr << "unexpected line token: " << t << endl;
943                                 }
944                         }
945                 }
946
947                 // split into cells
948                 vector<string> cells;
949                 split(lines[row], cells, TAB);
950                 for (size_t col = 0, cell = 0; cell < cells.size();
951                      ++col, ++cell) {
952                         //cerr << "cell content: '" << cells[cell] << "'\n";
953                         if (col >= colinfo.size()) {
954                                 // This does not work in LaTeX
955                                 cerr << "Ignoring extra cell '"
956                                      << cells[cell] << "'." << endl;
957                                 continue;
958                         }
959                         Parser p(cells[cell]);
960                         p.skip_spaces();
961                         //cells[cell] << "'\n";
962                         if (p.next_token().cs() == "multicolumn") {
963                                 // how many cells?
964                                 p.get_token();
965                                 size_t const ncells =
966                                         convert<unsigned int>(p.verbatim_item());
967
968                                 // special cell properties alignment
969                                 vector<ColInfo> t;
970                                 handle_colalign(p, t, ColInfo());
971                                 p.skip_spaces(true);
972                                 ColInfo & ci = t.front();
973
974                                 // The logic of LyX for multicolumn vertical
975                                 // lines is too complicated to reproduce it
976                                 // here (see LyXTabular::TeXCellPreamble()).
977                                 // Therefore we simply put everything in the
978                                 // special field.
979                                 ci2special(ci);
980
981                                 cellinfo[row][col].multi      = CELL_BEGIN_OF_MULTICOLUMN;
982                                 cellinfo[row][col].align      = ci.align;
983                                 cellinfo[row][col].special    = ci.special;
984                                 cellinfo[row][col].leftlines  = ci.leftlines;
985                                 cellinfo[row][col].rightlines = ci.rightlines;
986                                 ostringstream os;
987                                 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
988                                 if (!cellinfo[row][col].content.empty()) {
989                                         // This may or may not work in LaTeX,
990                                         // but it does not work in LyX.
991                                         // FIXME: Handle it correctly!
992                                         cerr << "Moving cell content '"
993                                              << cells[cell]
994                                              << "' into a multicolumn cell. "
995                                                 "This will probably not work."
996                                              << endl;
997                                 }
998                                 cellinfo[row][col].content += os.str();
999
1000                                 // add dummy cells for multicol
1001                                 for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) {
1002                                         ++col;
1003                                         cellinfo[row][col].multi = CELL_PART_OF_MULTICOLUMN;
1004                                         cellinfo[row][col].align = 'c';
1005                                 }
1006
1007                         } else if (col == 0 && is_long_tabular &&
1008                                    p.next_token().cs() == "caption") {
1009                                 // longtable caption support in LyX is a hack:
1010                                 // Captions require a row of their own with
1011                                 // the caption flag set to true, having only
1012                                 // one multicolumn cell. The contents of that
1013                                 // cell must contain exactly one caption inset
1014                                 // and nothing else.
1015                                 rowinfo[row].caption = true;
1016                                 for (size_t c = 1; c < cells.size(); ++c) {
1017                                         if (!cells[c].empty()) {
1018                                                 cerr << "Moving cell content '"
1019                                                      << cells[c]
1020                                                      << "' into the caption cell. "
1021                                                         "This will probably not work."
1022                                                      << endl;
1023                                                 cells[0] += cells[c];
1024                                         }
1025                                 }
1026                                 cells.resize(1);
1027                                 cellinfo[row][col].align      = colinfo[col].align;
1028                                 cellinfo[row][col].multi      = CELL_BEGIN_OF_MULTICOLUMN;
1029                                 ostringstream os;
1030                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
1031                                 cellinfo[row][col].content += os.str();
1032                                 // add dummy multicolumn cells
1033                                 for (size_t c = 1; c < colinfo.size(); ++c)
1034                                         cellinfo[row][c].multi = CELL_PART_OF_MULTICOLUMN;
1035
1036                         } else {
1037                                 cellinfo[row][col].leftlines  = colinfo[col].leftlines;
1038                                 cellinfo[row][col].rightlines = colinfo[col].rightlines;
1039                                 cellinfo[row][col].align      = colinfo[col].align;
1040                                 ostringstream os;
1041                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
1042                                 cellinfo[row][col].content += os.str();
1043                         }
1044                 }
1045
1046                 //cerr << "//  handle almost empty last row what we have\n";
1047                 // handle almost empty last row
1048                 if (row && lines[row].empty() && row + 1 == rowinfo.size()) {
1049                         //cerr << "remove empty last line\n";
1050                         if (rowinfo[row].topline)
1051                                 rowinfo[row - 1].bottomline = true;
1052                         for (size_t col = 0; col < colinfo.size(); ++col)
1053                                 if (cellinfo[row][col].topline)
1054                                         cellinfo[row - 1][col].bottomline = true;
1055                         rowinfo.pop_back();
1056                 }
1057         }
1058
1059         // Now we have the table structure and content in rowinfo, colinfo
1060         // and cellinfo.
1061         // Unfortunately LyX has some limitations that we need to work around.
1062
1063         // Convert cells with special content to multicolumn cells
1064         // (LyX ignores the special field for non-multicolumn cells).
1065         for (size_t row = 0; row < rowinfo.size(); ++row) {
1066                 for (size_t col = 0; col < cellinfo[row].size(); ++col) {
1067                         if (cellinfo[row][col].multi == CELL_NORMAL &&
1068                             !cellinfo[row][col].special.empty())
1069                                 cellinfo[row][col].multi = CELL_BEGIN_OF_MULTICOLUMN;
1070                 }
1071         }
1072
1073         // Distribute lines from rows/columns to cells
1074         // The code was stolen from convert_tablines() in lyx2lyx/lyx_1_6.py.
1075         // Each standard cell inherits the settings of the corresponding
1076         // rowinfo/colinfo. This works because all cells with individual
1077         // settings were converted to multicolumn cells above.
1078         // Each multicolumn cell inherits the settings of the rowinfo/colinfo
1079         // corresponding to the first column of the multicolumn cell (default
1080         // of the multicol package). This works because the special field
1081         // overrides the line fields.
1082         for (size_t row = 0; row < rowinfo.size(); ++row) {
1083                 for (size_t col = 0; col < cellinfo[row].size(); ++col) {
1084                         if (cellinfo[row][col].multi == CELL_NORMAL) {
1085                                 cellinfo[row][col].topline = rowinfo[row].topline;
1086                                 cellinfo[row][col].bottomline = rowinfo[row].bottomline;
1087                                 cellinfo[row][col].leftlines = colinfo[col].leftlines;
1088                                 cellinfo[row][col].rightlines = colinfo[col].rightlines;
1089                         } else if (cellinfo[row][col].multi == CELL_BEGIN_OF_MULTICOLUMN) {
1090                                 size_t s = col + 1;
1091                                 while (s < cellinfo[row].size() &&
1092                                        cellinfo[row][s].multi == CELL_PART_OF_MULTICOLUMN)
1093                                         s++;
1094                                 if (s < cellinfo[row].size() &&
1095                                     cellinfo[row][s].multi != CELL_BEGIN_OF_MULTICOLUMN)
1096                                         cellinfo[row][col].rightlines = colinfo[col].rightlines;
1097                                 if (col > 0 && cellinfo[row][col-1].multi == CELL_NORMAL)
1098                                         cellinfo[row][col].leftlines = colinfo[col].leftlines;
1099                         }
1100                 }
1101         }
1102
1103         //cerr << "// output what we have\n";
1104         // output what we have
1105         os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
1106            << "\" columns=\"" << colinfo.size() << "\">\n";
1107         os << "<features"
1108            << write_attribute("rotate", false)
1109            << write_attribute("islongtable", is_long_tabular)
1110            << ">\n";
1111
1112         //cerr << "// after header\n";
1113         for (size_t col = 0; col < colinfo.size(); ++col) {
1114                 os << "<column alignment=\""
1115                    << verbose_align(colinfo[col].align) << "\""
1116                    << " valignment=\""
1117                    << verbose_valign(colinfo[col].valign) << "\""
1118                    << write_attribute("width", translate_len(colinfo[col].width))
1119                    << write_attribute("special", colinfo[col].special)
1120                    << ">\n";
1121         }
1122         //cerr << "// after cols\n";
1123
1124         for (size_t row = 0; row < rowinfo.size(); ++row) {
1125                 os << "<row"
1126                    << write_attribute("endhead",
1127                                       rowinfo[row].type == LT_HEAD)
1128                    << write_attribute("endfirsthead",
1129                                       rowinfo[row].type == LT_FIRSTHEAD)
1130                    << write_attribute("endfoot",
1131                                       rowinfo[row].type == LT_FOOT)
1132                    << write_attribute("endlastfoot",
1133                                       rowinfo[row].type == LT_LASTFOOT)
1134                    << write_attribute("newpage", rowinfo[row].newpage)
1135                    << write_attribute("caption", rowinfo[row].caption)
1136                    << ">\n";
1137                 for (size_t col = 0; col < colinfo.size(); ++col) {
1138                         CellInfo const & cell = cellinfo[row][col];
1139                         os << "<cell";
1140                         if (cell.multi != CELL_NORMAL)
1141                                 os << " multicolumn=\"" << cell.multi << "\"";
1142                         os << " alignment=\"" << verbose_align(cell.align)
1143                            << "\""
1144                            << " valignment=\"" << verbose_valign(cell.valign)
1145                            << "\""
1146                            << write_attribute("topline", cell.topline)
1147                            << write_attribute("bottomline", cell.bottomline)
1148                            << write_attribute("leftline", cell.leftlines > 0)
1149                            << write_attribute("rightline", cell.rightlines > 0)
1150                            << write_attribute("rotate", cell.rotate);
1151                         //cerr << "\nrow: " << row << " col: " << col;
1152                         //if (cell.topline)
1153                         //      cerr << " topline=\"true\"";
1154                         //if (cell.bottomline)
1155                         //      cerr << " bottomline=\"true\"";
1156                         os << " usebox=\"none\""
1157                            << write_attribute("width", translate_len(cell.width));
1158                         if (cell.multi != CELL_NORMAL)
1159                                 os << write_attribute("special", cell.special);
1160                         os << ">"
1161                            << "\n\\begin_inset Text\n"
1162                            << cell.content
1163                            << "\n\\end_inset\n"
1164                            << "</cell>\n";
1165                 }
1166                 os << "</row>\n";
1167         }
1168
1169         os << "</lyxtabular>\n";
1170 }
1171
1172
1173
1174
1175 // }])
1176
1177
1178 } // namespace lyx