]> git.lyx.org Git - lyx.git/blob - src/tex2lyx/table.C
architectural changes to tex2lyx
[lyx.git] / src / tex2lyx / table.C
1 /** The .tex to .lyx converter
2     \author André Pönitz (2003)
3  */
4
5 // {[(
6
7 #include <config.h>
8
9 #include "tex2lyx.h"
10
11 #include <cctype>
12 #include <fstream>
13 #include <iostream>
14 #include <sstream>
15 #include <vector>
16 #include <map>
17
18 using std::cerr;
19 using std::endl;
20 using std::istringstream;
21 using std::ostream;
22 using std::ostringstream;
23 using std::string;
24 using std::vector;
25 using std::map;
26
27 #include "mathed/math_gridinfo.h"
28
29 // filled in preamble.C
30 std::map<char, int> special_columns;
31
32
33 namespace {
34
35 int string2int(string const & s, int deflt = 0)
36 {
37         istringstream is(s);
38         int i = deflt;
39         is >> i;
40         return i;
41 }
42
43
44 string read_hlines(Parser & p)
45 {
46         ostringstream os;
47         p.skip_spaces();
48         while (p.good()) {
49                 if (p.next_token().cs() == "hline") {
50                         p.get_token();
51                         os << "\\hline";
52                 } else if (p.next_token().cs() == "cline") {
53                         p.get_token();
54                         os << "\\cline{" << p.verbatim_item() << "}";
55                 } else
56                         break;
57                 p.skip_spaces();
58         };
59         //cerr << "read_hlines(), read: '" << os.str() << "'\n";
60         //cerr << "read_hlines(), next token: " << p.next_token() << "\n";
61         return os.str();
62 }
63
64
65 /* rather brutish way to code table structure in a string:
66
67   \begin{tabular}{ccc}
68     1 & 2 & 3\\ \hline
69     \multicolumn{2}{c}{4} & 5 //
70     6 & 7 \\
71   \end{tabular}
72
73  gets "translated" to:
74
75   1 TAB 2 TAB 3 LINE
76   \hline HLINE  TAB 5 LINE
77   5 TAB 7 LINE
78 */
79
80 char const TAB   = '\001';
81 char const LINE  = '\002';
82 char const HLINE = '\004';
83
84
85 void handle_colalign(Parser & p, vector<ColInfo> & colinfo)
86 {
87         if (p.get_token().cat() != catBegin)
88                 cerr << "wrong syntax for table column alignment. '{' expected\n";
89
90         char nextalign = 'b';
91         bool leftline = false;
92         for (Token t=p.get_token(); p.good() && t.cat() != catEnd; t = p.get_token()){
93 #ifdef FILEDEBUG
94                 cerr << "t: " << t << "  c: '" << t.character() << "'\n";
95 #endif
96
97                 switch (t.character()) {
98                         case 'c':
99                         case 'l':
100                         case 'r': {
101                                 ColInfo ci;
102                                 ci.align = t.character();
103                                 if (colinfo.size() && colinfo.back().rightline > 1) {
104                                         ci.leftline = true;
105                                         --colinfo.back().rightline;
106                                 }
107                                 colinfo.push_back(ci);
108                                 break;
109                         }
110                         case 'p':
111                                 colinfo.push_back(ColInfo());
112                                 colinfo.back().align = nextalign;
113                                 colinfo.back().width = p.verbatim_item();
114                                 nextalign = 'b';
115                                 break;
116                         case '|':
117                                 if (colinfo.empty())
118                                         leftline = true;
119                                 else
120                                         ++colinfo.back().rightline;
121                                 break;
122                         case '>': {
123                                 string s = p.verbatim_item();
124                                 if (s == "\\raggedleft ")
125                                         nextalign = 'l';
126                                 else if (s == "\\raggedright ")
127                                         nextalign = 'r';
128                                 else
129                                         cerr << "unknown '>' column '" << s << "'\n";
130                                 break;
131                         }
132                         default:
133                                 if (special_columns.find(t.character()) != special_columns.end()) {
134                                         ColInfo ci;
135                                         ci.align = 'c';
136                                         ci.special += t.character();
137                                         int const nargs = special_columns[t.character()];
138                                         for (int i = 0; i < nargs; ++i)
139                                                 ci.special += "{" + p.verbatim_item() + "}"; 
140                                         //cerr << "handling special column '" << t << "' " << nargs
141                                         //      << "  '" << ci.special << "'\n";
142                                         colinfo.push_back(ci);
143                                 } else {
144                                         cerr << "ignoring special separator '" << t << "'\n";
145                                 }
146                                 break;
147                         }
148         }
149         if (colinfo.size() && leftline)
150                 colinfo[0].leftline = true;
151 }
152
153
154 } // anonymous namespace
155
156
157 void parse_table(Parser & p, ostream & os, unsigned flags)
158 {
159         string hlines;
160
161         while (p.good()) {
162                 Token const & t = p.get_token();
163
164 #ifdef FILEDEBUG
165                 cerr << "t: " << t << " flags: " << flags << "\n";
166 #endif
167
168                 //
169                 // cat codes
170                 //
171                 if (t.cat() == catMath) {
172                                 // we are inside some text mode thingy, so opening new math is allowed
173                         Token const & n = p.get_token();
174                         if (n.cat() == catMath) {
175                                 // TeX's $$...$$ syntax for displayed math
176                                 os << "\\[";
177                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
178                                 os << "\\]";
179                                 p.get_token(); // skip the second '$' token
180                         } else {
181                                 // simple $...$  stuff
182                                 p.putback();
183                                 os << '$';
184                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
185                                 os << '$';
186                         }
187                 }
188
189                 else if (t.cat() == catLetter ||
190                                t.cat() == catSpace ||
191                                t.cat() == catSuper ||
192                                t.cat() == catSub ||
193                                t.cat() == catOther ||
194                                t.cat() == catActive ||
195                                t.cat() == catNewline ||
196                                t.cat() == catParameter)
197                         os << t.character();
198
199                 else if (t.cat() == catBegin) {
200                         os << '{';
201                         parse_table(p, os, FLAG_BRACE_LAST);
202                         os << '}';
203                 }
204
205                 else if (t.cat() == catEnd) {
206                         if (flags & FLAG_BRACE_LAST)
207                                 return;
208                         cerr << "unexpected '}'\n";
209                 }
210
211                 else if (t.cat() == catAlign) {
212                         os << TAB;
213                 }
214
215                 else if (t.cs() == "tabularnewline" || t.cs() == "\\") {
216                         // stuff before the line break
217                         // and look ahead for stuff after the line break
218                         os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE;
219                         hlines.erase();
220                 }
221
222                 else if (t.cs() == "hline")
223                         hlines += "\\hline";
224
225                 else if (t.cs() == "cline")
226                         hlines += "\\cline{" + p.verbatim_item() + '}';
227
228                 else if (t.cat() == catComment)
229                         handle_comment(p);
230
231                 else if (t.cs() == "(") {
232                         os << "\\(";
233                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
234                         os << "\\)";
235                 }
236
237                 else if (t.cs() == "[") {
238                         os << "\\[";
239                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
240                         os << "\\]";
241                 }
242
243                 else if (t.cs() == "begin") {
244                         string const name = p.getArg('{', '}');
245                         active_environments.push_back(name);
246                         parse_table(p, os, FLAG_END);
247                 }
248
249                 else if (t.cs() == "end") {
250                         if (flags & FLAG_END) {
251                                 // eat environment name
252                                 string const name = p.getArg('{', '}');
253                                 if (name != active_environment())
254                                         p.error("\\end{" + name + "} does not match \\begin{"
255                                                 + active_environment() + "}");
256                                 active_environments.pop_back();
257                                 return;
258                         }
259                         p.error("found 'end' unexpectedly");
260                 }
261
262                 else 
263                         os << t.asInput();
264         }
265 }
266
267
268 void handle_hline_above(RowInfo & ri, vector<CellInfo> & ci)
269 {
270         ri.topline = true;
271         for (size_t col = 0; col < ci.size(); ++col)
272                 ci[col].topline = true;
273 }
274
275
276 void handle_hline_below(RowInfo & ri, vector<CellInfo> & ci)
277 {
278         ri.bottomline = true;
279         for (size_t col = 0; col < ci.size(); ++col)
280                 ci[col].bottomline = true;
281 }
282
283
284 void handle_tabular(Parser & p, ostream & os,
285                     Context & context)
286 {
287         string posopts = p.getOpt();
288         if (posopts.size())
289                 cerr << "vertical tabular positioning '" << posopts << "' ignored\n";
290
291         vector<ColInfo>            colinfo;
292
293         // handle column formatting
294         handle_colalign(p, colinfo);
295
296         // handle initial hlines
297
298         // first scan of cells
299         // use table mode to keep it minimal-invasive
300         // not exactly what's TeX doing...
301         vector<string> lines;
302         ostringstream ss;
303         ss << read_hlines(p) << HLINE; // handle initial hlines
304         parse_table(p, ss, FLAG_END);
305         split(ss.str(), lines, LINE);
306
307         vector< vector<CellInfo> > cellinfo(lines.size());
308         vector<RowInfo> rowinfo(lines.size());
309         
310         // split into rows
311         //cerr << "// split into rows\n";
312         for (size_t row = 0; row < rowinfo.size(); ++row) {
313
314                 // init row
315                 cellinfo[row].resize(colinfo.size());
316
317                 // split row    
318                 vector<string> dummy;
319                 //cerr << "\n########### LINE: " << lines[row] << "########\n";
320                 split(lines[row], dummy, HLINE);
321
322                 // handle horizontal line fragments
323                 if (dummy.size() != 3) {
324                         if (dummy.size() != 1)
325                                 cerr << "unexpected dummy size: " << dummy.size()
326                                         << " content: " << lines[row] << "\n";
327                         dummy.resize(3);
328                 }
329                 lines[row] = dummy[1];
330
331                 //cerr << "line: " << row << " above 0: " << dummy[0] << "\n";
332                 //cerr << "line: " << row << " below 2: " << dummy[2] <<  "\n";
333                 //cerr << "line: " << row << " cells 1: " << dummy[1] <<  "\n";
334
335                 for (int i = 0; i <= 2; i += 2) {       
336                         //cerr << "   reading from line string '" << dummy[i] << "'\n";
337                         Parser p1(dummy[i]);
338                         while (p1.good()) {
339                                 Token t = p1.get_token();
340                                 //cerr << "read token: " << t << "\n";
341                                 if (t.cs() == "hline") {
342                                         if (i == 0) {
343                                                 if (rowinfo[row].topline) {
344                                                         if (row > 0) // extra bottomline above
345                                                                 handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]);
346                                                         else
347                                                                 cerr << "dropping extra hline\n";
348                                                         //cerr << "below row: " << row-1 << endl;
349                                                 } else {
350                                                         handle_hline_above(rowinfo[row], cellinfo[row]);
351                                                         //cerr << "above row: " << row << endl;
352                                                 }
353                                         } else {        
354                                                 //cerr << "below row: " << row << endl;
355                                                 handle_hline_below(rowinfo[row], cellinfo[row]);
356                                         }
357                                 } else if (t.cs() == "cline") {
358                                         string arg = p1.verbatim_item();
359                                         //cerr << "read cline arg: '" << arg << "'\n";
360                                         vector<string> t;
361                                         split(arg, t, '-');
362                                         t.resize(2);
363                                         size_t from = string2int(t[0]) - 1;
364                                         size_t to = string2int(t[1]);
365                                         for (size_t col = from; col < to; ++col) {
366                                                 //cerr << "row: " << row << " col: " << col << " i: " << i << endl;
367                                                 if (i == 0) {
368                                                         rowinfo[row].topline = true;
369                                                         cellinfo[row][col].topline = true;
370                                                 } else {
371                                                         rowinfo[row].bottomline = true;
372                                                         cellinfo[row][col].bottomline = true;
373                                                 }
374                                         }
375                                 } else {
376                                         cerr << "unexpected line token: " << t << endl;
377                                 }
378                         }
379                 }
380
381                 // split into cells
382                 vector<string> cells;
383                 split(lines[row], cells, TAB);
384                 for (size_t col = 0, cell = 0;
385                                 cell < cells.size() && col < colinfo.size(); ++col, ++cell) {
386                         //cerr << "cell content: '" << cells[cell] << "'\n";
387                         Parser p(cells[cell]);
388                         p.skip_spaces();        
389                         //cells[cell] << "'\n";
390                         if (p.next_token().cs() == "multicolumn") {
391                                 // how many cells?
392                                 p.get_token();
393                                 size_t const ncells = string2int(p.verbatim_item());
394
395                                 // special cell properties alignment    
396                                 vector<ColInfo> t;
397                                 handle_colalign(p, t);
398                                 cellinfo[row][col].multi     = 1;
399                                 cellinfo[row][col].align     = t.front().align;
400                                 ostringstream os;
401                                 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
402                                 cellinfo[row][col].content   = os.str();
403                                 cellinfo[row][col].leftline  |= t.front().leftline;
404                                 cellinfo[row][col].rightline |= t.front().rightline;
405
406                                 // add dummy cells for multicol
407                                 for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) {
408                                         ++col;
409                                         cellinfo[row][col].multi = 2;
410                                         cellinfo[row][col].align = 'c';
411                                 }
412
413                                 // more than one line on the right?
414                                 if (t.front().rightline > 1)
415                                         cellinfo[row][col + 1].leftline = true;
416
417                         } else {        
418                                 // FLAG_END is a hack, we need to read all of it
419                                 cellinfo[row][col].leftline = colinfo[col].leftline;
420                                 cellinfo[row][col].rightline = colinfo[col].rightline;
421                                 cellinfo[row][col].align = colinfo[col].align;
422                                 ostringstream os;
423                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
424                                 cellinfo[row][col].content   = os.str();
425                         }
426                 }
427
428                 //cerr << "//  handle almost empty last row what we have\n";
429                 // handle almost empty last row
430                 if (row && lines[row].empty() && row + 1 == rowinfo.size()) {
431                         //cerr << "remove empty last line\n";
432                         if (rowinfo[row].topline)
433                                 rowinfo[row - 1].bottomline = true;
434                         for (size_t col = 0; col < colinfo.size(); ++col)
435                                 if (cellinfo[row][col].topline)
436                                         cellinfo[row - 1][col].bottomline = true;
437                         rowinfo.pop_back();
438                 }
439
440         }
441
442         //cerr << "// output what we have\n";
443         // output what we have
444         os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
445                  << "\" columns=\"" << colinfo.size() << "\">\n"
446                  << "<features>\n";
447
448         //cerr << "// after header\n";
449         for (size_t col = 0; col < colinfo.size(); ++col) {
450                 os << "<column alignment=\""
451                    << verbose_align(colinfo[col].align) << "\"";
452                 os << " valignment=\"top\"";
453                 if (colinfo[col].leftline)
454                         os << " leftline=\"true\"";
455                 if (colinfo[col].rightline)
456                         os << " rightline=\"true\"";
457                 if (colinfo[col].width.size())
458                         os << " width=\"" << colinfo[col].width << "\"";
459                 if (colinfo[col].special.size())
460                         os << " special=\"" << colinfo[col].special << "\"";
461                 os << ">\n";
462         }
463         //cerr << "// after cols\n";
464
465         for (size_t row = 0; row < rowinfo.size(); ++row) {
466                 os << "<row";
467                 if (rowinfo[row].topline)
468                         os << " topline=\"true\"";
469                 if (rowinfo[row].bottomline)
470                         os << " bottomline=\"true\"";
471                 os << ">\n";
472                 for (size_t col = 0; col < colinfo.size(); ++col) {
473                         CellInfo const & cell = cellinfo[row][col];
474                         os << "<cell";
475                         if (cell.multi)
476                                 os << " multicolumn=\"" << cell.multi << "\"";
477                         os << " alignment=\"" << verbose_align(cell.align) 
478                            << "\""
479                            << " valignment=\"top\"";
480                         if (cell.topline)
481                                 os << " topline=\"true\"";
482                         if (cell.bottomline)
483                                 os << " bottomline=\"true\"";
484                         if (cell.leftline)
485                                 os << " leftline=\"true\"";
486                         if (cell.rightline)
487                                 os << " rightline=\"true\"";
488                         //cerr << "\nrow: " << row << " col: " << col;
489                         //if (cell.topline)
490                         //      cerr << " topline=\"true\"";
491                         //if (cell.bottomline)
492                         //      cerr << " bottomline=\"true\"";
493                         os << " usebox=\"none\""
494                            << ">"
495                            << "\n\\begin_inset Text\n"
496                            << cell.content
497                            << "\n\\end_inset \n"
498                            << "</cell>\n";
499                 }
500                 os << "</row>\n";
501         }
502                         
503         os << "</lyxtabular>\n";
504 }
505
506
507
508
509 // }])