src/tex2lyx/table.C

   1 /** The .tex to .lyx converter
   2     \author André Pönitz (2003)
   3  */
   4
   5 // {[(
   6
   7 #include <config.h>
   8
   9 #include "tex2lyx.h"
  10
  11 #include <cctype>
  12 #include <fstream>
  13 #include <iostream>
  14 #include <sstream>
  15 #include <vector>
  16 #include <map>
  17
  18 using std::cerr;
  19 using std::endl;
  20 using std::istringstream;
  21 using std::ostream;
  22 using std::ostringstream;
  23 using std::string;
  24 using std::vector;
  25 using std::map;
  26
  27 #include "mathed/math_gridinfo.h"
  28
  29 // filled in preamble.C
  30 std::map<char, int> special_columns;
  31
  32
  33 namespace {
  34
  35 int string2int(string const & s, int deflt = 0)
  36 {
  37         istringstream is(s);
  38         int i = deflt;
  39         is >> i;
  40         return i;
  41 }
  42
  43
  44 string read_hlines(Parser & p)
  45 {
  46         ostringstream os;
  47         p.skip_spaces();
  48         while (p.good()) {
  49                 if (p.next_token().cs() == "hline") {
  50                         p.get_token();
  51                         os << "\\hline";
  52                 } else if (p.next_token().cs() == "cline") {
  53                         p.get_token();
  54                         os << "\\cline{" << p.verbatim_item() << "}";
  55                 } else
  56                         break;
  57                 p.skip_spaces();
  58         };
  59         //cerr << "read_hlines(), read: '" << os.str() << "'\n";
  60         //cerr << "read_hlines(), next token: " << p.next_token() << "\n";
  61         return os.str();
  62 }
  63
  64
  65 /* rather brutish way to code table structure in a string:
  66
  67   \begin{tabular}{ccc}
  68     1 & 2 & 3\\ \hline
  69     \multicolumn{2}{c}{4} & 5 //
  70     6 & 7 \\
  71   \end{tabular}
  72
  73  gets "translated" to:
  74
  75   1 TAB 2 TAB 3 LINE
  76   \hline HLINE  TAB 5 LINE
  77   5 TAB 7 LINE
  78 */
  79
  80 char const TAB   = '\001';
  81 char const LINE  = '\002';
  82 char const HLINE = '\004';
  83
  84
  85 void handle_colalign(Parser & p, vector<ColInfo> & colinfo)
  86 {
  87         if (p.get_token().cat() != catBegin)
  88                 cerr << "wrong syntax for table column alignment. '{' expected\n";
  89
  90         char nextalign = 'b';
  91         bool leftline = false;
  92         for (Token t=p.get_token(); p.good() && t.cat() != catEnd; t = p.get_token()){
  93 #ifdef FILEDEBUG
  94                 cerr << "t: " << t << "  c: '" << t.character() << "'\n";
  95 #endif
  96
  97                 switch (t.character()) {
  98                         case 'c':
  99                         case 'l':
 100                         case 'r': {
 101                                 ColInfo ci;
 102                                 ci.align = t.character();
 103                                 if (colinfo.size() && colinfo.back().rightline > 1) {
 104                                         ci.leftline = true;
 105                                         --colinfo.back().rightline;
 106                                 }
 107                                 colinfo.push_back(ci);
 108                                 break;
 109                         }
 110                         case 'p':
 111                                 colinfo.push_back(ColInfo());
 112                                 colinfo.back().align = nextalign;
 113                                 colinfo.back().width = p.verbatim_item();
 114                                 nextalign = 'b';
 115                                 break;
 116                         case '|':
 117                                 if (colinfo.empty())
 118                                         leftline = true;
 119                                 else
 120                                         ++colinfo.back().rightline;
 121                                 break;
 122                         case '>': {
 123                                 string s = p.verbatim_item();
 124                                 if (s == "\\raggedleft ")
 125                                         nextalign = 'l';
 126                                 else if (s == "\\raggedright ")
 127                                         nextalign = 'r';
 128                                 else
 129                                         cerr << "unknown '>' column '" << s << "'\n";
 130                                 break;
 131                         }
 132                         default:
 133                                 if (special_columns.find(t.character()) != special_columns.end()) {
 134                                         ColInfo ci;
 135                                         ci.align = 'c';
 136                                         ci.special += t.character();
 137                                         int const nargs = special_columns[t.character()];
 138                                         for (int i = 0; i < nargs; ++i)
 139                                                 ci.special += "{" + p.verbatim_item() + "}";
 140                                         //cerr << "handling special column '" << t << "' " << nargs
 141                                         //      << "  '" << ci.special << "'\n";
 142                                         colinfo.push_back(ci);
 143                                 } else {
 144                                         cerr << "ignoring special separator '" << t << "'\n";
 145                                 }
 146                                 break;
 147                         }
 148         }
 149         if (colinfo.size() && leftline)
 150                 colinfo[0].leftline = true;
 151 }
 152
 153
 154 } // anonymous namespace
 155
 156
 157 void parse_table(Parser & p, ostream & os, unsigned flags)
 158 {
 159         string hlines;
 160
 161         while (p.good()) {
 162                 Token const & t = p.get_token();
 163
 164 #ifdef FILEDEBUG
 165                 cerr << "t: " << t << " flags: " << flags << "\n";
 166 #endif
 167
 168                 //
 169                 // cat codes
 170                 //
 171                 if (t.cat() == catMath) {
 172                                 // we are inside some text mode thingy, so opening new math is allowed
 173                         Token const & n = p.get_token();
 174                         if (n.cat() == catMath) {
 175                                 // TeX's $$...$$ syntax for displayed math
 176                                 os << "\\[";
 177                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
 178                                 os << "\\]";
 179                                 p.get_token(); // skip the second '$' token
 180                         } else {
 181                                 // simple $...$  stuff
 182                                 p.putback();
 183                                 os << '$';
 184                                 parse_math(p, os, FLAG_SIMPLE, MATH_MODE);
 185                                 os << '$';
 186                         }
 187                 }
 188
 189                 else if (t.cat() == catLetter ||
 190                                t.cat() == catSpace ||
 191                                t.cat() == catSuper ||
 192                                t.cat() == catSub ||
 193                                t.cat() == catOther ||
 194                                t.cat() == catActive ||
 195                                t.cat() == catNewline ||
 196                                t.cat() == catParameter)
 197                         os << t.character();
 198
 199                 else if (t.cat() == catBegin) {
 200                         os << '{';
 201                         parse_table(p, os, FLAG_BRACE_LAST);
 202                         os << '}';
 203                 }
 204
 205                 else if (t.cat() == catEnd) {
 206                         if (flags & FLAG_BRACE_LAST)
 207                                 return;
 208                         cerr << "unexpected '}'\n";
 209                 }
 210
 211                 else if (t.cat() == catAlign) {
 212                         os << TAB;
 213                 }
 214
 215                 else if (t.cs() == "tabularnewline" || t.cs() == "\\") {
 216                         // stuff before the line break
 217                         // and look ahead for stuff after the line break
 218                         os << HLINE << hlines << HLINE << LINE << read_hlines(p) << HLINE;
 219                         hlines.erase();
 220                 }
 221
 222                 else if (t.cs() == "hline")
 223                         hlines += "\\hline";
 224
 225                 else if (t.cs() == "cline")
 226                         hlines += "\\cline{" + p.verbatim_item() + '}';
 227
 228                 else if (t.cat() == catComment)
 229                         handle_comment(p);
 230
 231                 else if (t.cs() == "(") {
 232                         os << "\\(";
 233                         parse_math(p, os, FLAG_SIMPLE2, MATH_MODE);
 234                         os << "\\)";
 235                 }
 236
 237                 else if (t.cs() == "[") {
 238                         os << "\\[";
 239                         parse_math(p, os, FLAG_EQUATION, MATH_MODE);
 240                         os << "\\]";
 241                 }
 242
 243                 else if (t.cs() == "begin") {
 244                         string const name = p.getArg('{', '}');
 245                         active_environments.push_back(name);
 246                         parse_table(p, os, FLAG_END);
 247                 }
 248
 249                 else if (t.cs() == "end") {
 250                         if (flags & FLAG_END) {
 251                                 // eat environment name
 252                                 string const name = p.getArg('{', '}');
 253                                 if (name != active_environment())
 254                                         p.error("\\end{" + name + "} does not match \\begin{"
 255                                                 + active_environment() + "}");
 256                                 active_environments.pop_back();
 257                                 return;
 258                         }
 259                         p.error("found 'end' unexpectedly");
 260                 }
 261
 262                 else
 263                         os << t.asInput();
 264         }
 265 }
 266
 267
 268 void handle_hline_above(RowInfo & ri, vector<CellInfo> & ci)
 269 {
 270         ri.topline = true;
 271         for (size_t col = 0; col < ci.size(); ++col)
 272                 ci[col].topline = true;
 273 }
 274
 275
 276 void handle_hline_below(RowInfo & ri, vector<CellInfo> & ci)
 277 {
 278         ri.bottomline = true;
 279         for (size_t col = 0; col < ci.size(); ++col)
 280                 ci[col].bottomline = true;
 281 }
 282
 283
 284 void handle_tabular(Parser & p, ostream & os,
 285                     Context & context)
 286 {
 287         string posopts = p.getOpt();
 288         if (posopts.size())
 289                 cerr << "vertical tabular positioning '" << posopts << "' ignored\n";
 290
 291         vector<ColInfo>            colinfo;
 292
 293         // handle column formatting
 294         handle_colalign(p, colinfo);
 295
 296         // handle initial hlines
 297
 298         // first scan of cells
 299         // use table mode to keep it minimal-invasive
 300         // not exactly what's TeX doing...
 301         vector<string> lines;
 302         ostringstream ss;
 303         ss << read_hlines(p) << HLINE; // handle initial hlines
 304         parse_table(p, ss, FLAG_END);
 305         split(ss.str(), lines, LINE);
 306
 307         vector< vector<CellInfo> > cellinfo(lines.size());
 308         vector<RowInfo> rowinfo(lines.size());
 309
 310         // split into rows
 311         //cerr << "// split into rows\n";
 312         for (size_t row = 0; row < rowinfo.size(); ++row) {
 313
 314                 // init row
 315                 cellinfo[row].resize(colinfo.size());
 316
 317                 // split row
 318                 vector<string> dummy;
 319                 //cerr << "\n########### LINE: " << lines[row] << "########\n";
 320                 split(lines[row], dummy, HLINE);
 321
 322                 // handle horizontal line fragments
 323                 if (dummy.size() != 3) {
 324                         if (dummy.size() != 1)
 325                                 cerr << "unexpected dummy size: " << dummy.size()
 326                                         << " content: " << lines[row] << "\n";
 327                         dummy.resize(3);
 328                 }
 329                 lines[row] = dummy[1];
 330
 331                 //cerr << "line: " << row << " above 0: " << dummy[0] << "\n";
 332                 //cerr << "line: " << row << " below 2: " << dummy[2] <<  "\n";
 333                 //cerr << "line: " << row << " cells 1: " << dummy[1] <<  "\n";
 334
 335                 for (int i = 0; i <= 2; i += 2) {
 336                         //cerr << "   reading from line string '" << dummy[i] << "'\n";
 337                         Parser p1(dummy[i]);
 338                         while (p1.good()) {
 339                                 Token t = p1.get_token();
 340                                 //cerr << "read token: " << t << "\n";
 341                                 if (t.cs() == "hline") {
 342                                         if (i == 0) {
 343                                                 if (rowinfo[row].topline) {
 344                                                         if (row > 0) // extra bottomline above
 345                                                                 handle_hline_below(rowinfo[row - 1], cellinfo[row - 1]);
 346                                                         else
 347                                                                 cerr << "dropping extra hline\n";
 348                                                         //cerr << "below row: " << row-1 << endl;
 349                                                 } else {
 350                                                         handle_hline_above(rowinfo[row], cellinfo[row]);
 351                                                         //cerr << "above row: " << row << endl;
 352                                                 }
 353                                         } else {
 354                                                 //cerr << "below row: " << row << endl;
 355                                                 handle_hline_below(rowinfo[row], cellinfo[row]);
 356                                         }
 357                                 } else if (t.cs() == "cline") {
 358                                         string arg = p1.verbatim_item();
 359                                         //cerr << "read cline arg: '" << arg << "'\n";
 360                                         vector<string> t;
 361                                         split(arg, t, '-');
 362                                         t.resize(2);
 363                                         size_t from = string2int(t[0]) - 1;
 364                                         size_t to = string2int(t[1]);
 365                                         for (size_t col = from; col < to; ++col) {
 366                                                 //cerr << "row: " << row << " col: " << col << " i: " << i << endl;
 367                                                 if (i == 0) {
 368                                                         rowinfo[row].topline = true;
 369                                                         cellinfo[row][col].topline = true;
 370                                                 } else {
 371                                                         rowinfo[row].bottomline = true;
 372                                                         cellinfo[row][col].bottomline = true;
 373                                                 }
 374                                         }
 375                                 } else {
 376                                         cerr << "unexpected line token: " << t << endl;
 377                                 }
 378                         }
 379                 }
 380
 381                 // split into cells
 382                 vector<string> cells;
 383                 split(lines[row], cells, TAB);
 384                 for (size_t col = 0, cell = 0;
 385                                 cell < cells.size() && col < colinfo.size(); ++col, ++cell) {
 386                         //cerr << "cell content: '" << cells[cell] << "'\n";
 387                         Parser p(cells[cell]);
 388                         p.skip_spaces();
 389                         //cells[cell] << "'\n";
 390                         if (p.next_token().cs() == "multicolumn") {
 391                                 // how many cells?
 392                                 p.get_token();
 393                                 size_t const ncells = string2int(p.verbatim_item());
 394
 395                                 // special cell properties alignment
 396                                 vector<ColInfo> t;
 397                                 handle_colalign(p, t);
 398                                 cellinfo[row][col].multi     = 1;
 399                                 cellinfo[row][col].align     = t.front().align;
 400                                 ostringstream os;
 401                                 parse_text_in_inset(p, os, FLAG_ITEM, false, context);
 402                                 cellinfo[row][col].content   = os.str();
 403                                 cellinfo[row][col].leftline  |= t.front().leftline;
 404                                 cellinfo[row][col].rightline |= t.front().rightline;
 405
 406                                 // add dummy cells for multicol
 407                                 for (size_t i = 0; i < ncells - 1 && col < colinfo.size(); ++i) {
 408                                         ++col;
 409                                         cellinfo[row][col].multi = 2;
 410                                         cellinfo[row][col].align = 'c';
 411                                 }
 412
 413                                 // more than one line on the right?
 414                                 if (t.front().rightline > 1)
 415                                         cellinfo[row][col + 1].leftline = true;
 416
 417                         } else {
 418                                 // FLAG_END is a hack, we need to read all of it
 419                                 cellinfo[row][col].leftline = colinfo[col].leftline;
 420                                 cellinfo[row][col].rightline = colinfo[col].rightline;
 421                                 cellinfo[row][col].align = colinfo[col].align;
 422                                 ostringstream os;
 423                                 parse_text_in_inset(p, os, FLAG_CELL, false, context);
 424                                 cellinfo[row][col].content   = os.str();
 425                         }
 426                 }
 427
 428                 //cerr << "//  handle almost empty last row what we have\n";
 429                 // handle almost empty last row
 430                 if (row && lines[row].empty() && row + 1 == rowinfo.size()) {
 431                         //cerr << "remove empty last line\n";
 432                         if (rowinfo[row].topline)
 433                                 rowinfo[row - 1].bottomline = true;
 434                         for (size_t col = 0; col < colinfo.size(); ++col)
 435                                 if (cellinfo[row][col].topline)
 436                                         cellinfo[row - 1][col].bottomline = true;
 437                         rowinfo.pop_back();
 438                 }
 439
 440         }
 441
 442         //cerr << "// output what we have\n";
 443         // output what we have
 444         os << "\n<lyxtabular version=\"3\" rows=\"" << rowinfo.size()
 445                  << "\" columns=\"" << colinfo.size() << "\">\n"
 446                  << "<features>\n";
 447
 448         //cerr << "// after header\n";
 449         for (size_t col = 0; col < colinfo.size(); ++col) {
 450                 os << "<column alignment=\""
 451                    << verbose_align(colinfo[col].align) << "\"";
 452                 os << " valignment=\"top\"";
 453                 if (colinfo[col].leftline)
 454                         os << " leftline=\"true\"";
 455                 if (colinfo[col].rightline)
 456                         os << " rightline=\"true\"";
 457                 if (colinfo[col].width.size())
 458                         os << " width=\"" << colinfo[col].width << "\"";
 459                 if (colinfo[col].special.size())
 460                         os << " special=\"" << colinfo[col].special << "\"";
 461                 os << ">\n";
 462         }
 463         //cerr << "// after cols\n";
 464
 465         for (size_t row = 0; row < rowinfo.size(); ++row) {
 466                 os << "<row";
 467                 if (rowinfo[row].topline)
 468                         os << " topline=\"true\"";
 469                 if (rowinfo[row].bottomline)
 470                         os << " bottomline=\"true\"";
 471                 os << ">\n";
 472                 for (size_t col = 0; col < colinfo.size(); ++col) {
 473                         CellInfo const & cell = cellinfo[row][col];
 474                         os << "<cell";
 475                         if (cell.multi)
 476                                 os << " multicolumn=\"" << cell.multi << "\"";
 477                         os << " alignment=\"" << verbose_align(cell.align)
 478                            << "\""
 479                            << " valignment=\"top\"";
 480                         if (cell.topline)
 481                                 os << " topline=\"true\"";
 482                         if (cell.bottomline)
 483                                 os << " bottomline=\"true\"";
 484                         if (cell.leftline)
 485                                 os << " leftline=\"true\"";
 486                         if (cell.rightline)
 487                                 os << " rightline=\"true\"";
 488                         //cerr << "\nrow: " << row << " col: " << col;
 489                         //if (cell.topline)
 490                         //      cerr << " topline=\"true\"";
 491                         //if (cell.bottomline)
 492                         //      cerr << " bottomline=\"true\"";
 493                         os << " usebox=\"none\""
 494                            << ">"
 495                            << "\n\\begin_inset Text\n"
 496                            << cell.content
 497                            << "\n\\end_inset \n"
 498                            << "</cell>\n";
 499                 }
 500                 os << "</row>\n";
 501         }
 502
 503         os << "</lyxtabular>\n";
 504 }
 505
 506
 507
 508
 509 // }])