src/insets/InsetBibtex.cpp

   1 /**
   2  * \file InsetBibtex.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Alejandro Aguilar Sierra
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "InsetBibtex.h"
  14
  15 #include "Buffer.h"
  16 #include "BufferParams.h"
  17 #include "DispatchResult.h"
  18 #include "debug.h"
  19 #include "Encoding.h"
  20 #include "FuncRequest.h"
  21 #include "gettext.h"
  22 #include "LaTeXFeatures.h"
  23 #include "MetricsInfo.h"
  24 #include "OutputParams.h"
  25
  26 #include "frontends/alert.h"
  27
  28 #include "support/filetools.h"
  29 #include "support/lstrings.h"
  30 #include "support/lyxlib.h"
  31 #include "support/os.h"
  32 #include "support/Path.h"
  33 #include "support/textutils.h"
  34
  35 #include <boost/tokenizer.hpp>
  36
  37
  38 namespace lyx {
  39
  40 using support::absolutePath;
  41 using support::ascii_lowercase;
  42 using support::changeExtension;
  43 using support::contains;
  44 using support::copy;
  45 using support::DocFileName;
  46 using support::FileName;
  47 using support::findtexfile;
  48 using support::isFileReadable;
  49 using support::isValidLaTeXFilename;
  50 using support::latex_path;
  51 using support::ltrim;
  52 using support::makeAbsPath;
  53 using support::makeRelPath;
  54 using support::prefixIs;
  55 using support::removeExtension;
  56 using support::rtrim;
  57 using support::split;
  58 using support::subst;
  59 using support::tokenPos;
  60 using support::trim;
  61 using support::lowercase;
  62
  63 namespace Alert = frontend::Alert;
  64 namespace os = support::os;
  65
  66 using std::endl;
  67 using std::getline;
  68 using std::string;
  69 using std::ostream;
  70 using std::pair;
  71 using std::vector;
  72 using std::map;
  73
  74
  75 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
  76         : InsetCommand(p, "bibtex")
  77 {}
  78
  79
  80 std::auto_ptr<Inset> InsetBibtex::doClone() const
  81 {
  82         return std::auto_ptr<Inset>(new InsetBibtex(*this));
  83 }
  84
  85
  86 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
  87 {
  88         switch (cmd.action) {
  89
  90         case LFUN_INSET_MODIFY: {
  91                 InsetCommandParams p("bibtex");
  92                 InsetCommandMailer::string2params("bibtex", to_utf8(cmd.argument()), p);
  93                 if (!p.getCmdName().empty()) {
  94                         setParams(p);
  95                         cur.buffer().updateBibfilesCache();
  96                 } else
  97                         cur.noUpdate();
  98                 break;
  99         }
 100
 101         default:
 102                 InsetCommand::doDispatch(cur, cmd);
 103                 break;
 104         }
 105 }
 106
 107
 108 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
 109 {
 110         return _("BibTeX Generated Bibliography");
 111 }
 112
 113
 114 namespace {
 115
 116 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
 117                       string const & name, string const & ext)
 118 {
 119         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
 120         if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
 121                 return name;
 122         else if (!runparams.nice)
 123                 return fname;
 124         else
 125                 // FIXME UNICODE
 126                 return to_utf8(makeRelPath(from_utf8(fname),
 127                                            from_utf8(buffer.getMasterBuffer()->filePath())));
 128 }
 129
 130 }
 131
 132
 133 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
 134                        OutputParams const & runparams) const
 135 {
 136         // the sequence of the commands:
 137         // 1. \bibliographystyle{style}
 138         // 2. \addcontentsline{...} - if option bibtotoc set
 139         // 3. \bibliography{database}
 140         // and with bibtopic:
 141         // 1. \bibliographystyle{style}
 142         // 2. \begin{btSect}{database}
 143         // 3. \btPrint{Cited|NotCited|All}
 144         // 4. \end{btSect}
 145
 146         // Database(s)
 147         // If we are processing the LaTeX file in a temp directory then
 148         // copy the .bib databases to this temp directory, mangling their
 149         // names in the process. Store this mangled name in the list of
 150         // all databases.
 151         // (We need to do all this because BibTeX *really*, *really*
 152         // can't handle "files with spaces" and Windows users tend to
 153         // use such filenames.)
 154         // Otherwise, store the (maybe absolute) path to the original,
 155         // unmangled database name.
 156         typedef boost::char_separator<char_type> Separator;
 157         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
 158
 159         Separator const separator(from_ascii(",").c_str());
 160         // The tokenizer must not be called with temporary strings, since
 161         // it does not make a copy and uses iterators of the string further
 162         // down. getParam returns a reference, so this is OK.
 163         Tokenizer const tokens(getParam("bibfiles"), separator);
 164         Tokenizer::const_iterator const begin = tokens.begin();
 165         Tokenizer::const_iterator const end = tokens.end();
 166
 167         odocstringstream dbs;
 168         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
 169                 docstring const input = trim(*it);
 170                 // FIXME UNICODE
 171                 string utf8input(to_utf8(input));
 172                 string database =
 173                         normalize_name(buffer, runparams, utf8input, ".bib");
 174                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
 175                 bool const not_from_texmf = isFileReadable(try_in_file);
 176
 177                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
 178                     not_from_texmf) {
 179
 180                         // mangledFilename() needs the extension
 181                         DocFileName const in_file = DocFileName(try_in_file);
 182                         database = removeExtension(in_file.mangledFilename());
 183                         FileName const out_file(makeAbsPath(database + ".bib",
 184                                         buffer.getMasterBuffer()->temppath()));
 185
 186                         bool const success = copy(in_file, out_file);
 187                         if (!success) {
 188                                 lyxerr << "Failed to copy '" << in_file
 189                                        << "' to '" << out_file << "'"
 190                                        << endl;
 191                         }
 192                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
 193                            !isValidLaTeXFilename(database)) {
 194                                 frontend::Alert::warning(_("Invalid filename"),
 195                                                          _("The following filename is likely to cause trouble "
 196                                                            "when running the exported file through LaTeX: ") +
 197                                                             from_utf8(database));
 198                 }
 199
 200                 if (it != begin)
 201                         dbs << ',';
 202                 // FIXME UNICODE
 203                 dbs << from_utf8(latex_path(database));
 204         }
 205         docstring const db_out = dbs.str();
 206
 207         // Post this warning only once.
 208         static bool warned_about_spaces = false;
 209         if (!warned_about_spaces &&
 210             runparams.nice && db_out.find(' ') != docstring::npos) {
 211                 warned_about_spaces = true;
 212
 213                 Alert::warning(_("Export Warning!"),
 214                                _("There are spaces in the paths to your BibTeX databases.\n"
 215                                               "BibTeX will be unable to find them."));
 216
 217         }
 218
 219         // Style-Options
 220         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
 221         string bibtotoc;
 222         if (prefixIs(style, "bibtotoc")) {
 223                 bibtotoc = "bibtotoc";
 224                 if (contains(style, ',')) {
 225                         style = split(style, bibtotoc, ',');
 226                 }
 227         }
 228
 229         // line count
 230         int nlines = 0;
 231
 232         if (!style.empty()) {
 233                 string base =
 234                         normalize_name(buffer, runparams, style, ".bst");
 235                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
 236                 bool const not_from_texmf = isFileReadable(try_in_file);
 237                 // If this style does not come from texmf and we are not
 238                 // exporting to .tex copy it to the tmp directory.
 239                 // This prevents problems with spaces and 8bit charcaters
 240                 // in the file name.
 241                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
 242                     not_from_texmf) {
 243                         // use new style name
 244                         DocFileName const in_file = DocFileName(try_in_file);
 245                         base = removeExtension(in_file.mangledFilename());
 246                         FileName const out_file(makeAbsPath(base + ".bst",
 247                                         buffer.getMasterBuffer()->temppath()));
 248                         bool const success = copy(in_file, out_file);
 249                         if (!success) {
 250                                 lyxerr << "Failed to copy '" << in_file
 251                                        << "' to '" << out_file << "'"
 252                                        << endl;
 253                         }
 254                 }
 255                 // FIXME UNICODE
 256                 os << "\\bibliographystyle{"
 257                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
 258                    << "}\n";
 259                 nlines += 1;
 260         }
 261
 262         // Post this warning only once.
 263         static bool warned_about_bst_spaces = false;
 264         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
 265                 warned_about_bst_spaces = true;
 266                 Alert::warning(_("Export Warning!"),
 267                                _("There are spaces in the path to your BibTeX style file.\n"
 268                                               "BibTeX will be unable to find it."));
 269         }
 270
 271         if (!db_out.empty() && buffer.params().use_bibtopic){
 272                 os << "\\begin{btSect}{" << db_out << "}\n";
 273                 docstring btprint = getParam("btprint");
 274                 if (btprint.empty())
 275                         // default
 276                         btprint = from_ascii("btPrintCited");
 277                 os << "\\" << btprint << "\n"
 278                    << "\\end{btSect}\n";
 279                 nlines += 3;
 280         }
 281
 282         // bibtotoc-Option
 283         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
 284                 // maybe a problem when a textclass has no "art" as
 285                 // part of its name, because it's than book.
 286                 // For the "official" lyx-layouts it's no problem to support
 287                 // all well
 288                 if (!contains(buffer.params().getTextClass().name(),
 289                               "art")) {
 290                         if (buffer.params().sides == TextClass::OneSide) {
 291                                 // oneside
 292                                 os << "\\clearpage";
 293                         } else {
 294                                 // twoside
 295                                 os << "\\cleardoublepage";
 296                         }
 297
 298                         // bookclass
 299                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
 300
 301                 } else {
 302                         // article class
 303                         os << "\\addcontentsline{toc}{section}{\\refname}";
 304                 }
 305         }
 306
 307         if (!db_out.empty() && !buffer.params().use_bibtopic){
 308                 os << "\\bibliography{" << db_out << "}\n";
 309                 nlines += 1;
 310         }
 311
 312         return nlines;
 313 }
 314
 315
 316 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
 317 {
 318         FileName path(buffer.filePath());
 319         support::Path p(path);
 320
 321         vector<FileName> vec;
 322
 323         string tmp;
 324         // FIXME UNICODE
 325         string bibfiles = to_utf8(getParam("bibfiles"));
 326         bibfiles = split(bibfiles, tmp, ',');
 327         while (!tmp.empty()) {
 328                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
 329                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
 330
 331                 // If we didn't find a matching file name just fail silently
 332                 if (!file.empty())
 333                         vec.push_back(file);
 334
 335                 // Get next file name
 336                 bibfiles = split(bibfiles, tmp, ',');
 337         }
 338
 339         return vec;
 340 }
 341
 342 namespace {
 343
 344         // methods for parsing bibtex files
 345
 346         typedef map<docstring, docstring> VarMap;
 347
 348         /// remove whitespace characters, optionally a single comma,
 349         /// and further whitespace characters from the stream.
 350         /// @return true if a comma was found, false otherwise
 351         ///
 352         bool removeWSAndComma(idocfstream & ifs) {
 353                 char_type ch;
 354
 355                 if (!ifs)
 356                         return false;
 357
 358                 // skip whitespace
 359                 do {
 360                         ifs.get(ch);
 361                 } while (ifs && isSpace(ch));
 362
 363                 if (!ifs)
 364                         return false;
 365
 366                 if (ch != ',') {
 367                         ifs.putback(ch);
 368                         return false;
 369                 }
 370
 371                 // skip whitespace
 372                 do {
 373                         ifs.get(ch);
 374                 } while (ifs && isSpace(ch));
 375
 376                 if (ifs) {
 377                         ifs.putback(ch);
 378                 }
 379
 380                 return true;
 381         }
 382
 383
 384         enum charCase {
 385                 makeLowerCase,
 386                 keepCase
 387         };
 388
 389         /// remove whitespace characters, read characer sequence
 390         /// not containing whitespace characters or characters in
 391         /// delimChars, and remove further whitespace characters.
 392         ///
 393         /// @return true if a string of length > 0 could be read.
 394         ///
 395         bool readTypeOrKey(docstring & val, idocfstream & ifs,
 396                 docstring const & delimChars, charCase chCase) {
 397
 398                 char_type ch;
 399
 400                 val.clear();
 401
 402                 if (!ifs)
 403                         return false;
 404
 405                 // skip whitespace
 406                 do {
 407                         ifs.get(ch);
 408                 } while (ifs && isSpace(ch));
 409
 410                 if (!ifs)
 411                         return false;
 412
 413                 // read value
 414                 while (ifs && !isSpace(ch) && delimChars.find(ch) == docstring::npos) {
 415                         if (chCase == makeLowerCase) {
 416                                 val += lowercase(ch);
 417                         } else {
 418                                 val += ch;
 419                         }
 420                         ifs.get(ch);
 421                 }
 422
 423                 // skip whitespace
 424                 while (ifs && isSpace(ch)) {
 425                         ifs.get(ch);
 426                 }
 427
 428                 if (ifs) {
 429                         ifs.putback(ch);
 430                 }
 431
 432                 return val.length() > 0;
 433         }
 434
 435         /// read subsequent bibtex values that are delimited with a #-character.
 436         /// Concatenate all parts and replace names with the associated string in
 437         /// the variable strings.
 438         /// @return true if reading was successfull (all single parts were delimited
 439         /// correctly)
 440         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
 441
 442                 char_type ch;
 443
 444                 val.clear();
 445
 446                 if (!ifs)
 447                         return false;
 448
 449                 do {
 450                         // skip whitespace
 451                         do {
 452                                 ifs.get(ch);
 453                         } while (ifs && isSpace(ch));
 454
 455                         if (!ifs)
 456                                 return false;
 457
 458                         // check for field type
 459                         if (isDigit(ch)) {
 460
 461                                 // read integer value
 462                                 do {
 463                                         val += ch;
 464                                         ifs.get(ch);
 465                                 } while (ifs && isDigit(ch));
 466
 467                                 if (!ifs)
 468                                         return false;
 469
 470                         } else if (ch == '"' || ch == '{') {
 471
 472                                 // read delimited text - set end delimiter
 473                                 char_type delim = ch == '"'? '"': '}';
 474
 475                                 // inside this delimited text braces must match.
 476                                 // Thus we can have a closing delimiter only
 477                                 // when nestLevel == 0
 478                                 int nestLevel = 0;
 479
 480                                 ifs.get(ch);
 481                                 while (ifs && (nestLevel > 0 || ch != delim)) {
 482                                         val += ch;
 483
 484                                         // update nesting level
 485                                         switch (ch) {
 486                                                 case '{':
 487                                                         ++nestLevel;
 488                                                         break;
 489                                                 case '}':
 490                                                         --nestLevel;
 491                                                         if (nestLevel < 0) return false;
 492                                                         break;
 493                                         }
 494
 495                                         ifs.get(ch);
 496                                 }
 497
 498                                 if (!ifs)
 499                                         return false;
 500
 501                                 ifs.get(ch);
 502
 503                                 if (!ifs)
 504                                         return false;
 505
 506                         } else {
 507
 508                                 // reading a string name
 509                                 docstring strName;
 510
 511                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
 512                                         strName += lowercase(ch);
 513                                         ifs.get(ch);
 514                                 }
 515
 516                                 if (!ifs)
 517                                         return false;
 518
 519                                 // replace the string with its assigned value or
 520                                 // discard it if it's not assigned
 521                                 if (strName.length()) {
 522                                         VarMap::const_iterator pos = strings.find(strName);
 523                                         if (pos != strings.end()) {
 524                                                 val += pos->second;
 525                                         }
 526                                 }
 527                         }
 528
 529                         // skip WS
 530                         while (ifs && isSpace(ch)) {
 531                                 ifs.get(ch);
 532                         }
 533
 534                         if (!ifs)
 535                                 return false;
 536
 537                         // continue reading next value on concatenate with '#'
 538                 } while (ch == '#');
 539
 540                 ifs.putback(ch);
 541
 542                 return true;
 543         }
 544 }
 545
 546
 547 // This method returns a comma separated list of Bibtex entries
 548 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
 549                 std::vector<std::pair<string, docstring> > & keys) const
 550 {
 551         vector<FileName> const files = getFiles(buffer);
 552         for (vector<FileName>::const_iterator it = files.begin();
 553              it != files.end(); ++ it) {
 554                 // This bibtex parser is a first step to parse bibtex files
 555                 // more precisely.
 556                 //
 557                 // - it reads the whole bibtex entry and does a syntax check
 558                 //   (matching delimiters, missing commas,...
 559                 // - it recovers from errors starting with the next @-character
 560                 // - it reads @string definitions and replaces them in the
 561                 //   field values.
 562                 // - it accepts more characters in keys or value names than
 563                 //   bibtex does.
 564                 //
 565                 // TODOS:
 566                 // - the entries are split into name = value pairs by the
 567                 //   parser. These have to be merged again because of the
 568                 //   way lyx treats the entries ( pair<...>(...) ). The citation
 569                 //   mechanism in lyx should be changed such that it can use
 570                 //   the split entries.
 571                 // - messages on parsing errors can be generated.
 572                 //
 573
 574                 // Officially bibtex does only support ASCII, but in practice
 575                 // you can use the encoding of the main document as long as
 576                 // some elements like keys and names are pure ASCII. Therefore
 577                 // we convert the file from the buffer encoding.
 578                 // We don't restrict keys to ASCII in LyX, since our own
 579                 // InsetBibitem can generate non-ASCII keys, and nonstandard
 580                 // 8bit clean bibtex forks exist.
 581                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
 582                                 std::ios_base::in,
 583                                 buffer.params().encoding().iconvName());
 584
 585                 char_type ch;
 586                 VarMap strings;
 587
 588                 while (ifs) {
 589
 590                         ifs.get(ch);
 591                         if (!ifs)
 592                                 break;
 593
 594                         if (ch != '@')
 595                                 continue;
 596
 597                         docstring entryType;
 598
 599                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), makeLowerCase) || !ifs)
 600                                 continue;
 601
 602                         if (entryType == from_ascii("comment")) {
 603
 604                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
 605                                 continue;
 606                         }
 607
 608                         ifs.get(ch);
 609                         if (!ifs)
 610                                 break;
 611
 612                         if ((ch != '(') && (ch != '{')) {
 613                                 // invalid entry delimiter
 614                                 ifs.putback(ch);
 615                                 continue;
 616                         }
 617
 618                         // process the entry
 619                         if (entryType == from_ascii("string")) {
 620
 621                                 // read string and add it to the strings map
 622                                 // (or replace it's old value)
 623                                 docstring name;
 624                                 docstring value;
 625
 626                                 if (!readTypeOrKey(name, ifs, from_ascii("#=}),"), makeLowerCase) || !ifs)
 627                                         continue;
 628
 629                                 ifs.get(ch);
 630                                 if (!ifs || ch != '=')
 631                                         continue;
 632
 633                                 if (!readValue(value, ifs, strings))
 634                                         continue;
 635
 636                                 strings[name] = value;
 637
 638                         } else if (entryType == from_ascii("preamble")) {
 639
 640                                 // preamble definitions are discarded.
 641                                 // can they be of any use in lyx?
 642                                 docstring value;
 643
 644                                 if (!readValue(value, ifs, strings))
 645                                         continue;
 646
 647                         } else {
 648
 649                                 // Citation entry. Read the key and all name = value pairs
 650                                 docstring key;
 651                                 docstring fields;
 652                                 docstring name;
 653                                 docstring value;
 654                                 docstring commaNewline;
 655
 656                                 if (!readTypeOrKey(key, ifs, from_ascii(",})"), keepCase) || !ifs)
 657                                         continue;
 658
 659                                 // now we have a key, so we will add an entry
 660                                 // (even if it's empty, as bibtex does)
 661                                 //
 662                                 // all items must be separated by a comma. If
 663                                 // it is missing the scanning of this entry is
 664                                 // stopped and the next is searched.
 665                                 bool readNext = removeWSAndComma(ifs);
 666
 667                                 while (ifs && readNext) {
 668
 669                                         // read field name
 670                                         if (!readTypeOrKey(name, ifs, from_ascii("=}),"), makeLowerCase) || !ifs)
 671                                                 break;
 672
 673                                         // next char must be an equal sign
 674                                         ifs.get(ch);
 675                                         if (!ifs)
 676                                                 break;
 677                                         if (ch != '=') {
 678                                                 ifs.putback(ch);
 679                                                 break;
 680                                         }
 681
 682                                         // read field value
 683                                         if (!readValue(value, ifs, strings))
 684                                                 break;
 685
 686                                         // append field to the total entry string.
 687                                         //
 688                                         // TODO: Here is where the fields can be put in
 689                                         //       a more intelligent structure that preserves
 690                                         //           the already known parts.
 691                                         fields += commaNewline;
 692                                         fields += name + from_ascii(" = {") + value + '}';
 693
 694                                         if (!commaNewline.length())
 695                                                 commaNewline = from_ascii(",\n");
 696
 697                                         readNext = removeWSAndComma(ifs);
 698                                 }
 699
 700                                 // add the new entry
 701                                 keys.push_back(pair<string, docstring>(
 702                                 to_utf8(key), fields));
 703                         }
 704
 705                 } //< searching '@'
 706
 707         } //< for loop over files
 708 }
 709
 710
 711
 712 bool InsetBibtex::addDatabase(string const & db)
 713 {
 714         // FIXME UNICODE
 715         string bibfiles(to_utf8(getParam("bibfiles")));
 716         if (tokenPos(bibfiles, ',', db) == -1) {
 717                 if (!bibfiles.empty())
 718                         bibfiles += ',';
 719                 setParam("bibfiles", from_utf8(bibfiles + db));
 720                 return true;
 721         }
 722         return false;
 723 }
 724
 725
 726 bool InsetBibtex::delDatabase(string const & db)
 727 {
 728         // FIXME UNICODE
 729         string bibfiles(to_utf8(getParam("bibfiles")));
 730         if (contains(bibfiles, db)) {
 731                 int const n = tokenPos(bibfiles, ',', db);
 732                 string bd = db;
 733                 if (n > 0) {
 734                         // this is not the first database
 735                         string tmp = ',' + bd;
 736                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
 737                 } else if (n == 0)
 738                         // this is the first (or only) database
 739                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
 740                 else
 741                         return false;
 742         }
 743         return true;
 744 }
 745
 746
 747 void InsetBibtex::validate(LaTeXFeatures & features) const
 748 {
 749         if (features.bufferParams().use_bibtopic)
 750                 features.require("bibtopic");
 751 }
 752
 753
 754 } // namespace lyx