src/insets/InsetBibtex.cpp

   1 /**
   2  * \file InsetBibtex.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Alejandro Aguilar Sierra
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "InsetBibtex.h"
  14
  15 #include "Buffer.h"
  16 #include "BufferParams.h"
  17 #include "DispatchResult.h"
  18 #include "debug.h"
  19 #include "Encoding.h"
  20 #include "FuncRequest.h"
  21 #include "gettext.h"
  22 #include "LaTeXFeatures.h"
  23 #include "MetricsInfo.h"
  24 #include "OutputParams.h"
  25
  26 #include "frontends/Alert.h"
  27
  28 #include "support/filetools.h"
  29 #include "support/lstrings.h"
  30 #include "support/lyxlib.h"
  31 #include "support/os.h"
  32 #include "support/Path.h"
  33 #include "support/textutils.h"
  34
  35 #include <boost/tokenizer.hpp>
  36
  37
  38 namespace lyx {
  39
  40 using support::absolutePath;
  41 using support::ascii_lowercase;
  42 using support::changeExtension;
  43 using support::contains;
  44 using support::copy;
  45 using support::DocFileName;
  46 using support::FileName;
  47 using support::findtexfile;
  48 using support::isFileReadable;
  49 using support::latex_path;
  50 using support::ltrim;
  51 using support::makeAbsPath;
  52 using support::makeRelPath;
  53 using support::prefixIs;
  54 using support::removeExtension;
  55 using support::rtrim;
  56 using support::split;
  57 using support::subst;
  58 using support::tokenPos;
  59 using support::trim;
  60 using support::lowercase;
  61
  62 namespace Alert = frontend::Alert;
  63 namespace os = support::os;
  64
  65 using std::endl;
  66 using std::getline;
  67 using std::string;
  68 using std::ostream;
  69 using std::pair;
  70 using std::vector;
  71 using std::map;
  72
  73
  74 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
  75         : InsetCommand(p, "bibtex")
  76 {}
  77
  78
  79 std::auto_ptr<InsetBase> InsetBibtex::doClone() const
  80 {
  81         return std::auto_ptr<InsetBase>(new InsetBibtex(*this));
  82 }
  83
  84
  85 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
  86 {
  87         switch (cmd.action) {
  88
  89         case LFUN_INSET_MODIFY: {
  90                 InsetCommandParams p("bibtex");
  91                 InsetCommandMailer::string2params("bibtex", to_utf8(cmd.argument()), p);
  92                 if (!p.getCmdName().empty()) {
  93                         setParams(p);
  94                         cur.buffer().updateBibfilesCache();
  95                 } else
  96                         cur.noUpdate();
  97                 break;
  98         }
  99
 100         default:
 101                 InsetCommand::doDispatch(cur, cmd);
 102                 break;
 103         }
 104 }
 105
 106
 107 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
 108 {
 109         return _("BibTeX Generated Bibliography");
 110 }
 111
 112
 113 namespace {
 114
 115 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
 116                       string const & name, string const & ext)
 117 {
 118         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
 119         if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
 120                 return name;
 121         else if (!runparams.nice)
 122                 return fname;
 123         else
 124                 // FIXME UNICODE
 125                 return to_utf8(makeRelPath(from_utf8(fname),
 126                                            from_utf8(buffer.getMasterBuffer()->filePath())));
 127 }
 128
 129 }
 130
 131
 132 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
 133                        OutputParams const & runparams) const
 134 {
 135         // the sequence of the commands:
 136         // 1. \bibliographystyle{style}
 137         // 2. \addcontentsline{...} - if option bibtotoc set
 138         // 3. \bibliography{database}
 139         // and with bibtopic:
 140         // 1. \bibliographystyle{style}
 141         // 2. \begin{btSect}{database}
 142         // 3. \btPrint{Cited|NotCited|All}
 143         // 4. \end{btSect}
 144
 145         // Database(s)
 146         // If we are processing the LaTeX file in a temp directory then
 147         // copy the .bib databases to this temp directory, mangling their
 148         // names in the process. Store this mangled name in the list of
 149         // all databases.
 150         // (We need to do all this because BibTeX *really*, *really*
 151         // can't handle "files with spaces" and Windows users tend to
 152         // use such filenames.)
 153         // Otherwise, store the (maybe absolute) path to the original,
 154         // unmangled database name.
 155         typedef boost::char_separator<char_type> Separator;
 156         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
 157
 158         Separator const separator(from_ascii(",").c_str());
 159         // The tokenizer must not be called with temporary strings, since
 160         // it does not make a copy and uses iterators of the string further
 161         // down. getParam returns a reference, so this is OK.
 162         Tokenizer const tokens(getParam("bibfiles"), separator);
 163         Tokenizer::const_iterator const begin = tokens.begin();
 164         Tokenizer::const_iterator const end = tokens.end();
 165
 166         odocstringstream dbs;
 167         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
 168                 docstring const input = trim(*it);
 169                 // FIXME UNICODE
 170                 string utf8input(to_utf8(input));
 171                 string database =
 172                         normalize_name(buffer, runparams, utf8input, ".bib");
 173                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
 174                 bool const not_from_texmf = isFileReadable(try_in_file);
 175
 176                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
 177                     not_from_texmf) {
 178
 179                         // mangledFilename() needs the extension
 180                         DocFileName const in_file = DocFileName(try_in_file);
 181                         database = removeExtension(in_file.mangledFilename());
 182                         FileName const out_file(makeAbsPath(database + ".bib",
 183                                         buffer.getMasterBuffer()->temppath()));
 184
 185                         bool const success = copy(in_file, out_file);
 186                         if (!success) {
 187                                 lyxerr << "Failed to copy '" << in_file
 188                                        << "' to '" << out_file << "'"
 189                                        << endl;
 190                         }
 191                 }
 192
 193                 if (it != begin)
 194                         dbs << ',';
 195                 // FIXME UNICODE
 196                 dbs << from_utf8(latex_path(database));
 197         }
 198         docstring const db_out = dbs.str();
 199
 200         // Post this warning only once.
 201         static bool warned_about_spaces = false;
 202         if (!warned_about_spaces &&
 203             runparams.nice && db_out.find(' ') != docstring::npos) {
 204                 warned_about_spaces = true;
 205
 206                 Alert::warning(_("Export Warning!"),
 207                                _("There are spaces in the paths to your BibTeX databases.\n"
 208                                               "BibTeX will be unable to find them."));
 209
 210         }
 211
 212         // Style-Options
 213         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
 214         string bibtotoc;
 215         if (prefixIs(style, "bibtotoc")) {
 216                 bibtotoc = "bibtotoc";
 217                 if (contains(style, ',')) {
 218                         style = split(style, bibtotoc, ',');
 219                 }
 220         }
 221
 222         // line count
 223         int nlines = 0;
 224
 225         if (!style.empty()) {
 226                 string base =
 227                         normalize_name(buffer, runparams, style, ".bst");
 228                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
 229                 bool const not_from_texmf = isFileReadable(try_in_file);
 230                 // If this style does not come from texmf and we are not
 231                 // exporting to .tex copy it to the tmp directory.
 232                 // This prevents problems with spaces and 8bit charcaters
 233                 // in the file name.
 234                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
 235                     not_from_texmf) {
 236                         // use new style name
 237                         DocFileName const in_file = DocFileName(try_in_file);
 238                         base = removeExtension(in_file.mangledFilename());
 239                         FileName const out_file(makeAbsPath(base + ".bst",
 240                                         buffer.getMasterBuffer()->temppath()));
 241                         bool const success = copy(in_file, out_file);
 242                         if (!success) {
 243                                 lyxerr << "Failed to copy '" << in_file
 244                                        << "' to '" << out_file << "'"
 245                                        << endl;
 246                         }
 247                 }
 248                 // FIXME UNICODE
 249                 os << "\\bibliographystyle{"
 250                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
 251                    << "}\n";
 252                 nlines += 1;
 253         }
 254
 255         // Post this warning only once.
 256         static bool warned_about_bst_spaces = false;
 257         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
 258                 warned_about_bst_spaces = true;
 259                 Alert::warning(_("Export Warning!"),
 260                                _("There are spaces in the path to your BibTeX style file.\n"
 261                                               "BibTeX will be unable to find it."));
 262         }
 263
 264         if (!db_out.empty() && buffer.params().use_bibtopic){
 265                 os << "\\begin{btSect}{" << db_out << "}\n";
 266                 docstring btprint = getParam("btprint");
 267                 if (btprint.empty())
 268                         // default
 269                         btprint = from_ascii("btPrintCited");
 270                 os << "\\" << btprint << "\n"
 271                    << "\\end{btSect}\n";
 272                 nlines += 3;
 273         }
 274
 275         // bibtotoc-Option
 276         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
 277                 // maybe a problem when a textclass has no "art" as
 278                 // part of its name, because it's than book.
 279                 // For the "official" lyx-layouts it's no problem to support
 280                 // all well
 281                 if (!contains(buffer.params().getLyXTextClass().name(),
 282                               "art")) {
 283                         if (buffer.params().sides == LyXTextClass::OneSide) {
 284                                 // oneside
 285                                 os << "\\clearpage";
 286                         } else {
 287                                 // twoside
 288                                 os << "\\cleardoublepage";
 289                         }
 290
 291                         // bookclass
 292                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
 293
 294                 } else {
 295                         // article class
 296                         os << "\\addcontentsline{toc}{section}{\\refname}";
 297                 }
 298         }
 299
 300         if (!db_out.empty() && !buffer.params().use_bibtopic){
 301                 os << "\\bibliography{" << db_out << "}\n";
 302                 nlines += 1;
 303         }
 304
 305         return nlines;
 306 }
 307
 308
 309 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
 310 {
 311         FileName path(buffer.filePath());
 312         support::Path p(path);
 313
 314         vector<FileName> vec;
 315
 316         string tmp;
 317         // FIXME UNICODE
 318         string bibfiles = to_utf8(getParam("bibfiles"));
 319         bibfiles = split(bibfiles, tmp, ',');
 320         while (!tmp.empty()) {
 321                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
 322                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
 323
 324                 // If we didn't find a matching file name just fail silently
 325                 if (!file.empty())
 326                         vec.push_back(file);
 327
 328                 // Get next file name
 329                 bibfiles = split(bibfiles, tmp, ',');
 330         }
 331
 332         return vec;
 333 }
 334
 335 namespace {
 336
 337         // methods for parsing bibtex files
 338
 339         typedef map<docstring, docstring> VarMap;
 340
 341         /// remove whitespace characters, optionally a single comma,
 342         /// and further whitespace characters from the stream.
 343         /// @return true if a comma was found, false otherwise
 344         ///
 345         bool removeWSAndComma(idocfstream & ifs) {
 346                 char_type ch;
 347
 348                 if (!ifs)
 349                         return false;
 350
 351                 // skip whitespace
 352                 do {
 353                         ifs.get(ch);
 354                 } while (ifs && isSpace(ch));
 355
 356                 if (!ifs)
 357                         return false;
 358
 359                 if (ch != ',') {
 360                         ifs.putback(ch);
 361                         return false;
 362                 }
 363
 364                 // skip whitespace
 365                 do {
 366                         ifs.get(ch);
 367                 } while (ifs && isSpace(ch));
 368
 369                 if (ifs) {
 370                         ifs.putback(ch);
 371                 }
 372
 373                 return true;
 374         }
 375
 376
 377         enum charCase {
 378                 makeLowerCase,
 379                 keepCase
 380         };
 381
 382         /// remove whitespace characters, read characer sequence
 383         /// not containing whitespace characters or characters in
 384         /// delimChars, and remove further whitespace characters.
 385         ///
 386         /// @return true if a string of length > 0 could be read.
 387         ///
 388         bool readTypeOrKey(docstring & val, idocfstream & ifs,
 389                 docstring const & delimChars, charCase chCase) {
 390
 391                 char_type ch;
 392
 393                 val.clear();
 394
 395                 if (!ifs)
 396                         return false;
 397
 398                 // skip whitespace
 399                 do {
 400                         ifs.get(ch);
 401                 } while (ifs && isSpace(ch));
 402
 403                 if (!ifs)
 404                         return false;
 405
 406                 // read value
 407                 while (ifs && !isSpace(ch) && delimChars.find(ch) == docstring::npos) {
 408                         if (chCase == makeLowerCase) {
 409                                 val += lowercase(ch);
 410                         } else {
 411                                 val += ch;
 412                         }
 413                         ifs.get(ch);
 414                 }
 415
 416                 // skip whitespace
 417                 while (ifs && isSpace(ch)) {
 418                         ifs.get(ch);
 419                 }
 420
 421                 if (ifs) {
 422                         ifs.putback(ch);
 423                 }
 424
 425                 return val.length() > 0;
 426         }
 427
 428         /// read subsequent bibtex values that are delimited with a #-character.
 429         /// Concatenate all parts and replace names with the associated string in
 430         /// the variable strings.
 431         /// @return true if reading was successfull (all single parts were delimited
 432         /// correctly)
 433         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
 434
 435                 char_type ch;
 436
 437                 val.clear();
 438
 439                 if (!ifs)
 440                         return false;
 441
 442                 do {
 443                         // skip whitespace
 444                         do {
 445                                 ifs.get(ch);
 446                         } while (ifs && isSpace(ch));
 447
 448                         if (!ifs)
 449                                 return false;
 450
 451                         // check for field type
 452                         if (isDigit(ch)) {
 453
 454                                 // read integer value
 455                                 do {
 456                                         val += ch;
 457                                         ifs.get(ch);
 458                                 } while (ifs && isDigit(ch));
 459
 460                                 if (!ifs)
 461                                         return false;
 462
 463                         } else if (ch == '"' || ch == '{') {
 464
 465                                 // read delimited text - set end delimiter
 466                                 char_type delim = ch == '"'? '"': '}';
 467
 468                                 // inside this delimited text braces must match.
 469                                 // Thus we can have a closing delimiter only
 470                                 // when nestLevel == 0
 471                                 int nestLevel = 0;
 472
 473                                 ifs.get(ch);
 474                                 while (ifs && (nestLevel > 0 || ch != delim)) {
 475                                         val += ch;
 476
 477                                         // update nesting level
 478                                         switch (ch) {
 479                                                 case '{':
 480                                                         ++nestLevel;
 481                                                         break;
 482                                                 case '}':
 483                                                         --nestLevel;
 484                                                         if (nestLevel < 0) return false;
 485                                                         break;
 486                                         }
 487
 488                                         ifs.get(ch);
 489                                 }
 490
 491                                 if (!ifs)
 492                                         return false;
 493
 494                                 ifs.get(ch);
 495
 496                                 if (!ifs)
 497                                         return false;
 498
 499                         } else {
 500
 501                                 // reading a string name
 502                                 docstring strName;
 503
 504                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
 505                                         strName += lowercase(ch);
 506                                         ifs.get(ch);
 507                                 }
 508
 509                                 if (!ifs)
 510                                         return false;
 511
 512                                 // replace the string with its assigned value or
 513                                 // discard it if it's not assigned
 514                                 if (strName.length()) {
 515                                         VarMap::const_iterator pos = strings.find(strName);
 516                                         if (pos != strings.end()) {
 517                                                 val += pos->second;
 518                                         }
 519                                 }
 520                         }
 521
 522                         // skip WS
 523                         while (ifs && isSpace(ch)) {
 524                                 ifs.get(ch);
 525                         }
 526
 527                         if (!ifs)
 528                                 return false;
 529
 530                         // continue reading next value on concatenate with '#'
 531                 } while (ch == '#');
 532
 533                 ifs.putback(ch);
 534
 535                 return true;
 536         }
 537 }
 538
 539
 540 // This method returns a comma separated list of Bibtex entries
 541 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
 542                 std::vector<std::pair<string, docstring> > & keys) const
 543 {
 544         vector<FileName> const files = getFiles(buffer);
 545         for (vector<FileName>::const_iterator it = files.begin();
 546              it != files.end(); ++ it) {
 547                 // This bibtex parser is a first step to parse bibtex files
 548                 // more precisely.
 549                 //
 550                 // - it reads the whole bibtex entry and does a syntax check
 551                 //   (matching delimiters, missing commas,...
 552                 // - it recovers from errors starting with the next @-character
 553                 // - it reads @string definitions and replaces them in the
 554                 //   field values.
 555                 // - it accepts more characters in keys or value names than
 556                 //   bibtex does.
 557                 //
 558                 // TODOS:
 559                 // - the entries are split into name = value pairs by the
 560                 //   parser. These have to be merged again because of the
 561                 //   way lyx treats the entries ( pair<...>(...) ). The citation
 562                 //   mechanism in lyx should be changed such that it can use
 563                 //   the split entries.
 564                 // - messages on parsing errors can be generated.
 565                 //
 566
 567                 // Officially bibtex does only support ASCII, but in practice
 568                 // you can use the encoding of the main document as long as
 569                 // some elements like keys and names are pure ASCII. Therefore
 570                 // we convert the file from the buffer encoding.
 571                 // We don't restrict keys to ASCII in LyX, since our own
 572                 // InsetBibitem can generate non-ASCII keys, and nonstandard
 573                 // 8bit clean bibtex forks exist.
 574                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
 575                                 std::ios_base::in,
 576                                 buffer.params().encoding().iconvName());
 577
 578                 char_type ch;
 579                 VarMap strings;
 580
 581                 while (ifs) {
 582
 583                         ifs.get(ch);
 584                         if (!ifs)
 585                                 break;
 586
 587                         if (ch != '@')
 588                                 continue;
 589
 590                         docstring entryType;
 591
 592                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), makeLowerCase) || !ifs)
 593                                 continue;
 594
 595                         if (entryType == from_ascii("comment")) {
 596
 597                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
 598                                 continue;
 599                         }
 600
 601                         ifs.get(ch);
 602                         if (!ifs)
 603                                 break;
 604
 605                         if ((ch != '(') && (ch != '{')) {
 606                                 // invalid entry delimiter
 607                                 ifs.putback(ch);
 608                                 continue;
 609                         }
 610
 611                         // process the entry
 612                         if (entryType == from_ascii("string")) {
 613
 614                                 // read string and add it to the strings map
 615                                 // (or replace it's old value)
 616                                 docstring name;
 617                                 docstring value;
 618
 619                                 if (!readTypeOrKey(name, ifs, from_ascii("#=}),"), makeLowerCase) || !ifs)
 620                                         continue;
 621
 622                                 ifs.get(ch);
 623                                 if (!ifs || ch != '=')
 624                                         continue;
 625
 626                                 if (!readValue(value, ifs, strings))
 627                                         continue;
 628
 629                                 strings[name] = value;
 630
 631                         } else if (entryType == from_ascii("preamble")) {
 632
 633                                 // preamble definitions are discarded.
 634                                 // can they be of any use in lyx?
 635                                 docstring value;
 636
 637                                 if (!readValue(value, ifs, strings))
 638                                         continue;
 639
 640                         } else {
 641
 642                                 // Citation entry. Read the key and all name = value pairs
 643                                 docstring key;
 644                                 docstring fields;
 645                                 docstring name;
 646                                 docstring value;
 647                                 docstring commaNewline;
 648
 649                                 if (!readTypeOrKey(key, ifs, from_ascii(",})"), keepCase) || !ifs)
 650                                         continue;
 651
 652                                 // now we have a key, so we will add an entry
 653                                 // (even if it's empty, as bibtex does)
 654                                 //
 655                                 // all items must be separated by a comma. If
 656                                 // it is missing the scanning of this entry is
 657                                 // stopped and the next is searched.
 658                                 bool readNext = removeWSAndComma(ifs);
 659
 660                                 while (ifs && readNext) {
 661
 662                                         // read field name
 663                                         if (!readTypeOrKey(name, ifs, from_ascii("=}),"), makeLowerCase) || !ifs)
 664                                                 break;
 665
 666                                         // next char must be an equal sign
 667                                         ifs.get(ch);
 668                                         if (!ifs)
 669                                                 break;
 670                                         if (ch != '=') {
 671                                                 ifs.putback(ch);
 672                                                 break;
 673                                         }
 674
 675                                         // read field value
 676                                         if (!readValue(value, ifs, strings))
 677                                                 break;
 678
 679                                         // append field to the total entry string.
 680                                         //
 681                                         // TODO: Here is where the fields can be put in
 682                                         //       a more intelligent structure that preserves
 683                                         //           the already known parts.
 684                                         fields += commaNewline;
 685                                         fields += name + from_ascii(" = {") + value + '}';
 686
 687                                         if (!commaNewline.length())
 688                                                 commaNewline = from_ascii(",\n");
 689
 690                                         readNext = removeWSAndComma(ifs);
 691                                 }
 692
 693                                 // add the new entry
 694                                 keys.push_back(pair<string, docstring>(
 695                                 to_utf8(key), fields));
 696                         }
 697
 698                 } //< searching '@'
 699
 700         } //< for loop over files
 701 }
 702
 703
 704
 705 bool InsetBibtex::addDatabase(string const & db)
 706 {
 707         // FIXME UNICODE
 708         string bibfiles(to_utf8(getParam("bibfiles")));
 709         if (tokenPos(bibfiles, ',', db) == -1) {
 710                 if (!bibfiles.empty())
 711                         bibfiles += ',';
 712                 setParam("bibfiles", from_utf8(bibfiles + db));
 713                 return true;
 714         }
 715         return false;
 716 }
 717
 718
 719 bool InsetBibtex::delDatabase(string const & db)
 720 {
 721         // FIXME UNICODE
 722         string bibfiles(to_utf8(getParam("bibfiles")));
 723         if (contains(bibfiles, db)) {
 724                 int const n = tokenPos(bibfiles, ',', db);
 725                 string bd = db;
 726                 if (n > 0) {
 727                         // this is not the first database
 728                         string tmp = ',' + bd;
 729                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
 730                 } else if (n == 0)
 731                         // this is the first (or only) database
 732                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
 733                 else
 734                         return false;
 735         }
 736         return true;
 737 }
 738
 739
 740 void InsetBibtex::validate(LaTeXFeatures & features) const
 741 {
 742         if (features.bufferParams().use_bibtopic)
 743                 features.require("bibtopic");
 744 }
 745
 746
 747 } // namespace lyx