src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  */
  12
  13 #include <config.h>
  14
  15 #include "BiblioInfo.h"
  16 #include "Buffer.h"
  17 #include "BufferParams.h"
  18 #include "buffer_funcs.h"
  19 #include "Encoding.h"
  20 #include "InsetIterator.h"
  21 #include "Paragraph.h"
  22 #include "TocBackend.h"
  23
  24 #include "insets/Inset.h"
  25 #include "insets/InsetBibitem.h"
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetInclude.h"
  28
  29 #include "support/docstream.h"
  30 #include "support/gettext.h"
  31 #include "support/lassert.h"
  32 #include "support/lstrings.h"
  33 #include "support/textutils.h"
  34
  35 #include "boost/regex.hpp"
  36
  37 #include <set>
  38
  39 using namespace std;
  40 using namespace lyx::support;
  41
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 // gets the "family name" from an author-type string
  48 docstring familyName(docstring const & name)
  49 {
  50         if (name.empty())
  51                 return docstring();
  52
  53         // first we look for a comma, and take the last name to be everything
  54         // preceding the right-most one, so that we also get the "jr" part.
  55         docstring::size_type idx = name.rfind(',');
  56         if (idx != docstring::npos)
  57                 return ltrim(name.substr(0, idx));
  58
  59         // OK, so now we want to look for the last name. We're going to
  60         // include the "von" part. This isn't perfect.
  61         // Split on spaces, to get various tokens.
  62         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  63         // If we only get two, assume the last one is the last name
  64         if (pieces.size() <= 2)
  65                 return pieces.back();
  66
  67         // Now we look for the first token that begins with a lower case letter.
  68         vector<docstring>::const_iterator it = pieces.begin();
  69         vector<docstring>::const_iterator en = pieces.end();
  70         for (; it != en; ++it) {
  71                 if ((*it).size() == 0)
  72                         continue;
  73                 char_type const c = (*it)[0];
  74                 if (isLower(c))
  75                         break;
  76         }
  77
  78         if (it == en) // we never found a "von"
  79                 return pieces.back();
  80
  81         // reconstruct what we need to return
  82         docstring retval;
  83         bool first = true;
  84         for (; it != en; ++it) {
  85                 if (!first)
  86                         retval += " ";
  87                 else
  88                         first = false;
  89                 retval += *it;
  90         }
  91         return retval;
  92 }
  93
  94 // converts a string containing LaTeX commands into unicode
  95 // for display.
  96 docstring convertLaTeXCommands(docstring const & str)
  97 {
  98         docstring val = str;
  99         docstring ret;
 100
 101         bool scanning_cmd = false;
 102         bool scanning_math = false;
 103         bool escaped = false; // used to catch \$, etc.
 104         while (val.size()) {
 105                 char_type const ch = val[0];
 106
 107                 // if we're scanning math, we output everything until we
 108                 // find an unescaped $, at which point we break out.
 109                 if (scanning_math) {
 110                         if (escaped)
 111                                 escaped = false;
 112                         else if (ch == '\\')
 113                                 escaped = true;
 114                         else if (ch == '$')
 115                                 scanning_math = false;
 116                         ret += ch;
 117                         val = val.substr(1);
 118                         continue;
 119                 }
 120
 121                 // if we're scanning a command name, then we just
 122                 // discard characters until we hit something that
 123                 // isn't alpha.
 124                 if (scanning_cmd) {
 125                         if (isAlphaASCII(ch)) {
 126                                 val = val.substr(1);
 127                                 escaped = false;
 128                                 continue;
 129                         }
 130                         // so we're done with this command.
 131                         // now we fall through and check this character.
 132                         scanning_cmd = false;
 133                 }
 134
 135                 // was the last character a \? If so, then this is something like:
 136                 // \\ or \$, so we'll just output it. That's probably not always right...
 137                 if (escaped) {
 138                         // exception: output \, as THIN SPACE
 139                         if (ch == ',')
 140                                 ret.push_back(0x2009);
 141                         else
 142                                 ret += ch;
 143                         val = val.substr(1);
 144                         escaped = false;
 145                         continue;
 146                 }
 147
 148                 if (ch == '$') {
 149                         ret += ch;
 150                         val = val.substr(1);
 151                         scanning_math = true;
 152                         continue;
 153                 }
 154
 155                 // we just ignore braces
 156                 if (ch == '{' || ch == '}') {
 157                         val = val.substr(1);
 158                         continue;
 159                 }
 160
 161                 // we're going to check things that look like commands, so if
 162                 // this doesn't, just output it.
 163                 if (ch != '\\') {
 164                         ret += ch;
 165                         val = val.substr(1);
 166                         continue;
 167                 }
 168
 169                 // ok, could be a command of some sort
 170                 // let's see if it corresponds to some unicode
 171                 // unicodesymbols has things in the form: \"{u},
 172                 // whereas we may see things like: \"u. So we'll
 173                 // look for that and change it, if necessary.
 174                 static boost::regex const reg("^\\\\\\W\\w");
 175                 if (boost::regex_search(to_utf8(val), reg)) {
 176                         val.insert(3, from_ascii("}"));
 177                         val.insert(2, from_ascii("{"));
 178                 }
 179                 docstring rem;
 180                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
 181                                                         Encodings::TEXT_CMD);
 182                 if (!cnvtd.empty()) {
 183                         // it did, so we'll take that bit and proceed with what's left
 184                         ret += cnvtd;
 185                         val = rem;
 186                         continue;
 187                 }
 188                 // it's a command of some sort
 189                 scanning_cmd = true;
 190                 escaped = true;
 191                 val = val.substr(1);
 192         }
 193         return ret;
 194 }
 195
 196 } // anon namespace
 197
 198
 199 //////////////////////////////////////////////////////////////////////
 200 //
 201 // BibTeXInfo
 202 //
 203 //////////////////////////////////////////////////////////////////////
 204
 205 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 206         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
 207 {}
 208
 209
 210 bool BibTeXInfo::hasField(docstring const & field) const
 211 {
 212         return count(field) == 1;
 213 }
 214
 215
 216 docstring const BibTeXInfo::getAbbreviatedAuthor() const
 217 {
 218         if (!is_bibtex_) {
 219                 docstring const opt = label();
 220                 if (opt.empty())
 221                         return docstring();
 222
 223                 docstring authors;
 224                 split(opt, authors, '(');
 225                 return authors;
 226         }
 227
 228         docstring author = convertLaTeXCommands(operator[]("author"));
 229         if (author.empty()) {
 230                 author = convertLaTeXCommands(operator[]("editor"));
 231                 if (author.empty())
 232                         return bib_key_;
 233         }
 234
 235         // OK, we've got some names. Let's format them.
 236         // Try to split the author list on " and "
 237         vector<docstring> const authors =
 238                 getVectorFromString(author, from_ascii(" and "));
 239
 240         if (authors.size() == 2)
 241                 return bformat(_("%1$s and %2$s"),
 242                         familyName(authors[0]), familyName(authors[1]));
 243
 244         if (authors.size() > 2)
 245                 return bformat(_("%1$s et al."), familyName(authors[0]));
 246
 247         return familyName(authors[0]);
 248 }
 249
 250
 251 docstring const BibTeXInfo::getYear() const
 252 {
 253         if (is_bibtex_)
 254                 return operator[]("year");
 255
 256         docstring const opt = label();
 257         if (opt.empty())
 258                 return docstring();
 259
 260         docstring authors;
 261         docstring const tmp = split(opt, authors, '(');
 262         docstring year;
 263         split(tmp, year, ')');
 264         return year;
 265 }
 266
 267
 268 docstring const BibTeXInfo::getXRef() const
 269 {
 270         if (!is_bibtex_)
 271                 return docstring();
 272         return operator[]("crossref");
 273 }
 274
 275
 276 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
 277 {
 278         if (!info_.empty())
 279                 return info_;
 280
 281         if (!is_bibtex_) {
 282                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 283                 info_ = it->second;
 284                 return info_;
 285         }
 286
 287         // FIXME
 288         // This could be made a lot better using the entry_type_
 289         // field to customize the output based upon entry type.
 290
 291         // Search for all possible "required" fields
 292         docstring author = getValueForKey("author", xref);
 293         if (author.empty())
 294                 author = getValueForKey("editor", xref);
 295
 296         docstring year   = getValueForKey("year", xref);
 297         docstring title  = getValueForKey("title", xref);
 298         docstring docLoc = getValueForKey("pages", xref);
 299         if (docLoc.empty()) {
 300                 docLoc = getValueForKey("chapter", xref);
 301                 if (!docLoc.empty())
 302                         docLoc = _("Ch. ") + docLoc;
 303         }       else {
 304                 docLoc = _("pp. ") + docLoc;
 305         }
 306
 307         docstring media = getValueForKey("journal", xref);
 308         if (media.empty()) {
 309                 media = getValueForKey("publisher", xref);
 310                 if (media.empty()) {
 311                         media = getValueForKey("school", xref);
 312                         if (media.empty())
 313                                 media = getValueForKey("institution");
 314                 }
 315         }
 316         docstring volume = getValueForKey("volume", xref);
 317
 318         odocstringstream result;
 319         if (!author.empty())
 320                 result << author << ", ";
 321         if (!title.empty())
 322                 result << title;
 323         if (!media.empty())
 324                 result << ", " << media;
 325         if (!year.empty())
 326                 result << " (" << year << ")";
 327         if (!docLoc.empty())
 328                 result << ", " << docLoc;
 329
 330         docstring const result_str = rtrim(result.str());
 331         if (!result_str.empty()) {
 332                 info_ = convertLaTeXCommands(result_str);
 333                 return info_;
 334         }
 335
 336         // This should never happen (or at least be very unusual!)
 337         static docstring e = docstring();
 338         return e;
 339 }
 340
 341
 342 docstring const & BibTeXInfo::operator[](docstring const & field) const
 343 {
 344         BibTeXInfo::const_iterator it = find(field);
 345         if (it != end())
 346                 return it->second;
 347         static docstring const empty_value = docstring();
 348         return empty_value;
 349 }
 350
 351
 352 docstring const & BibTeXInfo::operator[](string const & field) const
 353 {
 354         return operator[](from_ascii(field));
 355 }
 356
 357
 358 docstring BibTeXInfo::getValueForKey(string const & key,
 359                 BibTeXInfo const * const xref) const
 360 {
 361         docstring const ret = operator[](key);
 362         if (!ret.empty() || !xref)
 363                 return ret;
 364         return (*xref)[key];
 365 }
 366
 367
 368 //////////////////////////////////////////////////////////////////////
 369 //
 370 // BiblioInfo
 371 //
 372 //////////////////////////////////////////////////////////////////////
 373
 374 namespace {
 375 // A functor for use with sort, leading to case insensitive sorting
 376         class compareNoCase: public binary_function<docstring, docstring, bool>
 377         {
 378                 public:
 379                         bool operator()(docstring const & s1, docstring const & s2) const {
 380                                 return compare_no_case(s1, s2) < 0;
 381                         }
 382         };
 383 } // namespace anon
 384
 385
 386 vector<docstring> const BiblioInfo::getKeys() const
 387 {
 388         vector<docstring> bibkeys;
 389         BiblioInfo::const_iterator it  = begin();
 390         for (; it != end(); ++it)
 391                 bibkeys.push_back(it->first);
 392         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 393         return bibkeys;
 394 }
 395
 396
 397 vector<docstring> const BiblioInfo::getFields() const
 398 {
 399         vector<docstring> bibfields;
 400         set<docstring>::const_iterator it = field_names_.begin();
 401         set<docstring>::const_iterator end = field_names_.end();
 402         for (; it != end; ++it)
 403                 bibfields.push_back(*it);
 404         sort(bibfields.begin(), bibfields.end());
 405         return bibfields;
 406 }
 407
 408
 409 vector<docstring> const BiblioInfo::getEntries() const
 410 {
 411         vector<docstring> bibentries;
 412         set<docstring>::const_iterator it = entry_types_.begin();
 413         set<docstring>::const_iterator end = entry_types_.end();
 414         for (; it != end; ++it)
 415                 bibentries.push_back(*it);
 416         sort(bibentries.begin(), bibentries.end());
 417         return bibentries;
 418 }
 419
 420
 421 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
 422 {
 423         BiblioInfo::const_iterator it = find(key);
 424         if (it == end())
 425                 return docstring();
 426         BibTeXInfo const & data = it->second;
 427         return data.getAbbreviatedAuthor();
 428 }
 429
 430
 431 docstring const BiblioInfo::getYear(docstring const & key) const
 432 {
 433         BiblioInfo::const_iterator it = find(key);
 434         if (it == end())
 435                 return docstring();
 436         BibTeXInfo const & data = it->second;
 437         docstring year = data.getYear();
 438         if (!year.empty())
 439                 return year;
 440         // let's try the crossref
 441         docstring const xref = data.getXRef();
 442         if (xref.empty())
 443                 return _("No year"); // no luck
 444         BiblioInfo::const_iterator const xrefit = find(xref);
 445         if (xrefit == end())
 446                 return _("No year"); // no luck again
 447         BibTeXInfo const & xref_data = xrefit->second;
 448         return xref_data.getYear();
 449         return data.getYear();
 450 }
 451
 452
 453 docstring const BiblioInfo::getInfo(docstring const & key) const
 454 {
 455         BiblioInfo::const_iterator it = find(key);
 456         if (it == end())
 457                 return docstring();
 458         BibTeXInfo const & data = it->second;
 459         BibTeXInfo const * xrefptr = 0;
 460         docstring const xref = data.getXRef();
 461         if (!xref.empty()) {
 462                 BiblioInfo::const_iterator const xrefit = find(xref);
 463                 if (xrefit != end())
 464                         xrefptr = &(xrefit->second);
 465         }
 466         return data.getInfo(xrefptr);
 467 }
 468
 469
 470 vector<docstring> const BiblioInfo::getCiteStrings(
 471         docstring const & key, Buffer const & buf) const
 472 {
 473         CiteEngine const engine = buf.params().citeEngine();
 474         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
 475                 return getNumericalStrings(key, buf);
 476         else
 477                 return getAuthorYearStrings(key, buf);
 478 }
 479
 480
 481 vector<docstring> const BiblioInfo::getNumericalStrings(
 482         docstring const & key, Buffer const & buf) const
 483 {
 484         if (empty())
 485                 return vector<docstring>();
 486
 487         docstring const author = getAbbreviatedAuthor(key);
 488         docstring const year   = getYear(key);
 489         if (author.empty() || year.empty())
 490                 return vector<docstring>();
 491
 492         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 493
 494         vector<docstring> vec(styles.size());
 495         for (size_t i = 0; i != vec.size(); ++i) {
 496                 docstring str;
 497
 498                 switch (styles[i]) {
 499                         case CITE:
 500                         case CITEP:
 501                                 str = from_ascii("[#ID]");
 502                                 break;
 503
 504                         case NOCITE:
 505                                 str = _("Add to bibliography only.");
 506                                 break;
 507
 508                         case CITET:
 509                                 str = author + " [#ID]";
 510                                 break;
 511
 512                         case CITEALT:
 513                                 str = author + " #ID";
 514                                 break;
 515
 516                         case CITEALP:
 517                                 str = from_ascii("#ID");
 518                                 break;
 519
 520                         case CITEAUTHOR:
 521                                 str = author;
 522                                 break;
 523
 524                         case CITEYEAR:
 525                                 str = year;
 526                                 break;
 527
 528                         case CITEYEARPAR:
 529                                 str = '(' + year + ')';
 530                                 break;
 531                 }
 532
 533                 vec[i] = str;
 534         }
 535
 536         return vec;
 537 }
 538
 539
 540 vector<docstring> const BiblioInfo::getAuthorYearStrings(
 541         docstring const & key, Buffer const & buf) const
 542 {
 543         if (empty())
 544                 return vector<docstring>();
 545
 546         docstring const author = getAbbreviatedAuthor(key);
 547         docstring const year   = getYear(key);
 548         if (author.empty() || year.empty())
 549                 return vector<docstring>();
 550
 551         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 552
 553         vector<docstring> vec(styles.size());
 554         for (size_t i = 0; i != vec.size(); ++i) {
 555                 docstring str;
 556
 557                 switch (styles[i]) {
 558                         case CITE:
 559                 // jurabib only: Author/Annotator
 560                 // (i.e. the "before" field, 2nd opt arg)
 561                                 str = author + "/<" + _("before") + '>';
 562                                 break;
 563
 564                         case NOCITE:
 565                                 str = _("Add to bibliography only.");
 566                                 break;
 567
 568                         case CITET:
 569                                 str = author + " (" + year + ')';
 570                                 break;
 571
 572                         case CITEP:
 573                                 str = '(' + author + ", " + year + ')';
 574                                 break;
 575
 576                         case CITEALT:
 577                                 str = author + ' ' + year ;
 578                                 break;
 579
 580                         case CITEALP:
 581                                 str = author + ", " + year ;
 582                                 break;
 583
 584                         case CITEAUTHOR:
 585                                 str = author;
 586                                 break;
 587
 588                         case CITEYEAR:
 589                                 str = year;
 590                                 break;
 591
 592                         case CITEYEARPAR:
 593                                 str = '(' + year + ')';
 594                                 break;
 595                 }
 596                 vec[i] = str;
 597         }
 598         return vec;
 599 }
 600
 601
 602 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 603 {
 604         bimap_.insert(info.begin(), info.end());
 605 }
 606
 607
 608 namespace {
 609         // used in xhtml to sort a list of BibTeXInfo objects
 610         bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
 611         {
 612                 return lhs->getAbbreviatedAuthor() < rhs->getAbbreviatedAuthor();
 613         }
 614 }
 615
 616
 617 void BiblioInfo::collectCitedEntries(Buffer const & buf)
 618 {
 619         cited_entries_.clear();
 620         // We are going to collect all the citation keys used in the document,
 621         // getting them from the TOC.
 622         // FIXME We may want to collect these differently, in the first case,
 623         // so that we might have them in order of appearance.
 624         set<docstring> citekeys;
 625         Toc const & toc = buf.tocBackend().toc("citation");
 626         Toc::const_iterator it = toc.begin();
 627         Toc::const_iterator const en = toc.end();
 628         for (; it != en; ++it) {
 629                 if (it->str().empty())
 630                         continue;
 631                 vector<docstring> const keys = getVectorFromString(it->str());
 632                 citekeys.insert(keys.begin(), keys.end());
 633         }
 634         if (citekeys.empty())
 635                 return;
 636
 637         // We have a set of the keys used in this document.
 638         // We will now convert it to a list of the BibTeXInfo objects used in
 639         // this document...
 640         vector<BibTeXInfo const *> bi;
 641         set<docstring>::const_iterator cit = citekeys.begin();
 642         set<docstring>::const_iterator const cen = citekeys.end();
 643         for (; cit != cen; ++cit) {
 644                 BiblioInfo::const_iterator const bt = find(*cit);
 645                 if (bt == end() || !bt->second.isBibTeX())
 646                         continue;
 647                 bi.push_back(&(bt->second));
 648         }
 649         // ...and sort it.
 650         sort(bi.begin(), bi.end(), lSorter);
 651
 652         // Now we can write the sorted keys
 653         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
 654         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
 655         for (; bit != ben; ++bit)
 656                 cited_entries_.push_back((*bit)->key());
 657 }
 658
 659
 660 //////////////////////////////////////////////////////////////////////
 661 //
 662 // CitationStyle
 663 //
 664 //////////////////////////////////////////////////////////////////////
 665
 666 namespace {
 667
 668
 669 char const * const citeCommands[] = {
 670         "cite", "citet", "citep", "citealt", "citealp",
 671         "citeauthor", "citeyear", "citeyearpar", "nocite" };
 672
 673 unsigned int const nCiteCommands =
 674                 sizeof(citeCommands) / sizeof(char *);
 675
 676 CiteStyle const citeStylesArray[] = {
 677         CITE, CITET, CITEP, CITEALT, CITEALP,
 678         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
 679
 680 unsigned int const nCiteStyles =
 681                 sizeof(citeStylesArray) / sizeof(CiteStyle);
 682
 683 CiteStyle const citeStylesFull[] = {
 684         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 685
 686 unsigned int const nCiteStylesFull =
 687                 sizeof(citeStylesFull) / sizeof(CiteStyle);
 688
 689 CiteStyle const citeStylesUCase[] = {
 690         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 691
 692 unsigned int const nCiteStylesUCase =
 693         sizeof(citeStylesUCase) / sizeof(CiteStyle);
 694
 695 } // namespace anon
 696
 697
 698 CitationStyle citationStyleFromString(string const & command)
 699 {
 700         CitationStyle s;
 701         if (command.empty())
 702                 return s;
 703
 704         string cmd = command;
 705         if (cmd[0] == 'C') {
 706                 s.forceUpperCase = true;
 707                 cmd[0] = 'c';
 708         }
 709
 710         size_t const n = cmd.size() - 1;
 711         if (cmd != "cite" && cmd[n] == '*') {
 712                 s.full = true;
 713                 cmd = cmd.substr(0, n);
 714         }
 715
 716         char const * const * const last = citeCommands + nCiteCommands;
 717         char const * const * const ptr = find(citeCommands, last, cmd);
 718
 719         if (ptr != last) {
 720                 size_t idx = ptr - citeCommands;
 721                 s.style = citeStylesArray[idx];
 722         }
 723         return s;
 724 }
 725
 726
 727 string citationStyleToString(const CitationStyle & s)
 728 {
 729         string cite = citeCommands[s.style];
 730         if (s.full) {
 731                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
 732                 if (std::find(citeStylesFull, last, s.style) != last)
 733                         cite += '*';
 734         }
 735
 736         if (s.forceUpperCase) {
 737                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
 738                 if (std::find(citeStylesUCase, last, s.style) != last)
 739                         cite[0] = 'C';
 740         }
 741
 742         return cite;
 743 }
 744
 745 vector<CiteStyle> citeStyles(CiteEngine engine)
 746 {
 747         unsigned int nStyles = 0;
 748         unsigned int start = 0;
 749
 750         switch (engine) {
 751                 case ENGINE_BASIC:
 752                         nStyles = 2;
 753                         start = 0;
 754                         break;
 755                 case ENGINE_NATBIB_AUTHORYEAR:
 756                 case ENGINE_NATBIB_NUMERICAL:
 757                         nStyles = nCiteStyles - 1;
 758                         start = 1;
 759                         break;
 760                 case ENGINE_JURABIB:
 761                         nStyles = nCiteStyles;
 762                         start = 0;
 763                         break;
 764         }
 765
 766         vector<CiteStyle> styles(nStyles);
 767         size_t i = 0;
 768         int j = start;
 769         for (; i != styles.size(); ++i, ++j)
 770                 styles[i] = citeStylesArray[j];
 771
 772         return styles;
 773 }
 774
 775 } // namespace lyx
 776