src/BiblioInfo.cpp

   1 /**
   2  * \file BiblioInfo.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  * \author Richard Heck
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  */
  12
  13 #include <config.h>
  14
  15 #include "BiblioInfo.h"
  16 #include "Buffer.h"
  17 #include "BufferParams.h"
  18 #include "buffer_funcs.h"
  19 #include "Encoding.h"
  20 #include "InsetIterator.h"
  21 #include "Paragraph.h"
  22
  23 #include "insets/Inset.h"
  24 #include "insets/InsetBibitem.h"
  25 #include "insets/InsetBibtex.h"
  26 #include "insets/InsetInclude.h"
  27
  28 #include "support/docstream.h"
  29 #include "support/gettext.h"
  30 #include "support/lassert.h"
  31 #include "support/lstrings.h"
  32 #include "support/textutils.h"
  33
  34 #include "boost/regex.hpp"
  35
  36 using namespace std;
  37 using namespace lyx::support;
  38
  39
  40 namespace lyx {
  41
  42 namespace {
  43
  44 // gets the "family name" from an author-type string
  45 docstring familyName(docstring const & name)
  46 {
  47         if (name.empty())
  48                 return docstring();
  49
  50         // first we look for a comma, and take the last name to be everything
  51         // preceding the right-most one, so that we also get the "jr" part.
  52         docstring::size_type idx = name.rfind(',');
  53         if (idx != docstring::npos)
  54                 return ltrim(name.substr(0, idx));
  55
  56         // OK, so now we want to look for the last name. We're going to
  57         // include the "von" part. This isn't perfect.
  58         // Split on spaces, to get various tokens.
  59         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
  60         // If we only get two, assume the last one is the last name
  61         if (pieces.size() <= 2)
  62                 return pieces.back();
  63
  64         // Now we look for the first token that begins with a lower case letter.
  65         vector<docstring>::const_iterator it = pieces.begin();
  66         vector<docstring>::const_iterator en = pieces.end();
  67         for (; it != en; ++it) {
  68                 if ((*it).size() == 0)
  69                         continue;
  70                 char_type const c = (*it)[0];
  71                 if (isLower(c))
  72                         break;
  73         }
  74
  75         if (it == en) // we never found a "von"
  76                 return pieces.back();
  77
  78         // reconstruct what we need to return
  79         docstring retval;
  80         bool first = true;
  81         for (; it != en; ++it) {
  82                 if (!first)
  83                         retval += " ";
  84                 else
  85                         first = false;
  86                 retval += *it;
  87         }
  88         return retval;
  89 }
  90
  91 // converts a string containing LaTeX commands into unicode
  92 // for display.
  93 docstring convertLaTeXCommands(docstring const & str)
  94 {
  95         docstring val = str;
  96         docstring ret;
  97
  98         bool scanning_cmd = false;
  99         bool scanning_math = false;
 100         bool escaped = false; // used to catch \$, etc.
 101         while (val.size()) {
 102                 char_type const ch = val[0];
 103
 104                 // if we're scanning math, we output everything until we
 105                 // find an unescaped $, at which point we break out.
 106                 if (scanning_math) {
 107                         if (escaped)
 108                                 escaped = false;
 109                         else if (ch == '\\')
 110                                 escaped = true;
 111                         else if (ch == '$')
 112                                 scanning_math = false;
 113                         ret += ch;
 114                         val = val.substr(1);
 115                         continue;
 116                 }
 117
 118                 // if we're scanning a command name, then we just
 119                 // discard characters until we hit something that
 120                 // isn't alpha.
 121                 if (scanning_cmd) {
 122                         if (isAlphaASCII(ch)) {
 123                                 val = val.substr(1);
 124                                 escaped = false;
 125                                 continue;
 126                         }
 127                         // so we're done with this command.
 128                         // now we fall through and check this character.
 129                         scanning_cmd = false;
 130                 }
 131
 132                 // was the last character a \? If so, then this is something like:
 133                 // \\ or \$, so we'll just output it. That's probably not always right...
 134                 if (escaped) {
 135                         // exception: output \, as THIN SPACE
 136                         if (ch == ',')
 137                                 ret.push_back(0x2009);
 138                         else
 139                                 ret += ch;
 140                         val = val.substr(1);
 141                         escaped = false;
 142                         continue;
 143                 }
 144
 145                 if (ch == '$') {
 146                         ret += ch;
 147                         val = val.substr(1);
 148                         scanning_math = true;
 149                         continue;
 150                 }
 151
 152                 // we just ignore braces
 153                 if (ch == '{' || ch == '}') {
 154                         val = val.substr(1);
 155                         continue;
 156                 }
 157
 158                 // we're going to check things that look like commands, so if
 159                 // this doesn't, just output it.
 160                 if (ch != '\\') {
 161                         ret += ch;
 162                         val = val.substr(1);
 163                         continue;
 164                 }
 165
 166                 // ok, could be a command of some sort
 167                 // let's see if it corresponds to some unicode
 168                 // unicodesymbols has things in the form: \"{u},
 169                 // whereas we may see things like: \"u. So we'll
 170                 // look for that and change it, if necessary.
 171                 static boost::regex const reg("^\\\\\\W\\w");
 172                 if (boost::regex_search(to_utf8(val), reg)) {
 173                         val.insert(3, from_ascii("}"));
 174                         val.insert(2, from_ascii("{"));
 175                 }
 176                 docstring rem;
 177                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
 178                                                         Encodings::TEXT_CMD);
 179                 if (!cnvtd.empty()) {
 180                         // it did, so we'll take that bit and proceed with what's left
 181                         ret += cnvtd;
 182                         val = rem;
 183                         continue;
 184                 }
 185                 // it's a command of some sort
 186                 scanning_cmd = true;
 187                 escaped = true;
 188                 val = val.substr(1);
 189         }
 190         return ret;
 191 }
 192
 193 } // anon namespace
 194
 195
 196 //////////////////////////////////////////////////////////////////////
 197 //
 198 // BibTeXInfo
 199 //
 200 //////////////////////////////////////////////////////////////////////
 201
 202 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
 203         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
 204 {}
 205
 206
 207 bool BibTeXInfo::hasField(docstring const & field) const
 208 {
 209         return count(field) == 1;
 210 }
 211
 212
 213 docstring const BibTeXInfo::getAbbreviatedAuthor() const
 214 {
 215         if (!is_bibtex_) {
 216                 docstring const opt = label();
 217                 if (opt.empty())
 218                         return docstring();
 219
 220                 docstring authors;
 221                 split(opt, authors, '(');
 222                 return authors;
 223         }
 224
 225         docstring author = convertLaTeXCommands(operator[]("author"));
 226         if (author.empty()) {
 227                 author = convertLaTeXCommands(operator[]("editor"));
 228                 if (author.empty())
 229                         return bib_key_;
 230         }
 231
 232         // OK, we've got some names. Let's format them.
 233         // Try to split the author list on " and "
 234         vector<docstring> const authors =
 235                 getVectorFromString(author, from_ascii(" and "));
 236
 237         if (authors.size() == 2)
 238                 return bformat(_("%1$s and %2$s"),
 239                         familyName(authors[0]), familyName(authors[1]));
 240
 241         if (authors.size() > 2)
 242                 return bformat(_("%1$s et al."), familyName(authors[0]));
 243
 244         return familyName(authors[0]);
 245 }
 246
 247
 248 docstring const BibTeXInfo::getYear() const
 249 {
 250         if (is_bibtex_)
 251                 return operator[]("year");
 252
 253         docstring const opt = label();
 254         if (opt.empty())
 255                 return docstring();
 256
 257         docstring authors;
 258         docstring const tmp = split(opt, authors, '(');
 259         docstring year;
 260         split(tmp, year, ')');
 261         return year;
 262 }
 263
 264
 265 docstring const BibTeXInfo::getXRef() const
 266 {
 267         if (!is_bibtex_)
 268                 return docstring();
 269         return operator[]("crossref");
 270 }
 271
 272
 273 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
 274 {
 275         if (!info_.empty())
 276                 return info_;
 277
 278         if (!is_bibtex_) {
 279                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
 280                 info_ = it->second;
 281                 return info_;
 282         }
 283
 284         // FIXME
 285         // This could be made a lot better using the entry_type_
 286         // field to customize the output based upon entry type.
 287
 288         // Search for all possible "required" fields
 289         docstring author = getValueForKey("author", xref);
 290         if (author.empty())
 291                 author = getValueForKey("editor", xref);
 292
 293         docstring year   = getValueForKey("year", xref);
 294         docstring title  = getValueForKey("title", xref);
 295         docstring docLoc = getValueForKey("pages", xref);
 296         if (docLoc.empty()) {
 297                 docLoc = getValueForKey("chapter", xref);
 298                 if (!docLoc.empty())
 299                         docLoc = _("Ch. ") + docLoc;
 300         }       else {
 301                 docLoc = _("pp. ") + docLoc;
 302         }
 303
 304         docstring media = getValueForKey("journal", xref);
 305         if (media.empty()) {
 306                 media = getValueForKey("publisher", xref);
 307                 if (media.empty()) {
 308                         media = getValueForKey("school", xref);
 309                         if (media.empty())
 310                                 media = getValueForKey("institution");
 311                 }
 312         }
 313         docstring volume = getValueForKey("volume", xref);
 314
 315         odocstringstream result;
 316         if (!author.empty())
 317                 result << author << ", ";
 318         if (!title.empty())
 319                 result << title;
 320         if (!media.empty())
 321                 result << ", " << media;
 322         if (!year.empty())
 323                 result << " (" << year << ")";
 324         if (!docLoc.empty())
 325                 result << ", " << docLoc;
 326
 327         docstring const result_str = rtrim(result.str());
 328         if (!result_str.empty()) {
 329                 info_ = convertLaTeXCommands(result_str);
 330                 return info_;
 331         }
 332
 333         // This should never happen (or at least be very unusual!)
 334         static docstring e = docstring();
 335         return e;
 336 }
 337
 338
 339 docstring const & BibTeXInfo::operator[](docstring const & field) const
 340 {
 341         BibTeXInfo::const_iterator it = find(field);
 342         if (it != end())
 343                 return it->second;
 344         static docstring const empty_value = docstring();
 345         return empty_value;
 346 }
 347
 348
 349 docstring const & BibTeXInfo::operator[](string const & field) const
 350 {
 351         return operator[](from_ascii(field));
 352 }
 353
 354
 355 docstring BibTeXInfo::getValueForKey(string const & key,
 356                 BibTeXInfo const * const xref) const
 357 {
 358         docstring const ret = operator[](key);
 359         if (!ret.empty() || !xref)
 360                 return ret;
 361         return (*xref)[key];
 362 }
 363
 364
 365 //////////////////////////////////////////////////////////////////////
 366 //
 367 // BiblioInfo
 368 //
 369 //////////////////////////////////////////////////////////////////////
 370
 371 namespace {
 372 // A functor for use with sort, leading to case insensitive sorting
 373         class compareNoCase: public binary_function<docstring, docstring, bool>
 374         {
 375                 public:
 376                         bool operator()(docstring const & s1, docstring const & s2) const {
 377                                 return compare_no_case(s1, s2) < 0;
 378                         }
 379         };
 380 } // namespace anon
 381
 382
 383 vector<docstring> const BiblioInfo::getKeys() const
 384 {
 385         vector<docstring> bibkeys;
 386         BiblioInfo::const_iterator it  = begin();
 387         for (; it != end(); ++it)
 388                 bibkeys.push_back(it->first);
 389         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 390         return bibkeys;
 391 }
 392
 393
 394 vector<docstring> const BiblioInfo::getFields() const
 395 {
 396         vector<docstring> bibfields;
 397         set<docstring>::const_iterator it = field_names_.begin();
 398         set<docstring>::const_iterator end = field_names_.end();
 399         for (; it != end; ++it)
 400                 bibfields.push_back(*it);
 401         sort(bibfields.begin(), bibfields.end());
 402         return bibfields;
 403 }
 404
 405
 406 vector<docstring> const BiblioInfo::getEntries() const
 407 {
 408         vector<docstring> bibentries;
 409         set<docstring>::const_iterator it = entry_types_.begin();
 410         set<docstring>::const_iterator end = entry_types_.end();
 411         for (; it != end; ++it)
 412                 bibentries.push_back(*it);
 413         sort(bibentries.begin(), bibentries.end());
 414         return bibentries;
 415 }
 416
 417
 418 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
 419 {
 420         BiblioInfo::const_iterator it = find(key);
 421         if (it == end())
 422                 return docstring();
 423         BibTeXInfo const & data = it->second;
 424         return data.getAbbreviatedAuthor();
 425 }
 426
 427
 428 docstring const BiblioInfo::getYear(docstring const & key) const
 429 {
 430         BiblioInfo::const_iterator it = find(key);
 431         if (it == end())
 432                 return docstring();
 433         BibTeXInfo const & data = it->second;
 434         docstring year = data.getYear();
 435         if (!year.empty())
 436                 return year;
 437         // let's try the crossref
 438         docstring const xref = data.getXRef();
 439         if (xref.empty())
 440                 return _("No year"); // no luck
 441         BiblioInfo::const_iterator const xrefit = find(xref);
 442         if (xrefit == end())
 443                 return _("No year"); // no luck again
 444         BibTeXInfo const & xref_data = xrefit->second;
 445         return xref_data.getYear();
 446         return data.getYear();
 447 }
 448
 449
 450 docstring const BiblioInfo::getInfo(docstring const & key) const
 451 {
 452         BiblioInfo::const_iterator it = find(key);
 453         if (it == end())
 454                 return docstring();
 455         BibTeXInfo const & data = it->second;
 456         BibTeXInfo const * xrefptr = 0;
 457         docstring const xref = data.getXRef();
 458         if (!xref.empty()) {
 459                 BiblioInfo::const_iterator const xrefit = find(xref);
 460                 if (xrefit != end())
 461                         xrefptr = &(xrefit->second);
 462         }
 463         return data.getInfo(xrefptr);
 464 }
 465
 466
 467 vector<docstring> const BiblioInfo::getCiteStrings(
 468         docstring const & key, Buffer const & buf) const
 469 {
 470         CiteEngine const engine = buf.params().citeEngine();
 471         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
 472                 return getNumericalStrings(key, buf);
 473         else
 474                 return getAuthorYearStrings(key, buf);
 475 }
 476
 477
 478 vector<docstring> const BiblioInfo::getNumericalStrings(
 479         docstring const & key, Buffer const & buf) const
 480 {
 481         if (empty())
 482                 return vector<docstring>();
 483
 484         docstring const author = getAbbreviatedAuthor(key);
 485         docstring const year   = getYear(key);
 486         if (author.empty() || year.empty())
 487                 return vector<docstring>();
 488
 489         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 490
 491         vector<docstring> vec(styles.size());
 492         for (size_t i = 0; i != vec.size(); ++i) {
 493                 docstring str;
 494
 495                 switch (styles[i]) {
 496                         case CITE:
 497                         case CITEP:
 498                                 str = from_ascii("[#ID]");
 499                                 break;
 500
 501                         case NOCITE:
 502                                 str = _("Add to bibliography only.");
 503                                 break;
 504
 505                         case CITET:
 506                                 str = author + " [#ID]";
 507                                 break;
 508
 509                         case CITEALT:
 510                                 str = author + " #ID";
 511                                 break;
 512
 513                         case CITEALP:
 514                                 str = from_ascii("#ID");
 515                                 break;
 516
 517                         case CITEAUTHOR:
 518                                 str = author;
 519                                 break;
 520
 521                         case CITEYEAR:
 522                                 str = year;
 523                                 break;
 524
 525                         case CITEYEARPAR:
 526                                 str = '(' + year + ')';
 527                                 break;
 528                 }
 529
 530                 vec[i] = str;
 531         }
 532
 533         return vec;
 534 }
 535
 536
 537 vector<docstring> const BiblioInfo::getAuthorYearStrings(
 538         docstring const & key, Buffer const & buf) const
 539 {
 540         if (empty())
 541                 return vector<docstring>();
 542
 543         docstring const author = getAbbreviatedAuthor(key);
 544         docstring const year   = getYear(key);
 545         if (author.empty() || year.empty())
 546                 return vector<docstring>();
 547
 548         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
 549
 550         vector<docstring> vec(styles.size());
 551         for (size_t i = 0; i != vec.size(); ++i) {
 552                 docstring str;
 553
 554                 switch (styles[i]) {
 555                         case CITE:
 556                 // jurabib only: Author/Annotator
 557                 // (i.e. the "before" field, 2nd opt arg)
 558                                 str = author + "/<" + _("before") + '>';
 559                                 break;
 560
 561                         case NOCITE:
 562                                 str = _("Add to bibliography only.");
 563                                 break;
 564
 565                         case CITET:
 566                                 str = author + " (" + year + ')';
 567                                 break;
 568
 569                         case CITEP:
 570                                 str = '(' + author + ", " + year + ')';
 571                                 break;
 572
 573                         case CITEALT:
 574                                 str = author + ' ' + year ;
 575                                 break;
 576
 577                         case CITEALP:
 578                                 str = author + ", " + year ;
 579                                 break;
 580
 581                         case CITEAUTHOR:
 582                                 str = author;
 583                                 break;
 584
 585                         case CITEYEAR:
 586                                 str = year;
 587                                 break;
 588
 589                         case CITEYEARPAR:
 590                                 str = '(' + year + ')';
 591                                 break;
 592                 }
 593                 vec[i] = str;
 594         }
 595         return vec;
 596 }
 597
 598
 599 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
 600 {
 601         bimap_.insert(info.begin(), info.end());
 602 }
 603
 604
 605 //////////////////////////////////////////////////////////////////////
 606 //
 607 // CitationStyle
 608 //
 609 //////////////////////////////////////////////////////////////////////
 610
 611 namespace {
 612
 613
 614 char const * const citeCommands[] = {
 615         "cite", "citet", "citep", "citealt", "citealp",
 616         "citeauthor", "citeyear", "citeyearpar", "nocite" };
 617
 618 unsigned int const nCiteCommands =
 619                 sizeof(citeCommands) / sizeof(char *);
 620
 621 CiteStyle const citeStylesArray[] = {
 622         CITE, CITET, CITEP, CITEALT, CITEALP,
 623         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
 624
 625 unsigned int const nCiteStyles =
 626                 sizeof(citeStylesArray) / sizeof(CiteStyle);
 627
 628 CiteStyle const citeStylesFull[] = {
 629         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 630
 631 unsigned int const nCiteStylesFull =
 632                 sizeof(citeStylesFull) / sizeof(CiteStyle);
 633
 634 CiteStyle const citeStylesUCase[] = {
 635         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 636
 637 unsigned int const nCiteStylesUCase =
 638         sizeof(citeStylesUCase) / sizeof(CiteStyle);
 639
 640 } // namespace anon
 641
 642
 643 CitationStyle citationStyleFromString(string const & command)
 644 {
 645         CitationStyle s;
 646         if (command.empty())
 647                 return s;
 648
 649         string cmd = command;
 650         if (cmd[0] == 'C') {
 651                 s.forceUpperCase = true;
 652                 cmd[0] = 'c';
 653         }
 654
 655         size_t const n = cmd.size() - 1;
 656         if (cmd != "cite" && cmd[n] == '*') {
 657                 s.full = true;
 658                 cmd = cmd.substr(0, n);
 659         }
 660
 661         char const * const * const last = citeCommands + nCiteCommands;
 662         char const * const * const ptr = find(citeCommands, last, cmd);
 663
 664         if (ptr != last) {
 665                 size_t idx = ptr - citeCommands;
 666                 s.style = citeStylesArray[idx];
 667         }
 668         return s;
 669 }
 670
 671
 672 string citationStyleToString(const CitationStyle & s)
 673 {
 674         string cite = citeCommands[s.style];
 675         if (s.full) {
 676                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
 677                 if (std::find(citeStylesFull, last, s.style) != last)
 678                         cite += '*';
 679         }
 680
 681         if (s.forceUpperCase) {
 682                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
 683                 if (std::find(citeStylesUCase, last, s.style) != last)
 684                         cite[0] = 'C';
 685         }
 686
 687         return cite;
 688 }
 689
 690 vector<CiteStyle> citeStyles(CiteEngine engine)
 691 {
 692         unsigned int nStyles = 0;
 693         unsigned int start = 0;
 694
 695         switch (engine) {
 696                 case ENGINE_BASIC:
 697                         nStyles = 2;
 698                         start = 0;
 699                         break;
 700                 case ENGINE_NATBIB_AUTHORYEAR:
 701                 case ENGINE_NATBIB_NUMERICAL:
 702                         nStyles = nCiteStyles - 1;
 703                         start = 1;
 704                         break;
 705                 case ENGINE_JURABIB:
 706                         nStyles = nCiteStyles;
 707                         start = 0;
 708                         break;
 709         }
 710
 711         vector<CiteStyle> styles(nStyles);
 712         size_t i = 0;
 713         int j = start;
 714         for (; i != styles.size(); ++i, ++j)
 715                 styles[i] = citeStylesArray[j];
 716
 717         return styles;
 718 }
 719
 720 } // namespace lyx
 721