src/frontends/controllers/biblio.C

   1 /**
   2  * \file biblio.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Angus Leeming
   7  * \author Herbert Voß
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "biblio.h"
  15
  16 #include "buffer.h"
  17 #include "bufferparams.h"
  18 #include "gettext.h"
  19
  20 #include "support/lstrings.h"
  21 #include "support/std_sstream.h"
  22
  23 #include <boost/regex.hpp>
  24
  25 #include <algorithm>
  26
  27 using lyx::support::ascii_lowercase;
  28 using lyx::support::bformat;
  29 using lyx::support::compare_ascii_no_case;
  30 using lyx::support::contains;
  31 using lyx::support::getVectorFromString;
  32 using lyx::support::ltrim;
  33 using lyx::support::rtrim;
  34 using lyx::support::split;
  35 using lyx::support::subst;
  36 using lyx::support::token;
  37 using lyx::support::trim;
  38
  39 using std::string;
  40 using std::ostringstream;
  41 using std::vector;
  42
  43
  44 namespace biblio {
  45
  46 string const familyName(string const & name)
  47 {
  48         // Very simple parser
  49         string fname = name;
  50
  51         // possible authorname combinations are:
  52         // "Surname, FirstName"
  53         // "Surname, F."
  54         // "FirstName Surname"
  55         // "F. Surname"
  56         string::size_type idx = fname.find(',');
  57         if (idx != string::npos)
  58                 return ltrim(fname.substr(0, idx));
  59         idx = fname.rfind('.');
  60         if (idx != string::npos)
  61                 fname = ltrim(fname.substr(idx + 1));
  62         // test if we have a LaTeX Space in front
  63         if (fname[0] == '\\')
  64                 return fname.substr(2);
  65
  66         return rtrim(fname);
  67 }
  68
  69
  70 string const getAbbreviatedAuthor(InfoMap const & map, string const & key)
  71 {
  72         BOOST_ASSERT(!map.empty());
  73
  74         InfoMap::const_iterator it = map.find(key);
  75         if (it == map.end())
  76                 return string();
  77         string const & data = it->second;
  78
  79         // Is the entry a BibTeX one or one from lyx-layout "bibliography"?
  80         string::size_type const pos = data.find("TheBibliographyRef");
  81         if (pos != string::npos) {
  82                 if (pos <= 2) {
  83                         return string();
  84                 }
  85
  86                 string const opt = trim(data.substr(0, pos - 1));
  87                 if (opt.empty())
  88                         return string();
  89
  90                 string authors;
  91                 split(opt, authors, '(');
  92                 return authors;
  93         }
  94
  95         string author = parseBibTeX(data, "author");
  96
  97         if (author.empty())
  98                 author = parseBibTeX(data, "editor");
  99
 100         if (author.empty()) {
 101                 author = parseBibTeX(data, "key");
 102                 if (author.empty())
 103                         author = key;
 104                 return author;
 105         }
 106
 107         vector<string> const authors = getVectorFromString(author, " and ");
 108         if (authors.empty())
 109                 return author;
 110
 111         if (authors.size() == 2)
 112                 return bformat(_("%1$s and %2$s"),
 113                         familyName(authors[0]), familyName(authors[1]));
 114
 115         if (authors.size() > 2)
 116                 return bformat(_("%1$s et al."), familyName(authors[0]));
 117
 118         return familyName(authors[0]);
 119 }
 120
 121
 122 string const getYear(InfoMap const & map, string const & key)
 123 {
 124         BOOST_ASSERT(!map.empty());
 125
 126         InfoMap::const_iterator it = map.find(key);
 127         if (it == map.end())
 128                 return string();
 129         string const & data = it->second;
 130
 131         // Is the entry a BibTeX one or one from lyx-layout "bibliography"?
 132         string::size_type const pos = data.find("TheBibliographyRef");
 133         if (pos != string::npos) {
 134                 if (pos <= 2) {
 135                         return string();
 136                 }
 137
 138                 string const opt =
 139                         trim(data.substr(0, pos - 1));
 140                 if (opt.empty())
 141                         return string();
 142
 143                 string authors;
 144                 string const tmp = split(opt, authors, '(');
 145                 string year;
 146                 split(tmp, year, ')');
 147                 return year;
 148
 149         }
 150
 151         string year = parseBibTeX(data, "year");
 152         if (year.empty())
 153                 year = _("No year");
 154
 155         return year;
 156 }
 157
 158
 159 namespace {
 160
 161 // A functor for use with std::sort, leading to case insensitive sorting
 162 struct compareNoCase: public std::binary_function<string, string, bool>
 163 {
 164         bool operator()(string const & s1, string const & s2) const {
 165                 return compare_ascii_no_case(s1, s2) < 0;
 166         }
 167 };
 168
 169 } // namespace anon
 170
 171
 172 vector<string> const getKeys(InfoMap const & map)
 173 {
 174         vector<string> bibkeys;
 175         InfoMap::const_iterator it  = map.begin();
 176         InfoMap::const_iterator end = map.end();
 177         for (; it != end; ++it) {
 178                 bibkeys.push_back(it->first);
 179         }
 180
 181         std::sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
 182         return bibkeys;
 183 }
 184
 185
 186 string const getInfo(InfoMap const & map, string const & key)
 187 {
 188         BOOST_ASSERT(!map.empty());
 189
 190         InfoMap::const_iterator it = map.find(key);
 191         if (it == map.end())
 192                 return string();
 193         string const & data = it->second;
 194
 195         // is the entry a BibTeX one or one from lyx-layout "bibliography"?
 196         string const separator("TheBibliographyRef");
 197         string::size_type const pos = data.find(separator);
 198         if (pos != string::npos) {
 199                 string::size_type const pos2 = pos + separator.size();
 200                 string const info = trim(data.substr(pos2));
 201                 return info;
 202         }
 203
 204         // Search for all possible "required" keys
 205         string author = parseBibTeX(data, "author");
 206         if (author.empty())
 207                 author = parseBibTeX(data, "editor");
 208
 209         string year       = parseBibTeX(data, "year");
 210         string title      = parseBibTeX(data, "title");
 211         string booktitle  = parseBibTeX(data, "booktitle");
 212         string chapter    = parseBibTeX(data, "chapter");
 213         string number     = parseBibTeX(data, "number");
 214         string volume     = parseBibTeX(data, "volume");
 215         string pages      = parseBibTeX(data, "pages");
 216
 217         string media      = parseBibTeX(data, "journal");
 218         if (media.empty())
 219                 media = parseBibTeX(data, "publisher");
 220         if (media.empty())
 221                 media = parseBibTeX(data, "school");
 222         if (media.empty())
 223                 media = parseBibTeX(data, "institution");
 224
 225         ostringstream result;
 226         if (!author.empty())
 227                 result << author << ", ";
 228         if (!title.empty())
 229                 result << title;
 230         if (!booktitle.empty())
 231                 result << ", in " << booktitle;
 232         if (!chapter.empty())
 233                 result << ", Ch. " << chapter;
 234         if (!media.empty())
 235                 result << ", " << media;
 236         if (!volume.empty())
 237                 result << ", vol. " << volume;
 238         if (!number.empty())
 239                 result << ", no. " << number;
 240         if (!pages.empty())
 241                 result << ", pp. " << pages;
 242         if (!year.empty())
 243                 result << ", " << year;
 244
 245         string const result_str = rtrim(result.str());
 246         if (!result_str.empty())
 247                 return result_str;
 248
 249         // This should never happen (or at least be very unusual!)
 250         return data;
 251 }
 252
 253
 254 namespace {
 255
 256 // Escape special chars.
 257 // All characters are literals except: '.|*?+(){}[]^$\'
 258 // These characters are literals when preceded by a "\", which is done here
 259 string const escape_special_chars(string const & expr)
 260 {
 261         // Search for all chars '.|*?+(){}[^$]\'
 262         // Note that '[' and '\' must be escaped.
 263         // This is a limitation of boost::regex, but all other chars in BREs
 264         // are assumed literal.
 265         boost::RegEx reg("[].|*?+(){}^$\\[\\\\]");
 266
 267         // $& is a perl-like expression that expands to all of the current match
 268         // The '$' must be prefixed with the escape character '\' for
 269         // boost to treat it as a literal.
 270         // Thus, to prefix a matched expression with '\', we use:
 271         return reg.Merge(expr, "\\\\$&");
 272 }
 273
 274
 275 // A functor for use with std::find_if, used to ascertain whether a
 276 // data entry matches the required regex_
 277 struct RegexMatch : public std::unary_function<string, bool>
 278 {
 279         // re and icase are used to construct an instance of boost::RegEx.
 280         // if icase is true, then matching is insensitive to case
 281         RegexMatch(InfoMap const & m, string const & re, bool icase)
 282                 : map_(m), regex_(re, icase) {}
 283
 284         bool operator()(string const & key) const {
 285                 if (!validRE())
 286                         return false;
 287
 288                 // the data searched is the key + its associated BibTeX/biblio
 289                 // fields
 290                 string data = key;
 291                 InfoMap::const_iterator info = map_.find(key);
 292                 if (info != map_.end())
 293                         data += ' ' + info->second;
 294
 295                 // Attempts to find a match for the current RE
 296                 // somewhere in data.
 297                 return regex_.Search(data);
 298         }
 299
 300         bool validRE() const { return regex_.error_code() == 0; }
 301
 302 private:
 303         InfoMap const map_;
 304         mutable boost::RegEx regex_;
 305 };
 306
 307 } // namespace anon
 308
 309
 310 vector<string>::const_iterator
 311 searchKeys(InfoMap const & theMap,
 312            vector<string> const & keys,
 313            string const & search_expr,
 314            vector<string>::const_iterator start,
 315            Search type,
 316            Direction dir,
 317            bool caseSensitive)
 318 {
 319         // Preliminary checks
 320         if (start < keys.begin() || start >= keys.end())
 321                 return keys.end();
 322
 323         string expr = trim(search_expr);
 324         if (expr.empty())
 325                 return keys.end();
 326
 327         if (type == SIMPLE)
 328                 // We must escape special chars in the search_expr so that
 329                 // it is treated as a simple string by boost::regex.
 330                 expr = escape_special_chars(expr);
 331
 332         // Build the functor that will be passed to find_if.
 333         RegexMatch const match(theMap, expr, !caseSensitive);
 334         if (!match.validRE())
 335                 return keys.end();
 336
 337         // Search the vector of 'keys' from 'start' for one that matches the
 338         // predicate 'match'. Searching can be forward or backward from start.
 339         if (dir == FORWARD)
 340                 return std::find_if(start, keys.end(), match);
 341
 342         vector<string>::const_reverse_iterator rit(start);
 343         vector<string>::const_reverse_iterator rend = keys.rend();
 344         rit = std::find_if(rit, rend, match);
 345
 346         if (rit == rend)
 347                 return keys.end();
 348         // This is correct and always safe.
 349         // (See Meyer's Effective STL, Item 28.)
 350         return (++rit).base();
 351 }
 352
 353
 354 string const parseBibTeX(string data, string const & findkey)
 355 {
 356         string keyvalue;
 357         // at first we delete all characters right of '%' and
 358         // replace tabs through a space and remove leading spaces
 359         // we read the data line by line so that the \n are
 360         // ignored, too.
 361         string data_;
 362         int Entries = 0;
 363         string dummy = token(data,'\n', Entries);
 364         while (!dummy.empty()) {
 365                 dummy = subst(dummy, '\t', ' ');        // no tabs
 366                 dummy = ltrim(dummy);           // no leading spaces
 367                 // ignore lines with a beginning '%' or ignore all right of %
 368                 string::size_type const idx =
 369                         dummy.empty() ? string::npos : dummy.find('%');
 370                 if (idx != string::npos)
 371                         dummy.erase(idx, string::npos);
 372                 // do we have a new token or a new line of
 373                 // the same one? In the first case we ignore
 374                 // the \n and in the second we replace it
 375                 // with a space
 376                 if (!dummy.empty()) {
 377                         if (!contains(dummy, '='))
 378                                 data_ += ' ' + dummy;
 379                         else
 380                                 data_ += dummy;
 381                 }
 382                 dummy = token(data, '\n', ++Entries);
 383         }
 384
 385         // replace double commas with "" for easy scanning
 386         data = subst(data_, ",,", "\"\"");
 387
 388         // unlikely!
 389         if (data.empty())
 390                 return string();
 391
 392         // now get only the important line of the bibtex entry.
 393         // all entries are devided by ',' except the last one.
 394         data += ',';  // now we have same behaviour for all entries
 395                       // because the last one is "blah ... }"
 396         Entries = 0;
 397         bool found = false;
 398         // parsing of title and booktitle is different from the
 399         // others, because booktitle contains title
 400         do {
 401                 dummy = token(data, ',', Entries++);
 402                 if (!dummy.empty()) {
 403                         found = contains(ascii_lowercase(dummy), findkey);
 404                         if (findkey == "title" &&
 405                                 contains(ascii_lowercase(dummy), "booktitle"))
 406                                 found = false;
 407                 }
 408         } while (!found && !dummy.empty());
 409         if (dummy.empty())
 410                 // no such keyword
 411                 return string();
 412
 413         // we are not sure, if we get all, because "key= "blah, blah" is
 414         // allowed.
 415         // Therefore we read all until the next "=" character, which follows a
 416         // new keyword
 417         keyvalue = dummy;
 418         dummy = token(data, ',', Entries++);
 419         while (!contains(dummy, '=') && !dummy.empty()) {
 420                 keyvalue += ',' + dummy;
 421                 dummy = token(data, ',', Entries++);
 422         }
 423
 424         // replace double "" with originals ,, (two commas)
 425         // leaving us with the all-important line
 426         data = subst(keyvalue, "\"\"", ",,");
 427
 428         // Clean-up.
 429         // 1. Spaces
 430         data = rtrim(data);
 431         // 2. if there is no opening '{' then a closing '{' is probably cruft.
 432         if (!contains(data, '{'))
 433                 data = rtrim(data, "}");
 434         // happens, when last keyword
 435         string::size_type const idx =
 436                 !data.empty() ? data.find('=') : string::npos;
 437
 438         if (idx == string::npos)
 439                 return string();
 440
 441         data = trim(data.substr(idx));
 442
 443         if (data.length() < 2 || data[0] != '=') {      // a valid entry?
 444                 return string();
 445         } else {
 446                 // delete '=' and the following spaces
 447                 data = ltrim(data, " =");
 448                 if (data.length() < 2) {
 449                         return data;    // not long enough to find delimiters
 450                 } else {
 451                         string::size_type keypos = 1;
 452                         char enclosing;
 453                         if (data[0] == '{') {
 454                                 enclosing = '}';
 455                         } else if (data[0] == '"') {
 456                                 enclosing = '"';
 457                         } else {
 458                                 // no {} and no "", pure data but with a
 459                                 // possible ',' at the end
 460                                 return rtrim(data, ",");
 461                         }
 462                         string tmp = data.substr(keypos);
 463                         while (tmp.find('{') != string::npos &&
 464                                tmp.find('}') != string::npos &&
 465                                tmp.find('{') < tmp.find('}') &&
 466                                tmp.find('{') < tmp.find(enclosing)) {
 467
 468                                 keypos += tmp.find('{') + 1;
 469                                 tmp = data.substr(keypos);
 470                                 keypos += tmp.find('}') + 1;
 471                                 tmp = data.substr(keypos);
 472                         }
 473                         if (tmp.find(enclosing) == string::npos)
 474                                 return data;
 475                         else {
 476                                 keypos += tmp.find(enclosing);
 477                                 return data.substr(1, keypos - 1);
 478                         }
 479                 }
 480         }
 481 }
 482
 483
 484 namespace {
 485
 486
 487 char const * const citeCommands[] = {
 488         "cite", "citet", "citep", "citealt", "citealp", "citeauthor",
 489         "citeyear", "citeyearpar" };
 490
 491 unsigned int const nCiteCommands =
 492         sizeof(citeCommands) / sizeof(char *);
 493
 494 CiteStyle const citeStyles[] = {
 495         CITE, CITET, CITEP, CITEALT, CITEALP,
 496         CITEAUTHOR, CITEYEAR, CITEYEARPAR };
 497
 498 unsigned int const nCiteStyles =
 499         sizeof(citeStyles) / sizeof(CiteStyle);
 500
 501 CiteStyle const citeStylesFull[] = {
 502         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 503
 504 unsigned int const nCiteStylesFull =
 505         sizeof(citeStylesFull) / sizeof(CiteStyle);
 506
 507 CiteStyle const citeStylesUCase[] = {
 508         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
 509
 510 unsigned int const nCiteStylesUCase =
 511         sizeof(citeStylesUCase) / sizeof(CiteStyle);
 512
 513 } // namespace anon
 514
 515
 516 CitationStyle const getCitationStyle(string const & command)
 517 {
 518         if (command.empty()) return CitationStyle();
 519
 520         CitationStyle cs;
 521         string cmd = command;
 522
 523         if (cmd[0] == 'C') {
 524                 cs.forceUCase = true;
 525                 cmd[0] = 'c';
 526         }
 527
 528         size_t n = cmd.size() - 1;
 529         if (cmd[n] == '*') {
 530                 cs.full = true;
 531                 cmd = cmd.substr(0,n);
 532         }
 533
 534         char const * const * const last = citeCommands + nCiteCommands;
 535         char const * const * const ptr = std::find(citeCommands, last, cmd);
 536
 537         if (ptr != last) {
 538                 size_t idx = ptr - citeCommands;
 539                 cs.style = citeStyles[idx];
 540         }
 541
 542         return cs;
 543 }
 544
 545
 546 string const getCiteCommand(CiteStyle command, bool full, bool forceUCase)
 547 {
 548         string cite = citeCommands[command];
 549         if (full) {
 550                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
 551                 if (std::find(citeStylesFull, last, command) != last)
 552                         cite += '*';
 553         }
 554
 555         if (forceUCase) {
 556                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
 557                 if (std::find(citeStylesUCase, last, command) != last)
 558                         cite[0] = 'C';
 559         }
 560
 561         return cite;
 562 }
 563
 564
 565 CiteEngine getEngine(Buffer const & buffer)
 566 {
 567         CiteEngine engine = ENGINE_BASIC;
 568
 569         if (buffer.params().use_natbib) {
 570                 if (buffer.params().use_numerical_citations) {
 571                         engine = ENGINE_NATBIB_NUMERICAL;
 572                 } else {
 573                         engine = ENGINE_NATBIB_AUTHORYEAR;
 574                 }
 575         }
 576
 577         if (buffer.params().use_jurabib)
 578                 engine = ENGINE_JURABIB;
 579
 580         return engine;
 581 }
 582
 583
 584 vector<CiteStyle> const getCiteStyles(CiteEngine engine)
 585 {
 586         unsigned int nStyles = 0;
 587         unsigned int start = 0;
 588
 589         switch (engine) {
 590         case ENGINE_BASIC:
 591                 nStyles = 1;
 592                 start = 0;
 593                 break;
 594         case ENGINE_NATBIB_AUTHORYEAR:
 595         case ENGINE_NATBIB_NUMERICAL:
 596                 nStyles = nCiteStyles - 1;
 597                 start = 1;
 598                 break;
 599         case ENGINE_JURABIB:
 600                 nStyles = nCiteStyles;
 601                 start = 0;
 602                 break;
 603         }
 604
 605         typedef vector<CiteStyle> cite_vec;
 606
 607         cite_vec styles(nStyles);
 608         cite_vec::size_type i = 0;
 609         int j = start;
 610         for (; i != styles.size(); ++i, ++j)
 611                 styles[i] = citeStyles[j];
 612
 613         return styles;
 614 }
 615
 616
 617 vector<string> const
 618 getNumericalStrings(string const & key,
 619                     InfoMap const & map, vector<CiteStyle> const & styles)
 620 {
 621         if (map.empty()) {
 622                 return vector<string>();
 623         }
 624
 625         string const author = getAbbreviatedAuthor(map, key);
 626         string const year   = getYear(map, key);
 627         if (author.empty() || year.empty())
 628                 return vector<string>();
 629
 630         vector<string> vec(styles.size());
 631         for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
 632                 string str;
 633
 634                 switch (styles[i]) {
 635                 case CITE:
 636                 case CITEP:
 637                         str = "[#ID]";
 638                         break;
 639
 640                 case CITET:
 641                         str = author + " [#ID]";
 642                         break;
 643
 644                 case CITEALT:
 645                         str = author + " #ID";
 646                         break;
 647
 648                 case CITEALP:
 649                         str = "#ID";
 650                         break;
 651
 652                 case CITEAUTHOR:
 653                         str = author;
 654                         break;
 655
 656                 case CITEYEAR:
 657                         str = year;
 658                         break;
 659
 660                 case CITEYEARPAR:
 661                         str = '(' + year + ')';
 662                         break;
 663                 }
 664
 665                 vec[i] = str;
 666         }
 667
 668         return vec;
 669 }
 670
 671
 672 vector<string> const
 673 getAuthorYearStrings(string const & key,
 674                     InfoMap const & map, vector<CiteStyle> const & styles)
 675 {
 676         if (map.empty()) {
 677                 return vector<string>();
 678         }
 679
 680         string const author = getAbbreviatedAuthor(map, key);
 681         string const year   = getYear(map, key);
 682         if (author.empty() || year.empty())
 683                 return vector<string>();
 684
 685         vector<string> vec(styles.size());
 686         for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
 687                 string str;
 688
 689                 switch (styles[i]) {
 690                 case CITE:
 691                         // jurabib only: Author/Annotator
 692                         // (i.e. the "before" field, 2nd opt arg)
 693                         str = author + "/<" + _("before") + '>';
 694                         break;
 695
 696                 case CITET:
 697                         str = author + " (" + year + ')';
 698                         break;
 699
 700                 case CITEP:
 701                         str = '(' + author + ", " + year + ')';
 702                         break;
 703
 704                 case CITEALT:
 705                         str = author + ' ' + year ;
 706                         break;
 707
 708                 case CITEALP:
 709                         str = author + ", " + year ;
 710                         break;
 711
 712                 case CITEAUTHOR:
 713                         str = author;
 714                         break;
 715
 716                 case CITEYEAR:
 717                         str = year;
 718                         break;
 719
 720                 case CITEYEARPAR:
 721                         str = '(' + year + ')';
 722                         break;
 723                 }
 724
 725                 vec[i] = str;
 726         }
 727
 728         return vec;
 729 }
 730
 731 } // namespace biblio