]> git.lyx.org Git - lyx.git/blob - src/Biblio.cpp
Re-write of the BibTeX representation. The main change is that we now have
[lyx.git] / src / Biblio.cpp
1 /**
2  * \file Biblio.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck (re-write of BibTeX representation)
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "Biblio.h"
16
17 #include "buffer_funcs.h"
18 #include "gettext.h"
19 #include "InsetIterator.h"
20 #include "Paragraph.h"
21
22 #include "insets/Inset.h"
23 #include "insets/InsetBibitem.h"
24 #include "insets/InsetBibtex.h"
25 #include "insets/InsetInclude.h"
26
27 #include "support/lstrings.h"
28
29 #include "boost/regex.hpp"
30
31 using std::string;
32 using std::vector;
33 using std::pair;
34 using std::endl;
35
36 namespace lyx {
37 using support::ascii_lowercase;
38 using support::bformat;
39 using support::compare_ascii_no_case;
40 using support::contains;
41 using support::getVectorFromString;
42 using support::ltrim;
43 using support::prefixIs;
44 using support::rtrim;
45 using support::split;
46 using support::subst;
47 using support::token;
48 using support::trim;
49
50 namespace biblio {
51
52         
53 BibTeXInfo::BibTeXInfo(): isBibTeX(true)
54 {}
55
56         
57 BibTeXInfo::BibTeXInfo(bool isBibTeX): isBibTeX(isBibTeX)
58 {};
59
60
61 bool BibTeXInfo::hasKey(docstring const & key) 
62 {
63         const_iterator it = find(key);
64         return it == end();
65 }
66
67
68 namespace {
69
70         vector<string> const init_possible_cite_commands()
71         {
72                 char const * const pos[] = {
73                         "cite", "citet", "citep", "citealt", "citealp",
74                         "citeauthor", "citeyear", "citeyearpar",
75                         "citet*", "citep*", "citealt*", "citealp*", "citeauthor*",
76                         "Citet",  "Citep",  "Citealt",  "Citealp",  "Citeauthor",
77                         "Citet*", "Citep*", "Citealt*", "Citealp*", "Citeauthor*",
78                         "fullcite",
79                         "footcite", "footcitet", "footcitep", "footcitealt",
80                         "footcitealp", "footciteauthor", "footciteyear", "footciteyearpar",
81                         "citefield", "citetitle", "cite*"
82                 };
83                 size_t const size_pos = sizeof(pos) / sizeof(pos[0]);
84
85                 return vector<string>(pos, pos + size_pos);
86         }
87
88
89         vector<string> const & possible_cite_commands()
90         {
91                 static vector<string> const pos = init_possible_cite_commands();
92                 return pos;
93         }
94
95
96         bool is_possible_cite_command(string const & input)
97         {
98                 vector<string> const & possibles = possible_cite_commands();
99                 vector<string>::const_iterator const end = possibles.end();
100                 return std::find(possibles.begin(), end, input) != end;
101         }
102
103
104         string const default_cite_command(CiteEngine engine)
105         {
106                 string str;
107                 switch (engine) {
108                         case ENGINE_BASIC:
109                                 str = "cite";
110                                 break;
111                         case ENGINE_NATBIB_AUTHORYEAR:
112                                 str = "citet";
113                                 break;
114                         case ENGINE_NATBIB_NUMERICAL:
115                                 str = "citep";
116                                 break;
117                         case ENGINE_JURABIB:
118                                 str = "cite";
119                                 break;
120                 }
121                 return str;
122         }
123
124
125 } // namespace anon
126
127
128 string const asValidLatexCommand(string const & input,
129                                  CiteEngine const engine)
130 {
131         string const default_str = default_cite_command(engine);
132         if (!is_possible_cite_command(input))
133                 return default_str;
134
135         string output;
136         switch (engine) {
137                 case ENGINE_BASIC:
138                         output = default_str;
139                         break;
140
141                 case ENGINE_NATBIB_AUTHORYEAR:
142                 case ENGINE_NATBIB_NUMERICAL:
143                         if (input == "cite" || input == "citefield" ||
144                                                         input == "citetitle" || input == "cite*")
145                                 output = default_str;
146                         else if (prefixIs(input, "foot"))
147                                 output = input.substr(4);
148                         else
149                                 output = input;
150                         break;
151
152                         case ENGINE_JURABIB: {
153         // Jurabib does not support the 'uppercase' natbib style.
154                                 if (input[0] == 'C')
155                                         output = string(1, 'c') + input.substr(1);
156                                 else
157                                         output = input;
158
159         // Jurabib does not support the 'full' natbib style.
160                                 string::size_type const n = output.size() - 1;
161                                 if (output != "cite*" && output[n] == '*')
162                                         output = output.substr(0, n);
163
164                                 break;
165                         }
166         }
167
168         return output;
169 }
170
171
172 docstring const familyName(docstring const & name)
173 {
174         if (name.empty())
175                 return docstring();
176
177 // Very simple parser
178         docstring fname = name;
179
180 // possible authorname combinations are:
181 // "Surname, FirstName"
182 // "Surname, F."
183 // "FirstName Surname"
184 // "F. Surname"
185         docstring::size_type idx = fname.find(',');
186         if (idx != docstring::npos)
187                 return ltrim(fname.substr(0, idx));
188         idx = fname.rfind('.');
189         if (idx != docstring::npos && idx + 1 < fname.size())
190                 fname = ltrim(fname.substr(idx + 1));
191 // test if we have a LaTeX Space in front
192         if (fname[0] == '\\')
193                 return fname.substr(2);
194
195         return rtrim(fname);
196 }
197
198
199 docstring const getAbbreviatedAuthor(BibKeyList const & map, string const & key)
200 {
201         BOOST_ASSERT(!map.empty());
202         BibKeyList::const_iterator it = map.find(key);
203         if (it == map.end())
204                 return docstring();
205         BibTeXInfo const & data = it->second;
206  
207         if (!data.isBibTeX) 
208                 return docstring();
209  
210         docstring author = getValueForKey(data, "author");
211  
212         if (author.empty()) {
213                 author = getValueForKey(data, "editor");
214                 if (author.empty()) {
215                         author = getValueForKey(data, "key");
216                         if (author.empty())
217                                 // FIXME UNICODE
218                                 return from_utf8(key);
219                         else 
220                                 return author; //this is the key
221                 }
222         }
223
224         //OK, we've got some names. Let's format them.
225         //try to split the author list on " and "
226         vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
227         
228         if (authors.size() == 2)
229                 return bformat(_("%1$s and %2$s"),
230                                                                          familyName(authors[0]), familyName(authors[1]));
231         else if (authors.size() > 2)
232                 return bformat(_("%1$s et al."), familyName(authors[0]));
233         else  
234                 return familyName(authors[0]);
235 }
236
237
238 docstring const getYear(BibKeyList const & map, string const & key)
239 {
240         BOOST_ASSERT(!map.empty());
241  
242         BibKeyList::const_iterator it = map.find(key);
243         if (it == map.end())
244                 return docstring();
245         BibTeXInfo const & data = it->second;
246  
247         if (!data.isBibTeX) 
248                 return docstring();
249  
250         docstring year = getValueForKey(data, "year");
251         if (year.empty())
252                 year = _("No year");
253
254         return year;
255 }
256
257
258 namespace {
259 // A functor for use with std::sort, leading to case insensitive sorting
260 class compareNoCase: public std::binary_function<string, string, bool>
261 {
262         public:
263                 bool operator()(string const & s1, string const & s2) const {
264                         return compare_ascii_no_case(s1, s2) < 0;
265                 }
266 };
267 } // namespace anon
268
269
270 vector<string> const getKeys(BibKeyList const & map)
271 {
272         vector<string> bibkeys;
273         BibKeyList::const_iterator it  = map.begin();
274         BibKeyList::const_iterator end = map.end();
275         for (; it != end; ++it) {
276                 bibkeys.push_back(it->first);
277         }
278
279         std::sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
280         return bibkeys;
281 }
282
283
284 docstring const getInfo(BibKeyList const & map, string const & key)
285 {
286         BOOST_ASSERT(!map.empty());
287  
288         BibKeyList::const_iterator it = map.find(key);
289         if (it == map.end())
290                 return docstring();
291         BibTeXInfo const & data = it->second;
292  
293         if (!data.isBibTeX) {
294                 BibTeXInfo::const_iterator it3 = data.find(from_ascii("ref"));
295                 return it3->second;
296         }
297  
298         //FIXME
299         //This could be made alot better using the biblio::TheEntryType
300         //field to customize the output based upon entry type.
301         
302         //Search for all possible "required" fields
303         docstring author = getValueForKey(data, "author");
304         if (author.empty())
305                 author = getValueForKey(data, "editor");
306  
307         docstring year      = getValueForKey(data, "year");
308         docstring title     = getValueForKey(data, "title");
309         docstring docLoc    = getValueForKey(data, "pages");
310         if (docLoc.empty()) {
311                 docLoc = getValueForKey(data, "chapter");
312                 if (!docLoc.empty())
313                         docLoc = from_ascii("Ch. ") + docLoc;
314         }       else 
315                 docLoc = from_ascii("pp. ") + docLoc;
316                 docstring media     = getValueForKey(data, "journal");
317                 if (media.empty()) {
318                         media = getValueForKey(data, "publisher");
319                         if (media.empty()) {
320                                 media = getValueForKey(data, "school");
321                                 if (media.empty())
322                                         media = getValueForKey(data, "institution");
323                         }
324                 }
325                 docstring volume = getValueForKey(data, "volume");
326  
327                 odocstringstream result;
328                 if (!author.empty())
329                         result << author << ", ";
330                 if (!title.empty())
331                         result << title;
332                 if (!media.empty())
333                         result << ", " << media;
334                 if (!year.empty())
335                         result << ", " << year;
336                 if (!docLoc.empty())
337                         result << ", " << docLoc;
338  
339                 docstring const result_str = rtrim(result.str());
340                 if (!result_str.empty())
341                         return result_str;
342  
343         // This should never happen (or at least be very unusual!)
344         return docstring();
345 }
346
347
348 namespace {
349
350 // Escape special chars.
351 // All characters are literals except: '.|*?+(){}[]^$\'
352 // These characters are literals when preceded by a "\", which is done here
353 // @todo: This function should be moved to support, and then the test in tests
354 //        should be moved there as well.
355 string const escape_special_chars(string const & expr)
356 {
357         // Search for all chars '.|*?+(){}[^$]\'
358         // Note that '[' and '\' must be escaped.
359         // This is a limitation of boost::regex, but all other chars in BREs
360         // are assumed literal.
361                 boost::regex reg("[].|*?+(){}^$\\[\\\\]");
362
363         // $& is a perl-like expression that expands to all
364         // of the current match
365         // The '$' must be prefixed with the escape character '\' for
366         // boost to treat it as a literal.
367         // Thus, to prefix a matched expression with '\', we use:
368                 return boost::regex_replace(expr, reg, "\\\\$&");
369 }
370
371
372 // A functor for use with std::find_if, used to ascertain whether a
373 // data entry matches the required regex_
374 // @throws: boost::regex_error if the supplied regex pattern is not valid
375 // @todo: This function should be moved to support.
376 class RegexMatch : public std::unary_function<string, bool>
377 {
378         public:
379 // re and icase are used to construct an instance of boost::RegEx.
380 // if icase is true, then matching is insensitive to case
381                 RegexMatch(BibKeyList const & m, string const & re, bool icase)
382                 : map_(m), regex_(re, icase) {}
383
384                 bool operator()(string const & key) const {
385                         //FIXME This should search the monolith.
386                         BibKeyList::const_iterator info = map_.find(key);
387                         if (info == map_.end())
388                                 return false;
389  
390                         BibTeXInfo const kvm = info->second;
391                         string const data = key + ' ' + to_utf8(kvm.allData);
392                         
393                         return boost::regex_search(data, regex_);
394                 }
395         private:
396                 BibKeyList const map_;
397                 mutable boost::regex regex_;
398 };
399
400 } // namespace anon
401
402
403 vector<string>::const_iterator searchKeys(BibKeyList const & theMap,
404                 vector<string> const & keys,
405                 string const & search_expr,
406                 vector<string>::const_iterator start,
407                 Search type, Direction dir,     bool caseSensitive)
408 {
409         // Preliminary checks
410         if (start < keys.begin() || start >= keys.end())
411                 return keys.end();
412
413         string expr = trim(search_expr);
414         if (expr.empty())
415                 return keys.end();
416
417         if (type == SIMPLE)
418         // We must escape special chars in the search_expr so that
419         // it is treated as a simple string by boost::regex.
420         expr = escape_special_chars(expr);
421
422         try {
423                 // Build the functor that will be passed to find_if.
424                 RegexMatch const match(theMap, expr, !caseSensitive);
425
426                 // Search the vector of 'keys' from 'start' for one
427                 // that matches the predicate 'match'. Searching can
428                 // be forward or backward from start.
429                 if (dir == FORWARD)
430                         return std::find_if(start, keys.end(), match);
431
432                 vector<string>::const_reverse_iterator rit(start);
433                 vector<string>::const_reverse_iterator rend = keys.rend();
434                 rit = std::find_if(rit, rend, match);
435
436                 if (rit == rend)
437                         return keys.end();
438                 // This is correct and always safe.
439                 // (See Meyer's Effective STL, Item 28.)
440                 return (++rit).base();
441         }
442         catch (boost::regex_error &) {
443                 return keys.end();
444         }
445 }
446
447
448 docstring const getValueForKey(BibTeXInfo const & data, string const & findkey)
449 {
450         docstring key = from_ascii(findkey);
451         BibTeXInfo::const_iterator it = data.find(key);
452         if (it == data.end())
453                 return docstring();
454         //FIXME ?? return it->second??
455         BibTeXInfo & data2 = const_cast<BibTeXInfo &>(data);
456         return data2[key];
457 }
458
459 namespace {
460
461
462 char const * const citeCommands[] = {
463         "cite", "citet", "citep", "citealt", "citealp", "citeauthor",
464         "citeyear", "citeyearpar" };
465
466 unsigned int const nCiteCommands =
467                 sizeof(citeCommands) / sizeof(char *);
468
469 CiteStyle const citeStyles[] = {
470         CITE, CITET, CITEP, CITEALT, CITEALP,
471 CITEAUTHOR, CITEYEAR, CITEYEARPAR };
472
473 unsigned int const nCiteStyles =
474                 sizeof(citeStyles) / sizeof(CiteStyle);
475
476 CiteStyle const citeStylesFull[] = {
477         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
478
479 unsigned int const nCiteStylesFull =
480                 sizeof(citeStylesFull) / sizeof(CiteStyle);
481
482 CiteStyle const citeStylesUCase[] = {
483         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
484
485 unsigned int const nCiteStylesUCase =
486         sizeof(citeStylesUCase) / sizeof(CiteStyle);
487
488 } // namespace anon
489
490
491 CitationStyle::CitationStyle(string const & command)
492         : style(CITE), full(false), forceUCase(false)
493 {
494         if (command.empty())
495                 return;
496
497         string cmd = command;
498         if (cmd[0] == 'C') {
499                 forceUCase = true;
500                 cmd[0] = 'c';
501         }
502
503         string::size_type const n = cmd.size() - 1;
504         if (cmd != "cite" && cmd[n] == '*') {
505                 full = true;
506                 cmd = cmd.substr(0,n);
507         }
508
509         char const * const * const last = citeCommands + nCiteCommands;
510         char const * const * const ptr = std::find(citeCommands, last, cmd);
511
512         if (ptr != last) {
513                 size_t idx = ptr - citeCommands;
514                 style = citeStyles[idx];
515         }
516 }
517
518
519 string const CitationStyle::asLatexStr() const
520 {
521         string cite = citeCommands[style];
522         if (full) {
523                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
524                 if (std::find(citeStylesFull, last, style) != last)
525                         cite += '*';
526         }
527
528         if (forceUCase) {
529                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
530                 if (std::find(citeStylesUCase, last, style) != last)
531                         cite[0] = 'C';
532         }
533
534         return cite;
535 }
536
537
538 vector<CiteStyle> const getCiteStyles(CiteEngine const engine)
539 {
540         unsigned int nStyles = 0;
541         unsigned int start = 0;
542
543         switch (engine) {
544                 case ENGINE_BASIC:
545                         nStyles = 1;
546                         start = 0;
547                         break;
548                 case ENGINE_NATBIB_AUTHORYEAR:
549                 case ENGINE_NATBIB_NUMERICAL:
550                         nStyles = nCiteStyles - 1;
551                         start = 1;
552                         break;
553                 case ENGINE_JURABIB:
554                         nStyles = nCiteStyles;
555                         start = 0;
556                         break;
557         }
558
559         typedef vector<CiteStyle> cite_vec;
560
561         cite_vec styles(nStyles);
562         cite_vec::size_type i = 0;
563         int j = start;
564         for (; i != styles.size(); ++i, ++j)
565                 styles[i] = citeStyles[j];
566
567         return styles;
568 }
569
570
571 vector<docstring> const
572         getNumericalStrings(string const & key,
573         BibKeyList const & map, vector<CiteStyle> const & styles)
574 {
575         if (map.empty())
576                 return vector<docstring>();
577
578         docstring const author = getAbbreviatedAuthor(map, key);
579         docstring const year   = getYear(map, key);
580         if (author.empty() || year.empty())
581                 return vector<docstring>();
582
583         vector<docstring> vec(styles.size());
584         for (vector<docstring>::size_type i = 0; i != vec.size(); ++i) {
585                 docstring str;
586
587                 switch (styles[i]) {
588                         case CITE:
589                         case CITEP:
590                                 str = from_ascii("[#ID]");
591                                 break;
592
593                         case CITET:
594                                 str = author + " [#ID]";
595                                 break;
596
597                         case CITEALT:
598                                 str = author + " #ID";
599                                 break;
600
601                         case CITEALP:
602                                 str = from_ascii("#ID");
603                                 break;
604
605                         case CITEAUTHOR:
606                                 str = author;
607                                 break;
608
609                         case CITEYEAR:
610                                 str = year;
611                                 break;
612
613                         case CITEYEARPAR:
614                                 str = '(' + year + ')';
615                                 break;
616                 }
617
618                 vec[i] = str;
619         }
620
621         return vec;
622 }
623
624
625 vector<docstring> const
626                 getAuthorYearStrings(string const & key,
627                         BibKeyList const & map, vector<CiteStyle> const & styles)
628 {
629         if (map.empty())
630                 return vector<docstring>();
631
632         docstring const author = getAbbreviatedAuthor(map, key);
633         docstring const year   = getYear(map, key);
634         if (author.empty() || year.empty())
635                 return vector<docstring>();
636
637         vector<docstring> vec(styles.size());
638         for (vector<docstring>::size_type i = 0; i != vec.size(); ++i) {
639                 docstring str;
640
641                 switch (styles[i]) {
642                         case CITE:
643                 // jurabib only: Author/Annotator
644                 // (i.e. the "before" field, 2nd opt arg)
645                                 str = author + "/<" + _("before") + '>';
646                                 break;
647
648                         case CITET:
649                                 str = author + " (" + year + ')';
650                                 break;
651
652                         case CITEP:
653                                 str = '(' + author + ", " + year + ')';
654                                 break;
655
656                         case CITEALT:
657                                 str = author + ' ' + year ;
658                                 break;
659
660                         case CITEALP:
661                                 str = author + ", " + year ;
662                                 break;
663
664                         case CITEAUTHOR:
665                                 str = author;
666                                 break;
667
668                         case CITEYEAR:
669                                 str = year;
670                                 break;
671
672                         case CITEYEARPAR:
673                                 str = '(' + year + ')';
674                                 break;
675                 }
676
677                 vec[i] = str;
678         }
679
680         return vec;
681 }
682
683
684 void fillWithBibKeys(Buffer const * const buf, 
685                      BibKeyList & keys)
686 {       
687         /// if this is a child document and the parent is already loaded
688         /// use the parent's list instead  [ale990412]
689         Buffer const * const tmp = buf->getMasterBuffer();
690         BOOST_ASSERT(tmp);
691         if (tmp != buf) {
692                 fillWithBibKeys(tmp, keys);
693                 return;
694         }
695
696         // Pre-load all child documents.
697         loadChildDocuments(*buf);
698
699         for (InsetIterator it = inset_iterator_begin(buf->inset()); it; ++it)
700                         it->fillWithBibKeys(*buf, keys, it);
701 }
702 } // namespace biblio
703 } // namespace lyx
704