]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
d98fbf666e3e46b3d7e5504260e93daeaa5cbe8e
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 //////////////////////////////////////////////////////////////////////
43 //
44 // BibTeXInfo
45 //
46 //////////////////////////////////////////////////////////////////////
47
48 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
49         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
50 {}
51
52
53 bool BibTeXInfo::hasField(docstring const & field) const
54 {
55         return count(field) == 1;
56 }
57
58
59 docstring const & BibTeXInfo::operator[](docstring const & field) const
60 {
61         BibTeXInfo::const_iterator it = find(field);
62         if (it != end())
63                 return it->second;
64         static docstring const empty_value = docstring();
65         return empty_value;
66 }
67         
68         
69 docstring const & BibTeXInfo::operator[](string const & field) const
70 {
71         return operator[](from_ascii(field));
72 }
73
74
75 docstring BibTeXInfo::getValueForKey(string const & key, 
76                 BibTeXInfo const * const xref) const
77 {
78         docstring const ret = operator[](key);
79         if (!ret.empty() || !xref)
80                 return ret;
81         return (*xref)[key];
82 }
83
84
85 docstring familyName(docstring const & name)
86 {
87         if (name.empty())
88                 return docstring();
89
90         // first we look for a comma, and take the last name to be everything
91         // preceding the right-most one, so that we also get the "jr" part.
92         docstring::size_type idx = name.rfind(',');
93         if (idx != docstring::npos)
94                 return ltrim(name.substr(0, idx));
95
96         // OK, so now we want to look for the last name. We're going to
97         // include the "von" part. This isn't perfect.
98         // Split on spaces, to get various tokens.
99         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
100         // If we only get two, assume the last one is the last name
101         if (pieces.size() <= 2)
102                 return pieces.back();
103
104         // Now we look for the first token that begins with a lower case letter.
105         vector<docstring>::const_iterator it = pieces.begin();
106         vector<docstring>::const_iterator en = pieces.end();
107         for (; it != en; ++it) {
108                 if ((*it).size() == 0)
109                         continue;
110                 char_type const c = (*it)[0];
111                 if (isLower(c))
112                         break;
113         }
114
115         if (it == en) // we never found a "von"
116                 return pieces.back();
117
118         // reconstruct what we need to return
119         docstring retval;
120         bool first = true;
121         for (; it != en; ++it) {
122                 if (!first)
123                         retval += " ";
124                 else 
125                         first = false;
126                 retval += *it;
127         }
128         return retval;
129 }
130
131 docstring const BibTeXInfo::getAbbreviatedAuthor() const
132 {
133         if (!is_bibtex_) {
134                 docstring const opt = trim(operator[]("label"));
135                 if (opt.empty())
136                         return docstring();
137
138                 docstring authors;
139                 split(opt, authors, '(');
140                 return authors;
141         }
142
143         docstring author = operator[]("author");
144         if (author.empty()) {
145                 author = operator[]("editor");
146                 if (author.empty())
147                         return bib_key_;
148         }
149
150         // OK, we've got some names. Let's format them.
151         // Try to split the author list on " and "
152         vector<docstring> const authors =
153                 getVectorFromString(author, from_ascii(" and "));
154
155         if (authors.size() == 2)
156                 return bformat(_("%1$s and %2$s"),
157                         familyName(authors[0]), familyName(authors[1]));
158
159         if (authors.size() > 2)
160                 return bformat(_("%1$s et al."), familyName(authors[0]));
161
162         return familyName(authors[0]);
163 }
164
165
166 docstring const BibTeXInfo::getYear() const
167 {
168         if (is_bibtex_) 
169                 return operator[]("year");
170
171         docstring const opt = trim(operator[]("label"));
172         if (opt.empty())
173                 return docstring();
174
175         docstring authors;
176         docstring const tmp = split(opt, authors, '(');
177         docstring year;
178         split(tmp, year, ')');
179         return year;
180 }
181
182
183 docstring const BibTeXInfo::getXRef() const
184 {
185         if (!is_bibtex_)
186                 return docstring();
187         return operator[]("crossref");
188 }
189
190
191 namespace {
192
193         docstring convertLaTeXCommands(docstring const & str)
194         {
195                 docstring val = str;
196                 docstring ret;
197         
198                 bool scanning_cmd = false;
199                 bool scanning_math = false;
200                 bool escaped = false; // used to catch \$, etc.
201                 while (val.size()) {
202                         char_type const ch = val[0];
203
204                         // if we're scanning math, we output everything until we
205                         // find an unescaped $, at which point we break out.
206                         if (scanning_math) {
207                                 if (escaped)
208                                         escaped = false;
209                                 else if (ch == '\\')
210                                         escaped = true;
211                                 else if (ch == '$') 
212                                         scanning_math = false;
213                                 ret += ch;
214                                 val = val.substr(1);
215                                 continue;
216                         }
217
218                         // if we're scanning a command name, then we just
219                         // discard characters until we hit something that
220                         // isn't alpha.
221                         if (scanning_cmd) {
222                                 if (isAlphaASCII(ch)) {
223                                         val = val.substr(1);
224                                         escaped = false;
225                                         continue;
226                                 }
227                                 // so we're done with this command.
228                                 // now we fall through and check this character.
229                                 scanning_cmd = false;
230                         }
231
232                         // was the last character a \? If so, then this is something like: \\,
233                         // or \$, so we'll just output it. That's probably not always right...
234                         if (escaped) {
235                                 ret += ch;
236                                 val = val.substr(1);
237                                 escaped = false;
238                                 continue;
239                         }
240
241                         if (ch == '$') {
242                                 ret += ch;
243                                 val = val.substr(1);
244                                 scanning_math = true;
245                                 continue;
246                         }
247
248                         // we just ignore braces
249                         if (ch == '{' || ch == '}') {
250                                 val = val.substr(1);
251                                 continue;
252                         }
253
254                         // we're going to check things that look like commands, so if
255                         // this doesn't, just output it.
256                         if (ch != '\\') {
257                                 ret += ch;
258                                 val = val.substr(1);
259                                 continue;
260                         }
261
262                         // ok, could be a command of some sort
263                         // let's see if it corresponds to some unicode
264                         // unicodesymbols has things in the form: \"{u},
265                         // whereas we may see things like: \"u. So we'll
266                         // look for that and change it, if necessary.
267                         static boost::regex const reg("^\\\\\\W\\w");
268                         if (boost::regex_search(to_utf8(val), reg)) {
269                                 val.insert(3, from_ascii("}"));
270                                 val.insert(2, from_ascii("{"));
271                         }
272                         docstring rem;
273                         docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
274                         if (!cnvtd.empty()) {
275                                 // it did, so we'll take that bit and proceed with what's left
276                                 ret += cnvtd;
277                                 val = rem;
278                                 continue;
279                         }
280                         // it's a command of some sort
281                         scanning_cmd = true;
282                         escaped = true;
283                         val = val.substr(1);
284                 }
285                 return ret;
286         }
287
288 } // anon namespace
289
290
291 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
292 {
293         if (!info_.empty())
294                 return info_;
295
296         if (!is_bibtex_) {
297                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
298                 info_ = it->second;
299                 return info_;
300         }
301  
302         // FIXME
303         // This could be made a lot better using the entry_type_
304         // field to customize the output based upon entry type.
305         
306         // Search for all possible "required" fields
307         docstring author = getValueForKey("author", xref);
308         if (author.empty())
309                 author = getValueForKey("editor", xref);
310  
311         docstring year   = getValueForKey("year", xref);
312         docstring title  = getValueForKey("title", xref);
313         docstring docLoc = getValueForKey("pages", xref);
314         if (docLoc.empty()) {
315                 docLoc = getValueForKey("chapter", xref);
316                 if (!docLoc.empty())
317                         docLoc = _("Ch. ") + docLoc;
318         }       else {
319                 docLoc = _("pp. ") + docLoc;
320         }
321
322         docstring media = getValueForKey("journal", xref);
323         if (media.empty()) {
324                 media = getValueForKey("publisher", xref);
325                 if (media.empty()) {
326                         media = getValueForKey("school", xref);
327                         if (media.empty())
328                                 media = getValueForKey("institution");
329                 }
330         }
331         docstring volume = getValueForKey("volume", xref);
332
333         odocstringstream result;
334         if (!author.empty())
335                 result << author << ", ";
336         if (!title.empty())
337                 result << title;
338         if (!media.empty())
339                 result << ", " << media;
340         if (!year.empty())
341                 result << " (" << year << ")";
342         if (!docLoc.empty())
343                 result << ", " << docLoc;
344
345         docstring const result_str = rtrim(result.str());
346         if (!result_str.empty()) {
347                 info_ = convertLaTeXCommands(result_str);
348                 return info_;
349         }
350
351         // This should never happen (or at least be very unusual!)
352         static docstring e = docstring();
353         return e;
354 }
355
356
357 //////////////////////////////////////////////////////////////////////
358 //
359 // BiblioInfo
360 //
361 //////////////////////////////////////////////////////////////////////
362
363 namespace {
364 // A functor for use with sort, leading to case insensitive sorting
365         class compareNoCase: public binary_function<docstring, docstring, bool>
366         {
367                 public:
368                         bool operator()(docstring const & s1, docstring const & s2) const {
369                                 return compare_no_case(s1, s2) < 0;
370                         }
371         };
372 } // namespace anon
373
374
375 vector<docstring> const BiblioInfo::getKeys() const
376 {
377         vector<docstring> bibkeys;
378         BiblioInfo::const_iterator it  = begin();
379         for (; it != end(); ++it)
380                 bibkeys.push_back(it->first);
381         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
382         return bibkeys;
383 }
384
385
386 vector<docstring> const BiblioInfo::getFields() const
387 {
388         vector<docstring> bibfields;
389         set<docstring>::const_iterator it = field_names_.begin();
390         set<docstring>::const_iterator end = field_names_.end();
391         for (; it != end; ++it)
392                 bibfields.push_back(*it);
393         sort(bibfields.begin(), bibfields.end());
394         return bibfields;
395 }
396
397
398 vector<docstring> const BiblioInfo::getEntries() const
399 {
400         vector<docstring> bibentries;
401         set<docstring>::const_iterator it = entry_types_.begin();
402         set<docstring>::const_iterator end = entry_types_.end();
403         for (; it != end; ++it)
404                 bibentries.push_back(*it);
405         sort(bibentries.begin(), bibentries.end());
406         return bibentries;
407 }
408
409
410 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
411 {
412         BiblioInfo::const_iterator it = find(key);
413         if (it == end())
414                 return docstring();
415         BibTeXInfo const & data = it->second;
416         return data.getAbbreviatedAuthor();
417 }
418
419
420 docstring const BiblioInfo::getYear(docstring const & key) const
421 {
422         BiblioInfo::const_iterator it = find(key);
423         if (it == end())
424                 return docstring();
425         BibTeXInfo const & data = it->second;
426         docstring year = data.getYear();
427         if (!year.empty())
428                 return year;
429         // let's try the crossref
430         docstring const xref = data.getXRef();
431         if (xref.empty())
432                 return year; // no luck
433         BiblioInfo::const_iterator const xrefit = find(xref);
434         if (xrefit == end())
435                 return year; // no luck again
436         BibTeXInfo const & xref_data = xrefit->second;
437         return xref_data.getYear();
438         return data.getYear();
439 }
440
441
442 docstring const BiblioInfo::getInfo(docstring const & key) const
443 {
444         BiblioInfo::const_iterator it = find(key);
445         if (it == end())
446                 return docstring();
447         BibTeXInfo const & data = it->second;
448         BibTeXInfo const * xrefptr = 0;
449         docstring const xref = data.getXRef();
450         if (!xref.empty()) {
451                 BiblioInfo::const_iterator const xrefit = find(xref);
452                 if (xrefit != end())
453                         xrefptr = &(xrefit->second);
454         }
455         return data.getInfo(xrefptr);
456 }
457
458
459 vector<docstring> const BiblioInfo::getCiteStrings(
460         docstring const & key, Buffer const & buf) const
461 {
462         CiteEngine const engine = buf.params().citeEngine();
463         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
464                 return getNumericalStrings(key, buf);
465         else
466                 return getAuthorYearStrings(key, buf);
467 }
468
469
470 vector<docstring> const BiblioInfo::getNumericalStrings(
471         docstring const & key, Buffer const & buf) const
472 {
473         if (empty())
474                 return vector<docstring>();
475
476         docstring const author = getAbbreviatedAuthor(key);
477         docstring const year   = getYear(key);
478         if (author.empty() || year.empty())
479                 return vector<docstring>();
480
481         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
482         
483         vector<docstring> vec(styles.size());
484         for (size_t i = 0; i != vec.size(); ++i) {
485                 docstring str;
486
487                 switch (styles[i]) {
488                         case CITE:
489                         case CITEP:
490                                 str = from_ascii("[#ID]");
491                                 break;
492
493                         case NOCITE:
494                                 str = _("Add to bibliography only.");
495                                 break;
496
497                         case CITET:
498                                 str = author + " [#ID]";
499                                 break;
500
501                         case CITEALT:
502                                 str = author + " #ID";
503                                 break;
504
505                         case CITEALP:
506                                 str = from_ascii("#ID");
507                                 break;
508
509                         case CITEAUTHOR:
510                                 str = author;
511                                 break;
512
513                         case CITEYEAR:
514                                 str = year;
515                                 break;
516
517                         case CITEYEARPAR:
518                                 str = '(' + year + ')';
519                                 break;
520                 }
521
522                 vec[i] = str;
523         }
524
525         return vec;
526 }
527
528
529 vector<docstring> const BiblioInfo::getAuthorYearStrings(
530         docstring const & key, Buffer const & buf) const
531 {
532         if (empty())
533                 return vector<docstring>();
534
535         docstring const author = getAbbreviatedAuthor(key);
536         docstring const year   = getYear(key);
537         if (author.empty() || year.empty())
538                 return vector<docstring>();
539
540         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
541         
542         vector<docstring> vec(styles.size());
543         for (size_t i = 0; i != vec.size(); ++i) {
544                 docstring str;
545
546                 switch (styles[i]) {
547                         case CITE:
548                 // jurabib only: Author/Annotator
549                 // (i.e. the "before" field, 2nd opt arg)
550                                 str = author + "/<" + _("before") + '>';
551                                 break;
552
553                         case NOCITE:
554                                 str = _("Add to bibliography only.");
555                                 break;
556
557                         case CITET:
558                                 str = author + " (" + year + ')';
559                                 break;
560
561                         case CITEP:
562                                 str = '(' + author + ", " + year + ')';
563                                 break;
564
565                         case CITEALT:
566                                 str = author + ' ' + year ;
567                                 break;
568
569                         case CITEALP:
570                                 str = author + ", " + year ;
571                                 break;
572
573                         case CITEAUTHOR:
574                                 str = author;
575                                 break;
576
577                         case CITEYEAR:
578                                 str = year;
579                                 break;
580
581                         case CITEYEARPAR:
582                                 str = '(' + year + ')';
583                                 break;
584                 }
585                 vec[i] = str;
586         }
587         return vec;
588 }
589
590
591 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
592 {
593         bimap_.insert(info.begin(), info.end());
594 }
595
596
597 //////////////////////////////////////////////////////////////////////
598 //
599 // CitationStyle
600 //
601 //////////////////////////////////////////////////////////////////////
602
603 namespace {
604
605
606 char const * const citeCommands[] = {
607         "cite", "citet", "citep", "citealt", "citealp",
608         "citeauthor", "citeyear", "citeyearpar", "nocite" };
609
610 unsigned int const nCiteCommands =
611                 sizeof(citeCommands) / sizeof(char *);
612
613 CiteStyle const citeStylesArray[] = {
614         CITE, CITET, CITEP, CITEALT, CITEALP, 
615         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
616
617 unsigned int const nCiteStyles =
618                 sizeof(citeStylesArray) / sizeof(CiteStyle);
619
620 CiteStyle const citeStylesFull[] = {
621         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
622
623 unsigned int const nCiteStylesFull =
624                 sizeof(citeStylesFull) / sizeof(CiteStyle);
625
626 CiteStyle const citeStylesUCase[] = {
627         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
628
629 unsigned int const nCiteStylesUCase =
630         sizeof(citeStylesUCase) / sizeof(CiteStyle);
631
632 } // namespace anon
633
634
635 CitationStyle citationStyleFromString(string const & command)
636 {
637         CitationStyle s;
638         if (command.empty())
639                 return s;
640
641         string cmd = command;
642         if (cmd[0] == 'C') {
643                 s.forceUpperCase = true;
644                 cmd[0] = 'c';
645         }
646
647         size_t const n = cmd.size() - 1;
648         if (cmd != "cite" && cmd[n] == '*') {
649                 s.full = true;
650                 cmd = cmd.substr(0, n);
651         }
652
653         char const * const * const last = citeCommands + nCiteCommands;
654         char const * const * const ptr = find(citeCommands, last, cmd);
655
656         if (ptr != last) {
657                 size_t idx = ptr - citeCommands;
658                 s.style = citeStylesArray[idx];
659         }
660         return s;
661 }
662
663
664 string citationStyleToString(const CitationStyle & s)
665 {
666         string cite = citeCommands[s.style];
667         if (s.full) {
668                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
669                 if (find(citeStylesFull, last, s.style) != last)
670                         cite += '*';
671         }
672
673         if (s.forceUpperCase) {
674                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
675                 if (find(citeStylesUCase, last, s.style) != last)
676                         cite[0] = 'C';
677         }
678
679         return cite;
680 }
681
682 vector<CiteStyle> citeStyles(CiteEngine engine)
683 {
684         unsigned int nStyles = 0;
685         unsigned int start = 0;
686
687         switch (engine) {
688                 case ENGINE_BASIC:
689                         nStyles = 2;
690                         start = 0;
691                         break;
692                 case ENGINE_NATBIB_AUTHORYEAR:
693                 case ENGINE_NATBIB_NUMERICAL:
694                         nStyles = nCiteStyles - 1;
695                         start = 1;
696                         break;
697                 case ENGINE_JURABIB:
698                         nStyles = nCiteStyles;
699                         start = 0;
700                         break;
701         }
702
703         vector<CiteStyle> styles(nStyles);
704         size_t i = 0;
705         int j = start;
706         for (; i != styles.size(); ++i, ++j)
707                 styles[i] = citeStylesArray[j];
708
709         return styles;
710 }
711
712 } // namespace lyx
713