]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
be8c369ab65912289700af63a0280be34855960d
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 //////////////////////////////////////////////////////////////////////
43 //
44 // BibTeXInfo
45 //
46 //////////////////////////////////////////////////////////////////////
47
48 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
49         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
50 {}
51
52
53 bool BibTeXInfo::hasField(docstring const & field) const
54 {
55         return count(field) == 1;
56 }
57
58
59 docstring const & BibTeXInfo::operator[](docstring const & field) const
60 {
61         BibTeXInfo::const_iterator it = find(field);
62         if (it != end())
63                 return it->second;
64         static docstring const empty_value = docstring();
65         return empty_value;
66 }
67         
68         
69 docstring const & BibTeXInfo::operator[](string const & field) const
70 {
71         return operator[](from_ascii(field));
72 }
73
74
75 docstring BibTeXInfo::getValueForKey(string const & key, 
76                 BibTeXInfo const * const xref) const
77 {
78         docstring const ret = operator[](key);
79         if (!ret.empty() or !xref)
80                 return ret;
81         return (*xref)[key];
82 }
83
84
85 docstring familyName(docstring const & name)
86 {
87         if (name.empty())
88                 return docstring();
89
90         // first we look for a comma, and take the last name to be everything
91         // preceding the right-most one, so that we also get the "jr" part.
92         docstring::size_type idx = name.rfind(',');
93         if (idx != docstring::npos)
94                 return ltrim(name.substr(0, idx));
95
96         // OK, so now we want to look for the last name. We're going to
97         // include the "von" part. This isn't perfect.
98         // Split on spaces, to get various tokens.
99         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
100         // If we only get two, assume the last one is the last name
101         if (pieces.size() <= 2)
102                 return pieces.back();
103
104         // Now we look for the first token that begins with a lower case letter.
105         vector<docstring>::const_iterator it = pieces.begin();
106         vector<docstring>::const_iterator en = pieces.end();
107         for (; it != en; ++it) {
108                 if ((*it).size() == 0)
109                         continue;
110                 char_type const c = (*it)[0];
111                 if (isLower(c))
112                         break;
113         }
114
115         if (it == en) // we never found a "von"
116                 return pieces.back();
117
118         // reconstruct what we need to return
119         docstring retval;
120         bool first = true;
121         for (; it != en; ++it) {
122                 if (!first)
123                         retval += " ";
124                 else 
125                         first = false;
126                 retval += *it;
127         }
128         return retval;
129 }
130
131 docstring const BibTeXInfo::getAbbreviatedAuthor() const
132 {
133         if (!is_bibtex_) {
134                 docstring const opt = trim(operator[]("label"));
135                 if (opt.empty())
136                         return docstring();
137
138                 docstring authors;
139                 split(opt, authors, '(');
140                 return authors;
141         }
142
143         docstring author = operator[]("author");
144         if (author.empty()) {
145                 author = operator[]("editor");
146                 if (author.empty())
147                         return bib_key_;
148         }
149
150         // OK, we've got some names. Let's format them.
151         // Try to split the author list on " and "
152         vector<docstring> const authors =
153                 getVectorFromString(author, from_ascii(" and "));
154
155         if (authors.size() == 2)
156                 return bformat(_("%1$s and %2$s"),
157                         familyName(authors[0]), familyName(authors[1]));
158
159         if (authors.size() > 2)
160                 return bformat(_("%1$s et al."), familyName(authors[0]));
161
162         return familyName(authors[0]);
163 }
164
165
166 docstring const BibTeXInfo::getYear() const
167 {
168         if (!is_bibtex_) {
169                 docstring const opt = trim(operator[]("label"));
170                 if (opt.empty())
171                         return docstring();
172
173                 docstring authors;
174                 docstring const tmp = split(opt, authors, '(');
175                 docstring year;
176                 split(tmp, year, ')');
177                 return year;
178         }
179
180         docstring year = operator[]("year");
181         if (year.empty())
182                 year = _("No year");
183         return year;
184 }
185
186
187 docstring const BibTeXInfo::getXRef() const
188 {
189         if (!is_bibtex_)
190                 return docstring();
191         return operator[]("crossref");
192 }
193
194
195 namespace {
196
197         docstring convertLaTeXCommands(docstring const & str)
198         {
199                 docstring val = str;
200                 docstring ret;
201         
202                 bool scanning_cmd = false;
203                 bool scanning_math = false;
204                 bool escaped = false; // used to catch \$, etc.
205                 while (val.size()) {
206                         char_type const ch = val[0];
207
208                         // if we're scanning math, we output everything until we
209                         // find an unescaped $, at which point we break out.
210                         if (scanning_math) {
211                                 if (escaped)
212                                         escaped = false;
213                                 else if (ch == '\\')
214                                         escaped = true;
215                                 else if (ch == '$') 
216                                         scanning_math = false;
217                                 ret += ch;
218                                 val = val.substr(1);
219                                 continue;
220                         }
221
222                         // if we're scanning a command name, then we just
223                         // discard characters until we hit something that
224                         // isn't alpha.
225                         if (scanning_cmd) {
226                                 if (isAlphaASCII(ch)) {
227                                         val = val.substr(1);
228                                         escaped = false;
229                                         continue;
230                                 }
231                                 // so we're done with this command.
232                                 // now we fall through and check this character.
233                                 scanning_cmd = false;
234                         }
235
236                         // was the last character a \? If so, then this is something like: \\,
237                         // or \$, so we'll just output it. That's probably not always right...
238                         if (escaped) {
239                                 ret += ch;
240                                 val = val.substr(1);
241                                 escaped = false;
242                                 continue;
243                         }
244
245                         if (ch == '$') {
246                                 ret += ch;
247                                 val = val.substr(1);
248                                 scanning_math = true;
249                                 continue;
250                         }
251
252                         // we just ignore braces
253                         if (ch == '{' || ch == '}') {
254                                 val = val.substr(1);
255                                 continue;
256                         }
257
258                         // we're going to check things that look like commands, so if
259                         // this doesn't, just output it.
260                         if (ch != '\\') {
261                                 ret += ch;
262                                 val = val.substr(1);
263                                 continue;
264                         }
265
266                         // ok, could be a command of some sort
267                         // let's see if it corresponds to some unicode
268                         // unicodesymbols has things in the form: \"{u},
269                         // whereas we may see things like: \"u. So we'll
270                         // look for that and change it, if necessary.
271                         static boost::regex const reg("^\\\\\\W\\w");
272                         if (boost::regex_search(to_utf8(val), reg)) {
273                                 val.insert(3, from_ascii("}"));
274                                 val.insert(2, from_ascii("{"));
275                         }
276                         docstring rem;
277                         docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
278                         if (!cnvtd.empty()) {
279                                 // it did, so we'll take that bit and proceed with what's left
280                                 ret += cnvtd;
281                                 val = rem;
282                                 continue;
283                         }
284                         // it's a command of some sort
285                         scanning_cmd = true;
286                         escaped = true;
287                         val = val.substr(1);
288                 }
289                 return ret;
290         }
291
292 } // anon namespace
293
294
295 docstring const BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
296 {
297         if (!info_.empty())
298                 return info_;
299
300         if (!is_bibtex_) {
301                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
302                 info_ = it->second;
303                 return info_;
304         }
305  
306         // FIXME
307         // This could be made a lot better using the entryType
308         // field to customize the output based upon entry type.
309         
310         // Search for all possible "required" fields
311         docstring author = getValueForKey("author", xref);
312         if (author.empty())
313                 author = getValueForKey("editor", xref);
314  
315         docstring year   = getValueForKey("year", xref);
316         docstring title  = getValueForKey("title", xref);
317         docstring docLoc = getValueForKey("pages", xref);
318         if (docLoc.empty()) {
319                 docLoc = getValueForKey("chapter", xref);
320                 if (!docLoc.empty())
321                         docLoc = from_ascii("Ch. ") + docLoc;
322         }       else {
323                 docLoc = from_ascii("pp. ") + docLoc;
324         }
325
326         docstring media = getValueForKey("journal", xref);
327         if (media.empty()) {
328                 media = getValueForKey("publisher", xref);
329                 if (media.empty()) {
330                         media = getValueForKey("school", xref);
331                         if (media.empty())
332                                 media = getValueForKey("institution");
333                 }
334         }
335         docstring volume = getValueForKey("volume", xref);
336
337         odocstringstream result;
338         if (!author.empty())
339                 result << author << ", ";
340         if (!title.empty())
341                 result << title;
342         if (!media.empty())
343                 result << ", " << media;
344         if (!year.empty())
345                 result << " (" << year << ")";
346         if (!docLoc.empty())
347                 result << ", " << docLoc;
348
349         docstring const result_str = rtrim(result.str());
350         if (!result_str.empty()) {
351                 info_ = convertLaTeXCommands(result_str);
352                 return info_;
353         }
354
355         // This should never happen (or at least be very unusual!)
356         return docstring();
357 }
358
359
360 //////////////////////////////////////////////////////////////////////
361 //
362 // BiblioInfo
363 //
364 //////////////////////////////////////////////////////////////////////
365
366 namespace {
367 // A functor for use with sort, leading to case insensitive sorting
368         class compareNoCase: public binary_function<docstring, docstring, bool>
369         {
370                 public:
371                         bool operator()(docstring const & s1, docstring const & s2) const {
372                                 return compare_no_case(s1, s2) < 0;
373                         }
374         };
375 } // namespace anon
376
377
378 vector<docstring> const BiblioInfo::getKeys() const
379 {
380         vector<docstring> bibkeys;
381         BiblioInfo::const_iterator it  = begin();
382         for (; it != end(); ++it)
383                 bibkeys.push_back(it->first);
384         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
385         return bibkeys;
386 }
387
388
389 vector<docstring> const BiblioInfo::getFields() const
390 {
391         vector<docstring> bibfields;
392         set<docstring>::const_iterator it = field_names_.begin();
393         set<docstring>::const_iterator end = field_names_.end();
394         for (; it != end; ++it)
395                 bibfields.push_back(*it);
396         sort(bibfields.begin(), bibfields.end());
397         return bibfields;
398 }
399
400
401 vector<docstring> const BiblioInfo::getEntries() const
402 {
403         vector<docstring> bibentries;
404         set<docstring>::const_iterator it = entry_types_.begin();
405         set<docstring>::const_iterator end = entry_types_.end();
406         for (; it != end; ++it)
407                 bibentries.push_back(*it);
408         sort(bibentries.begin(), bibentries.end());
409         return bibentries;
410 }
411
412
413 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
414 {
415         BiblioInfo::const_iterator it = find(key);
416         if (it == end())
417                 return docstring();
418         BibTeXInfo const & data = it->second;
419         return data.getAbbreviatedAuthor();
420 }
421
422
423 docstring const BiblioInfo::getYear(docstring const & key) const
424 {
425         BiblioInfo::const_iterator it = find(key);
426         if (it == end())
427                 return docstring();
428         BibTeXInfo const & data = it->second;
429         docstring year = data.getYear();
430         if (!year.empty())
431                 return year;
432         // let's try the crossref
433         docstring const xref = data.getXRef();
434         if (xref.empty())
435                 return year; // no luck
436         BiblioInfo::const_iterator const xrefit = find(xref);
437         if (xrefit == end())
438                 return year; // no luck again
439         BibTeXInfo const & xref_data = xrefit->second;
440         return xref_data.getYear();
441         return data.getYear();
442 }
443
444
445 docstring const BiblioInfo::getInfo(docstring const & key) const
446 {
447         BiblioInfo::const_iterator it = find(key);
448         if (it == end())
449                 return docstring();
450         BibTeXInfo const & data = it->second;
451         BibTeXInfo const * xrefptr = 0;
452         docstring const xref = data.getXRef();
453         if (!xref.empty()) {
454                 BiblioInfo::const_iterator const xrefit = find(xref);
455                 if (xrefit != end())
456                         xrefptr = &(xrefit->second);
457         }
458         return data.getInfo(xrefptr);
459 }
460
461
462 vector<docstring> const BiblioInfo::getCiteStrings(
463         docstring const & key, Buffer const & buf) const
464 {
465         CiteEngine const engine = buf.params().citeEngine();
466         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
467                 return getNumericalStrings(key, buf);
468         else
469                 return getAuthorYearStrings(key, buf);
470 }
471
472
473 vector<docstring> const BiblioInfo::getNumericalStrings(
474         docstring const & key, Buffer const & buf) const
475 {
476         if (empty())
477                 return vector<docstring>();
478
479         docstring const author = getAbbreviatedAuthor(key);
480         docstring const year   = getYear(key);
481         if (author.empty() || year.empty())
482                 return vector<docstring>();
483
484         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
485         
486         vector<docstring> vec(styles.size());
487         for (size_t i = 0; i != vec.size(); ++i) {
488                 docstring str;
489
490                 switch (styles[i]) {
491                         case CITE:
492                         case CITEP:
493                                 str = from_ascii("[#ID]");
494                                 break;
495
496                         case NOCITE:
497                                 str = _("Add to bibliography only.");
498                                 break;
499
500                         case CITET:
501                                 str = author + " [#ID]";
502                                 break;
503
504                         case CITEALT:
505                                 str = author + " #ID";
506                                 break;
507
508                         case CITEALP:
509                                 str = from_ascii("#ID");
510                                 break;
511
512                         case CITEAUTHOR:
513                                 str = author;
514                                 break;
515
516                         case CITEYEAR:
517                                 str = year;
518                                 break;
519
520                         case CITEYEARPAR:
521                                 str = '(' + year + ')';
522                                 break;
523                 }
524
525                 vec[i] = str;
526         }
527
528         return vec;
529 }
530
531
532 vector<docstring> const BiblioInfo::getAuthorYearStrings(
533         docstring const & key, Buffer const & buf) const
534 {
535         if (empty())
536                 return vector<docstring>();
537
538         docstring const author = getAbbreviatedAuthor(key);
539         docstring const year   = getYear(key);
540         if (author.empty() || year.empty())
541                 return vector<docstring>();
542
543         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
544         
545         vector<docstring> vec(styles.size());
546         for (size_t i = 0; i != vec.size(); ++i) {
547                 docstring str;
548
549                 switch (styles[i]) {
550                         case CITE:
551                 // jurabib only: Author/Annotator
552                 // (i.e. the "before" field, 2nd opt arg)
553                                 str = author + "/<" + _("before") + '>';
554                                 break;
555
556                         case NOCITE:
557                                 str = _("Add to bibliography only.");
558                                 break;
559
560                         case CITET:
561                                 str = author + " (" + year + ')';
562                                 break;
563
564                         case CITEP:
565                                 str = '(' + author + ", " + year + ')';
566                                 break;
567
568                         case CITEALT:
569                                 str = author + ' ' + year ;
570                                 break;
571
572                         case CITEALP:
573                                 str = author + ", " + year ;
574                                 break;
575
576                         case CITEAUTHOR:
577                                 str = author;
578                                 break;
579
580                         case CITEYEAR:
581                                 str = year;
582                                 break;
583
584                         case CITEYEARPAR:
585                                 str = '(' + year + ')';
586                                 break;
587                 }
588                 vec[i] = str;
589         }
590         return vec;
591 }
592
593
594 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
595 {
596         bimap_.insert(info.begin(), info.end());
597 }
598
599
600 //////////////////////////////////////////////////////////////////////
601 //
602 // CitationStyle
603 //
604 //////////////////////////////////////////////////////////////////////
605
606 namespace {
607
608
609 char const * const citeCommands[] = {
610         "cite", "citet", "citep", "citealt", "citealp",
611         "citeauthor", "citeyear", "citeyearpar", "nocite" };
612
613 unsigned int const nCiteCommands =
614                 sizeof(citeCommands) / sizeof(char *);
615
616 CiteStyle const citeStylesArray[] = {
617         CITE, CITET, CITEP, CITEALT, CITEALP, 
618         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
619
620 unsigned int const nCiteStyles =
621                 sizeof(citeStylesArray) / sizeof(CiteStyle);
622
623 CiteStyle const citeStylesFull[] = {
624         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
625
626 unsigned int const nCiteStylesFull =
627                 sizeof(citeStylesFull) / sizeof(CiteStyle);
628
629 CiteStyle const citeStylesUCase[] = {
630         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
631
632 unsigned int const nCiteStylesUCase =
633         sizeof(citeStylesUCase) / sizeof(CiteStyle);
634
635 } // namespace anon
636
637
638 CitationStyle citationStyleFromString(string const & command)
639 {
640         CitationStyle s;
641         if (command.empty())
642                 return s;
643
644         string cmd = command;
645         if (cmd[0] == 'C') {
646                 s.forceUpperCase = true;
647                 cmd[0] = 'c';
648         }
649
650         size_t const n = cmd.size() - 1;
651         if (cmd != "cite" && cmd[n] == '*') {
652                 s.full = true;
653                 cmd = cmd.substr(0, n);
654         }
655
656         char const * const * const last = citeCommands + nCiteCommands;
657         char const * const * const ptr = find(citeCommands, last, cmd);
658
659         if (ptr != last) {
660                 size_t idx = ptr - citeCommands;
661                 s.style = citeStylesArray[idx];
662         }
663         return s;
664 }
665
666
667 string citationStyleToString(const CitationStyle & s)
668 {
669         string cite = citeCommands[s.style];
670         if (s.full) {
671                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
672                 if (find(citeStylesFull, last, s.style) != last)
673                         cite += '*';
674         }
675
676         if (s.forceUpperCase) {
677                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
678                 if (find(citeStylesUCase, last, s.style) != last)
679                         cite[0] = 'C';
680         }
681
682         return cite;
683 }
684
685 vector<CiteStyle> citeStyles(CiteEngine engine)
686 {
687         unsigned int nStyles = 0;
688         unsigned int start = 0;
689
690         switch (engine) {
691                 case ENGINE_BASIC:
692                         nStyles = 2;
693                         start = 0;
694                         break;
695                 case ENGINE_NATBIB_AUTHORYEAR:
696                 case ENGINE_NATBIB_NUMERICAL:
697                         nStyles = nCiteStyles - 1;
698                         start = 1;
699                         break;
700                 case ENGINE_JURABIB:
701                         nStyles = nCiteStyles;
702                         start = 0;
703                         break;
704         }
705
706         vector<CiteStyle> styles(nStyles);
707         size_t i = 0;
708         int j = start;
709         for (; i != styles.size(); ++i, ++j)
710                 styles[i] = citeStylesArray[j];
711
712         return styles;
713 }
714
715 } // namespace lyx
716