]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
fix "make check" with gcc 4.3
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 //////////////////////////////////////////////////////////////////////
43 //
44 // BibTeXInfo
45 //
46 //////////////////////////////////////////////////////////////////////
47
48 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
49         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
50 {}
51
52
53 bool BibTeXInfo::hasField(docstring const & field) const
54 {
55         return count(field) == 1;
56 }
57
58
59 docstring const & BibTeXInfo::operator[](docstring const & field) const
60 {
61         BibTeXInfo::const_iterator it = find(field);
62         if (it != end())
63                 return it->second;
64         static docstring const empty_value = docstring();
65         return empty_value;
66 }
67         
68         
69 docstring const & BibTeXInfo::operator[](string const & field) const
70 {
71         return operator[](from_ascii(field));
72 }
73
74
75 docstring BibTeXInfo::getValueForKey(string const & key, 
76                 BibTeXInfo const * const xref) const
77 {
78         docstring const ret = operator[](key);
79         if (!ret.empty() || !xref)
80                 return ret;
81         return (*xref)[key];
82 }
83
84
85 docstring familyName(docstring const & name)
86 {
87         if (name.empty())
88                 return docstring();
89
90         // first we look for a comma, and take the last name to be everything
91         // preceding the right-most one, so that we also get the "jr" part.
92         docstring::size_type idx = name.rfind(',');
93         if (idx != docstring::npos)
94                 return ltrim(name.substr(0, idx));
95
96         // OK, so now we want to look for the last name. We're going to
97         // include the "von" part. This isn't perfect.
98         // Split on spaces, to get various tokens.
99         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
100         // If we only get two, assume the last one is the last name
101         if (pieces.size() <= 2)
102                 return pieces.back();
103
104         // Now we look for the first token that begins with a lower case letter.
105         vector<docstring>::const_iterator it = pieces.begin();
106         vector<docstring>::const_iterator en = pieces.end();
107         for (; it != en; ++it) {
108                 if ((*it).size() == 0)
109                         continue;
110                 char_type const c = (*it)[0];
111                 if (isLower(c))
112                         break;
113         }
114
115         if (it == en) // we never found a "von"
116                 return pieces.back();
117
118         // reconstruct what we need to return
119         docstring retval;
120         bool first = true;
121         for (; it != en; ++it) {
122                 if (!first)
123                         retval += " ";
124                 else 
125                         first = false;
126                 retval += *it;
127         }
128         return retval;
129 }
130
131 docstring const BibTeXInfo::getAbbreviatedAuthor() const
132 {
133         if (!is_bibtex_) {
134                 docstring const opt = trim(operator[]("label"));
135                 if (opt.empty())
136                         return docstring();
137
138                 docstring authors;
139                 split(opt, authors, '(');
140                 return authors;
141         }
142
143         docstring author = operator[]("author");
144         if (author.empty()) {
145                 author = operator[]("editor");
146                 if (author.empty())
147                         return bib_key_;
148         }
149
150         // OK, we've got some names. Let's format them.
151         // Try to split the author list on " and "
152         vector<docstring> const authors =
153                 getVectorFromString(author, from_ascii(" and "));
154
155         if (authors.size() == 2)
156                 return bformat(_("%1$s and %2$s"),
157                         familyName(authors[0]), familyName(authors[1]));
158
159         if (authors.size() > 2)
160                 return bformat(_("%1$s et al."), familyName(authors[0]));
161
162         return familyName(authors[0]);
163 }
164
165
166 docstring const BibTeXInfo::getYear() const
167 {
168         if (!is_bibtex_) {
169                 docstring const opt = trim(operator[]("label"));
170                 if (opt.empty())
171                         return docstring();
172
173                 docstring authors;
174                 docstring const tmp = split(opt, authors, '(');
175                 docstring year;
176                 split(tmp, year, ')');
177                 return year;
178         }
179
180         docstring year = operator[]("year");
181         if (year.empty())
182                 year = _("No year");
183         return year;
184 }
185
186
187 docstring const BibTeXInfo::getXRef() const
188 {
189         if (!is_bibtex_)
190                 return docstring();
191         return operator[]("crossref");
192 }
193
194
195 namespace {
196
197         docstring convertLaTeXCommands(docstring const & str)
198         {
199                 docstring val = str;
200                 docstring ret;
201         
202                 bool scanning_cmd = false;
203                 bool scanning_math = false;
204                 bool escaped = false; // used to catch \$, etc.
205                 while (val.size()) {
206                         char_type const ch = val[0];
207
208                         // if we're scanning math, we output everything until we
209                         // find an unescaped $, at which point we break out.
210                         if (scanning_math) {
211                                 if (escaped)
212                                         escaped = false;
213                                 else if (ch == '\\')
214                                         escaped = true;
215                                 else if (ch == '$') 
216                                         scanning_math = false;
217                                 ret += ch;
218                                 val = val.substr(1);
219                                 continue;
220                         }
221
222                         // if we're scanning a command name, then we just
223                         // discard characters until we hit something that
224                         // isn't alpha.
225                         if (scanning_cmd) {
226                                 if (isAlphaASCII(ch)) {
227                                         val = val.substr(1);
228                                         escaped = false;
229                                         continue;
230                                 }
231                                 // so we're done with this command.
232                                 // now we fall through and check this character.
233                                 scanning_cmd = false;
234                         }
235
236                         // was the last character a \? If so, then this is something like: \\,
237                         // or \$, so we'll just output it. That's probably not always right...
238                         if (escaped) {
239                                 ret += ch;
240                                 val = val.substr(1);
241                                 escaped = false;
242                                 continue;
243                         }
244
245                         if (ch == '$') {
246                                 ret += ch;
247                                 val = val.substr(1);
248                                 scanning_math = true;
249                                 continue;
250                         }
251
252                         // we just ignore braces
253                         if (ch == '{' || ch == '}') {
254                                 val = val.substr(1);
255                                 continue;
256                         }
257
258                         // we're going to check things that look like commands, so if
259                         // this doesn't, just output it.
260                         if (ch != '\\') {
261                                 ret += ch;
262                                 val = val.substr(1);
263                                 continue;
264                         }
265
266                         // ok, could be a command of some sort
267                         // let's see if it corresponds to some unicode
268                         // unicodesymbols has things in the form: \"{u},
269                         // whereas we may see things like: \"u. So we'll
270                         // look for that and change it, if necessary.
271                         static boost::regex const reg("^\\\\\\W\\w");
272                         if (boost::regex_search(to_utf8(val), reg)) {
273                                 val.insert(3, from_ascii("}"));
274                                 val.insert(2, from_ascii("{"));
275                         }
276                         docstring rem;
277                         docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
278                         if (!cnvtd.empty()) {
279                                 // it did, so we'll take that bit and proceed with what's left
280                                 ret += cnvtd;
281                                 val = rem;
282                                 continue;
283                         }
284                         // it's a command of some sort
285                         scanning_cmd = true;
286                         escaped = true;
287                         val = val.substr(1);
288                 }
289                 return ret;
290         }
291
292 } // anon namespace
293
294
295 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
296 {
297         if (!info_.empty())
298                 return info_;
299
300         if (!is_bibtex_) {
301                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
302                 info_ = it->second;
303                 return info_;
304         }
305  
306         // FIXME
307         // This could be made a lot better using the entry_type_
308         // field to customize the output based upon entry type.
309         
310         // Search for all possible "required" fields
311         docstring author = getValueForKey("author", xref);
312         if (author.empty())
313                 author = getValueForKey("editor", xref);
314  
315         docstring year   = getValueForKey("year", xref);
316         docstring title  = getValueForKey("title", xref);
317         docstring docLoc = getValueForKey("pages", xref);
318         if (docLoc.empty()) {
319                 docLoc = getValueForKey("chapter", xref);
320                 if (!docLoc.empty())
321                         docLoc = _("Ch. ") + docLoc;
322         }       else {
323                 docLoc = _("pp. ") + docLoc;
324         }
325
326         docstring media = getValueForKey("journal", xref);
327         if (media.empty()) {
328                 media = getValueForKey("publisher", xref);
329                 if (media.empty()) {
330                         media = getValueForKey("school", xref);
331                         if (media.empty())
332                                 media = getValueForKey("institution");
333                 }
334         }
335         docstring volume = getValueForKey("volume", xref);
336
337         odocstringstream result;
338         if (!author.empty())
339                 result << author << ", ";
340         if (!title.empty())
341                 result << title;
342         if (!media.empty())
343                 result << ", " << media;
344         if (!year.empty())
345                 result << " (" << year << ")";
346         if (!docLoc.empty())
347                 result << ", " << docLoc;
348
349         docstring const result_str = rtrim(result.str());
350         if (!result_str.empty()) {
351                 info_ = convertLaTeXCommands(result_str);
352                 return info_;
353         }
354
355         // This should never happen (or at least be very unusual!)
356         static docstring e = docstring();
357         return e;
358 }
359
360
361 //////////////////////////////////////////////////////////////////////
362 //
363 // BiblioInfo
364 //
365 //////////////////////////////////////////////////////////////////////
366
367 namespace {
368 // A functor for use with sort, leading to case insensitive sorting
369         class compareNoCase: public binary_function<docstring, docstring, bool>
370         {
371                 public:
372                         bool operator()(docstring const & s1, docstring const & s2) const {
373                                 return compare_no_case(s1, s2) < 0;
374                         }
375         };
376 } // namespace anon
377
378
379 vector<docstring> const BiblioInfo::getKeys() const
380 {
381         vector<docstring> bibkeys;
382         BiblioInfo::const_iterator it  = begin();
383         for (; it != end(); ++it)
384                 bibkeys.push_back(it->first);
385         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
386         return bibkeys;
387 }
388
389
390 vector<docstring> const BiblioInfo::getFields() const
391 {
392         vector<docstring> bibfields;
393         set<docstring>::const_iterator it = field_names_.begin();
394         set<docstring>::const_iterator end = field_names_.end();
395         for (; it != end; ++it)
396                 bibfields.push_back(*it);
397         sort(bibfields.begin(), bibfields.end());
398         return bibfields;
399 }
400
401
402 vector<docstring> const BiblioInfo::getEntries() const
403 {
404         vector<docstring> bibentries;
405         set<docstring>::const_iterator it = entry_types_.begin();
406         set<docstring>::const_iterator end = entry_types_.end();
407         for (; it != end; ++it)
408                 bibentries.push_back(*it);
409         sort(bibentries.begin(), bibentries.end());
410         return bibentries;
411 }
412
413
414 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
415 {
416         BiblioInfo::const_iterator it = find(key);
417         if (it == end())
418                 return docstring();
419         BibTeXInfo const & data = it->second;
420         return data.getAbbreviatedAuthor();
421 }
422
423
424 docstring const BiblioInfo::getYear(docstring const & key) const
425 {
426         BiblioInfo::const_iterator it = find(key);
427         if (it == end())
428                 return docstring();
429         BibTeXInfo const & data = it->second;
430         docstring year = data.getYear();
431         if (!year.empty())
432                 return year;
433         // let's try the crossref
434         docstring const xref = data.getXRef();
435         if (xref.empty())
436                 return year; // no luck
437         BiblioInfo::const_iterator const xrefit = find(xref);
438         if (xrefit == end())
439                 return year; // no luck again
440         BibTeXInfo const & xref_data = xrefit->second;
441         return xref_data.getYear();
442         return data.getYear();
443 }
444
445
446 docstring const BiblioInfo::getInfo(docstring const & key) const
447 {
448         BiblioInfo::const_iterator it = find(key);
449         if (it == end())
450                 return docstring();
451         BibTeXInfo const & data = it->second;
452         BibTeXInfo const * xrefptr = 0;
453         docstring const xref = data.getXRef();
454         if (!xref.empty()) {
455                 BiblioInfo::const_iterator const xrefit = find(xref);
456                 if (xrefit != end())
457                         xrefptr = &(xrefit->second);
458         }
459         return data.getInfo(xrefptr);
460 }
461
462
463 vector<docstring> const BiblioInfo::getCiteStrings(
464         docstring const & key, Buffer const & buf) const
465 {
466         CiteEngine const engine = buf.params().citeEngine();
467         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
468                 return getNumericalStrings(key, buf);
469         else
470                 return getAuthorYearStrings(key, buf);
471 }
472
473
474 vector<docstring> const BiblioInfo::getNumericalStrings(
475         docstring const & key, Buffer const & buf) const
476 {
477         if (empty())
478                 return vector<docstring>();
479
480         docstring const author = getAbbreviatedAuthor(key);
481         docstring const year   = getYear(key);
482         if (author.empty() || year.empty())
483                 return vector<docstring>();
484
485         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
486         
487         vector<docstring> vec(styles.size());
488         for (size_t i = 0; i != vec.size(); ++i) {
489                 docstring str;
490
491                 switch (styles[i]) {
492                         case CITE:
493                         case CITEP:
494                                 str = from_ascii("[#ID]");
495                                 break;
496
497                         case NOCITE:
498                                 str = _("Add to bibliography only.");
499                                 break;
500
501                         case CITET:
502                                 str = author + " [#ID]";
503                                 break;
504
505                         case CITEALT:
506                                 str = author + " #ID";
507                                 break;
508
509                         case CITEALP:
510                                 str = from_ascii("#ID");
511                                 break;
512
513                         case CITEAUTHOR:
514                                 str = author;
515                                 break;
516
517                         case CITEYEAR:
518                                 str = year;
519                                 break;
520
521                         case CITEYEARPAR:
522                                 str = '(' + year + ')';
523                                 break;
524                 }
525
526                 vec[i] = str;
527         }
528
529         return vec;
530 }
531
532
533 vector<docstring> const BiblioInfo::getAuthorYearStrings(
534         docstring const & key, Buffer const & buf) const
535 {
536         if (empty())
537                 return vector<docstring>();
538
539         docstring const author = getAbbreviatedAuthor(key);
540         docstring const year   = getYear(key);
541         if (author.empty() || year.empty())
542                 return vector<docstring>();
543
544         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
545         
546         vector<docstring> vec(styles.size());
547         for (size_t i = 0; i != vec.size(); ++i) {
548                 docstring str;
549
550                 switch (styles[i]) {
551                         case CITE:
552                 // jurabib only: Author/Annotator
553                 // (i.e. the "before" field, 2nd opt arg)
554                                 str = author + "/<" + _("before") + '>';
555                                 break;
556
557                         case NOCITE:
558                                 str = _("Add to bibliography only.");
559                                 break;
560
561                         case CITET:
562                                 str = author + " (" + year + ')';
563                                 break;
564
565                         case CITEP:
566                                 str = '(' + author + ", " + year + ')';
567                                 break;
568
569                         case CITEALT:
570                                 str = author + ' ' + year ;
571                                 break;
572
573                         case CITEALP:
574                                 str = author + ", " + year ;
575                                 break;
576
577                         case CITEAUTHOR:
578                                 str = author;
579                                 break;
580
581                         case CITEYEAR:
582                                 str = year;
583                                 break;
584
585                         case CITEYEARPAR:
586                                 str = '(' + year + ')';
587                                 break;
588                 }
589                 vec[i] = str;
590         }
591         return vec;
592 }
593
594
595 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
596 {
597         bimap_.insert(info.begin(), info.end());
598 }
599
600
601 //////////////////////////////////////////////////////////////////////
602 //
603 // CitationStyle
604 //
605 //////////////////////////////////////////////////////////////////////
606
607 namespace {
608
609
610 char const * const citeCommands[] = {
611         "cite", "citet", "citep", "citealt", "citealp",
612         "citeauthor", "citeyear", "citeyearpar", "nocite" };
613
614 unsigned int const nCiteCommands =
615                 sizeof(citeCommands) / sizeof(char *);
616
617 CiteStyle const citeStylesArray[] = {
618         CITE, CITET, CITEP, CITEALT, CITEALP, 
619         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
620
621 unsigned int const nCiteStyles =
622                 sizeof(citeStylesArray) / sizeof(CiteStyle);
623
624 CiteStyle const citeStylesFull[] = {
625         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
626
627 unsigned int const nCiteStylesFull =
628                 sizeof(citeStylesFull) / sizeof(CiteStyle);
629
630 CiteStyle const citeStylesUCase[] = {
631         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
632
633 unsigned int const nCiteStylesUCase =
634         sizeof(citeStylesUCase) / sizeof(CiteStyle);
635
636 } // namespace anon
637
638
639 CitationStyle citationStyleFromString(string const & command)
640 {
641         CitationStyle s;
642         if (command.empty())
643                 return s;
644
645         string cmd = command;
646         if (cmd[0] == 'C') {
647                 s.forceUpperCase = true;
648                 cmd[0] = 'c';
649         }
650
651         size_t const n = cmd.size() - 1;
652         if (cmd != "cite" && cmd[n] == '*') {
653                 s.full = true;
654                 cmd = cmd.substr(0, n);
655         }
656
657         char const * const * const last = citeCommands + nCiteCommands;
658         char const * const * const ptr = find(citeCommands, last, cmd);
659
660         if (ptr != last) {
661                 size_t idx = ptr - citeCommands;
662                 s.style = citeStylesArray[idx];
663         }
664         return s;
665 }
666
667
668 string citationStyleToString(const CitationStyle & s)
669 {
670         string cite = citeCommands[s.style];
671         if (s.full) {
672                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
673                 if (find(citeStylesFull, last, s.style) != last)
674                         cite += '*';
675         }
676
677         if (s.forceUpperCase) {
678                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
679                 if (find(citeStylesUCase, last, s.style) != last)
680                         cite[0] = 'C';
681         }
682
683         return cite;
684 }
685
686 vector<CiteStyle> citeStyles(CiteEngine engine)
687 {
688         unsigned int nStyles = 0;
689         unsigned int start = 0;
690
691         switch (engine) {
692                 case ENGINE_BASIC:
693                         nStyles = 2;
694                         start = 0;
695                         break;
696                 case ENGINE_NATBIB_AUTHORYEAR:
697                 case ENGINE_NATBIB_NUMERICAL:
698                         nStyles = nCiteStyles - 1;
699                         start = 1;
700                         break;
701                 case ENGINE_JURABIB:
702                         nStyles = nCiteStyles;
703                         start = 0;
704                         break;
705         }
706
707         vector<CiteStyle> styles(nStyles);
708         size_t i = 0;
709         int j = start;
710         for (; i != styles.size(); ++i, ++j)
711                 styles[i] = citeStylesArray[j];
712
713         return styles;
714 }
715
716 } // namespace lyx
717