]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
1e32445891f00a0a8ea4dbe3ef9a48a7acc8c9f5
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 //////////////////////////////////////////////////////////////////////
43 //
44 // BibTeXInfo
45 //
46 //////////////////////////////////////////////////////////////////////
47
48 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
49         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
50 {}
51
52
53 bool BibTeXInfo::hasField(docstring const & field) const
54 {
55         return count(field) == 1;
56 }
57
58
59 docstring const & BibTeXInfo::operator[](docstring const & field) const
60 {
61         BibTeXInfo::const_iterator it = find(field);
62         if (it != end())
63                 return it->second;
64         static docstring const empty_value = docstring();
65         return empty_value;
66 }
67         
68         
69 docstring const & BibTeXInfo::operator[](string const & field) const
70 {
71         return operator[](from_ascii(field));
72 }
73
74
75 docstring familyName(docstring const & name)
76 {
77         if (name.empty())
78                 return docstring();
79
80         // first we look for a comma, and take the last name to be everything
81         // preceding the right-most one, so that we also get the "jr" part.
82         docstring::size_type idx = name.rfind(',');
83         if (idx != docstring::npos)
84                 return ltrim(name.substr(0, idx));
85
86         // OK, so now we want to look for the last name. We're going to
87         // include the "von" part. This isn't perfect.
88         // Split on spaces, to get various tokens.
89         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
90         // If we only get two, assume the last one is the last name
91         if (pieces.size() <= 2)
92                 return pieces.back();
93
94         // Now we look for the first token that begins with a lower case letter.
95         vector<docstring>::const_iterator it = pieces.begin();
96         vector<docstring>::const_iterator en = pieces.end();
97         for (; it != en; ++it) {
98                 if ((*it).size() == 0)
99                         continue;
100                 char_type const c = (*it)[0];
101                 if (isLower(c))
102                         break;
103         }
104
105         if (it == en) // we never found a "von"
106                 return pieces.back();
107
108         // reconstruct what we need to return
109         docstring retval;
110         bool first = true;
111         for (; it != en; ++it) {
112                 if (!first)
113                         retval += " ";
114                 else 
115                         first = false;
116                 retval += *it;
117         }
118         return retval;
119 }
120
121 docstring const BibTeXInfo::getAbbreviatedAuthor() const
122 {
123         if (!is_bibtex_) {
124                 docstring const opt = trim(operator[]("label"));
125                 if (opt.empty())
126                         return docstring();
127
128                 docstring authors;
129                 split(opt, authors, '(');
130                 return authors;
131         }
132
133         docstring author = operator[]("author");
134         if (author.empty()) {
135                 author = operator[]("editor");
136                 if (author.empty())
137                         return bib_key_;
138         }
139
140         // OK, we've got some names. Let's format them.
141         // Try to split the author list on " and "
142         vector<docstring> const authors =
143                 getVectorFromString(author, from_ascii(" and "));
144
145         if (authors.size() == 2)
146                 return bformat(_("%1$s and %2$s"),
147                         familyName(authors[0]), familyName(authors[1]));
148
149         if (authors.size() > 2)
150                 return bformat(_("%1$s et al."), familyName(authors[0]));
151
152         return familyName(authors[0]);
153 }
154
155
156 docstring const BibTeXInfo::getYear() const
157 {
158         if (!is_bibtex_) {
159                 docstring const opt = trim(operator[]("label"));
160                 if (opt.empty())
161                         return docstring();
162
163                 docstring authors;
164                 docstring const tmp = split(opt, authors, '(');
165                 docstring year;
166                 split(tmp, year, ')');
167                 return year;
168         }
169
170         docstring year = operator[]("year");
171         if (year.empty())
172                 year = _("No year");
173         return year;
174 }
175
176
177 namespace {
178
179         docstring convertLaTeXCommands(docstring const & str)
180         {
181                 docstring val = str;
182                 docstring ret;
183         
184                 bool scanning_cmd = false;
185                 bool scanning_math = false;
186                 bool escaped = false; // used to catch \$, etc.
187                 while (val.size()) {
188                         char_type const ch = val[0];
189
190                         // if we're scanning math, we output everything until we
191                         // find an unescaped $, at which point we break out.
192                         if (scanning_math) {
193                                 if (escaped)
194                                         escaped = false;
195                                 else if (ch == '\\')
196                                         escaped = true;
197                                 else if (ch == '$') 
198                                         scanning_math = false;
199                                 ret += ch;
200                                 val = val.substr(1);
201                                 continue;
202                         }
203
204                         // if we're scanning a command name, then we just
205                         // discard characters until we hit something that
206                         // isn't alpha.
207                         if (scanning_cmd) {
208                                 if (isAlphaASCII(ch)) {
209                                         val = val.substr(1);
210                                         escaped = false;
211                                         continue;
212                                 }
213                                 // so we're done with this command.
214                                 // now we fall through and check this character.
215                                 scanning_cmd = false;
216                         }
217
218                         // was the last character a \? If so, then this is something like: \\,
219                         // or \$, so we'll just output it. That's probably not always right...
220                         if (escaped) {
221                                 ret += ch;
222                                 val = val.substr(1);
223                                 escaped = false;
224                                 continue;
225                         }
226
227                         if (ch == '$') {
228                                 ret += ch;
229                                 val = val.substr(1);
230                                 scanning_math = true;
231                                 continue;
232                         }
233
234                         // we just ignore braces
235                         if (ch == '{' || ch == '}') {
236                                 val = val.substr(1);
237                                 continue;
238                         }
239
240                         // we're going to check things that look like commands, so if
241                         // this doesn't, just output it.
242                         if (ch != '\\') {
243                                 ret += ch;
244                                 val = val.substr(1);
245                                 continue;
246                         }
247
248                         // ok, could be a command of some sort
249                         // let's see if it corresponds to some unicode
250                         // unicodesymbols has things in the form: \"{u},
251                         // whereas we may see things like: \"u. So we'll
252                         // look for that and change it, if necessary.
253                         static boost::regex const reg("^\\\\\\W\\w");
254                         if (boost::regex_search(to_utf8(val), reg)) {
255                                 val.insert(3, from_ascii("}"));
256                                 val.insert(2, from_ascii("{"));
257                         }
258                         docstring rem;
259                         docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
260                         if (!cnvtd.empty()) {
261                                 // it did, so we'll take that bit and proceed with what's left
262                                 ret += cnvtd;
263                                 val = rem;
264                                 continue;
265                         }
266                         // it's a command of some sort
267                         scanning_cmd = true;
268                         escaped = true;
269                         val = val.substr(1);
270                 }
271                 return ret;
272         }
273
274 } // anon namespace
275
276
277 docstring const BibTeXInfo::getInfo() const
278 {
279         if (!info_.empty())
280                 return info_;
281
282         if (!is_bibtex_) {
283                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
284                 info_ = it->second;
285                 return info_;
286         }
287  
288         // FIXME
289         // This could be made a lot better using the entryType
290         // field to customize the output based upon entry type.
291         
292         // Search for all possible "required" fields
293         docstring author = operator[]("author");
294         if (author.empty())
295                 author = operator[]("editor");
296  
297         docstring year   = operator[]("year");
298         docstring title  = operator[]("title");
299         docstring docLoc = operator[]("pages");
300         if (docLoc.empty()) {
301                 docLoc = operator[]("chapter");
302                 if (!docLoc.empty())
303                         docLoc = from_ascii("Ch. ") + docLoc;
304         }       else {
305                 docLoc = from_ascii("pp. ") + docLoc;
306         }
307
308         docstring media = operator[]("journal");
309         if (media.empty()) {
310                 media = operator[]("publisher");
311                 if (media.empty()) {
312                         media = operator[]("school");
313                         if (media.empty())
314                                 media = operator[]("institution");
315                 }
316         }
317         docstring volume = operator[]("volume");
318
319         odocstringstream result;
320         if (!author.empty())
321                 result << author << ", ";
322         if (!title.empty())
323                 result << title;
324         if (!media.empty())
325                 result << ", " << media;
326         if (!year.empty())
327                 result << ", " << year;
328         if (!docLoc.empty())
329                 result << ", " << docLoc;
330
331         docstring const result_str = rtrim(result.str());
332         if (!result_str.empty()) {
333                 info_ = convertLaTeXCommands(result_str);
334                 return info_;
335         }
336
337         // This should never happen (or at least be very unusual!)
338         return docstring();
339 }
340
341
342 //////////////////////////////////////////////////////////////////////
343 //
344 // BiblioInfo
345 //
346 //////////////////////////////////////////////////////////////////////
347
348 namespace {
349 // A functor for use with sort, leading to case insensitive sorting
350         class compareNoCase: public binary_function<docstring, docstring, bool>
351         {
352                 public:
353                         bool operator()(docstring const & s1, docstring const & s2) const {
354                                 return compare_no_case(s1, s2) < 0;
355                         }
356         };
357 } // namespace anon
358
359
360 vector<docstring> const BiblioInfo::getKeys() const
361 {
362         vector<docstring> bibkeys;
363         BiblioInfo::const_iterator it  = begin();
364         for (; it != end(); ++it)
365                 bibkeys.push_back(it->first);
366         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
367         return bibkeys;
368 }
369
370
371 vector<docstring> const BiblioInfo::getFields() const
372 {
373         vector<docstring> bibfields;
374         set<docstring>::const_iterator it = field_names_.begin();
375         set<docstring>::const_iterator end = field_names_.end();
376         for (; it != end; ++it)
377                 bibfields.push_back(*it);
378         sort(bibfields.begin(), bibfields.end());
379         return bibfields;
380 }
381
382
383 vector<docstring> const BiblioInfo::getEntries() const
384 {
385         vector<docstring> bibentries;
386         set<docstring>::const_iterator it = entry_types_.begin();
387         set<docstring>::const_iterator end = entry_types_.end();
388         for (; it != end; ++it)
389                 bibentries.push_back(*it);
390         sort(bibentries.begin(), bibentries.end());
391         return bibentries;
392 }
393
394
395 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
396 {
397         BiblioInfo::const_iterator it = find(key);
398         if (it == end())
399                 return docstring();
400         BibTeXInfo const & data = it->second;
401         return data.getAbbreviatedAuthor();
402 }
403
404
405 docstring const BiblioInfo::getYear(docstring const & key) const
406 {
407         BiblioInfo::const_iterator it = find(key);
408         if (it == end())
409                 return docstring();
410         BibTeXInfo const & data = it->second;
411         return data.getYear();
412 }
413
414
415 docstring const BiblioInfo::getInfo(docstring const & key) const
416 {
417         BiblioInfo::const_iterator it = find(key);
418         if (it == end())
419                 return docstring();
420         BibTeXInfo const & data = it->second;
421         return data.getInfo();
422 }
423
424
425 vector<docstring> const BiblioInfo::getCiteStrings(
426         docstring const & key, Buffer const & buf) const
427 {
428         CiteEngine const engine = buf.params().citeEngine();
429         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
430                 return getNumericalStrings(key, buf);
431         else
432                 return getAuthorYearStrings(key, buf);
433 }
434
435
436 vector<docstring> const BiblioInfo::getNumericalStrings(
437         docstring const & key, Buffer const & buf) const
438 {
439         if (empty())
440                 return vector<docstring>();
441
442         docstring const author = getAbbreviatedAuthor(key);
443         docstring const year   = getYear(key);
444         if (author.empty() || year.empty())
445                 return vector<docstring>();
446
447         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
448         
449         vector<docstring> vec(styles.size());
450         for (size_t i = 0; i != vec.size(); ++i) {
451                 docstring str;
452
453                 switch (styles[i]) {
454                         case CITE:
455                         case CITEP:
456                                 str = from_ascii("[#ID]");
457                                 break;
458
459                         case NOCITE:
460                                 str = _("Add to bibliography only.");
461                                 break;
462
463                         case CITET:
464                                 str = author + " [#ID]";
465                                 break;
466
467                         case CITEALT:
468                                 str = author + " #ID";
469                                 break;
470
471                         case CITEALP:
472                                 str = from_ascii("#ID");
473                                 break;
474
475                         case CITEAUTHOR:
476                                 str = author;
477                                 break;
478
479                         case CITEYEAR:
480                                 str = year;
481                                 break;
482
483                         case CITEYEARPAR:
484                                 str = '(' + year + ')';
485                                 break;
486                 }
487
488                 vec[i] = str;
489         }
490
491         return vec;
492 }
493
494
495 vector<docstring> const BiblioInfo::getAuthorYearStrings(
496         docstring const & key, Buffer const & buf) const
497 {
498         if (empty())
499                 return vector<docstring>();
500
501         docstring const author = getAbbreviatedAuthor(key);
502         docstring const year   = getYear(key);
503         if (author.empty() || year.empty())
504                 return vector<docstring>();
505
506         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
507         
508         vector<docstring> vec(styles.size());
509         for (size_t i = 0; i != vec.size(); ++i) {
510                 docstring str;
511
512                 switch (styles[i]) {
513                         case CITE:
514                 // jurabib only: Author/Annotator
515                 // (i.e. the "before" field, 2nd opt arg)
516                                 str = author + "/<" + _("before") + '>';
517                                 break;
518
519                         case NOCITE:
520                                 str = _("Add to bibliography only.");
521                                 break;
522
523                         case CITET:
524                                 str = author + " (" + year + ')';
525                                 break;
526
527                         case CITEP:
528                                 str = '(' + author + ", " + year + ')';
529                                 break;
530
531                         case CITEALT:
532                                 str = author + ' ' + year ;
533                                 break;
534
535                         case CITEALP:
536                                 str = author + ", " + year ;
537                                 break;
538
539                         case CITEAUTHOR:
540                                 str = author;
541                                 break;
542
543                         case CITEYEAR:
544                                 str = year;
545                                 break;
546
547                         case CITEYEARPAR:
548                                 str = '(' + year + ')';
549                                 break;
550                 }
551                 vec[i] = str;
552         }
553         return vec;
554 }
555
556
557 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
558 {
559         bimap_.insert(info.begin(), info.end());
560 }
561
562
563 //////////////////////////////////////////////////////////////////////
564 //
565 // CitationStyle
566 //
567 //////////////////////////////////////////////////////////////////////
568
569 namespace {
570
571
572 char const * const citeCommands[] = {
573         "cite", "citet", "citep", "citealt", "citealp",
574         "citeauthor", "citeyear", "citeyearpar", "nocite" };
575
576 unsigned int const nCiteCommands =
577                 sizeof(citeCommands) / sizeof(char *);
578
579 CiteStyle const citeStylesArray[] = {
580         CITE, CITET, CITEP, CITEALT, CITEALP, 
581         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
582
583 unsigned int const nCiteStyles =
584                 sizeof(citeStylesArray) / sizeof(CiteStyle);
585
586 CiteStyle const citeStylesFull[] = {
587         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
588
589 unsigned int const nCiteStylesFull =
590                 sizeof(citeStylesFull) / sizeof(CiteStyle);
591
592 CiteStyle const citeStylesUCase[] = {
593         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
594
595 unsigned int const nCiteStylesUCase =
596         sizeof(citeStylesUCase) / sizeof(CiteStyle);
597
598 } // namespace anon
599
600
601 CitationStyle citationStyleFromString(string const & command)
602 {
603         CitationStyle s;
604         if (command.empty())
605                 return s;
606
607         string cmd = command;
608         if (cmd[0] == 'C') {
609                 s.forceUpperCase = true;
610                 cmd[0] = 'c';
611         }
612
613         size_t const n = cmd.size() - 1;
614         if (cmd != "cite" && cmd[n] == '*') {
615                 s.full = true;
616                 cmd = cmd.substr(0, n);
617         }
618
619         char const * const * const last = citeCommands + nCiteCommands;
620         char const * const * const ptr = find(citeCommands, last, cmd);
621
622         if (ptr != last) {
623                 size_t idx = ptr - citeCommands;
624                 s.style = citeStylesArray[idx];
625         }
626         return s;
627 }
628
629
630 string citationStyleToString(const CitationStyle & s)
631 {
632         string cite = citeCommands[s.style];
633         if (s.full) {
634                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
635                 if (find(citeStylesFull, last, s.style) != last)
636                         cite += '*';
637         }
638
639         if (s.forceUpperCase) {
640                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
641                 if (find(citeStylesUCase, last, s.style) != last)
642                         cite[0] = 'C';
643         }
644
645         return cite;
646 }
647
648 vector<CiteStyle> citeStyles(CiteEngine engine)
649 {
650         unsigned int nStyles = 0;
651         unsigned int start = 0;
652
653         switch (engine) {
654                 case ENGINE_BASIC:
655                         nStyles = 2;
656                         start = 0;
657                         break;
658                 case ENGINE_NATBIB_AUTHORYEAR:
659                 case ENGINE_NATBIB_NUMERICAL:
660                         nStyles = nCiteStyles - 1;
661                         start = 1;
662                         break;
663                 case ENGINE_JURABIB:
664                         nStyles = nCiteStyles;
665                         start = 0;
666                         break;
667         }
668
669         vector<CiteStyle> styles(nStyles);
670         size_t i = 0;
671         int j = start;
672         for (; i != styles.size(); ++i, ++j)
673                 styles[i] = citeStylesArray[j];
674
675         return styles;
676 }
677
678 } // namespace lyx
679