]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
Not so soon, I guess, since that FIXME was from r6305.
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 namespace {
43
44 // gets the "family name" from an author-type string
45 docstring familyName(docstring const & name)
46 {
47         if (name.empty())
48                 return docstring();
49
50         // first we look for a comma, and take the last name to be everything
51         // preceding the right-most one, so that we also get the "jr" part.
52         docstring::size_type idx = name.rfind(',');
53         if (idx != docstring::npos)
54                 return ltrim(name.substr(0, idx));
55
56         // OK, so now we want to look for the last name. We're going to
57         // include the "von" part. This isn't perfect.
58         // Split on spaces, to get various tokens.
59         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
60         // If we only get two, assume the last one is the last name
61         if (pieces.size() <= 2)
62                 return pieces.back();
63
64         // Now we look for the first token that begins with a lower case letter.
65         vector<docstring>::const_iterator it = pieces.begin();
66         vector<docstring>::const_iterator en = pieces.end();
67         for (; it != en; ++it) {
68                 if ((*it).size() == 0)
69                         continue;
70                 char_type const c = (*it)[0];
71                 if (isLower(c))
72                         break;
73         }
74
75         if (it == en) // we never found a "von"
76                 return pieces.back();
77
78         // reconstruct what we need to return
79         docstring retval;
80         bool first = true;
81         for (; it != en; ++it) {
82                 if (!first)
83                         retval += " ";
84                 else 
85                         first = false;
86                 retval += *it;
87         }
88         return retval;
89 }
90
91 // converts a string containing LaTeX commands into unicode
92 // for display.
93 docstring convertLaTeXCommands(docstring const & str)
94 {
95         docstring val = str;
96         docstring ret;
97
98         bool scanning_cmd = false;
99         bool scanning_math = false;
100         bool escaped = false; // used to catch \$, etc.
101         while (val.size()) {
102                 char_type const ch = val[0];
103
104                 // if we're scanning math, we output everything until we
105                 // find an unescaped $, at which point we break out.
106                 if (scanning_math) {
107                         if (escaped)
108                                 escaped = false;
109                         else if (ch == '\\')
110                                 escaped = true;
111                         else if (ch == '$') 
112                                 scanning_math = false;
113                         ret += ch;
114                         val = val.substr(1);
115                         continue;
116                 }
117
118                 // if we're scanning a command name, then we just
119                 // discard characters until we hit something that
120                 // isn't alpha.
121                 if (scanning_cmd) {
122                         if (isAlphaASCII(ch)) {
123                                 val = val.substr(1);
124                                 escaped = false;
125                                 continue;
126                         }
127                         // so we're done with this command.
128                         // now we fall through and check this character.
129                         scanning_cmd = false;
130                 }
131
132                 // was the last character a \? If so, then this is something like: \\,
133                 // or \$, so we'll just output it. That's probably not always right...
134                 if (escaped) {
135                         ret += ch;
136                         val = val.substr(1);
137                         escaped = false;
138                         continue;
139                 }
140
141                 if (ch == '$') {
142                         ret += ch;
143                         val = val.substr(1);
144                         scanning_math = true;
145                         continue;
146                 }
147
148                 // we just ignore braces
149                 if (ch == '{' || ch == '}') {
150                         val = val.substr(1);
151                         continue;
152                 }
153
154                 // we're going to check things that look like commands, so if
155                 // this doesn't, just output it.
156                 if (ch != '\\') {
157                         ret += ch;
158                         val = val.substr(1);
159                         continue;
160                 }
161
162                 // ok, could be a command of some sort
163                 // let's see if it corresponds to some unicode
164                 // unicodesymbols has things in the form: \"{u},
165                 // whereas we may see things like: \"u. So we'll
166                 // look for that and change it, if necessary.
167                 static boost::regex const reg("^\\\\\\W\\w");
168                 if (boost::regex_search(to_utf8(val), reg)) {
169                         val.insert(3, from_ascii("}"));
170                         val.insert(2, from_ascii("{"));
171                 }
172                 docstring rem;
173                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
174                                                         Encodings::TEXT_CMD);
175                 if (!cnvtd.empty()) {
176                         // it did, so we'll take that bit and proceed with what's left
177                         ret += cnvtd;
178                         val = rem;
179                         continue;
180                 }
181                 // it's a command of some sort
182                 scanning_cmd = true;
183                 escaped = true;
184                 val = val.substr(1);
185         }
186         return ret;
187 }
188
189 } // anon namespace
190
191
192 //////////////////////////////////////////////////////////////////////
193 //
194 // BibTeXInfo
195 //
196 //////////////////////////////////////////////////////////////////////
197
198 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
199         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
200 {}
201
202
203 bool BibTeXInfo::hasField(docstring const & field) const
204 {
205         return count(field) == 1;
206 }
207
208
209 docstring const BibTeXInfo::getAbbreviatedAuthor() const
210 {
211         if (!is_bibtex_) {
212                 docstring const opt = label();
213                 if (opt.empty())
214                         return docstring();
215
216                 docstring authors;
217                 split(opt, authors, '(');
218                 return authors;
219         }
220
221         docstring author = convertLaTeXCommands(operator[]("author"));
222         if (author.empty()) {
223                 author = convertLaTeXCommands(operator[]("editor"));
224                 if (author.empty())
225                         return bib_key_;
226         }
227
228         // OK, we've got some names. Let's format them.
229         // Try to split the author list on " and "
230         vector<docstring> const authors =
231                 getVectorFromString(author, from_ascii(" and "));
232
233         if (authors.size() == 2)
234                 return bformat(_("%1$s and %2$s"),
235                         familyName(authors[0]), familyName(authors[1]));
236
237         if (authors.size() > 2)
238                 return bformat(_("%1$s et al."), familyName(authors[0]));
239
240         return familyName(authors[0]);
241 }
242
243
244 docstring const BibTeXInfo::getYear() const
245 {
246         if (is_bibtex_) 
247                 return operator[]("year");
248
249         docstring const opt = label();
250         if (opt.empty())
251                 return docstring();
252
253         docstring authors;
254         docstring const tmp = split(opt, authors, '(');
255         docstring year;
256         split(tmp, year, ')');
257         return year;
258 }
259
260
261 docstring const BibTeXInfo::getXRef() const
262 {
263         if (!is_bibtex_)
264                 return docstring();
265         return operator[]("crossref");
266 }
267
268
269 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
270 {
271         if (!info_.empty())
272                 return info_;
273
274         if (!is_bibtex_) {
275                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
276                 info_ = it->second;
277                 return info_;
278         }
279  
280         // FIXME
281         // This could be made a lot better using the entry_type_
282         // field to customize the output based upon entry type.
283         
284         // Search for all possible "required" fields
285         docstring author = getValueForKey("author", xref);
286         if (author.empty())
287                 author = getValueForKey("editor", xref);
288  
289         docstring year   = getValueForKey("year", xref);
290         docstring title  = getValueForKey("title", xref);
291         docstring docLoc = getValueForKey("pages", xref);
292         if (docLoc.empty()) {
293                 docLoc = getValueForKey("chapter", xref);
294                 if (!docLoc.empty())
295                         docLoc = _("Ch. ") + docLoc;
296         }       else {
297                 docLoc = _("pp. ") + docLoc;
298         }
299
300         docstring media = getValueForKey("journal", xref);
301         if (media.empty()) {
302                 media = getValueForKey("publisher", xref);
303                 if (media.empty()) {
304                         media = getValueForKey("school", xref);
305                         if (media.empty())
306                                 media = getValueForKey("institution");
307                 }
308         }
309         docstring volume = getValueForKey("volume", xref);
310
311         odocstringstream result;
312         if (!author.empty())
313                 result << author << ", ";
314         if (!title.empty())
315                 result << title;
316         if (!media.empty())
317                 result << ", " << media;
318         if (!year.empty())
319                 result << " (" << year << ")";
320         if (!docLoc.empty())
321                 result << ", " << docLoc;
322
323         docstring const result_str = rtrim(result.str());
324         if (!result_str.empty()) {
325                 info_ = convertLaTeXCommands(result_str);
326                 return info_;
327         }
328
329         // This should never happen (or at least be very unusual!)
330         static docstring e = docstring();
331         return e;
332 }
333
334
335 docstring const & BibTeXInfo::operator[](docstring const & field) const
336 {
337         BibTeXInfo::const_iterator it = find(field);
338         if (it != end())
339                 return it->second;
340         static docstring const empty_value = docstring();
341         return empty_value;
342 }
343         
344         
345 docstring const & BibTeXInfo::operator[](string const & field) const
346 {
347         return operator[](from_ascii(field));
348 }
349
350
351 docstring BibTeXInfo::getValueForKey(string const & key, 
352                 BibTeXInfo const * const xref) const
353 {
354         docstring const ret = operator[](key);
355         if (!ret.empty() || !xref)
356                 return ret;
357         return (*xref)[key];
358 }
359
360
361 //////////////////////////////////////////////////////////////////////
362 //
363 // BiblioInfo
364 //
365 //////////////////////////////////////////////////////////////////////
366
367 namespace {
368 // A functor for use with sort, leading to case insensitive sorting
369         class compareNoCase: public binary_function<docstring, docstring, bool>
370         {
371                 public:
372                         bool operator()(docstring const & s1, docstring const & s2) const {
373                                 return compare_no_case(s1, s2) < 0;
374                         }
375         };
376 } // namespace anon
377
378
379 vector<docstring> const BiblioInfo::getKeys() const
380 {
381         vector<docstring> bibkeys;
382         BiblioInfo::const_iterator it  = begin();
383         for (; it != end(); ++it)
384                 bibkeys.push_back(it->first);
385         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
386         return bibkeys;
387 }
388
389
390 vector<docstring> const BiblioInfo::getFields() const
391 {
392         vector<docstring> bibfields;
393         set<docstring>::const_iterator it = field_names_.begin();
394         set<docstring>::const_iterator end = field_names_.end();
395         for (; it != end; ++it)
396                 bibfields.push_back(*it);
397         sort(bibfields.begin(), bibfields.end());
398         return bibfields;
399 }
400
401
402 vector<docstring> const BiblioInfo::getEntries() const
403 {
404         vector<docstring> bibentries;
405         set<docstring>::const_iterator it = entry_types_.begin();
406         set<docstring>::const_iterator end = entry_types_.end();
407         for (; it != end; ++it)
408                 bibentries.push_back(*it);
409         sort(bibentries.begin(), bibentries.end());
410         return bibentries;
411 }
412
413
414 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
415 {
416         BiblioInfo::const_iterator it = find(key);
417         if (it == end())
418                 return docstring();
419         BibTeXInfo const & data = it->second;
420         return data.getAbbreviatedAuthor();
421 }
422
423
424 docstring const BiblioInfo::getYear(docstring const & key) const
425 {
426         BiblioInfo::const_iterator it = find(key);
427         if (it == end())
428                 return docstring();
429         BibTeXInfo const & data = it->second;
430         docstring year = data.getYear();
431         if (!year.empty())
432                 return year;
433         // let's try the crossref
434         docstring const xref = data.getXRef();
435         if (xref.empty())
436                 return _("No year"); // no luck
437         BiblioInfo::const_iterator const xrefit = find(xref);
438         if (xrefit == end())
439                 return _("No year"); // no luck again
440         BibTeXInfo const & xref_data = xrefit->second;
441         return xref_data.getYear();
442         return data.getYear();
443 }
444
445
446 docstring const BiblioInfo::getInfo(docstring const & key) const
447 {
448         BiblioInfo::const_iterator it = find(key);
449         if (it == end())
450                 return docstring();
451         BibTeXInfo const & data = it->second;
452         BibTeXInfo const * xrefptr = 0;
453         docstring const xref = data.getXRef();
454         if (!xref.empty()) {
455                 BiblioInfo::const_iterator const xrefit = find(xref);
456                 if (xrefit != end())
457                         xrefptr = &(xrefit->second);
458         }
459         return data.getInfo(xrefptr);
460 }
461
462
463 vector<docstring> const BiblioInfo::getCiteStrings(
464         docstring const & key, Buffer const & buf) const
465 {
466         CiteEngine const engine = buf.params().citeEngine();
467         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
468                 return getNumericalStrings(key, buf);
469         else
470                 return getAuthorYearStrings(key, buf);
471 }
472
473
474 vector<docstring> const BiblioInfo::getNumericalStrings(
475         docstring const & key, Buffer const & buf) const
476 {
477         if (empty())
478                 return vector<docstring>();
479
480         docstring const author = getAbbreviatedAuthor(key);
481         docstring const year   = getYear(key);
482         if (author.empty() || year.empty())
483                 return vector<docstring>();
484
485         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
486         
487         vector<docstring> vec(styles.size());
488         for (size_t i = 0; i != vec.size(); ++i) {
489                 docstring str;
490
491                 switch (styles[i]) {
492                         case CITE:
493                         case CITEP:
494                                 str = from_ascii("[#ID]");
495                                 break;
496
497                         case NOCITE:
498                                 str = _("Add to bibliography only.");
499                                 break;
500
501                         case CITET:
502                                 str = author + " [#ID]";
503                                 break;
504
505                         case CITEALT:
506                                 str = author + " #ID";
507                                 break;
508
509                         case CITEALP:
510                                 str = from_ascii("#ID");
511                                 break;
512
513                         case CITEAUTHOR:
514                                 str = author;
515                                 break;
516
517                         case CITEYEAR:
518                                 str = year;
519                                 break;
520
521                         case CITEYEARPAR:
522                                 str = '(' + year + ')';
523                                 break;
524                 }
525
526                 vec[i] = str;
527         }
528
529         return vec;
530 }
531
532
533 vector<docstring> const BiblioInfo::getAuthorYearStrings(
534         docstring const & key, Buffer const & buf) const
535 {
536         if (empty())
537                 return vector<docstring>();
538
539         docstring const author = getAbbreviatedAuthor(key);
540         docstring const year   = getYear(key);
541         if (author.empty() || year.empty())
542                 return vector<docstring>();
543
544         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
545         
546         vector<docstring> vec(styles.size());
547         for (size_t i = 0; i != vec.size(); ++i) {
548                 docstring str;
549
550                 switch (styles[i]) {
551                         case CITE:
552                 // jurabib only: Author/Annotator
553                 // (i.e. the "before" field, 2nd opt arg)
554                                 str = author + "/<" + _("before") + '>';
555                                 break;
556
557                         case NOCITE:
558                                 str = _("Add to bibliography only.");
559                                 break;
560
561                         case CITET:
562                                 str = author + " (" + year + ')';
563                                 break;
564
565                         case CITEP:
566                                 str = '(' + author + ", " + year + ')';
567                                 break;
568
569                         case CITEALT:
570                                 str = author + ' ' + year ;
571                                 break;
572
573                         case CITEALP:
574                                 str = author + ", " + year ;
575                                 break;
576
577                         case CITEAUTHOR:
578                                 str = author;
579                                 break;
580
581                         case CITEYEAR:
582                                 str = year;
583                                 break;
584
585                         case CITEYEARPAR:
586                                 str = '(' + year + ')';
587                                 break;
588                 }
589                 vec[i] = str;
590         }
591         return vec;
592 }
593
594
595 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
596 {
597         bimap_.insert(info.begin(), info.end());
598 }
599
600
601 //////////////////////////////////////////////////////////////////////
602 //
603 // CitationStyle
604 //
605 //////////////////////////////////////////////////////////////////////
606
607 namespace {
608
609
610 char const * const citeCommands[] = {
611         "cite", "citet", "citep", "citealt", "citealp",
612         "citeauthor", "citeyear", "citeyearpar", "nocite" };
613
614 unsigned int const nCiteCommands =
615                 sizeof(citeCommands) / sizeof(char *);
616
617 CiteStyle const citeStylesArray[] = {
618         CITE, CITET, CITEP, CITEALT, CITEALP, 
619         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
620
621 unsigned int const nCiteStyles =
622                 sizeof(citeStylesArray) / sizeof(CiteStyle);
623
624 CiteStyle const citeStylesFull[] = {
625         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
626
627 unsigned int const nCiteStylesFull =
628                 sizeof(citeStylesFull) / sizeof(CiteStyle);
629
630 CiteStyle const citeStylesUCase[] = {
631         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
632
633 unsigned int const nCiteStylesUCase =
634         sizeof(citeStylesUCase) / sizeof(CiteStyle);
635
636 } // namespace anon
637
638
639 CitationStyle citationStyleFromString(string const & command)
640 {
641         CitationStyle s;
642         if (command.empty())
643                 return s;
644
645         string cmd = command;
646         if (cmd[0] == 'C') {
647                 s.forceUpperCase = true;
648                 cmd[0] = 'c';
649         }
650
651         size_t const n = cmd.size() - 1;
652         if (cmd != "cite" && cmd[n] == '*') {
653                 s.full = true;
654                 cmd = cmd.substr(0, n);
655         }
656
657         char const * const * const last = citeCommands + nCiteCommands;
658         char const * const * const ptr = find(citeCommands, last, cmd);
659
660         if (ptr != last) {
661                 size_t idx = ptr - citeCommands;
662                 s.style = citeStylesArray[idx];
663         }
664         return s;
665 }
666
667
668 string citationStyleToString(const CitationStyle & s)
669 {
670         string cite = citeCommands[s.style];
671         if (s.full) {
672                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
673                 if (std::find(citeStylesFull, last, s.style) != last)
674                         cite += '*';
675         }
676
677         if (s.forceUpperCase) {
678                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
679                 if (std::find(citeStylesUCase, last, s.style) != last)
680                         cite[0] = 'C';
681         }
682
683         return cite;
684 }
685
686 vector<CiteStyle> citeStyles(CiteEngine engine)
687 {
688         unsigned int nStyles = 0;
689         unsigned int start = 0;
690
691         switch (engine) {
692                 case ENGINE_BASIC:
693                         nStyles = 2;
694                         start = 0;
695                         break;
696                 case ENGINE_NATBIB_AUTHORYEAR:
697                 case ENGINE_NATBIB_NUMERICAL:
698                         nStyles = nCiteStyles - 1;
699                         start = 1;
700                         break;
701                 case ENGINE_JURABIB:
702                         nStyles = nCiteStyles;
703                         start = 0;
704                         break;
705         }
706
707         vector<CiteStyle> styles(nStyles);
708         size_t i = 0;
709         int j = start;
710         for (; i != styles.size(); ++i, ++j)
711                 styles[i] = citeStylesArray[j];
712
713         return styles;
714 }
715
716 } // namespace lyx
717