]> git.lyx.org Git - lyx.git/blob - src/BiblioInfo.cpp
Fix compilation on win
[lyx.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22
23 #include "insets/Inset.h"
24 #include "insets/InsetBibitem.h"
25 #include "insets/InsetBibtex.h"
26 #include "insets/InsetInclude.h"
27
28 #include "support/docstream.h"
29 #include "support/gettext.h"
30 #include "support/lassert.h"
31 #include "support/lstrings.h"
32 #include "support/textutils.h"
33
34 #include "boost/regex.hpp"
35
36 using namespace std;
37 using namespace lyx::support;
38
39
40 namespace lyx {
41
42 namespace {
43
44 // gets the "family name" from an author-type string
45 docstring familyName(docstring const & name)
46 {
47         if (name.empty())
48                 return docstring();
49
50         // first we look for a comma, and take the last name to be everything
51         // preceding the right-most one, so that we also get the "jr" part.
52         docstring::size_type idx = name.rfind(',');
53         if (idx != docstring::npos)
54                 return ltrim(name.substr(0, idx));
55
56         // OK, so now we want to look for the last name. We're going to
57         // include the "von" part. This isn't perfect.
58         // Split on spaces, to get various tokens.
59         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
60         // If we only get two, assume the last one is the last name
61         if (pieces.size() <= 2)
62                 return pieces.back();
63
64         // Now we look for the first token that begins with a lower case letter.
65         vector<docstring>::const_iterator it = pieces.begin();
66         vector<docstring>::const_iterator en = pieces.end();
67         for (; it != en; ++it) {
68                 if ((*it).size() == 0)
69                         continue;
70                 char_type const c = (*it)[0];
71                 if (isLower(c))
72                         break;
73         }
74
75         if (it == en) // we never found a "von"
76                 return pieces.back();
77
78         // reconstruct what we need to return
79         docstring retval;
80         bool first = true;
81         for (; it != en; ++it) {
82                 if (!first)
83                         retval += " ";
84                 else 
85                         first = false;
86                 retval += *it;
87         }
88         return retval;
89 }
90
91 // converts a string containing LaTeX commands into unicode
92 // for display.
93 docstring convertLaTeXCommands(docstring const & str)
94 {
95         docstring val = str;
96         docstring ret;
97
98         bool scanning_cmd = false;
99         bool scanning_math = false;
100         bool escaped = false; // used to catch \$, etc.
101         while (val.size()) {
102                 char_type const ch = val[0];
103
104                 // if we're scanning math, we output everything until we
105                 // find an unescaped $, at which point we break out.
106                 if (scanning_math) {
107                         if (escaped)
108                                 escaped = false;
109                         else if (ch == '\\')
110                                 escaped = true;
111                         else if (ch == '$') 
112                                 scanning_math = false;
113                         ret += ch;
114                         val = val.substr(1);
115                         continue;
116                 }
117
118                 // if we're scanning a command name, then we just
119                 // discard characters until we hit something that
120                 // isn't alpha.
121                 if (scanning_cmd) {
122                         if (isAlphaASCII(ch)) {
123                                 val = val.substr(1);
124                                 escaped = false;
125                                 continue;
126                         }
127                         // so we're done with this command.
128                         // now we fall through and check this character.
129                         scanning_cmd = false;
130                 }
131
132                 // was the last character a \? If so, then this is something like: \\,
133                 // or \$, so we'll just output it. That's probably not always right...
134                 if (escaped) {
135                         ret += ch;
136                         val = val.substr(1);
137                         escaped = false;
138                         continue;
139                 }
140
141                 if (ch == '$') {
142                         ret += ch;
143                         val = val.substr(1);
144                         scanning_math = true;
145                         continue;
146                 }
147
148                 // we just ignore braces
149                 if (ch == '{' || ch == '}') {
150                         val = val.substr(1);
151                         continue;
152                 }
153
154                 // we're going to check things that look like commands, so if
155                 // this doesn't, just output it.
156                 if (ch != '\\') {
157                         ret += ch;
158                         val = val.substr(1);
159                         continue;
160                 }
161
162                 // ok, could be a command of some sort
163                 // let's see if it corresponds to some unicode
164                 // unicodesymbols has things in the form: \"{u},
165                 // whereas we may see things like: \"u. So we'll
166                 // look for that and change it, if necessary.
167                 static boost::regex const reg("^\\\\\\W\\w");
168                 if (boost::regex_search(to_utf8(val), reg)) {
169                         val.insert(3, from_ascii("}"));
170                         val.insert(2, from_ascii("{"));
171                 }
172                 docstring rem;
173                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
174                 if (!cnvtd.empty()) {
175                         // it did, so we'll take that bit and proceed with what's left
176                         ret += cnvtd;
177                         val = rem;
178                         continue;
179                 }
180                 // it's a command of some sort
181                 scanning_cmd = true;
182                 escaped = true;
183                 val = val.substr(1);
184         }
185         return ret;
186 }
187
188 } // anon namespace
189
190
191 //////////////////////////////////////////////////////////////////////
192 //
193 // BibTeXInfo
194 //
195 //////////////////////////////////////////////////////////////////////
196
197 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
198         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
199 {}
200
201
202 bool BibTeXInfo::hasField(docstring const & field) const
203 {
204         return count(field) == 1;
205 }
206
207
208 docstring const BibTeXInfo::getAbbreviatedAuthor() const
209 {
210         if (!is_bibtex_) {
211                 docstring const opt = trim(operator[]("label"));
212                 if (opt.empty())
213                         return docstring();
214
215                 docstring authors;
216                 split(opt, authors, '(');
217                 return authors;
218         }
219
220         docstring author = operator[]("author");
221         if (author.empty()) {
222                 author = operator[]("editor");
223                 if (author.empty())
224                         return bib_key_;
225         }
226
227         // OK, we've got some names. Let's format them.
228         // Try to split the author list on " and "
229         vector<docstring> const authors =
230                 getVectorFromString(author, from_ascii(" and "));
231
232         if (authors.size() == 2)
233                 return bformat(_("%1$s and %2$s"),
234                         familyName(authors[0]), familyName(authors[1]));
235
236         if (authors.size() > 2)
237                 return bformat(_("%1$s et al."), familyName(authors[0]));
238
239         return familyName(authors[0]);
240 }
241
242
243 docstring const BibTeXInfo::getYear() const
244 {
245         if (is_bibtex_) 
246                 return operator[]("year");
247
248         docstring const opt = trim(operator[]("label"));
249         if (opt.empty())
250                 return docstring();
251
252         docstring authors;
253         docstring const tmp = split(opt, authors, '(');
254         docstring year;
255         split(tmp, year, ')');
256         return year;
257 }
258
259
260 docstring const BibTeXInfo::getXRef() const
261 {
262         if (!is_bibtex_)
263                 return docstring();
264         return operator[]("crossref");
265 }
266
267
268 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
269 {
270         if (!info_.empty())
271                 return info_;
272
273         if (!is_bibtex_) {
274                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
275                 info_ = it->second;
276                 return info_;
277         }
278  
279         // FIXME
280         // This could be made a lot better using the entry_type_
281         // field to customize the output based upon entry type.
282         
283         // Search for all possible "required" fields
284         docstring author = getValueForKey("author", xref);
285         if (author.empty())
286                 author = getValueForKey("editor", xref);
287  
288         docstring year   = getValueForKey("year", xref);
289         docstring title  = getValueForKey("title", xref);
290         docstring docLoc = getValueForKey("pages", xref);
291         if (docLoc.empty()) {
292                 docLoc = getValueForKey("chapter", xref);
293                 if (!docLoc.empty())
294                         docLoc = _("Ch. ") + docLoc;
295         }       else {
296                 docLoc = _("pp. ") + docLoc;
297         }
298
299         docstring media = getValueForKey("journal", xref);
300         if (media.empty()) {
301                 media = getValueForKey("publisher", xref);
302                 if (media.empty()) {
303                         media = getValueForKey("school", xref);
304                         if (media.empty())
305                                 media = getValueForKey("institution");
306                 }
307         }
308         docstring volume = getValueForKey("volume", xref);
309
310         odocstringstream result;
311         if (!author.empty())
312                 result << author << ", ";
313         if (!title.empty())
314                 result << title;
315         if (!media.empty())
316                 result << ", " << media;
317         if (!year.empty())
318                 result << " (" << year << ")";
319         if (!docLoc.empty())
320                 result << ", " << docLoc;
321
322         docstring const result_str = rtrim(result.str());
323         if (!result_str.empty()) {
324                 info_ = convertLaTeXCommands(result_str);
325                 return info_;
326         }
327
328         // This should never happen (or at least be very unusual!)
329         static docstring e = docstring();
330         return e;
331 }
332
333
334 docstring const & BibTeXInfo::operator[](docstring const & field) const
335 {
336         BibTeXInfo::const_iterator it = find(field);
337         if (it != end())
338                 return it->second;
339         static docstring const empty_value = docstring();
340         return empty_value;
341 }
342         
343         
344 docstring const & BibTeXInfo::operator[](string const & field) const
345 {
346         return operator[](from_ascii(field));
347 }
348
349
350 docstring BibTeXInfo::getValueForKey(string const & key, 
351                 BibTeXInfo const * const xref) const
352 {
353         docstring const ret = operator[](key);
354         if (!ret.empty() || !xref)
355                 return ret;
356         return (*xref)[key];
357 }
358
359
360 //////////////////////////////////////////////////////////////////////
361 //
362 // BiblioInfo
363 //
364 //////////////////////////////////////////////////////////////////////
365
366 namespace {
367 // A functor for use with sort, leading to case insensitive sorting
368         class compareNoCase: public binary_function<docstring, docstring, bool>
369         {
370                 public:
371                         bool operator()(docstring const & s1, docstring const & s2) const {
372                                 return compare_no_case(s1, s2) < 0;
373                         }
374         };
375 } // namespace anon
376
377
378 vector<docstring> const BiblioInfo::getKeys() const
379 {
380         vector<docstring> bibkeys;
381         BiblioInfo::const_iterator it  = begin();
382         for (; it != end(); ++it)
383                 bibkeys.push_back(it->first);
384         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
385         return bibkeys;
386 }
387
388
389 vector<docstring> const BiblioInfo::getFields() const
390 {
391         vector<docstring> bibfields;
392         set<docstring>::const_iterator it = field_names_.begin();
393         set<docstring>::const_iterator end = field_names_.end();
394         for (; it != end; ++it)
395                 bibfields.push_back(*it);
396         sort(bibfields.begin(), bibfields.end());
397         return bibfields;
398 }
399
400
401 vector<docstring> const BiblioInfo::getEntries() const
402 {
403         vector<docstring> bibentries;
404         set<docstring>::const_iterator it = entry_types_.begin();
405         set<docstring>::const_iterator end = entry_types_.end();
406         for (; it != end; ++it)
407                 bibentries.push_back(*it);
408         sort(bibentries.begin(), bibentries.end());
409         return bibentries;
410 }
411
412
413 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
414 {
415         BiblioInfo::const_iterator it = find(key);
416         if (it == end())
417                 return docstring();
418         BibTeXInfo const & data = it->second;
419         return data.getAbbreviatedAuthor();
420 }
421
422
423 docstring const BiblioInfo::getYear(docstring const & key) const
424 {
425         BiblioInfo::const_iterator it = find(key);
426         if (it == end())
427                 return docstring();
428         BibTeXInfo const & data = it->second;
429         docstring year = data.getYear();
430         if (!year.empty())
431                 return year;
432         // let's try the crossref
433         docstring const xref = data.getXRef();
434         if (xref.empty())
435                 return _("No year"); // no luck
436         BiblioInfo::const_iterator const xrefit = find(xref);
437         if (xrefit == end())
438                 return _("No year"); // no luck again
439         BibTeXInfo const & xref_data = xrefit->second;
440         return xref_data.getYear();
441         return data.getYear();
442 }
443
444
445 docstring const BiblioInfo::getInfo(docstring const & key) const
446 {
447         BiblioInfo::const_iterator it = find(key);
448         if (it == end())
449                 return docstring();
450         BibTeXInfo const & data = it->second;
451         BibTeXInfo const * xrefptr = 0;
452         docstring const xref = data.getXRef();
453         if (!xref.empty()) {
454                 BiblioInfo::const_iterator const xrefit = find(xref);
455                 if (xrefit != end())
456                         xrefptr = &(xrefit->second);
457         }
458         return data.getInfo(xrefptr);
459 }
460
461
462 vector<docstring> const BiblioInfo::getCiteStrings(
463         docstring const & key, Buffer const & buf) const
464 {
465         CiteEngine const engine = buf.params().citeEngine();
466         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
467                 return getNumericalStrings(key, buf);
468         else
469                 return getAuthorYearStrings(key, buf);
470 }
471
472
473 vector<docstring> const BiblioInfo::getNumericalStrings(
474         docstring const & key, Buffer const & buf) const
475 {
476         if (empty())
477                 return vector<docstring>();
478
479         docstring const author = getAbbreviatedAuthor(key);
480         docstring const year   = getYear(key);
481         if (author.empty() || year.empty())
482                 return vector<docstring>();
483
484         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
485         
486         vector<docstring> vec(styles.size());
487         for (size_t i = 0; i != vec.size(); ++i) {
488                 docstring str;
489
490                 switch (styles[i]) {
491                         case CITE:
492                         case CITEP:
493                                 str = from_ascii("[#ID]");
494                                 break;
495
496                         case NOCITE:
497                                 str = _("Add to bibliography only.");
498                                 break;
499
500                         case CITET:
501                                 str = author + " [#ID]";
502                                 break;
503
504                         case CITEALT:
505                                 str = author + " #ID";
506                                 break;
507
508                         case CITEALP:
509                                 str = from_ascii("#ID");
510                                 break;
511
512                         case CITEAUTHOR:
513                                 str = author;
514                                 break;
515
516                         case CITEYEAR:
517                                 str = year;
518                                 break;
519
520                         case CITEYEARPAR:
521                                 str = '(' + year + ')';
522                                 break;
523                 }
524
525                 vec[i] = str;
526         }
527
528         return vec;
529 }
530
531
532 vector<docstring> const BiblioInfo::getAuthorYearStrings(
533         docstring const & key, Buffer const & buf) const
534 {
535         if (empty())
536                 return vector<docstring>();
537
538         docstring const author = getAbbreviatedAuthor(key);
539         docstring const year   = getYear(key);
540         if (author.empty() || year.empty())
541                 return vector<docstring>();
542
543         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
544         
545         vector<docstring> vec(styles.size());
546         for (size_t i = 0; i != vec.size(); ++i) {
547                 docstring str;
548
549                 switch (styles[i]) {
550                         case CITE:
551                 // jurabib only: Author/Annotator
552                 // (i.e. the "before" field, 2nd opt arg)
553                                 str = author + "/<" + _("before") + '>';
554                                 break;
555
556                         case NOCITE:
557                                 str = _("Add to bibliography only.");
558                                 break;
559
560                         case CITET:
561                                 str = author + " (" + year + ')';
562                                 break;
563
564                         case CITEP:
565                                 str = '(' + author + ", " + year + ')';
566                                 break;
567
568                         case CITEALT:
569                                 str = author + ' ' + year ;
570                                 break;
571
572                         case CITEALP:
573                                 str = author + ", " + year ;
574                                 break;
575
576                         case CITEAUTHOR:
577                                 str = author;
578                                 break;
579
580                         case CITEYEAR:
581                                 str = year;
582                                 break;
583
584                         case CITEYEARPAR:
585                                 str = '(' + year + ')';
586                                 break;
587                 }
588                 vec[i] = str;
589         }
590         return vec;
591 }
592
593
594 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
595 {
596         bimap_.insert(info.begin(), info.end());
597 }
598
599
600 //////////////////////////////////////////////////////////////////////
601 //
602 // CitationStyle
603 //
604 //////////////////////////////////////////////////////////////////////
605
606 namespace {
607
608
609 char const * const citeCommands[] = {
610         "cite", "citet", "citep", "citealt", "citealp",
611         "citeauthor", "citeyear", "citeyearpar", "nocite" };
612
613 unsigned int const nCiteCommands =
614                 sizeof(citeCommands) / sizeof(char *);
615
616 CiteStyle const citeStylesArray[] = {
617         CITE, CITET, CITEP, CITEALT, CITEALP, 
618         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
619
620 unsigned int const nCiteStyles =
621                 sizeof(citeStylesArray) / sizeof(CiteStyle);
622
623 CiteStyle const citeStylesFull[] = {
624         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
625
626 unsigned int const nCiteStylesFull =
627                 sizeof(citeStylesFull) / sizeof(CiteStyle);
628
629 CiteStyle const citeStylesUCase[] = {
630         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
631
632 unsigned int const nCiteStylesUCase =
633         sizeof(citeStylesUCase) / sizeof(CiteStyle);
634
635 } // namespace anon
636
637
638 CitationStyle citationStyleFromString(string const & command)
639 {
640         CitationStyle s;
641         if (command.empty())
642                 return s;
643
644         string cmd = command;
645         if (cmd[0] == 'C') {
646                 s.forceUpperCase = true;
647                 cmd[0] = 'c';
648         }
649
650         size_t const n = cmd.size() - 1;
651         if (cmd != "cite" && cmd[n] == '*') {
652                 s.full = true;
653                 cmd = cmd.substr(0, n);
654         }
655
656         char const * const * const last = citeCommands + nCiteCommands;
657         char const * const * const ptr = find(citeCommands, last, cmd);
658
659         if (ptr != last) {
660                 size_t idx = ptr - citeCommands;
661                 s.style = citeStylesArray[idx];
662         }
663         return s;
664 }
665
666
667 string citationStyleToString(const CitationStyle & s)
668 {
669         string cite = citeCommands[s.style];
670         if (s.full) {
671                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
672                 if (find(citeStylesFull, last, s.style) != last)
673                         cite += '*';
674         }
675
676         if (s.forceUpperCase) {
677                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
678                 if (find(citeStylesUCase, last, s.style) != last)
679                         cite[0] = 'C';
680         }
681
682         return cite;
683 }
684
685 vector<CiteStyle> citeStyles(CiteEngine engine)
686 {
687         unsigned int nStyles = 0;
688         unsigned int start = 0;
689
690         switch (engine) {
691                 case ENGINE_BASIC:
692                         nStyles = 2;
693                         start = 0;
694                         break;
695                 case ENGINE_NATBIB_AUTHORYEAR:
696                 case ENGINE_NATBIB_NUMERICAL:
697                         nStyles = nCiteStyles - 1;
698                         start = 1;
699                         break;
700                 case ENGINE_JURABIB:
701                         nStyles = nCiteStyles;
702                         start = 0;
703                         break;
704         }
705
706         vector<CiteStyle> styles(nStyles);
707         size_t i = 0;
708         int j = start;
709         for (; i != styles.size(); ++i, ++j)
710                 styles[i] = citeStylesArray[j];
711
712         return styles;
713 }
714
715 } // namespace lyx
716