]> git.lyx.org Git - features.git/blob - src/BiblioInfo.cpp
0faf241eb8b53eecc7fb51b07d1966ff6fa5f7a9
[features.git] / src / BiblioInfo.cpp
1 /**
2  * \file BiblioInfo.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Angus Leeming
7  * \author Herbert Voß
8  * \author Richard Heck
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "BiblioInfo.h"
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
19 #include "Encoding.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22 #include "TocBackend.h"
23
24 #include "insets/Inset.h"
25 #include "insets/InsetBibitem.h"
26 #include "insets/InsetBibtex.h"
27 #include "insets/InsetInclude.h"
28
29 #include "support/convert.h"
30 #include "support/debug.h"
31 #include "support/docstream.h"
32 #include "support/gettext.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
36
37 #include "boost/regex.hpp"
38
39 #include <set>
40
41 using namespace std;
42 using namespace lyx::support;
43
44
45 namespace lyx {
46
47 namespace {
48
49 // gets the "family name" from an author-type string
50 docstring familyName(docstring const & name)
51 {
52         if (name.empty())
53                 return docstring();
54
55         // first we look for a comma, and take the last name to be everything
56         // preceding the right-most one, so that we also get the "jr" part.
57         docstring::size_type idx = name.rfind(',');
58         if (idx != docstring::npos)
59                 return ltrim(name.substr(0, idx));
60
61         // OK, so now we want to look for the last name. We're going to
62         // include the "von" part. This isn't perfect.
63         // Split on spaces, to get various tokens.
64         vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
65         // If we only get two, assume the last one is the last name
66         if (pieces.size() <= 2)
67                 return pieces.back();
68
69         // Now we look for the first token that begins with a lower case letter.
70         vector<docstring>::const_iterator it = pieces.begin();
71         vector<docstring>::const_iterator en = pieces.end();
72         for (; it != en; ++it) {
73                 if ((*it).size() == 0)
74                         continue;
75                 char_type const c = (*it)[0];
76                 if (isLower(c))
77                         break;
78         }
79
80         if (it == en) // we never found a "von"
81                 return pieces.back();
82
83         // reconstruct what we need to return
84         docstring retval;
85         bool first = true;
86         for (; it != en; ++it) {
87                 if (!first)
88                         retval += " ";
89                 else 
90                         first = false;
91                 retval += *it;
92         }
93         return retval;
94 }
95
96 // converts a string containing LaTeX commands into unicode
97 // for display.
98 docstring convertLaTeXCommands(docstring const & str)
99 {
100         docstring val = str;
101         docstring ret;
102
103         bool scanning_cmd = false;
104         bool scanning_math = false;
105         bool escaped = false; // used to catch \$, etc.
106         while (val.size()) {
107                 char_type const ch = val[0];
108
109                 // if we're scanning math, we output everything until we
110                 // find an unescaped $, at which point we break out.
111                 if (scanning_math) {
112                         if (escaped)
113                                 escaped = false;
114                         else if (ch == '\\')
115                                 escaped = true;
116                         else if (ch == '$') 
117                                 scanning_math = false;
118                         ret += ch;
119                         val = val.substr(1);
120                         continue;
121                 }
122
123                 // if we're scanning a command name, then we just
124                 // discard characters until we hit something that
125                 // isn't alpha.
126                 if (scanning_cmd) {
127                         if (isAlphaASCII(ch)) {
128                                 val = val.substr(1);
129                                 escaped = false;
130                                 continue;
131                         }
132                         // so we're done with this command.
133                         // now we fall through and check this character.
134                         scanning_cmd = false;
135                 }
136
137                 // was the last character a \? If so, then this is something like:
138                 // \\ or \$, so we'll just output it. That's probably not always right...
139                 if (escaped) {
140                         // exception: output \, as THIN SPACE
141                         if (ch == ',')
142                                 ret.push_back(0x2009);
143                         else
144                                 ret += ch;
145                         val = val.substr(1);
146                         escaped = false;
147                         continue;
148                 }
149
150                 if (ch == '$') {
151                         ret += ch;
152                         val = val.substr(1);
153                         scanning_math = true;
154                         continue;
155                 }
156
157                 // we just ignore braces
158                 if (ch == '{' || ch == '}') {
159                         val = val.substr(1);
160                         continue;
161                 }
162
163                 // we're going to check things that look like commands, so if
164                 // this doesn't, just output it.
165                 if (ch != '\\') {
166                         ret += ch;
167                         val = val.substr(1);
168                         continue;
169                 }
170
171                 // ok, could be a command of some sort
172                 // let's see if it corresponds to some unicode
173                 // unicodesymbols has things in the form: \"{u},
174                 // whereas we may see things like: \"u. So we'll
175                 // look for that and change it, if necessary.
176                 static boost::regex const reg("^\\\\\\W\\w");
177                 if (boost::regex_search(to_utf8(val), reg)) {
178                         val.insert(3, from_ascii("}"));
179                         val.insert(2, from_ascii("{"));
180                 }
181                 docstring rem;
182                 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
183                                                         Encodings::TEXT_CMD);
184                 if (!cnvtd.empty()) {
185                         // it did, so we'll take that bit and proceed with what's left
186                         ret += cnvtd;
187                         val = rem;
188                         continue;
189                 }
190                 // it's a command of some sort
191                 scanning_cmd = true;
192                 escaped = true;
193                 val = val.substr(1);
194         }
195         return ret;
196 }
197
198 } // anon namespace
199
200
201 //////////////////////////////////////////////////////////////////////
202 //
203 // BibTeXInfo
204 //
205 //////////////////////////////////////////////////////////////////////
206
207 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
208         : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
209           modifier_(0)
210 {}
211
212
213 docstring const BibTeXInfo::getAbbreviatedAuthor() const
214 {
215         if (!is_bibtex_) {
216                 docstring const opt = label();
217                 if (opt.empty())
218                         return docstring();
219
220                 docstring authors;
221                 docstring const remainder = trim(split(opt, authors, '('));
222                 if (remainder.empty())
223                         // in this case, we didn't find a "(", 
224                         // so we don't have author (year)
225                         return docstring();
226                 return authors;
227         }
228
229         docstring author = convertLaTeXCommands(operator[]("author"));
230         if (author.empty()) {
231                 author = convertLaTeXCommands(operator[]("editor"));
232                 if (author.empty())
233                         return bib_key_;
234         }
235
236         // FIXME Move this to a separate routine that can
237         // be called from elsewhere.
238         // 
239         // OK, we've got some names. Let's format them.
240         // Try to split the author list on " and "
241         vector<docstring> const authors =
242                 getVectorFromString(author, from_ascii(" and "));
243
244         if (authors.size() == 2)
245                 return bformat(_("%1$s and %2$s"),
246                         familyName(authors[0]), familyName(authors[1]));
247
248         if (authors.size() > 2)
249                 return bformat(_("%1$s et al."), familyName(authors[0]));
250
251         return familyName(authors[0]);
252 }
253
254
255 docstring const BibTeXInfo::getYear() const
256 {
257         if (is_bibtex_) 
258                 return operator[]("year");
259
260         docstring const opt = label();
261         if (opt.empty())
262                 return docstring();
263
264         docstring authors;
265         docstring tmp = split(opt, authors, '(');
266         if (tmp.empty()) 
267                 // we don't have author (year)
268                 return docstring();
269         docstring year;
270         tmp = split(tmp, year, ')');
271         return year;
272 }
273
274
275 docstring const BibTeXInfo::getXRef() const
276 {
277         if (!is_bibtex_)
278                 return docstring();
279         return operator[]("crossref");
280 }
281
282
283 namespace {
284         string parseOptions(string const & format, string & optkey, 
285                         string & ifpart, string & elsepart);
286
287         /// Calls parseOptions to deal with an embedded option, such as:
288         ///   {%number%[[, no.~%number%]]}
289         /// which must appear at the start of format. ifelsepart gets the 
290         /// whole of the option, and we return what's left after the option.
291         /// we return format if there is an error.
292         string parseEmbeddedOption(string const & format, string & ifelsepart)
293         {
294                 LASSERT(format[0] == '{' && format[1] == '%', return format);
295                 string optkey;
296                 string ifpart;
297                 string elsepart;
298                 string const rest = parseOptions(format, optkey, ifpart, elsepart);
299                 if (format == rest) { // parse error
300                         LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
301                         return format;
302                 }
303                 LASSERT(rest.size() <= format.size(), /* */);
304                 ifelsepart = format.substr(0, format.size() - rest.size());
305                 return rest;
306         }
307         
308         
309         // Gets a "clause" from a format string, where the clause is 
310         // delimited by '[[' and ']]'. Returns what is left after the
311         // clause is removed, and returns format if there is an error.
312         string getClause(string const & format, string & clause)
313         {
314                 string fmt = format;
315                 // remove '[['
316                 fmt = fmt.substr(2);
317                 // we'll remove characters from the front of fmt as we 
318                 // deal with them
319                 while (fmt.size()) { 
320                         if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
321                           // that's the end
322                                 fmt = fmt.substr(2);
323                                 break;
324                         }
325                         // check for an embedded option
326                         if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
327                                 string part;
328                                 string const rest = parseEmbeddedOption(fmt, part);
329                                 if (fmt == rest) {
330                                         LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
331                                         return format;
332                                 }
333                                 clause += part;
334                                 fmt = rest;
335                         } else { // it's just a normal character
336                                 clause += fmt[0];
337                                 fmt = fmt.substr(1);
338                         }
339                 }
340                 return fmt;
341         }
342
343
344         /// parse an options string, which must appear at the start of the
345         /// format parameter. puts the parsed bits in optkey, ifpart, and
346         /// elsepart and returns what's left after the option is removed.
347         /// if there's an error, it returns format itself.
348         string parseOptions(string const & format, string & optkey, 
349                         string & ifpart, string & elsepart) 
350         {
351                 LASSERT(format[0] == '{' && format[1] == '%', return format);
352                 // strip '{%'
353                 string fmt = format.substr(2);
354                 size_t pos = fmt.find('%'); // end of key
355                 if (pos == string::npos) {
356                         LYXERR0("Error parsing  `" << format <<"'. Can't find end of key.");
357                         return format;
358                 }
359                 optkey = fmt.substr(0,pos);
360                 fmt = fmt.substr(pos + 1);
361                 // [[format]] should be next
362                 if (fmt[0] != '[' || fmt[1] != '[') {
363                         LYXERR0("Error parsing  `" << format <<"'. Can't find '[[' after key.");
364                         return format;
365                 }
366
367                 string curfmt = fmt;
368                 fmt = getClause(curfmt, ifpart);
369                 if (fmt == curfmt) {
370                         LYXERR0("Error parsing  `" << format <<"'. Couldn't get if clause.");
371                         return format;
372                 }
373
374                 if (fmt[0] == '}') // we're done, no else clause
375                         return fmt.substr(1);
376         
377                 // else part should follow
378                 if (fmt[0] != '[' || fmt[1] != '[') {
379                         LYXERR0("Error parsing  `" << format <<"'. Can't find else clause.");
380                         return format;
381                 }
382                 
383                 curfmt = fmt;
384                 fmt = getClause(curfmt, elsepart);
385                 // we should be done
386                 if (fmt == curfmt || fmt[0] != '}') {
387                         LYXERR0("Error parsing  `" << format <<"'. Can't find end of option.");
388                         return format;
389                 }
390                 return fmt.substr(1);
391 }
392
393 } // anon namespace
394
395
396 docstring BibTeXInfo::expandFormat(string const & format, 
397                 BibTeXInfo const * const xref, bool richtext) const
398 {
399         // return value
400         docstring ret;
401         string key;
402         bool scanning_key = false;
403         bool scanning_rich = false;
404
405         string fmt = format;
406         // we'll remove characters from the front of fmt as we 
407         // deal with them
408         while (fmt.size()) {
409                 char_type thischar = fmt[0];
410                 if (thischar == '%') { 
411                         // beginning or end of key
412                         if (scanning_key) { 
413                                 // end of key
414                                 scanning_key = false;
415                                 // so we replace the key with its value, which may be empty
416                                 docstring const val = getValueForKey(key, xref);
417                                 key.clear();
418                                 ret += val;
419                         } else {
420                                 // beginning of key
421                                 scanning_key = true;
422                         }
423                 } 
424                 else if (thischar == '{') { 
425                         // beginning of option?
426                         if (scanning_key) {
427                                 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
428                                 return _("ERROR!");
429                         }
430                         if (fmt.size() > 1) {
431                                 if (fmt[1] == '%') {
432                                         // it is the beginning of an optional format
433                                         string optkey;
434                                         string ifpart;
435                                         string elsepart;
436                                         string const newfmt = 
437                                                 parseOptions(fmt, optkey, ifpart, elsepart);
438                                         if (newfmt == fmt) // parse error
439                                                 return _("ERROR!");
440                                         fmt = newfmt;
441                                         docstring const val = getValueForKey(optkey, xref);
442                                         if (!val.empty())
443                                                 ret += expandFormat(ifpart, xref, richtext);
444                                         else if (!elsepart.empty())
445                                                 ret += expandFormat(elsepart, xref, richtext);
446                                         // fmt will have been shortened for us already
447                                         continue; 
448                                 }
449                                 if (fmt[1] == '!') {
450                                         // beginning of rich text
451                                         scanning_rich = true;
452                                         fmt = fmt.substr(2);
453                                         continue;
454                                 }
455                         }
456                         // we are here if the '{' was at the end of the format. hmm.
457                         ret += thischar;
458                 }
459                 else if (scanning_rich && thischar == '!' 
460                          && fmt.size() > 1 && fmt[1] == '}') {
461                         // end of rich text
462                         scanning_rich = false;
463                         fmt = fmt.substr(2);
464                         continue;
465                 }
466                 else if (scanning_key)
467                         key += thischar;
468                 else if (richtext || !scanning_rich)
469                         ret += thischar;
470                 // else the character is discarded, which will happen only if
471                 // richtext == false and we are scanning rich text
472                 fmt = fmt.substr(1);
473         } // for loop
474         if (scanning_key) {
475                 LYXERR0("Never found end of key in `" << format << "'!");
476                 return _("ERROR!");
477         }
478         if (scanning_rich) {
479                 LYXERR0("Never found end of rich text in `" << format << "'!");
480                 return _("ERROR!");
481         }
482         return ret;
483 }
484
485
486 namespace {
487
488 // FIXME These would be better read from a file, so that they
489 // could be customized.
490
491         static string articleFormat = "%author%, \"%title%\", {!<i>!}%journal%{!</i>!} {%volume%[[ %volume%{%number%[[, %number%]]}]]} (%year%){%pages%[[, pp. %pages%]]}.{%note%[[ %note%]]}";
492
493         static string bookFormat = "{%author%[[%author%]][[%editor%, ed.]]}, {!<i>!}%title%{!</i>!}{%volume%[[ vol. %volume%]][[{%number%[[no. %number%]]}]]}{%edition%[[%edition%]]} ({%address%[[%address%: ]]}%publisher%, %year%).{%note%[[ %note%]]}";
494
495         static string inSomething = "%author%, \"%title%\", in{%editor%[[ %editor%, ed.,]]} {!<i>!}%booktitle%{!</i>!}{%volume%[[ vol. %volume%]][[{%number%[[no. %number%]]}]]}{%edition%[[%edition%]]} ({%address%[[%address%: ]]}%publisher%, %year%){%pages%[[, pp. %pages%]]}.{%note%[[ %note%]]}";
496
497         static string thesis = "%author%, %title% ({%address%[[%address%: ]]}%school%, %year%).{%note%[[ %note%]]}";
498
499         static string defaultFormat = "{%author%[[%author%, ]][[{%editor%[[%editor%, ed., ]]}]]}\"%title%\"{%journal%[[, {!<i>!}%journal%{!</i>!}]][[{%publisher%[[, %publisher%]][[{%institution%[[, %institution%]]}]]}]]}{%year%[[ (%year%)]]}{%pages%[[, %pages%]]}.";
500
501 }
502
503 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
504         bool richtext) const
505 {
506         if (!info_.empty())
507                 return info_;
508
509         if (!is_bibtex_) {
510                 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
511                 info_ = it->second;
512                 return info_;
513         }
514
515         if (entry_type_ == "article")
516                 info_ = expandFormat(articleFormat, xref, richtext);
517         else if (entry_type_ == "book")
518                 info_ = expandFormat(bookFormat, xref, richtext);
519         else if (entry_type_.substr(0,2) == "in")
520                 info_ = expandFormat(inSomething, xref, richtext);
521         else if (entry_type_ == "phdthesis" || entry_type_ == "mastersthesis")
522                 info_ = expandFormat(thesis, xref, richtext);
523         else 
524                 info_ = expandFormat(defaultFormat, xref, richtext);
525
526         if (!info_.empty())
527                 info_ = convertLaTeXCommands(info_);
528         return info_;
529 }
530
531
532 docstring const & BibTeXInfo::operator[](docstring const & field) const
533 {
534         BibTeXInfo::const_iterator it = find(field);
535         if (it != end())
536                 return it->second;
537         static docstring const empty_value = docstring();
538         return empty_value;
539 }
540         
541         
542 docstring const & BibTeXInfo::operator[](string const & field) const
543 {
544         return operator[](from_ascii(field));
545 }
546
547
548 docstring BibTeXInfo::getValueForKey(string const & key, 
549                 BibTeXInfo const * const xref) const
550 {
551         docstring const ret = operator[](key);
552         if (!ret.empty() || !xref)
553                 return ret;
554         return (*xref)[key];
555 }
556
557
558 //////////////////////////////////////////////////////////////////////
559 //
560 // BiblioInfo
561 //
562 //////////////////////////////////////////////////////////////////////
563
564 namespace {
565 // A functor for use with sort, leading to case insensitive sorting
566         class compareNoCase: public binary_function<docstring, docstring, bool>
567         {
568                 public:
569                         bool operator()(docstring const & s1, docstring const & s2) const {
570                                 return compare_no_case(s1, s2) < 0;
571                         }
572         };
573 } // namespace anon
574
575
576 vector<docstring> const BiblioInfo::getKeys() const
577 {
578         vector<docstring> bibkeys;
579         BiblioInfo::const_iterator it  = begin();
580         for (; it != end(); ++it)
581                 bibkeys.push_back(it->first);
582         sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
583         return bibkeys;
584 }
585
586
587 vector<docstring> const BiblioInfo::getFields() const
588 {
589         vector<docstring> bibfields;
590         set<docstring>::const_iterator it = field_names_.begin();
591         set<docstring>::const_iterator end = field_names_.end();
592         for (; it != end; ++it)
593                 bibfields.push_back(*it);
594         sort(bibfields.begin(), bibfields.end());
595         return bibfields;
596 }
597
598
599 vector<docstring> const BiblioInfo::getEntries() const
600 {
601         vector<docstring> bibentries;
602         set<docstring>::const_iterator it = entry_types_.begin();
603         set<docstring>::const_iterator end = entry_types_.end();
604         for (; it != end; ++it)
605                 bibentries.push_back(*it);
606         sort(bibentries.begin(), bibentries.end());
607         return bibentries;
608 }
609
610
611 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
612 {
613         BiblioInfo::const_iterator it = find(key);
614         if (it == end())
615                 return docstring();
616         BibTeXInfo const & data = it->second;
617         return data.getAbbreviatedAuthor();
618 }
619
620
621 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
622 {
623         BiblioInfo::const_iterator it = find(key);
624         if (it == end())
625                 return docstring();
626         BibTeXInfo const & data = it->second;
627         return data.citeNumber();
628 }
629
630
631 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
632 {
633         BiblioInfo::const_iterator it = find(key);
634         if (it == end())
635                 return docstring();
636         BibTeXInfo const & data = it->second;
637         docstring year = data.getYear();
638         if (year.empty()) {
639                 // let's try the crossref
640                 docstring const xref = data.getXRef();
641                 if (xref.empty())
642                         return _("No year"); // no luck
643                 BiblioInfo::const_iterator const xrefit = find(xref);
644                 if (xrefit == end())
645                         return _("No year"); // no luck again
646                 BibTeXInfo const & xref_data = xrefit->second;
647                 year = xref_data.getYear();
648         }
649         if (use_modifier && data.modifier() != 0)
650                 year += data.modifier();
651         return year;
652 }
653
654
655 docstring const BiblioInfo::getInfo(docstring const & key, bool richtext) const
656 {
657         BiblioInfo::const_iterator it = find(key);
658         if (it == end())
659                 return docstring();
660         BibTeXInfo const & data = it->second;
661         BibTeXInfo const * xrefptr = 0;
662         docstring const xref = data.getXRef();
663         if (!xref.empty()) {
664                 BiblioInfo::const_iterator const xrefit = find(xref);
665                 if (xrefit != end())
666                         xrefptr = &(xrefit->second);
667         }
668         return data.getInfo(xrefptr, richtext);
669 }
670
671
672 bool BiblioInfo::isBibtex(docstring const & key) const
673 {
674         BiblioInfo::const_iterator it = find(key);
675         if (it == end())
676                 return false;
677         return it->second.isBibTeX();
678 }
679
680
681
682 vector<docstring> const BiblioInfo::getCiteStrings(
683         docstring const & key, Buffer const & buf) const
684 {
685         CiteEngine const engine = buf.params().citeEngine();
686         if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
687                 return getNumericalStrings(key, buf);
688         else
689                 return getAuthorYearStrings(key, buf);
690 }
691
692
693 vector<docstring> const BiblioInfo::getNumericalStrings(
694         docstring const & key, Buffer const & buf) const
695 {
696         if (empty())
697                 return vector<docstring>();
698
699         docstring const author = getAbbreviatedAuthor(key);
700         docstring const year   = getYear(key);
701         if (author.empty() || year.empty())
702                 return vector<docstring>();
703
704         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
705         
706         vector<docstring> vec(styles.size());
707         for (size_t i = 0; i != vec.size(); ++i) {
708                 docstring str;
709
710                 switch (styles[i]) {
711                         case CITE:
712                         case CITEP:
713                                 str = from_ascii("[#ID]");
714                                 break;
715
716                         case NOCITE:
717                                 str = _("Add to bibliography only.");
718                                 break;
719
720                         case CITET:
721                                 str = author + " [#ID]";
722                                 break;
723
724                         case CITEALT:
725                                 str = author + " #ID";
726                                 break;
727
728                         case CITEALP:
729                                 str = from_ascii("#ID");
730                                 break;
731
732                         case CITEAUTHOR:
733                                 str = author;
734                                 break;
735
736                         case CITEYEAR:
737                                 str = year;
738                                 break;
739
740                         case CITEYEARPAR:
741                                 str = '(' + year + ')';
742                                 break;
743                 }
744
745                 vec[i] = str;
746         }
747
748         return vec;
749 }
750
751
752 vector<docstring> const BiblioInfo::getAuthorYearStrings(
753         docstring const & key, Buffer const & buf) const
754 {
755         if (empty())
756                 return vector<docstring>();
757
758         docstring const author = getAbbreviatedAuthor(key);
759         docstring const year   = getYear(key);
760         if (author.empty() || year.empty())
761                 return vector<docstring>();
762
763         vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
764         
765         vector<docstring> vec(styles.size());
766         for (size_t i = 0; i != vec.size(); ++i) {
767                 docstring str;
768
769                 switch (styles[i]) {
770                         case CITE:
771                 // jurabib only: Author/Annotator
772                 // (i.e. the "before" field, 2nd opt arg)
773                                 str = author + "/<" + _("before") + '>';
774                                 break;
775
776                         case NOCITE:
777                                 str = _("Add to bibliography only.");
778                                 break;
779
780                         case CITET:
781                                 str = author + " (" + year + ')';
782                                 break;
783
784                         case CITEP:
785                                 str = '(' + author + ", " + year + ')';
786                                 break;
787
788                         case CITEALT:
789                                 str = author + ' ' + year ;
790                                 break;
791
792                         case CITEALP:
793                                 str = author + ", " + year ;
794                                 break;
795
796                         case CITEAUTHOR:
797                                 str = author;
798                                 break;
799
800                         case CITEYEAR:
801                                 str = year;
802                                 break;
803
804                         case CITEYEARPAR:
805                                 str = '(' + year + ')';
806                                 break;
807                 }
808                 vec[i] = str;
809         }
810         return vec;
811 }
812
813
814 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
815 {
816         bimap_.insert(info.begin(), info.end());
817 }
818
819
820 namespace {
821         // used in xhtml to sort a list of BibTeXInfo objects
822         bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
823         {
824                 docstring const lauth = lhs->getAbbreviatedAuthor();
825                 docstring const rauth = rhs->getAbbreviatedAuthor();
826                 docstring const lyear = lhs->getYear();
827                 docstring const ryear = rhs->getYear();
828                 docstring const ltitl = lhs->operator[]("title");
829                 docstring const rtitl = rhs->operator[]("title");
830                 return  (lauth < rauth)
831                                 || (lauth == rauth && lyear < ryear)
832                                 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
833         }
834 }
835
836
837 void BiblioInfo::collectCitedEntries(Buffer const & buf)
838 {
839         cited_entries_.clear();
840         // We are going to collect all the citation keys used in the document,
841         // getting them from the TOC.
842         // FIXME We may want to collect these differently, in the first case,
843         // so that we might have them in order of appearance.
844         set<docstring> citekeys;
845         Toc const & toc = buf.tocBackend().toc("citation");
846         Toc::const_iterator it = toc.begin();
847         Toc::const_iterator const en = toc.end();
848         for (; it != en; ++it) {
849                 if (it->str().empty())
850                         continue;
851                 vector<docstring> const keys = getVectorFromString(it->str());
852                 citekeys.insert(keys.begin(), keys.end());
853         }
854         if (citekeys.empty())
855                 return;
856         
857         // We have a set of the keys used in this document.
858         // We will now convert it to a list of the BibTeXInfo objects used in 
859         // this document...
860         vector<BibTeXInfo const *> bi;
861         set<docstring>::const_iterator cit = citekeys.begin();
862         set<docstring>::const_iterator const cen = citekeys.end();
863         for (; cit != cen; ++cit) {
864                 BiblioInfo::const_iterator const bt = find(*cit);
865                 if (bt == end() || !bt->second.isBibTeX())
866                         continue;
867                 bi.push_back(&(bt->second));
868         }
869         // ...and sort it.
870         sort(bi.begin(), bi.end(), lSorter);
871         
872         // Now we can write the sorted keys
873         vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
874         vector<BibTeXInfo const *>::const_iterator ben = bi.end();
875         for (; bit != ben; ++bit)
876                 cited_entries_.push_back((*bit)->key());
877 }
878
879
880 void BiblioInfo::makeCitationLabels(Buffer const & buf)
881 {
882         collectCitedEntries(buf);
883         CiteEngine const engine = buf.params().citeEngine();
884         bool const numbers = 
885                 (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL);
886
887         int keynumber = 0;
888         char modifier = 0;
889         // used to remember the last one we saw
890         // we'll be comparing entries to see if we need to add
891         // modifiers, like "1984a"
892         map<docstring, BibTeXInfo>::iterator last;
893
894         vector<docstring>::const_iterator it = cited_entries_.begin();
895         vector<docstring>::const_iterator const en = cited_entries_.end();
896         for (; it != en; ++it) {
897                 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
898                 // this shouldn't happen, but...
899                 if (biit == bimap_.end())
900                         // ...fail gracefully, anyway.
901                         continue;
902                 BibTeXInfo & entry = biit->second;
903                 if (numbers) {
904                         docstring const num = convert<docstring>(++keynumber);
905                         entry.setCiteNumber(num);
906                 } else {
907                         if (it != cited_entries_.begin()
908                             && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
909                             // we access the year via getYear() so as to get it from the xref,
910                             // if we need to do so
911                             && getYear(entry.key()) == getYear(last->second.key())) {
912                                 if (modifier == 0) {
913                                         // so the last one should have been 'a'
914                                         last->second.setModifier('a');
915                                         modifier = 'b';
916                                 } else if (modifier == 'z')
917                                         modifier = 'A';
918                                 else
919                                         modifier++;
920                         } else {
921                                 modifier = 0;
922                         }
923                         entry.setModifier(modifier);                            
924                         // remember the last one
925                         last = biit;
926                 }
927         }
928 }
929
930
931 //////////////////////////////////////////////////////////////////////
932 //
933 // CitationStyle
934 //
935 //////////////////////////////////////////////////////////////////////
936
937 namespace {
938
939
940 char const * const citeCommands[] = {
941         "cite", "citet", "citep", "citealt", "citealp",
942         "citeauthor", "citeyear", "citeyearpar", "nocite" };
943
944 unsigned int const nCiteCommands =
945                 sizeof(citeCommands) / sizeof(char *);
946
947 CiteStyle const citeStylesArray[] = {
948         CITE, CITET, CITEP, CITEALT, CITEALP, 
949         CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
950
951 unsigned int const nCiteStyles =
952                 sizeof(citeStylesArray) / sizeof(CiteStyle);
953
954 CiteStyle const citeStylesFull[] = {
955         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
956
957 unsigned int const nCiteStylesFull =
958                 sizeof(citeStylesFull) / sizeof(CiteStyle);
959
960 CiteStyle const citeStylesUCase[] = {
961         CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
962
963 unsigned int const nCiteStylesUCase =
964         sizeof(citeStylesUCase) / sizeof(CiteStyle);
965
966 } // namespace anon
967
968
969 CitationStyle citationStyleFromString(string const & command)
970 {
971         CitationStyle s;
972         if (command.empty())
973                 return s;
974
975         string cmd = command;
976         if (cmd[0] == 'C') {
977                 s.forceUpperCase = true;
978                 cmd[0] = 'c';
979         }
980
981         size_t const n = cmd.size() - 1;
982         if (cmd != "cite" && cmd[n] == '*') {
983                 s.full = true;
984                 cmd = cmd.substr(0, n);
985         }
986
987         char const * const * const last = citeCommands + nCiteCommands;
988         char const * const * const ptr = find(citeCommands, last, cmd);
989
990         if (ptr != last) {
991                 size_t idx = ptr - citeCommands;
992                 s.style = citeStylesArray[idx];
993         }
994         return s;
995 }
996
997
998 string citationStyleToString(const CitationStyle & s)
999 {
1000         string cite = citeCommands[s.style];
1001         if (s.full) {
1002                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
1003                 if (std::find(citeStylesFull, last, s.style) != last)
1004                         cite += '*';
1005         }
1006
1007         if (s.forceUpperCase) {
1008                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
1009                 if (std::find(citeStylesUCase, last, s.style) != last)
1010                         cite[0] = 'C';
1011         }
1012
1013         return cite;
1014 }
1015
1016 vector<CiteStyle> citeStyles(CiteEngine engine)
1017 {
1018         unsigned int nStyles = 0;
1019         unsigned int start = 0;
1020
1021         switch (engine) {
1022                 case ENGINE_BASIC:
1023                         nStyles = 2;
1024                         start = 0;
1025                         break;
1026                 case ENGINE_NATBIB_AUTHORYEAR:
1027                 case ENGINE_NATBIB_NUMERICAL:
1028                         nStyles = nCiteStyles - 1;
1029                         start = 1;
1030                         break;
1031                 case ENGINE_JURABIB:
1032                         nStyles = nCiteStyles;
1033                         start = 0;
1034                         break;
1035         }
1036
1037         vector<CiteStyle> styles(nStyles);
1038         size_t i = 0;
1039         int j = start;
1040         for (; i != styles.size(); ++i, ++j)
1041                 styles[i] = citeStylesArray[j];
1042
1043         return styles;
1044 }
1045
1046 } // namespace lyx
1047