]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
Support for \nocite* from Berhard Reiter. Increments file format to 210.
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "support/debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "support/gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
27
28 #include "frontends/alert.h"
29
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/FileNameList.h"
33 #include "support/filetools.h"
34 #include "support/lstrings.h"
35 #include "support/os.h"
36 #include "support/Path.h"
37 #include "support/textutils.h"
38
39 #include <boost/tokenizer.hpp>
40
41 using namespace std;
42 using namespace lyx::support;
43
44 namespace lyx {
45
46 namespace Alert = frontend::Alert;
47 namespace os = support::os;
48
49
50 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
51         : InsetCommand(p, "bibtex")
52 {}
53
54
55 CommandInfo const * InsetBibtex::findInfo(string const & /* cmdName */)
56 {
57         static const char * const paramnames[] = 
58                 {"options", "btprint", "bibfiles", ""};
59         static const bool isoptional[] = {true, true, false};
60         static const CommandInfo info = {3, paramnames, isoptional};
61         return &info;
62 }
63
64
65 Inset * InsetBibtex::clone() const
66 {
67         return new InsetBibtex(*this);
68 }
69
70
71 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
72 {
73         switch (cmd.action) {
74
75         case LFUN_INSET_MODIFY: {
76                 InsetCommandParams p(BIBTEX_CODE);
77                 try {
78                         if (!InsetCommandMailer::string2params("bibtex", 
79                                         to_utf8(cmd.argument()), p)) {
80                                 cur.noUpdate();
81                                 break;
82                         }
83                 } catch (ExceptionMessage const & message) {
84                         if (message.type_ == WarningException) {
85                                 Alert::warning(message.title_, message.details_);
86                                 cur.noUpdate();
87                         } else 
88                                 throw message;
89                         break;
90                 }
91                 setParams(p);
92                 cur.buffer().updateBibfilesCache();
93                 break;
94         }
95
96         default:
97                 InsetCommand::doDispatch(cur, cmd);
98                 break;
99         }
100 }
101
102
103 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
104 {
105         return _("BibTeX Generated Bibliography");
106 }
107
108
109 namespace {
110
111 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
112                       string const & name, string const & ext)
113 {
114         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
115         if (FileName(name).isAbsolute() || !FileName(fname + ext).isReadableFile())
116                 return name;
117         if (!runparams.nice)
118                 return fname;
119
120         // FIXME UNICODE
121         return to_utf8(makeRelPath(from_utf8(fname),
122                                          from_utf8(buffer.masterBuffer()->filePath())));
123 }
124
125 }
126
127
128 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
129                        OutputParams const & runparams) const
130 {
131         // the sequence of the commands:
132         // 1. \bibliographystyle{style}
133         // 2. \addcontentsline{...} - if option bibtotoc set
134         // 3. \bibliography{database}
135         // and with bibtopic:
136         // 1. \bibliographystyle{style}
137         // 2. \begin{btSect}{database}
138         // 3. \btPrint{Cited|NotCited|All}
139         // 4. \end{btSect}
140
141         // Database(s)
142         // If we are processing the LaTeX file in a temp directory then
143         // copy the .bib databases to this temp directory, mangling their
144         // names in the process. Store this mangled name in the list of
145         // all databases.
146         // (We need to do all this because BibTeX *really*, *really*
147         // can't handle "files with spaces" and Windows users tend to
148         // use such filenames.)
149         // Otherwise, store the (maybe absolute) path to the original,
150         // unmangled database name.
151         typedef boost::char_separator<char_type> Separator;
152         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
153
154         Separator const separator(from_ascii(",").c_str());
155         // The tokenizer must not be called with temporary strings, since
156         // it does not make a copy and uses iterators of the string further
157         // down. getParam returns a reference, so this is OK.
158         Tokenizer const tokens(getParam("bibfiles"), separator);
159         Tokenizer::const_iterator const begin = tokens.begin();
160         Tokenizer::const_iterator const end = tokens.end();
161
162         odocstringstream dbs;
163         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
164                 docstring const input = trim(*it);
165                 // FIXME UNICODE
166                 string utf8input = to_utf8(input);
167                 string database =
168                         normalizeName(buffer, runparams, utf8input, ".bib");
169                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
170                 bool const not_from_texmf = try_in_file.isReadableFile();
171
172                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
173                     not_from_texmf) {
174
175                         // mangledFilename() needs the extension
176                         DocFileName const in_file = DocFileName(try_in_file);
177                         database = removeExtension(in_file.mangledFilename());
178                         FileName const out_file = makeAbsPath(database + ".bib",
179                                         buffer.masterBuffer()->temppath());
180
181                         bool const success = in_file.copyTo(out_file);
182                         if (!success) {
183                                 lyxerr << "Failed to copy '" << in_file
184                                        << "' to '" << out_file << "'"
185                                        << endl;
186                         }
187                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
188                            !isValidLaTeXFilename(database)) {
189                                 frontend::Alert::warning(_("Invalid filename"),
190                                                          _("The following filename is likely to cause trouble "
191                                                            "when running the exported file through LaTeX: ") +
192                                                             from_utf8(database));
193                 }
194
195                 if (it != begin)
196                         dbs << ',';
197                 // FIXME UNICODE
198                 dbs << from_utf8(latex_path(database));
199         }
200         docstring const db_out = dbs.str();
201
202         // Post this warning only once.
203         static bool warned_about_spaces = false;
204         if (!warned_about_spaces &&
205             runparams.nice && db_out.find(' ') != docstring::npos) {
206                 warned_about_spaces = true;
207
208                 Alert::warning(_("Export Warning!"),
209                                _("There are spaces in the paths to your BibTeX databases.\n"
210                                               "BibTeX will be unable to find them."));
211         }
212
213         // Style-Options
214         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
215         string bibtotoc;
216         if (prefixIs(style, "bibtotoc")) {
217                 bibtotoc = "bibtotoc";
218                 if (contains(style, ','))
219                         style = split(style, bibtotoc, ',');
220         }
221
222         // line count
223         int nlines = 0;
224
225         if (!style.empty()) {
226                 string base = normalizeName(buffer, runparams, style, ".bst");
227                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
228                 bool const not_from_texmf = try_in_file.isReadableFile();
229                 // If this style does not come from texmf and we are not
230                 // exporting to .tex copy it to the tmp directory.
231                 // This prevents problems with spaces and 8bit charcaters
232                 // in the file name.
233                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
234                     not_from_texmf) {
235                         // use new style name
236                         DocFileName const in_file = DocFileName(try_in_file);
237                         base = removeExtension(in_file.mangledFilename());
238                         FileName const out_file(makeAbsPath(base + ".bst",
239                                         buffer.masterBuffer()->temppath()));
240                         bool const success = in_file.copyTo(out_file);
241                         if (!success) {
242                                 lyxerr << "Failed to copy '" << in_file
243                                        << "' to '" << out_file << "'"
244                                        << endl;
245                         }
246                 }
247                 // FIXME UNICODE
248                 os << "\\bibliographystyle{"
249                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
250                    << "}\n";
251                 nlines += 1;
252         }
253
254         // Post this warning only once.
255         static bool warned_about_bst_spaces = false;
256         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
257                 warned_about_bst_spaces = true;
258                 Alert::warning(_("Export Warning!"),
259                                _("There are spaces in the path to your BibTeX style file.\n"
260                                               "BibTeX will be unable to find it."));
261         }
262
263         if (!db_out.empty() && buffer.params().use_bibtopic){
264                 os << "\\begin{btSect}{" << db_out << "}\n";
265                 docstring btprint = getParam("btprint");
266                 if (btprint.empty())
267                         // default
268                         btprint = from_ascii("btPrintCited");
269                 os << "\\" << btprint << "\n"
270                    << "\\end{btSect}\n";
271                 nlines += 3;
272         }
273
274         // bibtotoc-Option
275         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
276                 // maybe a problem when a textclass has no "art" as
277                 // part of its name, because it's than book.
278                 // For the "official" lyx-layouts it's no problem to support
279                 // all well
280                 if (!contains(buffer.params().getTextClass().name(),
281                               "art")) {
282                         if (buffer.params().sides == OneSide) {
283                                 // oneside
284                                 os << "\\clearpage";
285                         } else {
286                                 // twoside
287                                 os << "\\cleardoublepage";
288                         }
289
290                         // bookclass
291                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
292
293                 } else {
294                         // article class
295                         os << "\\addcontentsline{toc}{section}{\\refname}";
296                 }
297         }
298
299         if (!db_out.empty() && !buffer.params().use_bibtopic){
300                 docstring btprint = getParam("btprint");
301                 if (btprint == "btPrintAll") {
302                         os << "\\nocite{*}\n";
303                         nlines += 1;
304                 }
305                 os << "\\bibliography{" << db_out << "}\n";
306                 nlines += 1;
307         }
308
309         return nlines;
310 }
311
312
313 FileNameList const InsetBibtex::getFiles(Buffer const & buffer) const
314 {
315         FileName path(buffer.filePath());
316         PathChanger p(path);
317
318         FileNameList vec;
319
320         string tmp;
321         // FIXME UNICODE
322         string bibfiles = to_utf8(getParam("bibfiles"));
323         bibfiles = split(bibfiles, tmp, ',');
324         while (!tmp.empty()) {
325                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
326                 LYXERR(Debug::LATEX, "Bibfile: " << file);
327
328                 // If we didn't find a matching file name just fail silently
329                 if (!file.empty())
330                         vec.push_back(file);
331
332                 // Get next file name
333                 bibfiles = split(bibfiles, tmp, ',');
334         }
335
336         return vec;
337 }
338
339 namespace {
340
341         // methods for parsing bibtex files
342
343         typedef map<docstring, docstring> VarMap;
344
345         /// remove whitespace characters, optionally a single comma,
346         /// and further whitespace characters from the stream.
347         /// @return true if a comma was found, false otherwise
348         ///
349         bool removeWSAndComma(idocfstream & ifs) {
350                 char_type ch;
351
352                 if (!ifs)
353                         return false;
354
355                 // skip whitespace
356                 do {
357                         ifs.get(ch);
358                 } while (ifs && isSpace(ch));
359
360                 if (!ifs)
361                         return false;
362
363                 if (ch != ',') {
364                         ifs.putback(ch);
365                         return false;
366                 }
367
368                 // skip whitespace
369                 do {
370                         ifs.get(ch);
371                 } while (ifs && isSpace(ch));
372
373                 if (ifs) {
374                         ifs.putback(ch);
375                 }
376
377                 return true;
378         }
379
380
381         enum charCase {
382                 makeLowerCase,
383                 keepCase
384         };
385
386         /// remove whitespace characters, read characer sequence
387         /// not containing whitespace characters or characters in
388         /// delimChars, and remove further whitespace characters.
389         ///
390         /// @return true if a string of length > 0 could be read.
391         ///
392         bool readTypeOrKey(docstring & val, idocfstream & ifs,
393                 docstring const & delimChars, docstring const &illegalChars, 
394                 charCase chCase) {
395
396                 char_type ch;
397
398                 val.clear();
399
400                 if (!ifs)
401                         return false;
402
403                 // skip whitespace
404                 do {
405                         ifs.get(ch);
406                 } while (ifs && isSpace(ch));
407
408                 if (!ifs)
409                         return false;
410
411                 // read value
412                 bool legalChar = true;
413                 while (ifs && !isSpace(ch) && 
414                                                  delimChars.find(ch) == docstring::npos &&
415                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
416                                         ) 
417                 {
418                         if (chCase == makeLowerCase)
419                                 val += lowercase(ch);
420                         else
421                                 val += ch;
422                         ifs.get(ch);
423                 }
424                 
425                 if (!legalChar) {
426                         ifs.putback(ch);
427                         return false;
428                 }
429
430                 // skip whitespace
431                 while (ifs && isSpace(ch)) {
432                         ifs.get(ch);
433                 }
434
435                 if (ifs) {
436                         ifs.putback(ch);
437                 }
438
439                 return val.length() > 0;
440         }
441
442         /// read subsequent bibtex values that are delimited with a #-character.
443         /// Concatenate all parts and replace names with the associated string in
444         /// the variable strings.
445         /// @return true if reading was successfull (all single parts were delimited
446         /// correctly)
447         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
448
449                 char_type ch;
450
451                 val.clear();
452
453                 if (!ifs)
454                         return false;
455
456                 do {
457                         // skip whitespace
458                         do {
459                                 ifs.get(ch);
460                         } while (ifs && isSpace(ch));
461
462                         if (!ifs)
463                                 return false;
464
465                         // check for field type
466                         if (isDigit(ch)) {
467
468                                 // read integer value
469                                 do {
470                                         val += ch;
471                                         ifs.get(ch);
472                                 } while (ifs && isDigit(ch));
473
474                                 if (!ifs)
475                                         return false;
476
477                         } else if (ch == '"' || ch == '{') {
478                                 // set end delimiter
479                                 char_type delim = ch == '"' ? '"': '}';
480
481                                 //Skip whitespace
482                                 do {
483                                         ifs.get(ch);
484                                 } while (ifs && isSpace(ch));
485                                 
486                                 if (!ifs)
487                                         return false;
488                                 
489                                 //We now have the first non-whitespace character
490                                 //We'll collapse adjacent whitespace.
491                                 bool lastWasWhiteSpace = false;
492                                 
493                                 // inside this delimited text braces must match.
494                                 // Thus we can have a closing delimiter only
495                                 // when nestLevel == 0
496                                 int nestLevel = 0;
497  
498                                 while (ifs && (nestLevel > 0 || ch != delim)) {
499                                         if (isSpace(ch)) {
500                                                 lastWasWhiteSpace = true;
501                                                 ifs.get(ch);
502                                                 continue;
503                                         }
504                                         //We output the space only after we stop getting 
505                                         //whitespace so as not to output any whitespace
506                                         //at the end of the value.
507                                         if (lastWasWhiteSpace) {
508                                                 lastWasWhiteSpace = false;
509                                                 val += ' ';
510                                         }
511                                         
512                                         val += ch;
513
514                                         // update nesting level
515                                         switch (ch) {
516                                                 case '{':
517                                                         ++nestLevel;
518                                                         break;
519                                                 case '}':
520                                                         --nestLevel;
521                                                         if (nestLevel < 0) return false;
522                                                         break;
523                                         }
524
525                                         ifs.get(ch);
526                                 }
527
528                                 if (!ifs)
529                                         return false;
530
531                                 ifs.get(ch);
532
533                                 if (!ifs)
534                                         return false;
535
536                         } else {
537
538                                 // reading a string name
539                                 docstring strName;
540
541                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
542                                         strName += lowercase(ch);
543                                         ifs.get(ch);
544                                 }
545
546                                 if (!ifs)
547                                         return false;
548
549                                 // replace the string with its assigned value or
550                                 // discard it if it's not assigned
551                                 if (strName.length()) {
552                                         VarMap::const_iterator pos = strings.find(strName);
553                                         if (pos != strings.end()) {
554                                                 val += pos->second;
555                                         }
556                                 }
557                         }
558
559                         // skip WS
560                         while (ifs && isSpace(ch)) {
561                                 ifs.get(ch);
562                         }
563
564                         if (!ifs)
565                                 return false;
566
567                         // continue reading next value on concatenate with '#'
568                 } while (ch == '#');
569
570                 ifs.putback(ch);
571
572                 return true;
573         }
574 }
575
576
577 // This method returns a comma separated list of Bibtex entries
578 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
579                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
580 {
581         FileNameList const files = getFiles(buffer);
582         for (vector<FileName>::const_iterator it = files.begin();
583              it != files.end(); ++ it) {
584                 // This bibtex parser is a first step to parse bibtex files
585                 // more precisely.
586                 //
587                 // - it reads the whole bibtex entry and does a syntax check
588                 //   (matching delimiters, missing commas,...
589                 // - it recovers from errors starting with the next @-character
590                 // - it reads @string definitions and replaces them in the
591                 //   field values.
592                 // - it accepts more characters in keys or value names than
593                 //   bibtex does.
594                 //
595                 // Officially bibtex does only support ASCII, but in practice
596                 // you can use the encoding of the main document as long as
597                 // some elements like keys and names are pure ASCII. Therefore
598                 // we convert the file from the buffer encoding.
599                 // We don't restrict keys to ASCII in LyX, since our own
600                 // InsetBibitem can generate non-ASCII keys, and nonstandard
601                 // 8bit clean bibtex forks exist.
602                 
603                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
604                         ios_base::in,
605                         buffer.params().encoding().iconvName());
606
607                 char_type ch;
608                 VarMap strings;
609
610                 while (ifs) {
611
612                         ifs.get(ch);
613                         if (!ifs)
614                                 break;
615
616                         if (ch != '@')
617                                 continue;
618
619                         docstring entryType;
620
621                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
622                                            docstring(), makeLowerCase) || !ifs)
623                                 continue;
624
625                         if (entryType == from_ascii("comment")) {
626
627                                 ifs.ignore(numeric_limits<int>::max(), '\n');
628                                 continue;
629                         }
630
631                         ifs.get(ch);
632                         if (!ifs)
633                                 break;
634
635                         if ((ch != '(') && (ch != '{')) {
636                                 // invalid entry delimiter
637                                 ifs.putback(ch);
638                                 continue;
639                         }
640
641                         // process the entry
642                         if (entryType == from_ascii("string")) {
643
644                                 // read string and add it to the strings map
645                                 // (or replace it's old value)
646                                 docstring name;
647                                 docstring value;
648
649                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
650                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
651                                         continue;
652
653                                 // next char must be an equal sign
654                                 ifs.get(ch);
655                                 if (!ifs || ch != '=')
656                                         continue;
657
658                                 if (!readValue(value, ifs, strings))
659                                         continue;
660
661                                 strings[name] = value;
662
663                         } else if (entryType == from_ascii("preamble")) {
664
665                                 // preamble definitions are discarded.
666                                 // can they be of any use in lyx?
667                                 docstring value;
668
669                                 if (!readValue(value, ifs, strings))
670                                         continue;
671
672                         } else {
673
674                                 // Citation entry. Try to read the key.
675                                 docstring key;
676
677                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
678                                                    from_ascii("}"), keepCase) || !ifs)
679                                         continue;
680
681                                 /////////////////////////////////////////////
682                                 // now we have a key, so we will add an entry 
683                                 // (even if it's empty, as bibtex does)
684                                 //
685                                 // we now read the field = value pairs.
686                                 // all items must be separated by a comma. If
687                                 // it is missing the scanning of this entry is
688                                 // stopped and the next is searched.
689                                 docstring fields;
690                                 docstring name;
691                                 docstring value;
692                                 docstring commaNewline;
693                                 docstring data;
694                                 BibTeXInfo keyvalmap;
695                                 keyvalmap.entryType = entryType;
696                                 
697                                 bool readNext = removeWSAndComma(ifs);
698  
699                                 while (ifs && readNext) {
700
701                                         // read field name
702                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
703                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
704                                                 break;
705
706                                         // next char must be an equal sign
707                                         ifs.get(ch);
708                                         if (!ifs)
709                                                 break;
710                                         if (ch != '=') {
711                                                 ifs.putback(ch);
712                                                 break;
713                                         }
714
715                                         // read field value
716                                         if (!readValue(value, ifs, strings))
717                                                 break;
718
719                                         keyvalmap[name] = value;
720                                         data += "\n\n" + value;
721                                         keylist.fieldNames.insert(name);
722                                         readNext = removeWSAndComma(ifs);
723                                 }
724
725                                 // add the new entry
726                                 keylist.entryTypes.insert(entryType);
727                                 keyvalmap.allData = data;
728                                 keyvalmap.isBibTeX = true;
729                                 keyvalmap.bibKey = key;
730                                 keylist[key] = keyvalmap;
731                         }
732                 } //< searching '@'
733         } //< for loop over files
734 }
735
736
737
738 bool InsetBibtex::addDatabase(string const & db)
739 {
740         // FIXME UNICODE
741         string bibfiles(to_utf8(getParam("bibfiles")));
742         if (tokenPos(bibfiles, ',', db) == -1) {
743                 if (!bibfiles.empty())
744                         bibfiles += ',';
745                 setParam("bibfiles", from_utf8(bibfiles + db));
746                 return true;
747         }
748         return false;
749 }
750
751
752 bool InsetBibtex::delDatabase(string const & db)
753 {
754         // FIXME UNICODE
755         string bibfiles(to_utf8(getParam("bibfiles")));
756         if (contains(bibfiles, db)) {
757                 int const n = tokenPos(bibfiles, ',', db);
758                 string bd = db;
759                 if (n > 0) {
760                         // this is not the first database
761                         string tmp = ',' + bd;
762                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
763                 } else if (n == 0)
764                         // this is the first (or only) database
765                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
766                 else
767                         return false;
768         }
769         return true;
770 }
771
772
773 void InsetBibtex::validate(LaTeXFeatures & features) const
774 {
775         if (features.bufferParams().use_bibtopic)
776                 features.require("bibtopic");
777 }
778
779
780 } // namespace lyx