]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
200a3c5841ee2ccdaeb721690c51cd6c08bb8ef7
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "Encoding.h"
20 #include "FuncRequest.h"
21 #include "LaTeXFeatures.h"
22 #include "MetricsInfo.h"
23 #include "OutputParams.h"
24 #include "TextClass.h"
25
26 #include "frontends/alert.h"
27
28 #include "support/debug.h"
29 #include "support/docstream.h"
30 #include "support/ExceptionMessage.h"
31 #include "support/filetools.h"
32 #include "support/gettext.h"
33 #include "support/lstrings.h"
34 #include "support/os.h"
35 #include "support/Path.h"
36 #include "support/textutils.h"
37
38 #include <limits>
39
40 using namespace std;
41 using namespace lyx::support;
42
43 namespace lyx {
44
45 namespace Alert = frontend::Alert;
46 namespace os = support::os;
47
48
49 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
50         : InsetCommand(p, "bibtex")
51 {}
52
53
54 ParamInfo const & InsetBibtex::findInfo(string const & /* cmdName */)
55 {
56         static ParamInfo param_info_;
57         if (param_info_.empty()) {
58                 param_info_.add("btprint", ParamInfo::LATEX_OPTIONAL);
59                 param_info_.add("bibfiles", ParamInfo::LATEX_REQUIRED);
60                 param_info_.add("options", ParamInfo::LYX_INTERNAL);
61         }
62         return param_info_;
63 }
64
65
66 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
67 {
68         switch (cmd.action) {
69
70         case LFUN_INSET_MODIFY: {
71                 InsetCommandParams p(BIBTEX_CODE);
72                 try {
73                         if (!InsetCommand::string2params("bibtex", 
74                                         to_utf8(cmd.argument()), p)) {
75                                 cur.noUpdate();
76                                 break;
77                         }
78                 } catch (ExceptionMessage const & message) {
79                         if (message.type_ == WarningException) {
80                                 Alert::warning(message.title_, message.details_);
81                                 cur.noUpdate();
82                         } else 
83                                 throw message;
84                         break;
85                 }
86                 //
87                 setParams(p);
88                 buffer().updateBibfilesCache();
89                 break;
90         }
91
92         default:
93                 InsetCommand::doDispatch(cur, cmd);
94                 break;
95         }
96 }
97
98
99 docstring InsetBibtex::screenLabel() const
100 {
101         return _("BibTeX Generated Bibliography");
102 }
103
104
105 docstring InsetBibtex::toolTip(BufferView const & /*bv*/, int /*x*/, int /*y*/) const
106 {
107         docstring item = from_ascii("* ");
108         docstring tip = _("Databases:\n");
109         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
110
111         if (bibfilelist.empty()) {
112                 tip += item;
113                 tip += _("none");
114         } else {
115                 vector<docstring>::const_iterator it = bibfilelist.begin();
116                 vector<docstring>::const_iterator en = bibfilelist.end();
117                 for (; it != en; ++it) {
118                         tip += item;
119                         tip += *it + "\n";
120                 }
121         }
122
123         // Style-Options
124         docstring style = getParam("options"); // maybe empty! and with bibtotoc
125         docstring bibtotoc = from_ascii("bibtotoc");
126         if (prefixIs(style, bibtotoc)) {
127                 if (contains(style, char_type(',')))
128                         style = split(style, bibtotoc, char_type(','));
129         }
130
131         tip += _("Style File:\n");
132         tip += item;
133         if (!style.empty())
134                 tip += style;
135         else
136                 tip += _("none");
137
138         return tip;
139 }
140
141
142 static string normalizeName(Buffer const & buffer,
143         OutputParams const & runparams, string const & name, string const & ext)
144 {
145         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
146         if (FileName(name).isAbsolute() || !FileName(fname + ext).isReadableFile())
147                 return name;
148         if (!runparams.nice)
149                 return fname;
150
151         // FIXME UNICODE
152         return to_utf8(makeRelPath(from_utf8(fname),
153                                          from_utf8(buffer.masterBuffer()->filePath())));
154 }
155
156
157 int InsetBibtex::latex(odocstream & os, OutputParams const & runparams) const
158 {
159         // the sequence of the commands:
160         // 1. \bibliographystyle{style}
161         // 2. \addcontentsline{...} - if option bibtotoc set
162         // 3. \bibliography{database}
163         // and with bibtopic:
164         // 1. \bibliographystyle{style}
165         // 2. \begin{btSect}{database}
166         // 3. \btPrint{Cited|NotCited|All}
167         // 4. \end{btSect}
168
169         // Database(s)
170         // If we are processing the LaTeX file in a temp directory then
171         // copy the .bib databases to this temp directory, mangling their
172         // names in the process. Store this mangled name in the list of
173         // all databases.
174         // (We need to do all this because BibTeX *really*, *really*
175         // can't handle "files with spaces" and Windows users tend to
176         // use such filenames.)
177         // Otherwise, store the (maybe absolute) path to the original,
178         // unmangled database name.
179         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
180         vector<docstring>::const_iterator it = bibfilelist.begin();
181         vector<docstring>::const_iterator en = bibfilelist.end();
182         odocstringstream dbs;
183         bool didone = false;
184
185         for (; it != en; ++it) {
186                 string utf8input = to_utf8(*it);
187                 string database =
188                         normalizeName(buffer(), runparams, utf8input, ".bib");
189                 FileName const try_in_file =
190                         makeAbsPath(database + ".bib", buffer().filePath());
191                 bool const not_from_texmf = try_in_file.isReadableFile();
192
193                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
194                     not_from_texmf) {
195
196                         // mangledFilename() needs the extension
197                         DocFileName const in_file = DocFileName(try_in_file);
198                         database = removeExtension(in_file.mangledFilename());
199                         FileName const out_file = makeAbsPath(database + ".bib",
200                                         buffer().masterBuffer()->temppath());
201
202                         bool const success = in_file.copyTo(out_file);
203                         if (!success) {
204                                 lyxerr << "Failed to copy '" << in_file
205                                        << "' to '" << out_file << "'"
206                                        << endl;
207                         }
208                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
209                            !isValidLaTeXFilename(database)) {
210                                 frontend::Alert::warning(_("Invalid filename"),
211                                                          _("The following filename is likely to cause trouble "
212                                                            "when running the exported file through LaTeX: ") +
213                                                             from_utf8(database));
214                 }
215
216                 if (didone)
217                         dbs << ',';
218                 else 
219                         didone = true;
220                 // FIXME UNICODE
221                 dbs << from_utf8(latex_path(database));
222         }
223         docstring const db_out = dbs.str();
224
225         // Post this warning only once.
226         static bool warned_about_spaces = false;
227         if (!warned_about_spaces &&
228             runparams.nice && db_out.find(' ') != docstring::npos) {
229                 warned_about_spaces = true;
230
231                 Alert::warning(_("Export Warning!"),
232                                _("There are spaces in the paths to your BibTeX databases.\n"
233                                               "BibTeX will be unable to find them."));
234         }
235         // Style-Options
236         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
237         string bibtotoc;
238         if (prefixIs(style, "bibtotoc")) {
239                 bibtotoc = "bibtotoc";
240                 if (contains(style, ','))
241                         style = split(style, bibtotoc, ',');
242         }
243
244
245         // line count
246         int nlines = 0;
247
248         if (!style.empty()) {
249                 string base = normalizeName(buffer(), runparams, style, ".bst");
250                 FileName const try_in_file = 
251                         makeAbsPath(base + ".bst", buffer().filePath());
252                 bool const not_from_texmf = try_in_file.isReadableFile();
253                 // If this style does not come from texmf and we are not
254                 // exporting to .tex copy it to the tmp directory.
255                 // This prevents problems with spaces and 8bit charcaters
256                 // in the file name.
257                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
258                     not_from_texmf) {
259                         // use new style name
260                         DocFileName const in_file = DocFileName(try_in_file);
261                         base = removeExtension(in_file.mangledFilename());
262                         FileName const out_file = makeAbsPath(base + ".bst",
263                                         buffer().masterBuffer()->temppath());
264                         bool const success = in_file.copyTo(out_file);
265                         if (!success) {
266                                 lyxerr << "Failed to copy '" << in_file
267                                        << "' to '" << out_file << "'"
268                                        << endl;
269                         }
270                 }
271                 // FIXME UNICODE
272                 os << "\\bibliographystyle{"
273                    << from_utf8(latex_path(normalizeName(buffer(), runparams, base, ".bst")))
274                    << "}\n";
275                 nlines += 1;
276         }
277
278         // Post this warning only once.
279         static bool warned_about_bst_spaces = false;
280         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
281                 warned_about_bst_spaces = true;
282                 Alert::warning(_("Export Warning!"),
283                                _("There are spaces in the path to your BibTeX style file.\n"
284                                               "BibTeX will be unable to find it."));
285         }
286
287         if (!db_out.empty() && buffer().params().use_bibtopic) {
288                 os << "\\begin{btSect}{" << db_out << "}\n";
289                 docstring btprint = getParam("btprint");
290                 if (btprint.empty())
291                         // default
292                         btprint = from_ascii("btPrintCited");
293                 os << "\\" << btprint << "\n"
294                    << "\\end{btSect}\n";
295                 nlines += 3;
296         }
297
298         // bibtotoc-Option
299         if (!bibtotoc.empty() && !buffer().params().use_bibtopic) {
300                 if (buffer().params().documentClass().hasLaTeXLayout("chapter")) {
301                         if (buffer().params().sides == OneSide) {
302                                 // oneside
303                                 os << "\\clearpage";
304                         } else {
305                                 // twoside
306                                 os << "\\cleardoublepage";
307                         }
308                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
309                 } else if (buffer().params().documentClass().hasLaTeXLayout("section"))
310                         os << "\\addcontentsline{toc}{section}{\\refname}";
311         }
312
313         if (!db_out.empty() && !buffer().params().use_bibtopic) {
314                 docstring btprint = getParam("btprint");
315                 if (btprint == "btPrintAll") {
316                         os << "\\nocite{*}\n";
317                         nlines += 1;
318                 }
319                 os << "\\bibliography{" << db_out << "}\n";
320                 nlines += 1;
321         }
322
323         return nlines;
324 }
325
326
327 support::FileNameList InsetBibtex::getBibFiles() const
328 {
329         FileName path(buffer().filePath());
330         support::PathChanger p(path);
331         
332         support::FileNameList vec;
333         
334         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
335         vector<docstring>::const_iterator it = bibfilelist.begin();
336         vector<docstring>::const_iterator en = bibfilelist.end();
337         for (; it != en; ++it) {
338                 FileName const file = 
339                         findtexfile(changeExtension(to_utf8(*it), "bib"), "bib");
340                 
341                 // If we didn't find a matching file name just fail silently
342                 if (!file.empty())
343                         vec.push_back(file);
344         }
345         
346         return vec;
347
348 }
349
350 namespace {
351
352         // methods for parsing bibtex files
353
354         typedef map<docstring, docstring> VarMap;
355
356         /// remove whitespace characters, optionally a single comma,
357         /// and further whitespace characters from the stream.
358         /// @return true if a comma was found, false otherwise
359         ///
360         bool removeWSAndComma(idocfstream & ifs) {
361                 char_type ch;
362
363                 if (!ifs)
364                         return false;
365
366                 // skip whitespace
367                 do {
368                         ifs.get(ch);
369                 } while (ifs && isSpace(ch));
370
371                 if (!ifs)
372                         return false;
373
374                 if (ch != ',') {
375                         ifs.putback(ch);
376                         return false;
377                 }
378
379                 // skip whitespace
380                 do {
381                         ifs.get(ch);
382                 } while (ifs && isSpace(ch));
383
384                 if (ifs) {
385                         ifs.putback(ch);
386                 }
387
388                 return true;
389         }
390
391
392         enum charCase {
393                 makeLowerCase,
394                 keepCase
395         };
396
397         /// remove whitespace characters, read characer sequence
398         /// not containing whitespace characters or characters in
399         /// delimChars, and remove further whitespace characters.
400         ///
401         /// @return true if a string of length > 0 could be read.
402         ///
403         bool readTypeOrKey(docstring & val, idocfstream & ifs,
404                 docstring const & delimChars, docstring const &illegalChars, 
405                 charCase chCase) {
406
407                 char_type ch;
408
409                 val.clear();
410
411                 if (!ifs)
412                         return false;
413
414                 // skip whitespace
415                 do {
416                         ifs.get(ch);
417                 } while (ifs && isSpace(ch));
418
419                 if (!ifs)
420                         return false;
421
422                 // read value
423                 bool legalChar = true;
424                 while (ifs && !isSpace(ch) && 
425                                                  delimChars.find(ch) == docstring::npos &&
426                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
427                                         ) 
428                 {
429                         if (chCase == makeLowerCase)
430                                 val += lowercase(ch);
431                         else
432                                 val += ch;
433                         ifs.get(ch);
434                 }
435                 
436                 if (!legalChar) {
437                         ifs.putback(ch);
438                         return false;
439                 }
440
441                 // skip whitespace
442                 while (ifs && isSpace(ch)) {
443                         ifs.get(ch);
444                 }
445
446                 if (ifs) {
447                         ifs.putback(ch);
448                 }
449
450                 return val.length() > 0;
451         }
452
453         /// read subsequent bibtex values that are delimited with a #-character.
454         /// Concatenate all parts and replace names with the associated string in
455         /// the variable strings.
456         /// @return true if reading was successfull (all single parts were delimited
457         /// correctly)
458         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
459
460                 char_type ch;
461
462                 val.clear();
463
464                 if (!ifs)
465                         return false;
466
467                 do {
468                         // skip whitespace
469                         do {
470                                 ifs.get(ch);
471                         } while (ifs && isSpace(ch));
472
473                         if (!ifs)
474                                 return false;
475
476                         // check for field type
477                         if (isDigit(ch)) {
478
479                                 // read integer value
480                                 do {
481                                         val += ch;
482                                         ifs.get(ch);
483                                 } while (ifs && isDigit(ch));
484
485                                 if (!ifs)
486                                         return false;
487
488                         } else if (ch == '"' || ch == '{') {
489                                 // set end delimiter
490                                 char_type delim = ch == '"' ? '"': '}';
491
492                                 //Skip whitespace
493                                 do {
494                                         ifs.get(ch);
495                                 } while (ifs && isSpace(ch));
496                                 
497                                 if (!ifs)
498                                         return false;
499                                 
500                                 //We now have the first non-whitespace character
501                                 //We'll collapse adjacent whitespace.
502                                 bool lastWasWhiteSpace = false;
503                                 
504                                 // inside this delimited text braces must match.
505                                 // Thus we can have a closing delimiter only
506                                 // when nestLevel == 0
507                                 int nestLevel = 0;
508  
509                                 while (ifs && (nestLevel > 0 || ch != delim)) {
510                                         if (isSpace(ch)) {
511                                                 lastWasWhiteSpace = true;
512                                                 ifs.get(ch);
513                                                 continue;
514                                         }
515                                         //We output the space only after we stop getting 
516                                         //whitespace so as not to output any whitespace
517                                         //at the end of the value.
518                                         if (lastWasWhiteSpace) {
519                                                 lastWasWhiteSpace = false;
520                                                 val += ' ';
521                                         }
522                                         
523                                         val += ch;
524
525                                         // update nesting level
526                                         switch (ch) {
527                                                 case '{':
528                                                         ++nestLevel;
529                                                         break;
530                                                 case '}':
531                                                         --nestLevel;
532                                                         if (nestLevel < 0) return false;
533                                                         break;
534                                         }
535
536                                         ifs.get(ch);
537                                 }
538
539                                 if (!ifs)
540                                         return false;
541
542                                 ifs.get(ch);
543
544                                 if (!ifs)
545                                         return false;
546
547                         } else {
548
549                                 // reading a string name
550                                 docstring strName;
551
552                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
553                                         strName += lowercase(ch);
554                                         ifs.get(ch);
555                                 }
556
557                                 if (!ifs)
558                                         return false;
559
560                                 // replace the string with its assigned value or
561                                 // discard it if it's not assigned
562                                 if (strName.length()) {
563                                         VarMap::const_iterator pos = strings.find(strName);
564                                         if (pos != strings.end()) {
565                                                 val += pos->second;
566                                         }
567                                 }
568                         }
569
570                         // skip WS
571                         while (ifs && isSpace(ch)) {
572                                 ifs.get(ch);
573                         }
574
575                         if (!ifs)
576                                 return false;
577
578                         // continue reading next value on concatenate with '#'
579                 } while (ch == '#');
580
581                 ifs.putback(ch);
582
583                 return true;
584         }
585 }
586
587
588 // This method returns a comma separated list of Bibtex entries
589 void InsetBibtex::fillWithBibKeys(BiblioInfo & keylist,
590         InsetIterator const & /*di*/) const
591 {
592         // This bibtex parser is a first step to parse bibtex files
593         // more precisely.
594         //
595         // - it reads the whole bibtex entry and does a syntax check
596         //   (matching delimiters, missing commas,...
597         // - it recovers from errors starting with the next @-character
598         // - it reads @string definitions and replaces them in the
599         //   field values.
600         // - it accepts more characters in keys or value names than
601         //   bibtex does.
602         //
603         // Officially bibtex does only support ASCII, but in practice
604         // you can use the encoding of the main document as long as
605         // some elements like keys and names are pure ASCII. Therefore
606         // we convert the file from the buffer encoding.
607         // We don't restrict keys to ASCII in LyX, since our own
608         // InsetBibitem can generate non-ASCII keys, and nonstandard
609         // 8bit clean bibtex forks exist.
610         support::FileNameList const files = getBibFiles();
611         support::FileNameList::const_iterator it = files.begin();
612         support::FileNameList::const_iterator en = files.end();
613         for (; it != en; ++ it) {
614                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
615                         ios_base::in, buffer().params().encoding().iconvName());
616
617                 char_type ch;
618                 VarMap strings;
619
620                 while (ifs) {
621
622                         ifs.get(ch);
623                         if (!ifs)
624                                 break;
625
626                         if (ch != '@')
627                                 continue;
628
629                         docstring entryType;
630
631                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
632                                            docstring(), makeLowerCase) || !ifs)
633                                 continue;
634
635                         if (entryType == from_ascii("comment")) {
636
637                                 ifs.ignore(numeric_limits<int>::max(), '\n');
638                                 continue;
639                         }
640
641                         ifs.get(ch);
642                         if (!ifs)
643                                 break;
644
645                         if ((ch != '(') && (ch != '{')) {
646                                 // invalid entry delimiter
647                                 ifs.putback(ch);
648                                 continue;
649                         }
650
651                         // process the entry
652                         if (entryType == from_ascii("string")) {
653
654                                 // read string and add it to the strings map
655                                 // (or replace it's old value)
656                                 docstring name;
657                                 docstring value;
658
659                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
660                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
661                                         continue;
662
663                                 // next char must be an equal sign
664                                 ifs.get(ch);
665                                 if (!ifs || ch != '=')
666                                         continue;
667
668                                 if (!readValue(value, ifs, strings))
669                                         continue;
670
671                                 strings[name] = value;
672
673                         } else if (entryType == from_ascii("preamble")) {
674
675                                 // preamble definitions are discarded.
676                                 // can they be of any use in lyx?
677                                 docstring value;
678
679                                 if (!readValue(value, ifs, strings))
680                                         continue;
681
682                         } else {
683
684                                 // Citation entry. Try to read the key.
685                                 docstring key;
686
687                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
688                                                    from_ascii("}"), keepCase) || !ifs)
689                                         continue;
690
691                                 /////////////////////////////////////////////
692                                 // now we have a key, so we will add an entry 
693                                 // (even if it's empty, as bibtex does)
694                                 //
695                                 // we now read the field = value pairs.
696                                 // all items must be separated by a comma. If
697                                 // it is missing the scanning of this entry is
698                                 // stopped and the next is searched.
699                                 docstring fields;
700                                 docstring name;
701                                 docstring value;
702                                 docstring commaNewline;
703                                 docstring data;
704                                 BibTeXInfo keyvalmap(key, entryType);
705                                 
706                                 bool readNext = removeWSAndComma(ifs);
707  
708                                 while (ifs && readNext) {
709
710                                         // read field name
711                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
712                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
713                                                 break;
714
715                                         // next char must be an equal sign
716                                         ifs.get(ch);
717                                         if (!ifs)
718                                                 break;
719                                         if (ch != '=') {
720                                                 ifs.putback(ch);
721                                                 break;
722                                         }
723
724                                         // read field value
725                                         if (!readValue(value, ifs, strings))
726                                                 break;
727
728                                         keyvalmap[name] = value;
729                                         data += "\n\n" + value;
730                                         keylist.addFieldName(name);
731                                         readNext = removeWSAndComma(ifs);
732                                 }
733
734                                 // add the new entry
735                                 keylist.addEntryType(entryType);
736                                 keyvalmap.setAllData(data);
737                                 keylist[key] = keyvalmap;
738                         }
739                 } //< searching '@'
740         } //< for loop over files
741 }
742
743
744 FileName InsetBibtex::getBibTeXPath(docstring const & filename, Buffer const & buf)
745 {
746         string texfile = changeExtension(to_utf8(filename), "bib");
747         // note that, if the filename can be found directly from the path, 
748         // findtexfile will just return a FileName object for that path.
749         FileName file(findtexfile(texfile, "bib"));
750         if (file.empty())
751                 file = FileName(makeAbsPath(texfile, buf.filePath()));
752         return file;
753 }
754  
755
756 bool InsetBibtex::addDatabase(docstring const & db)
757 {
758         docstring bibfiles = getParam("bibfiles");
759         if (tokenPos(bibfiles, ',', db) != -1)
760                 return false;
761         if (!bibfiles.empty())
762                 bibfiles += ',';
763         setParam("bibfiles", bibfiles + db);
764         return true;
765 }
766
767
768 bool InsetBibtex::delDatabase(docstring const & db)
769 {
770         docstring bibfiles = getParam("bibfiles");
771         if (contains(bibfiles, db)) {
772                 int const n = tokenPos(bibfiles, ',', db);
773                 docstring bd = db;
774                 if (n > 0) {
775                         // this is not the first database
776                         docstring tmp = ',' + bd;
777                         setParam("bibfiles", subst(bibfiles, tmp, docstring()));
778                 } else if (n == 0)
779                         // this is the first (or only) database
780                         setParam("bibfiles", split(bibfiles, bd, ','));
781                 else
782                         return false;
783         }
784         return true;
785 }
786
787
788 void InsetBibtex::validate(LaTeXFeatures & features) const
789 {
790         if (features.bufferParams().use_bibtopic)
791                 features.require("bibtopic");
792 }
793
794
795 } // namespace lyx