]> git.lyx.org Git - features.git/blob - src/insets/InsetBibtex.cpp
04124e1bb2527e47c4b0abe1642b77f04aa37cb4
[features.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
27
28 #include "frontends/alert.h"
29
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/filetools.h"
33 #include "support/lstrings.h"
34 #include "support/lyxlib.h"
35 #include "support/os.h"
36 #include "support/Path.h"
37 #include "support/textutils.h"
38
39 #include <boost/tokenizer.hpp>
40
41
42 namespace lyx {
43
44 using support::absolutePath;
45 using support::ascii_lowercase;
46 using support::changeExtension;
47 using support::contains;
48 using support::copy;
49 using support::DocFileName;
50 using support::FileName;
51 using support::findtexfile;
52 using support::isValidLaTeXFilename;
53 using support::latex_path;
54 using support::ltrim;
55 using support::makeAbsPath;
56 using support::makeRelPath;
57 using support::prefixIs;
58 using support::removeExtension;
59 using support::rtrim;
60 using support::split;
61 using support::subst;
62 using support::tokenPos;
63 using support::trim;
64 using support::lowercase;
65
66 namespace Alert = frontend::Alert;
67 namespace os = support::os;
68
69 using std::endl;
70 using std::getline;
71 using std::string;
72 using std::ostream;
73 using std::pair;
74 using std::vector;
75 using std::map;
76
77
78 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
79         : InsetCommand(p, "bibtex")
80 {}
81
82
83 CommandInfo const * InsetBibtex::findInfo(std::string const & /* cmdName */)
84 {
85         static const char * const paramnames[] = 
86                 {"options", "btprint", "bibfiles", ""};
87         static const bool isoptional[] = {true, true, false};
88         static const CommandInfo info = {3, paramnames, isoptional};
89         return &info;
90 }
91
92
93 Inset * InsetBibtex::clone() const
94 {
95         return new InsetBibtex(*this);
96 }
97
98
99 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
100 {
101         switch (cmd.action) {
102
103         case LFUN_INSET_MODIFY: {
104                 InsetCommandParams p(BIBTEX_CODE);
105                 try {
106                         if (!InsetCommandMailer::string2params("bibtex", 
107                                         to_utf8(cmd.argument()), p)) {
108                                 cur.noUpdate();
109                                 break;
110                         }
111                 } catch (support::ExceptionMessage const & message) {
112                         if (message.type_ == support::WarningException) {
113                                 Alert::warning(message.title_, message.details_);
114                                 cur.noUpdate();
115                         } else 
116                                 throw message;
117                         break;
118                 }
119                 setParams(p);
120                 cur.buffer().updateBibfilesCache();
121                 break;
122         }
123
124         default:
125                 InsetCommand::doDispatch(cur, cmd);
126                 break;
127         }
128 }
129
130
131 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
132 {
133         return _("BibTeX Generated Bibliography");
134 }
135
136
137 namespace {
138
139 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
140                       string const & name, string const & ext)
141 {
142         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
143         if (absolutePath(name) || !FileName(fname + ext).isFileReadable())
144                 return name;
145         if (!runparams.nice)
146                 return fname;
147
148         // FIXME UNICODE
149         return to_utf8(makeRelPath(from_utf8(fname),
150                                          from_utf8(buffer.masterBuffer()->filePath())));
151 }
152
153 }
154
155
156 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
157                        OutputParams const & runparams) const
158 {
159         // the sequence of the commands:
160         // 1. \bibliographystyle{style}
161         // 2. \addcontentsline{...} - if option bibtotoc set
162         // 3. \bibliography{database}
163         // and with bibtopic:
164         // 1. \bibliographystyle{style}
165         // 2. \begin{btSect}{database}
166         // 3. \btPrint{Cited|NotCited|All}
167         // 4. \end{btSect}
168
169         // Database(s)
170         // If we are processing the LaTeX file in a temp directory then
171         // copy the .bib databases to this temp directory, mangling their
172         // names in the process. Store this mangled name in the list of
173         // all databases.
174         // (We need to do all this because BibTeX *really*, *really*
175         // can't handle "files with spaces" and Windows users tend to
176         // use such filenames.)
177         // Otherwise, store the (maybe absolute) path to the original,
178         // unmangled database name.
179         typedef boost::char_separator<char_type> Separator;
180         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
181
182         Separator const separator(from_ascii(",").c_str());
183         // The tokenizer must not be called with temporary strings, since
184         // it does not make a copy and uses iterators of the string further
185         // down. getParam returns a reference, so this is OK.
186         Tokenizer const tokens(getParam("bibfiles"), separator);
187         Tokenizer::const_iterator const begin = tokens.begin();
188         Tokenizer::const_iterator const end = tokens.end();
189
190         odocstringstream dbs;
191         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
192                 docstring const input = trim(*it);
193                 // FIXME UNICODE
194                 string utf8input = to_utf8(input);
195                 string database =
196                         normalizeName(buffer, runparams, utf8input, ".bib");
197                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
198                 bool const not_from_texmf = try_in_file.isFileReadable();
199
200                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
201                     not_from_texmf) {
202
203                         // mangledFilename() needs the extension
204                         DocFileName const in_file = DocFileName(try_in_file);
205                         database = removeExtension(in_file.mangledFilename());
206                         FileName const out_file = makeAbsPath(database + ".bib",
207                                         buffer.masterBuffer()->temppath());
208
209                         bool const success = copy(in_file, out_file);
210                         if (!success) {
211                                 lyxerr << "Failed to copy '" << in_file
212                                        << "' to '" << out_file << "'"
213                                        << endl;
214                         }
215                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
216                            !isValidLaTeXFilename(database)) {
217                                 frontend::Alert::warning(_("Invalid filename"),
218                                                          _("The following filename is likely to cause trouble "
219                                                            "when running the exported file through LaTeX: ") +
220                                                             from_utf8(database));
221                 }
222
223                 if (it != begin)
224                         dbs << ',';
225                 // FIXME UNICODE
226                 dbs << from_utf8(latex_path(database));
227         }
228         docstring const db_out = dbs.str();
229
230         // Post this warning only once.
231         static bool warned_about_spaces = false;
232         if (!warned_about_spaces &&
233             runparams.nice && db_out.find(' ') != docstring::npos) {
234                 warned_about_spaces = true;
235
236                 Alert::warning(_("Export Warning!"),
237                                _("There are spaces in the paths to your BibTeX databases.\n"
238                                               "BibTeX will be unable to find them."));
239         }
240
241         // Style-Options
242         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
243         string bibtotoc;
244         if (prefixIs(style, "bibtotoc")) {
245                 bibtotoc = "bibtotoc";
246                 if (contains(style, ','))
247                         style = split(style, bibtotoc, ',');
248         }
249
250         // line count
251         int nlines = 0;
252
253         if (!style.empty()) {
254                 string base = normalizeName(buffer, runparams, style, ".bst");
255                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
256                 bool const not_from_texmf = try_in_file.isFileReadable();
257                 // If this style does not come from texmf and we are not
258                 // exporting to .tex copy it to the tmp directory.
259                 // This prevents problems with spaces and 8bit charcaters
260                 // in the file name.
261                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
262                     not_from_texmf) {
263                         // use new style name
264                         DocFileName const in_file = DocFileName(try_in_file);
265                         base = removeExtension(in_file.mangledFilename());
266                         FileName const out_file(makeAbsPath(base + ".bst",
267                                         buffer.masterBuffer()->temppath()));
268                         bool const success = copy(in_file, out_file);
269                         if (!success) {
270                                 lyxerr << "Failed to copy '" << in_file
271                                        << "' to '" << out_file << "'"
272                                        << endl;
273                         }
274                 }
275                 // FIXME UNICODE
276                 os << "\\bibliographystyle{"
277                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
278                    << "}\n";
279                 nlines += 1;
280         }
281
282         // Post this warning only once.
283         static bool warned_about_bst_spaces = false;
284         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
285                 warned_about_bst_spaces = true;
286                 Alert::warning(_("Export Warning!"),
287                                _("There are spaces in the path to your BibTeX style file.\n"
288                                               "BibTeX will be unable to find it."));
289         }
290
291         if (!db_out.empty() && buffer.params().use_bibtopic){
292                 os << "\\begin{btSect}{" << db_out << "}\n";
293                 docstring btprint = getParam("btprint");
294                 if (btprint.empty())
295                         // default
296                         btprint = from_ascii("btPrintCited");
297                 os << "\\" << btprint << "\n"
298                    << "\\end{btSect}\n";
299                 nlines += 3;
300         }
301
302         // bibtotoc-Option
303         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
304                 // maybe a problem when a textclass has no "art" as
305                 // part of its name, because it's than book.
306                 // For the "official" lyx-layouts it's no problem to support
307                 // all well
308                 if (!contains(buffer.params().getTextClass().name(),
309                               "art")) {
310                         if (buffer.params().sides == OneSide) {
311                                 // oneside
312                                 os << "\\clearpage";
313                         } else {
314                                 // twoside
315                                 os << "\\cleardoublepage";
316                         }
317
318                         // bookclass
319                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
320
321                 } else {
322                         // article class
323                         os << "\\addcontentsline{toc}{section}{\\refname}";
324                 }
325         }
326
327         if (!db_out.empty() && !buffer.params().use_bibtopic){
328                 os << "\\bibliography{" << db_out << "}\n";
329                 nlines += 1;
330         }
331
332         return nlines;
333 }
334
335
336 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
337 {
338         FileName path(buffer.filePath());
339         support::PathChanger p(path);
340
341         vector<FileName> vec;
342
343         string tmp;
344         // FIXME UNICODE
345         string bibfiles = to_utf8(getParam("bibfiles"));
346         bibfiles = split(bibfiles, tmp, ',');
347         while (!tmp.empty()) {
348                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
349                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
350
351                 // If we didn't find a matching file name just fail silently
352                 if (!file.empty())
353                         vec.push_back(file);
354
355                 // Get next file name
356                 bibfiles = split(bibfiles, tmp, ',');
357         }
358
359         return vec;
360 }
361
362 namespace {
363
364         // methods for parsing bibtex files
365
366         typedef map<docstring, docstring> VarMap;
367
368         /// remove whitespace characters, optionally a single comma,
369         /// and further whitespace characters from the stream.
370         /// @return true if a comma was found, false otherwise
371         ///
372         bool removeWSAndComma(idocfstream & ifs) {
373                 char_type ch;
374
375                 if (!ifs)
376                         return false;
377
378                 // skip whitespace
379                 do {
380                         ifs.get(ch);
381                 } while (ifs && isSpace(ch));
382
383                 if (!ifs)
384                         return false;
385
386                 if (ch != ',') {
387                         ifs.putback(ch);
388                         return false;
389                 }
390
391                 // skip whitespace
392                 do {
393                         ifs.get(ch);
394                 } while (ifs && isSpace(ch));
395
396                 if (ifs) {
397                         ifs.putback(ch);
398                 }
399
400                 return true;
401         }
402
403
404         enum charCase {
405                 makeLowerCase,
406                 keepCase
407         };
408
409         /// remove whitespace characters, read characer sequence
410         /// not containing whitespace characters or characters in
411         /// delimChars, and remove further whitespace characters.
412         ///
413         /// @return true if a string of length > 0 could be read.
414         ///
415         bool readTypeOrKey(docstring & val, idocfstream & ifs,
416                 docstring const & delimChars, docstring const &illegalChars, 
417                 charCase chCase) {
418
419                 char_type ch;
420
421                 val.clear();
422
423                 if (!ifs)
424                         return false;
425
426                 // skip whitespace
427                 do {
428                         ifs.get(ch);
429                 } while (ifs && isSpace(ch));
430
431                 if (!ifs)
432                         return false;
433
434                 // read value
435                 bool legalChar = true;
436                 while (ifs && !isSpace(ch) && 
437                                                  delimChars.find(ch) == docstring::npos &&
438                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
439                                         ) 
440                 {
441                         if (chCase == makeLowerCase)
442                                 val += lowercase(ch);
443                         else
444                                 val += ch;
445                         ifs.get(ch);
446                 }
447                 
448                 if (!legalChar) {
449                         ifs.putback(ch);
450                         return false;
451                 }
452
453                 // skip whitespace
454                 while (ifs && isSpace(ch)) {
455                         ifs.get(ch);
456                 }
457
458                 if (ifs) {
459                         ifs.putback(ch);
460                 }
461
462                 return val.length() > 0;
463         }
464
465         /// read subsequent bibtex values that are delimited with a #-character.
466         /// Concatenate all parts and replace names with the associated string in
467         /// the variable strings.
468         /// @return true if reading was successfull (all single parts were delimited
469         /// correctly)
470         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
471
472                 char_type ch;
473
474                 val.clear();
475
476                 if (!ifs)
477                         return false;
478
479                 do {
480                         // skip whitespace
481                         do {
482                                 ifs.get(ch);
483                         } while (ifs && isSpace(ch));
484
485                         if (!ifs)
486                                 return false;
487
488                         // check for field type
489                         if (isDigit(ch)) {
490
491                                 // read integer value
492                                 do {
493                                         val += ch;
494                                         ifs.get(ch);
495                                 } while (ifs && isDigit(ch));
496
497                                 if (!ifs)
498                                         return false;
499
500                         } else if (ch == '"' || ch == '{') {
501                                 // set end delimiter
502                                 char_type delim = ch == '"' ? '"': '}';
503
504                                 //Skip whitespace
505                                 do {
506                                         ifs.get(ch);
507                                 } while (ifs && isSpace(ch));
508                                 
509                                 if (!ifs)
510                                         return false;
511                                 
512                                 //We now have the first non-whitespace character
513                                 //We'll collapse adjacent whitespace.
514                                 bool lastWasWhiteSpace = false;
515                                 
516                                 // inside this delimited text braces must match.
517                                 // Thus we can have a closing delimiter only
518                                 // when nestLevel == 0
519                                 int nestLevel = 0;
520  
521                                 while (ifs && (nestLevel > 0 || ch != delim)) {
522                                         if (isSpace(ch)) {
523                                                 lastWasWhiteSpace = true;
524                                                 ifs.get(ch);
525                                                 continue;
526                                         }
527                                         //We output the space only after we stop getting 
528                                         //whitespace so as not to output any whitespace
529                                         //at the end of the value.
530                                         if (lastWasWhiteSpace) {
531                                                 lastWasWhiteSpace = false;
532                                                 val += ' ';
533                                         }
534                                         
535                                         val += ch;
536
537                                         // update nesting level
538                                         switch (ch) {
539                                                 case '{':
540                                                         ++nestLevel;
541                                                         break;
542                                                 case '}':
543                                                         --nestLevel;
544                                                         if (nestLevel < 0) return false;
545                                                         break;
546                                         }
547
548                                         ifs.get(ch);
549                                 }
550
551                                 if (!ifs)
552                                         return false;
553
554                                 ifs.get(ch);
555
556                                 if (!ifs)
557                                         return false;
558
559                         } else {
560
561                                 // reading a string name
562                                 docstring strName;
563
564                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
565                                         strName += lowercase(ch);
566                                         ifs.get(ch);
567                                 }
568
569                                 if (!ifs)
570                                         return false;
571
572                                 // replace the string with its assigned value or
573                                 // discard it if it's not assigned
574                                 if (strName.length()) {
575                                         VarMap::const_iterator pos = strings.find(strName);
576                                         if (pos != strings.end()) {
577                                                 val += pos->second;
578                                         }
579                                 }
580                         }
581
582                         // skip WS
583                         while (ifs && isSpace(ch)) {
584                                 ifs.get(ch);
585                         }
586
587                         if (!ifs)
588                                 return false;
589
590                         // continue reading next value on concatenate with '#'
591                 } while (ch == '#');
592
593                 ifs.putback(ch);
594
595                 return true;
596         }
597 }
598
599
600 // This method returns a comma separated list of Bibtex entries
601 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
602                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
603 {
604         vector<FileName> const files = getFiles(buffer);
605         for (vector<FileName>::const_iterator it = files.begin();
606              it != files.end(); ++ it) {
607                 // This bibtex parser is a first step to parse bibtex files
608                 // more precisely.
609                 //
610                 // - it reads the whole bibtex entry and does a syntax check
611                 //   (matching delimiters, missing commas,...
612                 // - it recovers from errors starting with the next @-character
613                 // - it reads @string definitions and replaces them in the
614                 //   field values.
615                 // - it accepts more characters in keys or value names than
616                 //   bibtex does.
617                 //
618                 // Officially bibtex does only support ASCII, but in practice
619                 // you can use the encoding of the main document as long as
620                 // some elements like keys and names are pure ASCII. Therefore
621                 // we convert the file from the buffer encoding.
622                 // We don't restrict keys to ASCII in LyX, since our own
623                 // InsetBibitem can generate non-ASCII keys, and nonstandard
624                 // 8bit clean bibtex forks exist.
625                 
626                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
627                         std::ios_base::in,
628                         buffer.params().encoding().iconvName());
629
630                 char_type ch;
631                 VarMap strings;
632
633                 while (ifs) {
634
635                         ifs.get(ch);
636                         if (!ifs)
637                                 break;
638
639                         if (ch != '@')
640                                 continue;
641
642                         docstring entryType;
643
644                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
645                                            docstring(), makeLowerCase) || !ifs)
646                                 continue;
647
648                         if (entryType == from_ascii("comment")) {
649
650                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
651                                 continue;
652                         }
653
654                         ifs.get(ch);
655                         if (!ifs)
656                                 break;
657
658                         if ((ch != '(') && (ch != '{')) {
659                                 // invalid entry delimiter
660                                 ifs.putback(ch);
661                                 continue;
662                         }
663
664                         // process the entry
665                         if (entryType == from_ascii("string")) {
666
667                                 // read string and add it to the strings map
668                                 // (or replace it's old value)
669                                 docstring name;
670                                 docstring value;
671
672                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
673                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
674                                         continue;
675
676                                 // next char must be an equal sign
677                                 ifs.get(ch);
678                                 if (!ifs || ch != '=')
679                                         continue;
680
681                                 if (!readValue(value, ifs, strings))
682                                         continue;
683
684                                 strings[name] = value;
685
686                         } else if (entryType == from_ascii("preamble")) {
687
688                                 // preamble definitions are discarded.
689                                 // can they be of any use in lyx?
690                                 docstring value;
691
692                                 if (!readValue(value, ifs, strings))
693                                         continue;
694
695                         } else {
696
697                                 // Citation entry. Try to read the key.
698                                 docstring key;
699
700                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
701                                                    from_ascii("}"), keepCase) || !ifs)
702                                         continue;
703
704                                 /////////////////////////////////////////////
705                                 // now we have a key, so we will add an entry 
706                                 // (even if it's empty, as bibtex does)
707                                 //
708                                 // we now read the field = value pairs.
709                                 // all items must be separated by a comma. If
710                                 // it is missing the scanning of this entry is
711                                 // stopped and the next is searched.
712                                 docstring fields;
713                                 docstring name;
714                                 docstring value;
715                                 docstring commaNewline;
716                                 docstring data;
717                                 BibTeXInfo keyvalmap;
718                                 keyvalmap.entryType = entryType;
719                                 
720                                 bool readNext = removeWSAndComma(ifs);
721  
722                                 while (ifs && readNext) {
723
724                                         // read field name
725                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
726                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
727                                                 break;
728
729                                         // next char must be an equal sign
730                                         ifs.get(ch);
731                                         if (!ifs)
732                                                 break;
733                                         if (ch != '=') {
734                                                 ifs.putback(ch);
735                                                 break;
736                                         }
737
738                                         // read field value
739                                         if (!readValue(value, ifs, strings))
740                                                 break;
741
742                                         keyvalmap[name] = value;
743                                         data += "\n\n" + value;
744                                         keylist.fieldNames.insert(name);
745                                         readNext = removeWSAndComma(ifs);
746                                 }
747
748                                 // add the new entry
749                                 keylist.entryTypes.insert(entryType);
750                                 keyvalmap.allData = data;
751                                 keyvalmap.isBibTeX = true;
752                                 keyvalmap.bibKey = key;
753                                 keylist[key] = keyvalmap;
754                         }
755                 } //< searching '@'
756         } //< for loop over files
757 }
758
759
760
761 bool InsetBibtex::addDatabase(string const & db)
762 {
763         // FIXME UNICODE
764         string bibfiles(to_utf8(getParam("bibfiles")));
765         if (tokenPos(bibfiles, ',', db) == -1) {
766                 if (!bibfiles.empty())
767                         bibfiles += ',';
768                 setParam("bibfiles", from_utf8(bibfiles + db));
769                 return true;
770         }
771         return false;
772 }
773
774
775 bool InsetBibtex::delDatabase(string const & db)
776 {
777         // FIXME UNICODE
778         string bibfiles(to_utf8(getParam("bibfiles")));
779         if (contains(bibfiles, db)) {
780                 int const n = tokenPos(bibfiles, ',', db);
781                 string bd = db;
782                 if (n > 0) {
783                         // this is not the first database
784                         string tmp = ',' + bd;
785                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
786                 } else if (n == 0)
787                         // this is the first (or only) database
788                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
789                 else
790                         return false;
791         }
792         return true;
793 }
794
795
796 void InsetBibtex::validate(LaTeXFeatures & features) const
797 {
798         if (features.bufferParams().use_bibtopic)
799                 features.require("bibtopic");
800 }
801
802
803 } // namespace lyx