]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
Move the findInfo() and defaultCommand() routines out of InsetCommand and into its...
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26
27 #include "frontends/alert.h"
28
29 #include "support/ExceptionMessage.h"
30 #include "support/filetools.h"
31 #include "support/lstrings.h"
32 #include "support/lyxlib.h"
33 #include "support/os.h"
34 #include "support/Path.h"
35 #include "support/textutils.h"
36
37 #include <boost/tokenizer.hpp>
38
39
40 namespace lyx {
41
42 using support::absolutePath;
43 using support::ascii_lowercase;
44 using support::changeExtension;
45 using support::contains;
46 using support::copy;
47 using support::DocFileName;
48 using support::FileName;
49 using support::findtexfile;
50 using support::isValidLaTeXFilename;
51 using support::latex_path;
52 using support::ltrim;
53 using support::makeAbsPath;
54 using support::makeRelPath;
55 using support::prefixIs;
56 using support::removeExtension;
57 using support::rtrim;
58 using support::split;
59 using support::subst;
60 using support::tokenPos;
61 using support::trim;
62 using support::lowercase;
63
64 namespace Alert = frontend::Alert;
65 namespace os = support::os;
66
67 using std::endl;
68 using std::getline;
69 using std::string;
70 using std::ostream;
71 using std::pair;
72 using std::vector;
73 using std::map;
74
75
76 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
77         : InsetCommand(p, "bibtex")
78 {}
79
80
81 CommandInfo const * InsetBibtex::findInfo(std::string const & /* cmdName */)
82 {
83         static const char * const paramnames[] = 
84                 {"options", "btprint", "bibfiles", ""};
85         static const bool isoptional[] = {true, true, false};
86         static const CommandInfo info = {3, paramnames, isoptional};
87         return &info;
88 }
89
90
91 Inset * InsetBibtex::clone() const
92 {
93         return new InsetBibtex(*this);
94 }
95
96
97 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
98 {
99         switch (cmd.action) {
100
101         case LFUN_INSET_MODIFY: {
102                 InsetCommandParams p(BIBTEX_CODE);
103                 try {
104                         if (!InsetCommandMailer::string2params("bibtex", 
105                                         to_utf8(cmd.argument()), p)) {
106                                 cur.noUpdate();
107                                 break;
108                         }
109                 } catch (support::ExceptionMessage const & message) {
110                         if (message.type_ == support::WarningException) {
111                                 Alert::warning(message.title_, message.details_);
112                                 cur.noUpdate();
113                         } else 
114                                 throw message;
115                         break;
116                 }
117                 setParams(p);
118                 cur.buffer().updateBibfilesCache();
119                 break;
120         }
121
122         default:
123                 InsetCommand::doDispatch(cur, cmd);
124                 break;
125         }
126 }
127
128
129 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
130 {
131         return _("BibTeX Generated Bibliography");
132 }
133
134
135 namespace {
136
137 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
138                       string const & name, string const & ext)
139 {
140         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
141         if (absolutePath(name) || !FileName(fname + ext).isFileReadable())
142                 return name;
143         if (!runparams.nice)
144                 return fname;
145
146         // FIXME UNICODE
147         return to_utf8(makeRelPath(from_utf8(fname),
148                                          from_utf8(buffer.masterBuffer()->filePath())));
149 }
150
151 }
152
153
154 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
155                        OutputParams const & runparams) const
156 {
157         // the sequence of the commands:
158         // 1. \bibliographystyle{style}
159         // 2. \addcontentsline{...} - if option bibtotoc set
160         // 3. \bibliography{database}
161         // and with bibtopic:
162         // 1. \bibliographystyle{style}
163         // 2. \begin{btSect}{database}
164         // 3. \btPrint{Cited|NotCited|All}
165         // 4. \end{btSect}
166
167         // Database(s)
168         // If we are processing the LaTeX file in a temp directory then
169         // copy the .bib databases to this temp directory, mangling their
170         // names in the process. Store this mangled name in the list of
171         // all databases.
172         // (We need to do all this because BibTeX *really*, *really*
173         // can't handle "files with spaces" and Windows users tend to
174         // use such filenames.)
175         // Otherwise, store the (maybe absolute) path to the original,
176         // unmangled database name.
177         typedef boost::char_separator<char_type> Separator;
178         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
179
180         Separator const separator(from_ascii(",").c_str());
181         // The tokenizer must not be called with temporary strings, since
182         // it does not make a copy and uses iterators of the string further
183         // down. getParam returns a reference, so this is OK.
184         Tokenizer const tokens(getParam("bibfiles"), separator);
185         Tokenizer::const_iterator const begin = tokens.begin();
186         Tokenizer::const_iterator const end = tokens.end();
187
188         odocstringstream dbs;
189         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
190                 docstring const input = trim(*it);
191                 // FIXME UNICODE
192                 string utf8input = to_utf8(input);
193                 string database =
194                         normalizeName(buffer, runparams, utf8input, ".bib");
195                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
196                 bool const not_from_texmf = try_in_file.isFileReadable();
197
198                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
199                     not_from_texmf) {
200
201                         // mangledFilename() needs the extension
202                         DocFileName const in_file = DocFileName(try_in_file);
203                         database = removeExtension(in_file.mangledFilename());
204                         FileName const out_file = makeAbsPath(database + ".bib",
205                                         buffer.masterBuffer()->temppath());
206
207                         bool const success = copy(in_file, out_file);
208                         if (!success) {
209                                 lyxerr << "Failed to copy '" << in_file
210                                        << "' to '" << out_file << "'"
211                                        << endl;
212                         }
213                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
214                            !isValidLaTeXFilename(database)) {
215                                 frontend::Alert::warning(_("Invalid filename"),
216                                                          _("The following filename is likely to cause trouble "
217                                                            "when running the exported file through LaTeX: ") +
218                                                             from_utf8(database));
219                 }
220
221                 if (it != begin)
222                         dbs << ',';
223                 // FIXME UNICODE
224                 dbs << from_utf8(latex_path(database));
225         }
226         docstring const db_out = dbs.str();
227
228         // Post this warning only once.
229         static bool warned_about_spaces = false;
230         if (!warned_about_spaces &&
231             runparams.nice && db_out.find(' ') != docstring::npos) {
232                 warned_about_spaces = true;
233
234                 Alert::warning(_("Export Warning!"),
235                                _("There are spaces in the paths to your BibTeX databases.\n"
236                                               "BibTeX will be unable to find them."));
237         }
238
239         // Style-Options
240         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
241         string bibtotoc;
242         if (prefixIs(style, "bibtotoc")) {
243                 bibtotoc = "bibtotoc";
244                 if (contains(style, ','))
245                         style = split(style, bibtotoc, ',');
246         }
247
248         // line count
249         int nlines = 0;
250
251         if (!style.empty()) {
252                 string base = normalizeName(buffer, runparams, style, ".bst");
253                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
254                 bool const not_from_texmf = try_in_file.isFileReadable();
255                 // If this style does not come from texmf and we are not
256                 // exporting to .tex copy it to the tmp directory.
257                 // This prevents problems with spaces and 8bit charcaters
258                 // in the file name.
259                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
260                     not_from_texmf) {
261                         // use new style name
262                         DocFileName const in_file = DocFileName(try_in_file);
263                         base = removeExtension(in_file.mangledFilename());
264                         FileName const out_file(makeAbsPath(base + ".bst",
265                                         buffer.masterBuffer()->temppath()));
266                         bool const success = copy(in_file, out_file);
267                         if (!success) {
268                                 lyxerr << "Failed to copy '" << in_file
269                                        << "' to '" << out_file << "'"
270                                        << endl;
271                         }
272                 }
273                 // FIXME UNICODE
274                 os << "\\bibliographystyle{"
275                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
276                    << "}\n";
277                 nlines += 1;
278         }
279
280         // Post this warning only once.
281         static bool warned_about_bst_spaces = false;
282         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
283                 warned_about_bst_spaces = true;
284                 Alert::warning(_("Export Warning!"),
285                                _("There are spaces in the path to your BibTeX style file.\n"
286                                               "BibTeX will be unable to find it."));
287         }
288
289         if (!db_out.empty() && buffer.params().use_bibtopic){
290                 os << "\\begin{btSect}{" << db_out << "}\n";
291                 docstring btprint = getParam("btprint");
292                 if (btprint.empty())
293                         // default
294                         btprint = from_ascii("btPrintCited");
295                 os << "\\" << btprint << "\n"
296                    << "\\end{btSect}\n";
297                 nlines += 3;
298         }
299
300         // bibtotoc-Option
301         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
302                 // maybe a problem when a textclass has no "art" as
303                 // part of its name, because it's than book.
304                 // For the "official" lyx-layouts it's no problem to support
305                 // all well
306                 if (!contains(buffer.params().getTextClass().name(),
307                               "art")) {
308                         if (buffer.params().sides == TextClass::OneSide) {
309                                 // oneside
310                                 os << "\\clearpage";
311                         } else {
312                                 // twoside
313                                 os << "\\cleardoublepage";
314                         }
315
316                         // bookclass
317                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
318
319                 } else {
320                         // article class
321                         os << "\\addcontentsline{toc}{section}{\\refname}";
322                 }
323         }
324
325         if (!db_out.empty() && !buffer.params().use_bibtopic){
326                 os << "\\bibliography{" << db_out << "}\n";
327                 nlines += 1;
328         }
329
330         return nlines;
331 }
332
333
334 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
335 {
336         FileName path(buffer.filePath());
337         support::Path p(path);
338
339         vector<FileName> vec;
340
341         string tmp;
342         // FIXME UNICODE
343         string bibfiles = to_utf8(getParam("bibfiles"));
344         bibfiles = split(bibfiles, tmp, ',');
345         while (!tmp.empty()) {
346                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
347                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
348
349                 // If we didn't find a matching file name just fail silently
350                 if (!file.empty())
351                         vec.push_back(file);
352
353                 // Get next file name
354                 bibfiles = split(bibfiles, tmp, ',');
355         }
356
357         return vec;
358 }
359
360 namespace {
361
362         // methods for parsing bibtex files
363
364         typedef map<docstring, docstring> VarMap;
365
366         /// remove whitespace characters, optionally a single comma,
367         /// and further whitespace characters from the stream.
368         /// @return true if a comma was found, false otherwise
369         ///
370         bool removeWSAndComma(idocfstream & ifs) {
371                 char_type ch;
372
373                 if (!ifs)
374                         return false;
375
376                 // skip whitespace
377                 do {
378                         ifs.get(ch);
379                 } while (ifs && isSpace(ch));
380
381                 if (!ifs)
382                         return false;
383
384                 if (ch != ',') {
385                         ifs.putback(ch);
386                         return false;
387                 }
388
389                 // skip whitespace
390                 do {
391                         ifs.get(ch);
392                 } while (ifs && isSpace(ch));
393
394                 if (ifs) {
395                         ifs.putback(ch);
396                 }
397
398                 return true;
399         }
400
401
402         enum charCase {
403                 makeLowerCase,
404                 keepCase
405         };
406
407         /// remove whitespace characters, read characer sequence
408         /// not containing whitespace characters or characters in
409         /// delimChars, and remove further whitespace characters.
410         ///
411         /// @return true if a string of length > 0 could be read.
412         ///
413         bool readTypeOrKey(docstring & val, idocfstream & ifs,
414                 docstring const & delimChars, docstring const &illegalChars, 
415                 charCase chCase) {
416
417                 char_type ch;
418
419                 val.clear();
420
421                 if (!ifs)
422                         return false;
423
424                 // skip whitespace
425                 do {
426                         ifs.get(ch);
427                 } while (ifs && isSpace(ch));
428
429                 if (!ifs)
430                         return false;
431
432                 // read value
433                 bool legalChar = true;
434                 while (ifs && !isSpace(ch) && 
435                                                  delimChars.find(ch) == docstring::npos &&
436                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
437                                         ) 
438                 {
439                         if (chCase == makeLowerCase)
440                                 val += lowercase(ch);
441                         else
442                                 val += ch;
443                         ifs.get(ch);
444                 }
445                 
446                 if (!legalChar) {
447                         ifs.putback(ch);
448                         return false;
449                 }
450
451                 // skip whitespace
452                 while (ifs && isSpace(ch)) {
453                         ifs.get(ch);
454                 }
455
456                 if (ifs) {
457                         ifs.putback(ch);
458                 }
459
460                 return val.length() > 0;
461         }
462
463         /// read subsequent bibtex values that are delimited with a #-character.
464         /// Concatenate all parts and replace names with the associated string in
465         /// the variable strings.
466         /// @return true if reading was successfull (all single parts were delimited
467         /// correctly)
468         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
469
470                 char_type ch;
471
472                 val.clear();
473
474                 if (!ifs)
475                         return false;
476
477                 do {
478                         // skip whitespace
479                         do {
480                                 ifs.get(ch);
481                         } while (ifs && isSpace(ch));
482
483                         if (!ifs)
484                                 return false;
485
486                         // check for field type
487                         if (isDigit(ch)) {
488
489                                 // read integer value
490                                 do {
491                                         val += ch;
492                                         ifs.get(ch);
493                                 } while (ifs && isDigit(ch));
494
495                                 if (!ifs)
496                                         return false;
497
498                         } else if (ch == '"' || ch == '{') {
499                                 // set end delimiter
500                                 char_type delim = ch == '"' ? '"': '}';
501
502                                 //Skip whitespace
503                                 do {
504                                         ifs.get(ch);
505                                 } while (ifs && isSpace(ch));
506                                 
507                                 if (!ifs)
508                                         return false;
509                                 
510                                 //We now have the first non-whitespace character
511                                 //We'll collapse adjacent whitespace.
512                                 bool lastWasWhiteSpace = false;
513                                 
514                                 // inside this delimited text braces must match.
515                                 // Thus we can have a closing delimiter only
516                                 // when nestLevel == 0
517                                 int nestLevel = 0;
518  
519                                 while (ifs && (nestLevel > 0 || ch != delim)) {
520                                         if (isSpace(ch)) {
521                                                 lastWasWhiteSpace = true;
522                                                 ifs.get(ch);
523                                                 continue;
524                                         }
525                                         //We output the space only after we stop getting 
526                                         //whitespace so as not to output any whitespace
527                                         //at the end of the value.
528                                         if (lastWasWhiteSpace) {
529                                                 lastWasWhiteSpace = false;
530                                                 val += ' ';
531                                         }
532                                         
533                                         val += ch;
534
535                                         // update nesting level
536                                         switch (ch) {
537                                                 case '{':
538                                                         ++nestLevel;
539                                                         break;
540                                                 case '}':
541                                                         --nestLevel;
542                                                         if (nestLevel < 0) return false;
543                                                         break;
544                                         }
545
546                                         ifs.get(ch);
547                                 }
548
549                                 if (!ifs)
550                                         return false;
551
552                                 ifs.get(ch);
553
554                                 if (!ifs)
555                                         return false;
556
557                         } else {
558
559                                 // reading a string name
560                                 docstring strName;
561
562                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
563                                         strName += lowercase(ch);
564                                         ifs.get(ch);
565                                 }
566
567                                 if (!ifs)
568                                         return false;
569
570                                 // replace the string with its assigned value or
571                                 // discard it if it's not assigned
572                                 if (strName.length()) {
573                                         VarMap::const_iterator pos = strings.find(strName);
574                                         if (pos != strings.end()) {
575                                                 val += pos->second;
576                                         }
577                                 }
578                         }
579
580                         // skip WS
581                         while (ifs && isSpace(ch)) {
582                                 ifs.get(ch);
583                         }
584
585                         if (!ifs)
586                                 return false;
587
588                         // continue reading next value on concatenate with '#'
589                 } while (ch == '#');
590
591                 ifs.putback(ch);
592
593                 return true;
594         }
595 }
596
597
598 // This method returns a comma separated list of Bibtex entries
599 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
600                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
601 {
602         vector<FileName> const files = getFiles(buffer);
603         for (vector<FileName>::const_iterator it = files.begin();
604              it != files.end(); ++ it) {
605                 // This bibtex parser is a first step to parse bibtex files
606                 // more precisely.
607                 //
608                 // - it reads the whole bibtex entry and does a syntax check
609                 //   (matching delimiters, missing commas,...
610                 // - it recovers from errors starting with the next @-character
611                 // - it reads @string definitions and replaces them in the
612                 //   field values.
613                 // - it accepts more characters in keys or value names than
614                 //   bibtex does.
615                 //
616                 // Officially bibtex does only support ASCII, but in practice
617                 // you can use the encoding of the main document as long as
618                 // some elements like keys and names are pure ASCII. Therefore
619                 // we convert the file from the buffer encoding.
620                 // We don't restrict keys to ASCII in LyX, since our own
621                 // InsetBibitem can generate non-ASCII keys, and nonstandard
622                 // 8bit clean bibtex forks exist.
623                 
624                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
625                         std::ios_base::in,
626                         buffer.params().encoding().iconvName());
627
628                 char_type ch;
629                 VarMap strings;
630
631                 while (ifs) {
632
633                         ifs.get(ch);
634                         if (!ifs)
635                                 break;
636
637                         if (ch != '@')
638                                 continue;
639
640                         docstring entryType;
641
642                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
643                                            docstring(), makeLowerCase) || !ifs)
644                                 continue;
645
646                         if (entryType == from_ascii("comment")) {
647
648                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
649                                 continue;
650                         }
651
652                         ifs.get(ch);
653                         if (!ifs)
654                                 break;
655
656                         if ((ch != '(') && (ch != '{')) {
657                                 // invalid entry delimiter
658                                 ifs.putback(ch);
659                                 continue;
660                         }
661
662                         // process the entry
663                         if (entryType == from_ascii("string")) {
664
665                                 // read string and add it to the strings map
666                                 // (or replace it's old value)
667                                 docstring name;
668                                 docstring value;
669
670                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
671                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
672                                         continue;
673
674                                 // next char must be an equal sign
675                                 ifs.get(ch);
676                                 if (!ifs || ch != '=')
677                                         continue;
678
679                                 if (!readValue(value, ifs, strings))
680                                         continue;
681
682                                 strings[name] = value;
683
684                         } else if (entryType == from_ascii("preamble")) {
685
686                                 // preamble definitions are discarded.
687                                 // can they be of any use in lyx?
688                                 docstring value;
689
690                                 if (!readValue(value, ifs, strings))
691                                         continue;
692
693                         } else {
694
695                                 // Citation entry. Try to read the key.
696                                 docstring key;
697
698                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
699                                                    from_ascii("}"), keepCase) || !ifs)
700                                         continue;
701
702                                 /////////////////////////////////////////////
703                                 // now we have a key, so we will add an entry 
704                                 // (even if it's empty, as bibtex does)
705                                 //
706                                 // we now read the field = value pairs.
707                                 // all items must be separated by a comma. If
708                                 // it is missing the scanning of this entry is
709                                 // stopped and the next is searched.
710                                 docstring fields;
711                                 docstring name;
712                                 docstring value;
713                                 docstring commaNewline;
714                                 docstring data;
715                                 BibTeXInfo keyvalmap;
716                                 keyvalmap.entryType = entryType;
717                                 
718                                 bool readNext = removeWSAndComma(ifs);
719  
720                                 while (ifs && readNext) {
721
722                                         // read field name
723                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
724                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
725                                                 break;
726
727                                         // next char must be an equal sign
728                                         ifs.get(ch);
729                                         if (!ifs)
730                                                 break;
731                                         if (ch != '=') {
732                                                 ifs.putback(ch);
733                                                 break;
734                                         }
735
736                                         // read field value
737                                         if (!readValue(value, ifs, strings))
738                                                 break;
739
740                                         keyvalmap[name] = value;
741                                         data += "\n\n" + value;
742                                         keylist.fieldNames.insert(name);
743                                         readNext = removeWSAndComma(ifs);
744                                 }
745
746                                 // add the new entry
747                                 keylist.entryTypes.insert(entryType);
748                                 keyvalmap.allData = data;
749                                 keyvalmap.isBibTeX = true;
750                                 keyvalmap.bibKey = key;
751                                 keylist[key] = keyvalmap;
752                         }
753                 } //< searching '@'
754         } //< for loop over files
755 }
756
757
758
759 bool InsetBibtex::addDatabase(string const & db)
760 {
761         // FIXME UNICODE
762         string bibfiles(to_utf8(getParam("bibfiles")));
763         if (tokenPos(bibfiles, ',', db) == -1) {
764                 if (!bibfiles.empty())
765                         bibfiles += ',';
766                 setParam("bibfiles", from_utf8(bibfiles + db));
767                 return true;
768         }
769         return false;
770 }
771
772
773 bool InsetBibtex::delDatabase(string const & db)
774 {
775         // FIXME UNICODE
776         string bibfiles(to_utf8(getParam("bibfiles")));
777         if (contains(bibfiles, db)) {
778                 int const n = tokenPos(bibfiles, ',', db);
779                 string bd = db;
780                 if (n > 0) {
781                         // this is not the first database
782                         string tmp = ',' + bd;
783                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
784                 } else if (n == 0)
785                         // this is the first (or only) database
786                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
787                 else
788                         return false;
789         }
790         return true;
791 }
792
793
794 void InsetBibtex::validate(LaTeXFeatures & features) const
795 {
796         if (features.bufferParams().use_bibtopic)
797                 features.require("bibtopic");
798 }
799
800
801 } // namespace lyx