]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
isome more FileName shuffling
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26
27 #include "frontends/alert.h"
28
29 #include "support/ExceptionMessage.h"
30 #include "support/filetools.h"
31 #include "support/lstrings.h"
32 #include "support/lyxlib.h"
33 #include "support/os.h"
34 #include "support/Path.h"
35 #include "support/textutils.h"
36
37 #include <boost/tokenizer.hpp>
38
39
40 namespace lyx {
41
42 using support::absolutePath;
43 using support::ascii_lowercase;
44 using support::changeExtension;
45 using support::contains;
46 using support::copy;
47 using support::DocFileName;
48 using support::FileName;
49 using support::findtexfile;
50 using support::isValidLaTeXFilename;
51 using support::latex_path;
52 using support::ltrim;
53 using support::makeAbsPath;
54 using support::makeRelPath;
55 using support::prefixIs;
56 using support::removeExtension;
57 using support::rtrim;
58 using support::split;
59 using support::subst;
60 using support::tokenPos;
61 using support::trim;
62 using support::lowercase;
63
64 namespace Alert = frontend::Alert;
65 namespace os = support::os;
66
67 using std::endl;
68 using std::getline;
69 using std::string;
70 using std::ostream;
71 using std::pair;
72 using std::vector;
73 using std::map;
74
75
76 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
77         : InsetCommand(p, "bibtex")
78 {}
79
80
81 Inset * InsetBibtex::clone() const
82 {
83         return new InsetBibtex(*this);
84 }
85
86
87 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
88 {
89         switch (cmd.action) {
90
91         case LFUN_INSET_MODIFY: {
92                 InsetCommandParams p("bibtex");
93                 try {
94                         if (!InsetCommandMailer::string2params("bibtex", 
95                                         to_utf8(cmd.argument()), p)) {
96                                 cur.noUpdate();
97                                 break;
98                         }
99                 } catch (support::ExceptionMessage const & message) {
100                         if (message.type_ == support::WarningException) {
101                                 Alert::warning(message.title_, message.details_);
102                                 cur.noUpdate();
103                         } else 
104                                 throw message;
105                         break;
106                 }
107                 setParams(p);
108                 cur.buffer().updateBibfilesCache();
109                 break;
110         }
111
112         default:
113                 InsetCommand::doDispatch(cur, cmd);
114                 break;
115         }
116 }
117
118
119 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
120 {
121         return _("BibTeX Generated Bibliography");
122 }
123
124
125 namespace {
126
127 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
128                       string const & name, string const & ext)
129 {
130         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
131         if (absolutePath(name) || !FileName(fname + ext).isFileReadable())
132                 return name;
133         if (!runparams.nice)
134                 return fname;
135
136         // FIXME UNICODE
137         return to_utf8(makeRelPath(from_utf8(fname),
138                                          from_utf8(buffer.getMasterBuffer()->filePath())));
139 }
140
141 }
142
143
144 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
145                        OutputParams const & runparams) const
146 {
147         // the sequence of the commands:
148         // 1. \bibliographystyle{style}
149         // 2. \addcontentsline{...} - if option bibtotoc set
150         // 3. \bibliography{database}
151         // and with bibtopic:
152         // 1. \bibliographystyle{style}
153         // 2. \begin{btSect}{database}
154         // 3. \btPrint{Cited|NotCited|All}
155         // 4. \end{btSect}
156
157         // Database(s)
158         // If we are processing the LaTeX file in a temp directory then
159         // copy the .bib databases to this temp directory, mangling their
160         // names in the process. Store this mangled name in the list of
161         // all databases.
162         // (We need to do all this because BibTeX *really*, *really*
163         // can't handle "files with spaces" and Windows users tend to
164         // use such filenames.)
165         // Otherwise, store the (maybe absolute) path to the original,
166         // unmangled database name.
167         typedef boost::char_separator<char_type> Separator;
168         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
169
170         Separator const separator(from_ascii(",").c_str());
171         // The tokenizer must not be called with temporary strings, since
172         // it does not make a copy and uses iterators of the string further
173         // down. getParam returns a reference, so this is OK.
174         Tokenizer const tokens(getParam("bibfiles"), separator);
175         Tokenizer::const_iterator const begin = tokens.begin();
176         Tokenizer::const_iterator const end = tokens.end();
177
178         odocstringstream dbs;
179         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
180                 docstring const input = trim(*it);
181                 // FIXME UNICODE
182                 string utf8input(to_utf8(input));
183                 string database =
184                         normalize_name(buffer, runparams, utf8input, ".bib");
185                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
186                 bool const not_from_texmf = try_in_file.isFileReadable();
187
188                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
189                     not_from_texmf) {
190
191                         // mangledFilename() needs the extension
192                         DocFileName const in_file = DocFileName(try_in_file);
193                         database = removeExtension(in_file.mangledFilename());
194                         FileName const out_file(makeAbsPath(database + ".bib",
195                                         buffer.getMasterBuffer()->temppath()));
196
197                         bool const success = copy(in_file, out_file);
198                         if (!success) {
199                                 lyxerr << "Failed to copy '" << in_file
200                                        << "' to '" << out_file << "'"
201                                        << endl;
202                         }
203                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
204                            !isValidLaTeXFilename(database)) {
205                                 frontend::Alert::warning(_("Invalid filename"),
206                                                          _("The following filename is likely to cause trouble "
207                                                            "when running the exported file through LaTeX: ") +
208                                                             from_utf8(database));
209                 }
210
211                 if (it != begin)
212                         dbs << ',';
213                 // FIXME UNICODE
214                 dbs << from_utf8(latex_path(database));
215         }
216         docstring const db_out = dbs.str();
217
218         // Post this warning only once.
219         static bool warned_about_spaces = false;
220         if (!warned_about_spaces &&
221             runparams.nice && db_out.find(' ') != docstring::npos) {
222                 warned_about_spaces = true;
223
224                 Alert::warning(_("Export Warning!"),
225                                _("There are spaces in the paths to your BibTeX databases.\n"
226                                               "BibTeX will be unable to find them."));
227
228         }
229
230         // Style-Options
231         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
232         string bibtotoc;
233         if (prefixIs(style, "bibtotoc")) {
234                 bibtotoc = "bibtotoc";
235                 if (contains(style, ',')) {
236                         style = split(style, bibtotoc, ',');
237                 }
238         }
239
240         // line count
241         int nlines = 0;
242
243         if (!style.empty()) {
244                 string base =
245                         normalize_name(buffer, runparams, style, ".bst");
246                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
247                 bool const not_from_texmf = try_in_file.isFileReadable();
248                 // If this style does not come from texmf and we are not
249                 // exporting to .tex copy it to the tmp directory.
250                 // This prevents problems with spaces and 8bit charcaters
251                 // in the file name.
252                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
253                     not_from_texmf) {
254                         // use new style name
255                         DocFileName const in_file = DocFileName(try_in_file);
256                         base = removeExtension(in_file.mangledFilename());
257                         FileName const out_file(makeAbsPath(base + ".bst",
258                                         buffer.getMasterBuffer()->temppath()));
259                         bool const success = copy(in_file, out_file);
260                         if (!success) {
261                                 lyxerr << "Failed to copy '" << in_file
262                                        << "' to '" << out_file << "'"
263                                        << endl;
264                         }
265                 }
266                 // FIXME UNICODE
267                 os << "\\bibliographystyle{"
268                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
269                    << "}\n";
270                 nlines += 1;
271         }
272
273         // Post this warning only once.
274         static bool warned_about_bst_spaces = false;
275         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
276                 warned_about_bst_spaces = true;
277                 Alert::warning(_("Export Warning!"),
278                                _("There are spaces in the path to your BibTeX style file.\n"
279                                               "BibTeX will be unable to find it."));
280         }
281
282         if (!db_out.empty() && buffer.params().use_bibtopic){
283                 os << "\\begin{btSect}{" << db_out << "}\n";
284                 docstring btprint = getParam("btprint");
285                 if (btprint.empty())
286                         // default
287                         btprint = from_ascii("btPrintCited");
288                 os << "\\" << btprint << "\n"
289                    << "\\end{btSect}\n";
290                 nlines += 3;
291         }
292
293         // bibtotoc-Option
294         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
295                 // maybe a problem when a textclass has no "art" as
296                 // part of its name, because it's than book.
297                 // For the "official" lyx-layouts it's no problem to support
298                 // all well
299                 if (!contains(buffer.params().getTextClass().name(),
300                               "art")) {
301                         if (buffer.params().sides == TextClass::OneSide) {
302                                 // oneside
303                                 os << "\\clearpage";
304                         } else {
305                                 // twoside
306                                 os << "\\cleardoublepage";
307                         }
308
309                         // bookclass
310                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
311
312                 } else {
313                         // article class
314                         os << "\\addcontentsline{toc}{section}{\\refname}";
315                 }
316         }
317
318         if (!db_out.empty() && !buffer.params().use_bibtopic){
319                 os << "\\bibliography{" << db_out << "}\n";
320                 nlines += 1;
321         }
322
323         return nlines;
324 }
325
326
327 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
328 {
329         FileName path(buffer.filePath());
330         support::Path p(path);
331
332         vector<FileName> vec;
333
334         string tmp;
335         // FIXME UNICODE
336         string bibfiles = to_utf8(getParam("bibfiles"));
337         bibfiles = split(bibfiles, tmp, ',');
338         while (!tmp.empty()) {
339                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
340                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
341
342                 // If we didn't find a matching file name just fail silently
343                 if (!file.empty())
344                         vec.push_back(file);
345
346                 // Get next file name
347                 bibfiles = split(bibfiles, tmp, ',');
348         }
349
350         return vec;
351 }
352
353 namespace {
354
355         // methods for parsing bibtex files
356
357         typedef map<docstring, docstring> VarMap;
358
359         /// remove whitespace characters, optionally a single comma,
360         /// and further whitespace characters from the stream.
361         /// @return true if a comma was found, false otherwise
362         ///
363         bool removeWSAndComma(idocfstream & ifs) {
364                 char_type ch;
365
366                 if (!ifs)
367                         return false;
368
369                 // skip whitespace
370                 do {
371                         ifs.get(ch);
372                 } while (ifs && isSpace(ch));
373
374                 if (!ifs)
375                         return false;
376
377                 if (ch != ',') {
378                         ifs.putback(ch);
379                         return false;
380                 }
381
382                 // skip whitespace
383                 do {
384                         ifs.get(ch);
385                 } while (ifs && isSpace(ch));
386
387                 if (ifs) {
388                         ifs.putback(ch);
389                 }
390
391                 return true;
392         }
393
394
395         enum charCase {
396                 makeLowerCase,
397                 keepCase
398         };
399
400         /// remove whitespace characters, read characer sequence
401         /// not containing whitespace characters or characters in
402         /// delimChars, and remove further whitespace characters.
403         ///
404         /// @return true if a string of length > 0 could be read.
405         ///
406         bool readTypeOrKey(docstring & val, idocfstream & ifs,
407                 docstring const & delimChars, docstring const &illegalChars, 
408                 charCase chCase) {
409
410                 char_type ch;
411
412                 val.clear();
413
414                 if (!ifs)
415                         return false;
416
417                 // skip whitespace
418                 do {
419                         ifs.get(ch);
420                 } while (ifs && isSpace(ch));
421
422                 if (!ifs)
423                         return false;
424
425                 // read value
426                 bool legalChar = true;
427                 while (ifs && !isSpace(ch) && 
428                                                  delimChars.find(ch) == docstring::npos &&
429                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
430                                         ) 
431                 {
432                         if (chCase == makeLowerCase)
433                                 val += lowercase(ch);
434                         else
435                                 val += ch;
436                         ifs.get(ch);
437                 }
438                 
439                 if (!legalChar) {
440                         ifs.putback(ch);
441                         return false;
442                 }
443
444                 // skip whitespace
445                 while (ifs && isSpace(ch)) {
446                         ifs.get(ch);
447                 }
448
449                 if (ifs) {
450                         ifs.putback(ch);
451                 }
452
453                 return val.length() > 0;
454         }
455
456         /// read subsequent bibtex values that are delimited with a #-character.
457         /// Concatenate all parts and replace names with the associated string in
458         /// the variable strings.
459         /// @return true if reading was successfull (all single parts were delimited
460         /// correctly)
461         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
462
463                 char_type ch;
464
465                 val.clear();
466
467                 if (!ifs)
468                         return false;
469
470                 do {
471                         // skip whitespace
472                         do {
473                                 ifs.get(ch);
474                         } while (ifs && isSpace(ch));
475
476                         if (!ifs)
477                                 return false;
478
479                         // check for field type
480                         if (isDigit(ch)) {
481
482                                 // read integer value
483                                 do {
484                                         val += ch;
485                                         ifs.get(ch);
486                                 } while (ifs && isDigit(ch));
487
488                                 if (!ifs)
489                                         return false;
490
491                         } else if (ch == '"' || ch == '{') {
492                                 // set end delimiter
493                                 char_type delim = ch == '"' ? '"': '}';
494
495                                 //Skip whitespace
496                                 do {
497                                         ifs.get(ch);
498                                 } while (ifs && isSpace(ch));
499                                 
500                                 if (!ifs)
501                                         return false;
502                                 
503                                 //We now have the first non-whitespace character
504                                 //We'll collapse adjacent whitespace.
505                                 bool lastWasWhiteSpace = false;
506                                 
507                                 // inside this delimited text braces must match.
508                                 // Thus we can have a closing delimiter only
509                                 // when nestLevel == 0
510                                 int nestLevel = 0;
511  
512                                 while (ifs && (nestLevel > 0 || ch != delim)) {
513                                         if (isSpace(ch)) {
514                                                 lastWasWhiteSpace = true;
515                                                 ifs.get(ch);
516                                                 continue;
517                                         }
518                                         //We output the space only after we stop getting 
519                                         //whitespace so as not to output any whitespace
520                                         //at the end of the value.
521                                         if (lastWasWhiteSpace) {
522                                                 lastWasWhiteSpace = false;
523                                                 val += ' ';
524                                         }
525                                         
526                                         val += ch;
527
528                                         // update nesting level
529                                         switch (ch) {
530                                                 case '{':
531                                                         ++nestLevel;
532                                                         break;
533                                                 case '}':
534                                                         --nestLevel;
535                                                         if (nestLevel < 0) return false;
536                                                         break;
537                                         }
538
539                                         ifs.get(ch);
540                                 }
541
542                                 if (!ifs)
543                                         return false;
544
545                                 ifs.get(ch);
546
547                                 if (!ifs)
548                                         return false;
549
550                         } else {
551
552                                 // reading a string name
553                                 docstring strName;
554
555                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
556                                         strName += lowercase(ch);
557                                         ifs.get(ch);
558                                 }
559
560                                 if (!ifs)
561                                         return false;
562
563                                 // replace the string with its assigned value or
564                                 // discard it if it's not assigned
565                                 if (strName.length()) {
566                                         VarMap::const_iterator pos = strings.find(strName);
567                                         if (pos != strings.end()) {
568                                                 val += pos->second;
569                                         }
570                                 }
571                         }
572
573                         // skip WS
574                         while (ifs && isSpace(ch)) {
575                                 ifs.get(ch);
576                         }
577
578                         if (!ifs)
579                                 return false;
580
581                         // continue reading next value on concatenate with '#'
582                 } while (ch == '#');
583
584                 ifs.putback(ch);
585
586                 return true;
587         }
588 }
589
590
591 // This method returns a comma separated list of Bibtex entries
592 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
593                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
594 {
595         vector<FileName> const files = getFiles(buffer);
596         for (vector<FileName>::const_iterator it = files.begin();
597              it != files.end(); ++ it) {
598                 // This bibtex parser is a first step to parse bibtex files
599                 // more precisely.
600                 //
601                 // - it reads the whole bibtex entry and does a syntax check
602                 //   (matching delimiters, missing commas,...
603                 // - it recovers from errors starting with the next @-character
604                 // - it reads @string definitions and replaces them in the
605                 //   field values.
606                 // - it accepts more characters in keys or value names than
607                 //   bibtex does.
608                 //
609                 // Officially bibtex does only support ASCII, but in practice
610                 // you can use the encoding of the main document as long as
611                 // some elements like keys and names are pure ASCII. Therefore
612                 // we convert the file from the buffer encoding.
613                 // We don't restrict keys to ASCII in LyX, since our own
614                 // InsetBibitem can generate non-ASCII keys, and nonstandard
615                 // 8bit clean bibtex forks exist.
616                 
617                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
618                         std::ios_base::in,
619                         buffer.params().encoding().iconvName());
620
621                 char_type ch;
622                 VarMap strings;
623
624                 while (ifs) {
625
626                         ifs.get(ch);
627                         if (!ifs)
628                                 break;
629
630                         if (ch != '@')
631                                 continue;
632
633                         docstring entryType;
634
635                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
636                                            docstring(), makeLowerCase) || !ifs)
637                                 continue;
638
639                         if (entryType == from_ascii("comment")) {
640
641                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
642                                 continue;
643                         }
644
645                         ifs.get(ch);
646                         if (!ifs)
647                                 break;
648
649                         if ((ch != '(') && (ch != '{')) {
650                                 // invalid entry delimiter
651                                 ifs.putback(ch);
652                                 continue;
653                         }
654
655                         // process the entry
656                         if (entryType == from_ascii("string")) {
657
658                                 // read string and add it to the strings map
659                                 // (or replace it's old value)
660                                 docstring name;
661                                 docstring value;
662
663                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
664                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
665                                         continue;
666
667                                 // next char must be an equal sign
668                                 ifs.get(ch);
669                                 if (!ifs || ch != '=')
670                                         continue;
671
672                                 if (!readValue(value, ifs, strings))
673                                         continue;
674
675                                 strings[name] = value;
676
677                         } else if (entryType == from_ascii("preamble")) {
678
679                                 // preamble definitions are discarded.
680                                 // can they be of any use in lyx?
681                                 docstring value;
682
683                                 if (!readValue(value, ifs, strings))
684                                         continue;
685
686                         } else {
687
688                                 // Citation entry. Try to read the key.
689                                 docstring key;
690
691                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
692                                                    from_ascii("}"), keepCase) || !ifs)
693                                         continue;
694
695                                 /////////////////////////////////////////////
696                                 // now we have a key, so we will add an entry 
697                                 // (even if it's empty, as bibtex does)
698                                 //
699                                 // we now read the field = value pairs.
700                                 // all items must be separated by a comma. If
701                                 // it is missing the scanning of this entry is
702                                 // stopped and the next is searched.
703                                 docstring fields;
704                                 docstring name;
705                                 docstring value;
706                                 docstring commaNewline;
707                                 docstring data;
708                                 BibTeXInfo keyvalmap;
709                                 keyvalmap.entryType = entryType;
710                                 
711                                 bool readNext = removeWSAndComma(ifs);
712  
713                                 while (ifs && readNext) {
714
715                                         // read field name
716                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
717                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
718                                                 break;
719
720                                         // next char must be an equal sign
721                                         ifs.get(ch);
722                                         if (!ifs)
723                                                 break;
724                                         if (ch != '=') {
725                                                 ifs.putback(ch);
726                                                 break;
727                                         }
728
729                                         // read field value
730                                         if (!readValue(value, ifs, strings))
731                                                 break;
732
733                                         keyvalmap[name] = value;
734                                         data += "\n\n" + value;
735                                         keylist.fieldNames.insert(name);
736                                         readNext = removeWSAndComma(ifs);
737                                 }
738
739                                 // add the new entry
740                                 keylist.entryTypes.insert(entryType);
741                                 keyvalmap.allData = data;
742                                 keyvalmap.isBibTeX = true;
743                                 keyvalmap.bibKey = key;
744                                 keylist[key] = keyvalmap;
745                         }
746                 } //< searching '@'
747         } //< for loop over files
748 }
749
750
751
752 bool InsetBibtex::addDatabase(string const & db)
753 {
754         // FIXME UNICODE
755         string bibfiles(to_utf8(getParam("bibfiles")));
756         if (tokenPos(bibfiles, ',', db) == -1) {
757                 if (!bibfiles.empty())
758                         bibfiles += ',';
759                 setParam("bibfiles", from_utf8(bibfiles + db));
760                 return true;
761         }
762         return false;
763 }
764
765
766 bool InsetBibtex::delDatabase(string const & db)
767 {
768         // FIXME UNICODE
769         string bibfiles(to_utf8(getParam("bibfiles")));
770         if (contains(bibfiles, db)) {
771                 int const n = tokenPos(bibfiles, ',', db);
772                 string bd = db;
773                 if (n > 0) {
774                         // this is not the first database
775                         string tmp = ',' + bd;
776                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
777                 } else if (n == 0)
778                         // this is the first (or only) database
779                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
780                 else
781                         return false;
782         }
783         return true;
784 }
785
786
787 void InsetBibtex::validate(LaTeXFeatures & features) const
788 {
789         if (features.bufferParams().use_bibtopic)
790                 features.require("bibtopic");
791 }
792
793
794 } // namespace lyx