]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
pimpl not needed here
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26
27 #include "frontends/alert.h"
28
29 #include "support/ExceptionMessage.h"
30 #include "support/docstream.h"
31 #include "support/filetools.h"
32 #include "support/lstrings.h"
33 #include "support/lyxlib.h"
34 #include "support/os.h"
35 #include "support/Path.h"
36 #include "support/textutils.h"
37
38 #include <boost/tokenizer.hpp>
39
40
41 namespace lyx {
42
43 using support::absolutePath;
44 using support::ascii_lowercase;
45 using support::changeExtension;
46 using support::contains;
47 using support::copy;
48 using support::DocFileName;
49 using support::FileName;
50 using support::findtexfile;
51 using support::isValidLaTeXFilename;
52 using support::latex_path;
53 using support::ltrim;
54 using support::makeAbsPath;
55 using support::makeRelPath;
56 using support::prefixIs;
57 using support::removeExtension;
58 using support::rtrim;
59 using support::split;
60 using support::subst;
61 using support::tokenPos;
62 using support::trim;
63 using support::lowercase;
64
65 namespace Alert = frontend::Alert;
66 namespace os = support::os;
67
68 using std::endl;
69 using std::getline;
70 using std::string;
71 using std::ostream;
72 using std::pair;
73 using std::vector;
74 using std::map;
75
76
77 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
78         : InsetCommand(p, "bibtex")
79 {}
80
81
82 CommandInfo const * InsetBibtex::findInfo(std::string const & /* cmdName */)
83 {
84         static const char * const paramnames[] = 
85                 {"options", "btprint", "bibfiles", ""};
86         static const bool isoptional[] = {true, true, false};
87         static const CommandInfo info = {3, paramnames, isoptional};
88         return &info;
89 }
90
91
92 Inset * InsetBibtex::clone() const
93 {
94         return new InsetBibtex(*this);
95 }
96
97
98 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
99 {
100         switch (cmd.action) {
101
102         case LFUN_INSET_MODIFY: {
103                 InsetCommandParams p(BIBTEX_CODE);
104                 try {
105                         if (!InsetCommandMailer::string2params("bibtex", 
106                                         to_utf8(cmd.argument()), p)) {
107                                 cur.noUpdate();
108                                 break;
109                         }
110                 } catch (support::ExceptionMessage const & message) {
111                         if (message.type_ == support::WarningException) {
112                                 Alert::warning(message.title_, message.details_);
113                                 cur.noUpdate();
114                         } else 
115                                 throw message;
116                         break;
117                 }
118                 setParams(p);
119                 cur.buffer().updateBibfilesCache();
120                 break;
121         }
122
123         default:
124                 InsetCommand::doDispatch(cur, cmd);
125                 break;
126         }
127 }
128
129
130 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
131 {
132         return _("BibTeX Generated Bibliography");
133 }
134
135
136 namespace {
137
138 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
139                       string const & name, string const & ext)
140 {
141         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
142         if (absolutePath(name) || !FileName(fname + ext).isFileReadable())
143                 return name;
144         if (!runparams.nice)
145                 return fname;
146
147         // FIXME UNICODE
148         return to_utf8(makeRelPath(from_utf8(fname),
149                                          from_utf8(buffer.masterBuffer()->filePath())));
150 }
151
152 }
153
154
155 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
156                        OutputParams const & runparams) const
157 {
158         // the sequence of the commands:
159         // 1. \bibliographystyle{style}
160         // 2. \addcontentsline{...} - if option bibtotoc set
161         // 3. \bibliography{database}
162         // and with bibtopic:
163         // 1. \bibliographystyle{style}
164         // 2. \begin{btSect}{database}
165         // 3. \btPrint{Cited|NotCited|All}
166         // 4. \end{btSect}
167
168         // Database(s)
169         // If we are processing the LaTeX file in a temp directory then
170         // copy the .bib databases to this temp directory, mangling their
171         // names in the process. Store this mangled name in the list of
172         // all databases.
173         // (We need to do all this because BibTeX *really*, *really*
174         // can't handle "files with spaces" and Windows users tend to
175         // use such filenames.)
176         // Otherwise, store the (maybe absolute) path to the original,
177         // unmangled database name.
178         typedef boost::char_separator<char_type> Separator;
179         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
180
181         Separator const separator(from_ascii(",").c_str());
182         // The tokenizer must not be called with temporary strings, since
183         // it does not make a copy and uses iterators of the string further
184         // down. getParam returns a reference, so this is OK.
185         Tokenizer const tokens(getParam("bibfiles"), separator);
186         Tokenizer::const_iterator const begin = tokens.begin();
187         Tokenizer::const_iterator const end = tokens.end();
188
189         odocstringstream dbs;
190         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
191                 docstring const input = trim(*it);
192                 // FIXME UNICODE
193                 string utf8input = to_utf8(input);
194                 string database =
195                         normalizeName(buffer, runparams, utf8input, ".bib");
196                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
197                 bool const not_from_texmf = try_in_file.isFileReadable();
198
199                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
200                     not_from_texmf) {
201
202                         // mangledFilename() needs the extension
203                         DocFileName const in_file = DocFileName(try_in_file);
204                         database = removeExtension(in_file.mangledFilename());
205                         FileName const out_file = makeAbsPath(database + ".bib",
206                                         buffer.masterBuffer()->temppath());
207
208                         bool const success = copy(in_file, out_file);
209                         if (!success) {
210                                 lyxerr << "Failed to copy '" << in_file
211                                        << "' to '" << out_file << "'"
212                                        << endl;
213                         }
214                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
215                            !isValidLaTeXFilename(database)) {
216                                 frontend::Alert::warning(_("Invalid filename"),
217                                                          _("The following filename is likely to cause trouble "
218                                                            "when running the exported file through LaTeX: ") +
219                                                             from_utf8(database));
220                 }
221
222                 if (it != begin)
223                         dbs << ',';
224                 // FIXME UNICODE
225                 dbs << from_utf8(latex_path(database));
226         }
227         docstring const db_out = dbs.str();
228
229         // Post this warning only once.
230         static bool warned_about_spaces = false;
231         if (!warned_about_spaces &&
232             runparams.nice && db_out.find(' ') != docstring::npos) {
233                 warned_about_spaces = true;
234
235                 Alert::warning(_("Export Warning!"),
236                                _("There are spaces in the paths to your BibTeX databases.\n"
237                                               "BibTeX will be unable to find them."));
238         }
239
240         // Style-Options
241         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
242         string bibtotoc;
243         if (prefixIs(style, "bibtotoc")) {
244                 bibtotoc = "bibtotoc";
245                 if (contains(style, ','))
246                         style = split(style, bibtotoc, ',');
247         }
248
249         // line count
250         int nlines = 0;
251
252         if (!style.empty()) {
253                 string base = normalizeName(buffer, runparams, style, ".bst");
254                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
255                 bool const not_from_texmf = try_in_file.isFileReadable();
256                 // If this style does not come from texmf and we are not
257                 // exporting to .tex copy it to the tmp directory.
258                 // This prevents problems with spaces and 8bit charcaters
259                 // in the file name.
260                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
261                     not_from_texmf) {
262                         // use new style name
263                         DocFileName const in_file = DocFileName(try_in_file);
264                         base = removeExtension(in_file.mangledFilename());
265                         FileName const out_file(makeAbsPath(base + ".bst",
266                                         buffer.masterBuffer()->temppath()));
267                         bool const success = copy(in_file, out_file);
268                         if (!success) {
269                                 lyxerr << "Failed to copy '" << in_file
270                                        << "' to '" << out_file << "'"
271                                        << endl;
272                         }
273                 }
274                 // FIXME UNICODE
275                 os << "\\bibliographystyle{"
276                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
277                    << "}\n";
278                 nlines += 1;
279         }
280
281         // Post this warning only once.
282         static bool warned_about_bst_spaces = false;
283         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
284                 warned_about_bst_spaces = true;
285                 Alert::warning(_("Export Warning!"),
286                                _("There are spaces in the path to your BibTeX style file.\n"
287                                               "BibTeX will be unable to find it."));
288         }
289
290         if (!db_out.empty() && buffer.params().use_bibtopic){
291                 os << "\\begin{btSect}{" << db_out << "}\n";
292                 docstring btprint = getParam("btprint");
293                 if (btprint.empty())
294                         // default
295                         btprint = from_ascii("btPrintCited");
296                 os << "\\" << btprint << "\n"
297                    << "\\end{btSect}\n";
298                 nlines += 3;
299         }
300
301         // bibtotoc-Option
302         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
303                 // maybe a problem when a textclass has no "art" as
304                 // part of its name, because it's than book.
305                 // For the "official" lyx-layouts it's no problem to support
306                 // all well
307                 if (!contains(buffer.params().getTextClass().name(),
308                               "art")) {
309                         if (buffer.params().sides == TextClass::OneSide) {
310                                 // oneside
311                                 os << "\\clearpage";
312                         } else {
313                                 // twoside
314                                 os << "\\cleardoublepage";
315                         }
316
317                         // bookclass
318                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
319
320                 } else {
321                         // article class
322                         os << "\\addcontentsline{toc}{section}{\\refname}";
323                 }
324         }
325
326         if (!db_out.empty() && !buffer.params().use_bibtopic){
327                 os << "\\bibliography{" << db_out << "}\n";
328                 nlines += 1;
329         }
330
331         return nlines;
332 }
333
334
335 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
336 {
337         FileName path(buffer.filePath());
338         support::Path p(path);
339
340         vector<FileName> vec;
341
342         string tmp;
343         // FIXME UNICODE
344         string bibfiles = to_utf8(getParam("bibfiles"));
345         bibfiles = split(bibfiles, tmp, ',');
346         while (!tmp.empty()) {
347                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
348                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
349
350                 // If we didn't find a matching file name just fail silently
351                 if (!file.empty())
352                         vec.push_back(file);
353
354                 // Get next file name
355                 bibfiles = split(bibfiles, tmp, ',');
356         }
357
358         return vec;
359 }
360
361 namespace {
362
363         // methods for parsing bibtex files
364
365         typedef map<docstring, docstring> VarMap;
366
367         /// remove whitespace characters, optionally a single comma,
368         /// and further whitespace characters from the stream.
369         /// @return true if a comma was found, false otherwise
370         ///
371         bool removeWSAndComma(idocfstream & ifs) {
372                 char_type ch;
373
374                 if (!ifs)
375                         return false;
376
377                 // skip whitespace
378                 do {
379                         ifs.get(ch);
380                 } while (ifs && isSpace(ch));
381
382                 if (!ifs)
383                         return false;
384
385                 if (ch != ',') {
386                         ifs.putback(ch);
387                         return false;
388                 }
389
390                 // skip whitespace
391                 do {
392                         ifs.get(ch);
393                 } while (ifs && isSpace(ch));
394
395                 if (ifs) {
396                         ifs.putback(ch);
397                 }
398
399                 return true;
400         }
401
402
403         enum charCase {
404                 makeLowerCase,
405                 keepCase
406         };
407
408         /// remove whitespace characters, read characer sequence
409         /// not containing whitespace characters or characters in
410         /// delimChars, and remove further whitespace characters.
411         ///
412         /// @return true if a string of length > 0 could be read.
413         ///
414         bool readTypeOrKey(docstring & val, idocfstream & ifs,
415                 docstring const & delimChars, docstring const &illegalChars, 
416                 charCase chCase) {
417
418                 char_type ch;
419
420                 val.clear();
421
422                 if (!ifs)
423                         return false;
424
425                 // skip whitespace
426                 do {
427                         ifs.get(ch);
428                 } while (ifs && isSpace(ch));
429
430                 if (!ifs)
431                         return false;
432
433                 // read value
434                 bool legalChar = true;
435                 while (ifs && !isSpace(ch) && 
436                                                  delimChars.find(ch) == docstring::npos &&
437                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
438                                         ) 
439                 {
440                         if (chCase == makeLowerCase)
441                                 val += lowercase(ch);
442                         else
443                                 val += ch;
444                         ifs.get(ch);
445                 }
446                 
447                 if (!legalChar) {
448                         ifs.putback(ch);
449                         return false;
450                 }
451
452                 // skip whitespace
453                 while (ifs && isSpace(ch)) {
454                         ifs.get(ch);
455                 }
456
457                 if (ifs) {
458                         ifs.putback(ch);
459                 }
460
461                 return val.length() > 0;
462         }
463
464         /// read subsequent bibtex values that are delimited with a #-character.
465         /// Concatenate all parts and replace names with the associated string in
466         /// the variable strings.
467         /// @return true if reading was successfull (all single parts were delimited
468         /// correctly)
469         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
470
471                 char_type ch;
472
473                 val.clear();
474
475                 if (!ifs)
476                         return false;
477
478                 do {
479                         // skip whitespace
480                         do {
481                                 ifs.get(ch);
482                         } while (ifs && isSpace(ch));
483
484                         if (!ifs)
485                                 return false;
486
487                         // check for field type
488                         if (isDigit(ch)) {
489
490                                 // read integer value
491                                 do {
492                                         val += ch;
493                                         ifs.get(ch);
494                                 } while (ifs && isDigit(ch));
495
496                                 if (!ifs)
497                                         return false;
498
499                         } else if (ch == '"' || ch == '{') {
500                                 // set end delimiter
501                                 char_type delim = ch == '"' ? '"': '}';
502
503                                 //Skip whitespace
504                                 do {
505                                         ifs.get(ch);
506                                 } while (ifs && isSpace(ch));
507                                 
508                                 if (!ifs)
509                                         return false;
510                                 
511                                 //We now have the first non-whitespace character
512                                 //We'll collapse adjacent whitespace.
513                                 bool lastWasWhiteSpace = false;
514                                 
515                                 // inside this delimited text braces must match.
516                                 // Thus we can have a closing delimiter only
517                                 // when nestLevel == 0
518                                 int nestLevel = 0;
519  
520                                 while (ifs && (nestLevel > 0 || ch != delim)) {
521                                         if (isSpace(ch)) {
522                                                 lastWasWhiteSpace = true;
523                                                 ifs.get(ch);
524                                                 continue;
525                                         }
526                                         //We output the space only after we stop getting 
527                                         //whitespace so as not to output any whitespace
528                                         //at the end of the value.
529                                         if (lastWasWhiteSpace) {
530                                                 lastWasWhiteSpace = false;
531                                                 val += ' ';
532                                         }
533                                         
534                                         val += ch;
535
536                                         // update nesting level
537                                         switch (ch) {
538                                                 case '{':
539                                                         ++nestLevel;
540                                                         break;
541                                                 case '}':
542                                                         --nestLevel;
543                                                         if (nestLevel < 0) return false;
544                                                         break;
545                                         }
546
547                                         ifs.get(ch);
548                                 }
549
550                                 if (!ifs)
551                                         return false;
552
553                                 ifs.get(ch);
554
555                                 if (!ifs)
556                                         return false;
557
558                         } else {
559
560                                 // reading a string name
561                                 docstring strName;
562
563                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
564                                         strName += lowercase(ch);
565                                         ifs.get(ch);
566                                 }
567
568                                 if (!ifs)
569                                         return false;
570
571                                 // replace the string with its assigned value or
572                                 // discard it if it's not assigned
573                                 if (strName.length()) {
574                                         VarMap::const_iterator pos = strings.find(strName);
575                                         if (pos != strings.end()) {
576                                                 val += pos->second;
577                                         }
578                                 }
579                         }
580
581                         // skip WS
582                         while (ifs && isSpace(ch)) {
583                                 ifs.get(ch);
584                         }
585
586                         if (!ifs)
587                                 return false;
588
589                         // continue reading next value on concatenate with '#'
590                 } while (ch == '#');
591
592                 ifs.putback(ch);
593
594                 return true;
595         }
596 }
597
598
599 // This method returns a comma separated list of Bibtex entries
600 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
601                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
602 {
603         vector<FileName> const files = getFiles(buffer);
604         for (vector<FileName>::const_iterator it = files.begin();
605              it != files.end(); ++ it) {
606                 // This bibtex parser is a first step to parse bibtex files
607                 // more precisely.
608                 //
609                 // - it reads the whole bibtex entry and does a syntax check
610                 //   (matching delimiters, missing commas,...
611                 // - it recovers from errors starting with the next @-character
612                 // - it reads @string definitions and replaces them in the
613                 //   field values.
614                 // - it accepts more characters in keys or value names than
615                 //   bibtex does.
616                 //
617                 // Officially bibtex does only support ASCII, but in practice
618                 // you can use the encoding of the main document as long as
619                 // some elements like keys and names are pure ASCII. Therefore
620                 // we convert the file from the buffer encoding.
621                 // We don't restrict keys to ASCII in LyX, since our own
622                 // InsetBibitem can generate non-ASCII keys, and nonstandard
623                 // 8bit clean bibtex forks exist.
624                 
625                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
626                         std::ios_base::in,
627                         buffer.params().encoding().iconvName());
628
629                 char_type ch;
630                 VarMap strings;
631
632                 while (ifs) {
633
634                         ifs.get(ch);
635                         if (!ifs)
636                                 break;
637
638                         if (ch != '@')
639                                 continue;
640
641                         docstring entryType;
642
643                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
644                                            docstring(), makeLowerCase) || !ifs)
645                                 continue;
646
647                         if (entryType == from_ascii("comment")) {
648
649                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
650                                 continue;
651                         }
652
653                         ifs.get(ch);
654                         if (!ifs)
655                                 break;
656
657                         if ((ch != '(') && (ch != '{')) {
658                                 // invalid entry delimiter
659                                 ifs.putback(ch);
660                                 continue;
661                         }
662
663                         // process the entry
664                         if (entryType == from_ascii("string")) {
665
666                                 // read string and add it to the strings map
667                                 // (or replace it's old value)
668                                 docstring name;
669                                 docstring value;
670
671                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
672                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
673                                         continue;
674
675                                 // next char must be an equal sign
676                                 ifs.get(ch);
677                                 if (!ifs || ch != '=')
678                                         continue;
679
680                                 if (!readValue(value, ifs, strings))
681                                         continue;
682
683                                 strings[name] = value;
684
685                         } else if (entryType == from_ascii("preamble")) {
686
687                                 // preamble definitions are discarded.
688                                 // can they be of any use in lyx?
689                                 docstring value;
690
691                                 if (!readValue(value, ifs, strings))
692                                         continue;
693
694                         } else {
695
696                                 // Citation entry. Try to read the key.
697                                 docstring key;
698
699                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
700                                                    from_ascii("}"), keepCase) || !ifs)
701                                         continue;
702
703                                 /////////////////////////////////////////////
704                                 // now we have a key, so we will add an entry 
705                                 // (even if it's empty, as bibtex does)
706                                 //
707                                 // we now read the field = value pairs.
708                                 // all items must be separated by a comma. If
709                                 // it is missing the scanning of this entry is
710                                 // stopped and the next is searched.
711                                 docstring fields;
712                                 docstring name;
713                                 docstring value;
714                                 docstring commaNewline;
715                                 docstring data;
716                                 BibTeXInfo keyvalmap;
717                                 keyvalmap.entryType = entryType;
718                                 
719                                 bool readNext = removeWSAndComma(ifs);
720  
721                                 while (ifs && readNext) {
722
723                                         // read field name
724                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
725                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
726                                                 break;
727
728                                         // next char must be an equal sign
729                                         ifs.get(ch);
730                                         if (!ifs)
731                                                 break;
732                                         if (ch != '=') {
733                                                 ifs.putback(ch);
734                                                 break;
735                                         }
736
737                                         // read field value
738                                         if (!readValue(value, ifs, strings))
739                                                 break;
740
741                                         keyvalmap[name] = value;
742                                         data += "\n\n" + value;
743                                         keylist.fieldNames.insert(name);
744                                         readNext = removeWSAndComma(ifs);
745                                 }
746
747                                 // add the new entry
748                                 keylist.entryTypes.insert(entryType);
749                                 keyvalmap.allData = data;
750                                 keyvalmap.isBibTeX = true;
751                                 keyvalmap.bibKey = key;
752                                 keylist[key] = keyvalmap;
753                         }
754                 } //< searching '@'
755         } //< for loop over files
756 }
757
758
759
760 bool InsetBibtex::addDatabase(string const & db)
761 {
762         // FIXME UNICODE
763         string bibfiles(to_utf8(getParam("bibfiles")));
764         if (tokenPos(bibfiles, ',', db) == -1) {
765                 if (!bibfiles.empty())
766                         bibfiles += ',';
767                 setParam("bibfiles", from_utf8(bibfiles + db));
768                 return true;
769         }
770         return false;
771 }
772
773
774 bool InsetBibtex::delDatabase(string const & db)
775 {
776         // FIXME UNICODE
777         string bibfiles(to_utf8(getParam("bibfiles")));
778         if (contains(bibfiles, db)) {
779                 int const n = tokenPos(bibfiles, ',', db);
780                 string bd = db;
781                 if (n > 0) {
782                         // this is not the first database
783                         string tmp = ',' + bd;
784                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
785                 } else if (n == 0)
786                         // this is the first (or only) database
787                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
788                 else
789                         return false;
790         }
791         return true;
792 }
793
794
795 void InsetBibtex::validate(LaTeXFeatures & features) const
796 {
797         if (features.bufferParams().use_bibtopic)
798                 features.require("bibtopic");
799 }
800
801
802 } // namespace lyx