]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
mainly cosmetics
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26
27 #include "frontends/alert.h"
28
29 #include "support/ExceptionMessage.h"
30 #include "support/filetools.h"
31 #include "support/lstrings.h"
32 #include "support/lyxlib.h"
33 #include "support/os.h"
34 #include "support/Path.h"
35 #include "support/textutils.h"
36
37 #include <boost/tokenizer.hpp>
38
39
40 namespace lyx {
41
42 using support::absolutePath;
43 using support::ascii_lowercase;
44 using support::changeExtension;
45 using support::contains;
46 using support::copy;
47 using support::DocFileName;
48 using support::FileName;
49 using support::findtexfile;
50 using support::isValidLaTeXFilename;
51 using support::latex_path;
52 using support::ltrim;
53 using support::makeAbsPath;
54 using support::makeRelPath;
55 using support::prefixIs;
56 using support::removeExtension;
57 using support::rtrim;
58 using support::split;
59 using support::subst;
60 using support::tokenPos;
61 using support::trim;
62 using support::lowercase;
63
64 namespace Alert = frontend::Alert;
65 namespace os = support::os;
66
67 using std::endl;
68 using std::getline;
69 using std::string;
70 using std::ostream;
71 using std::pair;
72 using std::vector;
73 using std::map;
74
75
76 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
77         : InsetCommand(p, "bibtex")
78 {}
79
80
81 Inset * InsetBibtex::clone() const
82 {
83         return new InsetBibtex(*this);
84 }
85
86
87 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
88 {
89         switch (cmd.action) {
90
91         case LFUN_INSET_MODIFY: {
92                 InsetCommandParams p(BIBTEX_CODE);
93                 try {
94                         if (!InsetCommandMailer::string2params("bibtex", 
95                                         to_utf8(cmd.argument()), p)) {
96                                 cur.noUpdate();
97                                 break;
98                         }
99                 } catch (support::ExceptionMessage const & message) {
100                         if (message.type_ == support::WarningException) {
101                                 Alert::warning(message.title_, message.details_);
102                                 cur.noUpdate();
103                         } else 
104                                 throw message;
105                         break;
106                 }
107                 setParams(p);
108                 cur.buffer().updateBibfilesCache();
109                 break;
110         }
111
112         default:
113                 InsetCommand::doDispatch(cur, cmd);
114                 break;
115         }
116 }
117
118
119 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
120 {
121         return _("BibTeX Generated Bibliography");
122 }
123
124
125 namespace {
126
127 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
128                       string const & name, string const & ext)
129 {
130         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
131         if (absolutePath(name) || !FileName(fname + ext).isFileReadable())
132                 return name;
133         if (!runparams.nice)
134                 return fname;
135
136         // FIXME UNICODE
137         return to_utf8(makeRelPath(from_utf8(fname),
138                                          from_utf8(buffer.masterBuffer()->filePath())));
139 }
140
141 }
142
143
144 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
145                        OutputParams const & runparams) const
146 {
147         // the sequence of the commands:
148         // 1. \bibliographystyle{style}
149         // 2. \addcontentsline{...} - if option bibtotoc set
150         // 3. \bibliography{database}
151         // and with bibtopic:
152         // 1. \bibliographystyle{style}
153         // 2. \begin{btSect}{database}
154         // 3. \btPrint{Cited|NotCited|All}
155         // 4. \end{btSect}
156
157         // Database(s)
158         // If we are processing the LaTeX file in a temp directory then
159         // copy the .bib databases to this temp directory, mangling their
160         // names in the process. Store this mangled name in the list of
161         // all databases.
162         // (We need to do all this because BibTeX *really*, *really*
163         // can't handle "files with spaces" and Windows users tend to
164         // use such filenames.)
165         // Otherwise, store the (maybe absolute) path to the original,
166         // unmangled database name.
167         typedef boost::char_separator<char_type> Separator;
168         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
169
170         Separator const separator(from_ascii(",").c_str());
171         // The tokenizer must not be called with temporary strings, since
172         // it does not make a copy and uses iterators of the string further
173         // down. getParam returns a reference, so this is OK.
174         Tokenizer const tokens(getParam("bibfiles"), separator);
175         Tokenizer::const_iterator const begin = tokens.begin();
176         Tokenizer::const_iterator const end = tokens.end();
177
178         odocstringstream dbs;
179         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
180                 docstring const input = trim(*it);
181                 // FIXME UNICODE
182                 string utf8input = to_utf8(input);
183                 string database =
184                         normalizeName(buffer, runparams, utf8input, ".bib");
185                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
186                 bool const not_from_texmf = try_in_file.isFileReadable();
187
188                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
189                     not_from_texmf) {
190
191                         // mangledFilename() needs the extension
192                         DocFileName const in_file = DocFileName(try_in_file);
193                         database = removeExtension(in_file.mangledFilename());
194                         FileName const out_file = makeAbsPath(database + ".bib",
195                                         buffer.masterBuffer()->temppath());
196
197                         bool const success = copy(in_file, out_file);
198                         if (!success) {
199                                 lyxerr << "Failed to copy '" << in_file
200                                        << "' to '" << out_file << "'"
201                                        << endl;
202                         }
203                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
204                            !isValidLaTeXFilename(database)) {
205                                 frontend::Alert::warning(_("Invalid filename"),
206                                                          _("The following filename is likely to cause trouble "
207                                                            "when running the exported file through LaTeX: ") +
208                                                             from_utf8(database));
209                 }
210
211                 if (it != begin)
212                         dbs << ',';
213                 // FIXME UNICODE
214                 dbs << from_utf8(latex_path(database));
215         }
216         docstring const db_out = dbs.str();
217
218         // Post this warning only once.
219         static bool warned_about_spaces = false;
220         if (!warned_about_spaces &&
221             runparams.nice && db_out.find(' ') != docstring::npos) {
222                 warned_about_spaces = true;
223
224                 Alert::warning(_("Export Warning!"),
225                                _("There are spaces in the paths to your BibTeX databases.\n"
226                                               "BibTeX will be unable to find them."));
227         }
228
229         // Style-Options
230         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
231         string bibtotoc;
232         if (prefixIs(style, "bibtotoc")) {
233                 bibtotoc = "bibtotoc";
234                 if (contains(style, ','))
235                         style = split(style, bibtotoc, ',');
236         }
237
238         // line count
239         int nlines = 0;
240
241         if (!style.empty()) {
242                 string base = normalizeName(buffer, runparams, style, ".bst");
243                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
244                 bool const not_from_texmf = try_in_file.isFileReadable();
245                 // If this style does not come from texmf and we are not
246                 // exporting to .tex copy it to the tmp directory.
247                 // This prevents problems with spaces and 8bit charcaters
248                 // in the file name.
249                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
250                     not_from_texmf) {
251                         // use new style name
252                         DocFileName const in_file = DocFileName(try_in_file);
253                         base = removeExtension(in_file.mangledFilename());
254                         FileName const out_file(makeAbsPath(base + ".bst",
255                                         buffer.masterBuffer()->temppath()));
256                         bool const success = copy(in_file, out_file);
257                         if (!success) {
258                                 lyxerr << "Failed to copy '" << in_file
259                                        << "' to '" << out_file << "'"
260                                        << endl;
261                         }
262                 }
263                 // FIXME UNICODE
264                 os << "\\bibliographystyle{"
265                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
266                    << "}\n";
267                 nlines += 1;
268         }
269
270         // Post this warning only once.
271         static bool warned_about_bst_spaces = false;
272         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
273                 warned_about_bst_spaces = true;
274                 Alert::warning(_("Export Warning!"),
275                                _("There are spaces in the path to your BibTeX style file.\n"
276                                               "BibTeX will be unable to find it."));
277         }
278
279         if (!db_out.empty() && buffer.params().use_bibtopic){
280                 os << "\\begin{btSect}{" << db_out << "}\n";
281                 docstring btprint = getParam("btprint");
282                 if (btprint.empty())
283                         // default
284                         btprint = from_ascii("btPrintCited");
285                 os << "\\" << btprint << "\n"
286                    << "\\end{btSect}\n";
287                 nlines += 3;
288         }
289
290         // bibtotoc-Option
291         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
292                 // maybe a problem when a textclass has no "art" as
293                 // part of its name, because it's than book.
294                 // For the "official" lyx-layouts it's no problem to support
295                 // all well
296                 if (!contains(buffer.params().getTextClass().name(),
297                               "art")) {
298                         if (buffer.params().sides == TextClass::OneSide) {
299                                 // oneside
300                                 os << "\\clearpage";
301                         } else {
302                                 // twoside
303                                 os << "\\cleardoublepage";
304                         }
305
306                         // bookclass
307                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
308
309                 } else {
310                         // article class
311                         os << "\\addcontentsline{toc}{section}{\\refname}";
312                 }
313         }
314
315         if (!db_out.empty() && !buffer.params().use_bibtopic){
316                 os << "\\bibliography{" << db_out << "}\n";
317                 nlines += 1;
318         }
319
320         return nlines;
321 }
322
323
324 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
325 {
326         FileName path(buffer.filePath());
327         support::Path p(path);
328
329         vector<FileName> vec;
330
331         string tmp;
332         // FIXME UNICODE
333         string bibfiles = to_utf8(getParam("bibfiles"));
334         bibfiles = split(bibfiles, tmp, ',');
335         while (!tmp.empty()) {
336                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
337                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
338
339                 // If we didn't find a matching file name just fail silently
340                 if (!file.empty())
341                         vec.push_back(file);
342
343                 // Get next file name
344                 bibfiles = split(bibfiles, tmp, ',');
345         }
346
347         return vec;
348 }
349
350 namespace {
351
352         // methods for parsing bibtex files
353
354         typedef map<docstring, docstring> VarMap;
355
356         /// remove whitespace characters, optionally a single comma,
357         /// and further whitespace characters from the stream.
358         /// @return true if a comma was found, false otherwise
359         ///
360         bool removeWSAndComma(idocfstream & ifs) {
361                 char_type ch;
362
363                 if (!ifs)
364                         return false;
365
366                 // skip whitespace
367                 do {
368                         ifs.get(ch);
369                 } while (ifs && isSpace(ch));
370
371                 if (!ifs)
372                         return false;
373
374                 if (ch != ',') {
375                         ifs.putback(ch);
376                         return false;
377                 }
378
379                 // skip whitespace
380                 do {
381                         ifs.get(ch);
382                 } while (ifs && isSpace(ch));
383
384                 if (ifs) {
385                         ifs.putback(ch);
386                 }
387
388                 return true;
389         }
390
391
392         enum charCase {
393                 makeLowerCase,
394                 keepCase
395         };
396
397         /// remove whitespace characters, read characer sequence
398         /// not containing whitespace characters or characters in
399         /// delimChars, and remove further whitespace characters.
400         ///
401         /// @return true if a string of length > 0 could be read.
402         ///
403         bool readTypeOrKey(docstring & val, idocfstream & ifs,
404                 docstring const & delimChars, docstring const &illegalChars, 
405                 charCase chCase) {
406
407                 char_type ch;
408
409                 val.clear();
410
411                 if (!ifs)
412                         return false;
413
414                 // skip whitespace
415                 do {
416                         ifs.get(ch);
417                 } while (ifs && isSpace(ch));
418
419                 if (!ifs)
420                         return false;
421
422                 // read value
423                 bool legalChar = true;
424                 while (ifs && !isSpace(ch) && 
425                                                  delimChars.find(ch) == docstring::npos &&
426                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
427                                         ) 
428                 {
429                         if (chCase == makeLowerCase)
430                                 val += lowercase(ch);
431                         else
432                                 val += ch;
433                         ifs.get(ch);
434                 }
435                 
436                 if (!legalChar) {
437                         ifs.putback(ch);
438                         return false;
439                 }
440
441                 // skip whitespace
442                 while (ifs && isSpace(ch)) {
443                         ifs.get(ch);
444                 }
445
446                 if (ifs) {
447                         ifs.putback(ch);
448                 }
449
450                 return val.length() > 0;
451         }
452
453         /// read subsequent bibtex values that are delimited with a #-character.
454         /// Concatenate all parts and replace names with the associated string in
455         /// the variable strings.
456         /// @return true if reading was successfull (all single parts were delimited
457         /// correctly)
458         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
459
460                 char_type ch;
461
462                 val.clear();
463
464                 if (!ifs)
465                         return false;
466
467                 do {
468                         // skip whitespace
469                         do {
470                                 ifs.get(ch);
471                         } while (ifs && isSpace(ch));
472
473                         if (!ifs)
474                                 return false;
475
476                         // check for field type
477                         if (isDigit(ch)) {
478
479                                 // read integer value
480                                 do {
481                                         val += ch;
482                                         ifs.get(ch);
483                                 } while (ifs && isDigit(ch));
484
485                                 if (!ifs)
486                                         return false;
487
488                         } else if (ch == '"' || ch == '{') {
489                                 // set end delimiter
490                                 char_type delim = ch == '"' ? '"': '}';
491
492                                 //Skip whitespace
493                                 do {
494                                         ifs.get(ch);
495                                 } while (ifs && isSpace(ch));
496                                 
497                                 if (!ifs)
498                                         return false;
499                                 
500                                 //We now have the first non-whitespace character
501                                 //We'll collapse adjacent whitespace.
502                                 bool lastWasWhiteSpace = false;
503                                 
504                                 // inside this delimited text braces must match.
505                                 // Thus we can have a closing delimiter only
506                                 // when nestLevel == 0
507                                 int nestLevel = 0;
508  
509                                 while (ifs && (nestLevel > 0 || ch != delim)) {
510                                         if (isSpace(ch)) {
511                                                 lastWasWhiteSpace = true;
512                                                 ifs.get(ch);
513                                                 continue;
514                                         }
515                                         //We output the space only after we stop getting 
516                                         //whitespace so as not to output any whitespace
517                                         //at the end of the value.
518                                         if (lastWasWhiteSpace) {
519                                                 lastWasWhiteSpace = false;
520                                                 val += ' ';
521                                         }
522                                         
523                                         val += ch;
524
525                                         // update nesting level
526                                         switch (ch) {
527                                                 case '{':
528                                                         ++nestLevel;
529                                                         break;
530                                                 case '}':
531                                                         --nestLevel;
532                                                         if (nestLevel < 0) return false;
533                                                         break;
534                                         }
535
536                                         ifs.get(ch);
537                                 }
538
539                                 if (!ifs)
540                                         return false;
541
542                                 ifs.get(ch);
543
544                                 if (!ifs)
545                                         return false;
546
547                         } else {
548
549                                 // reading a string name
550                                 docstring strName;
551
552                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
553                                         strName += lowercase(ch);
554                                         ifs.get(ch);
555                                 }
556
557                                 if (!ifs)
558                                         return false;
559
560                                 // replace the string with its assigned value or
561                                 // discard it if it's not assigned
562                                 if (strName.length()) {
563                                         VarMap::const_iterator pos = strings.find(strName);
564                                         if (pos != strings.end()) {
565                                                 val += pos->second;
566                                         }
567                                 }
568                         }
569
570                         // skip WS
571                         while (ifs && isSpace(ch)) {
572                                 ifs.get(ch);
573                         }
574
575                         if (!ifs)
576                                 return false;
577
578                         // continue reading next value on concatenate with '#'
579                 } while (ch == '#');
580
581                 ifs.putback(ch);
582
583                 return true;
584         }
585 }
586
587
588 // This method returns a comma separated list of Bibtex entries
589 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
590                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
591 {
592         vector<FileName> const files = getFiles(buffer);
593         for (vector<FileName>::const_iterator it = files.begin();
594              it != files.end(); ++ it) {
595                 // This bibtex parser is a first step to parse bibtex files
596                 // more precisely.
597                 //
598                 // - it reads the whole bibtex entry and does a syntax check
599                 //   (matching delimiters, missing commas,...
600                 // - it recovers from errors starting with the next @-character
601                 // - it reads @string definitions and replaces them in the
602                 //   field values.
603                 // - it accepts more characters in keys or value names than
604                 //   bibtex does.
605                 //
606                 // Officially bibtex does only support ASCII, but in practice
607                 // you can use the encoding of the main document as long as
608                 // some elements like keys and names are pure ASCII. Therefore
609                 // we convert the file from the buffer encoding.
610                 // We don't restrict keys to ASCII in LyX, since our own
611                 // InsetBibitem can generate non-ASCII keys, and nonstandard
612                 // 8bit clean bibtex forks exist.
613                 
614                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
615                         std::ios_base::in,
616                         buffer.params().encoding().iconvName());
617
618                 char_type ch;
619                 VarMap strings;
620
621                 while (ifs) {
622
623                         ifs.get(ch);
624                         if (!ifs)
625                                 break;
626
627                         if (ch != '@')
628                                 continue;
629
630                         docstring entryType;
631
632                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
633                                            docstring(), makeLowerCase) || !ifs)
634                                 continue;
635
636                         if (entryType == from_ascii("comment")) {
637
638                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
639                                 continue;
640                         }
641
642                         ifs.get(ch);
643                         if (!ifs)
644                                 break;
645
646                         if ((ch != '(') && (ch != '{')) {
647                                 // invalid entry delimiter
648                                 ifs.putback(ch);
649                                 continue;
650                         }
651
652                         // process the entry
653                         if (entryType == from_ascii("string")) {
654
655                                 // read string and add it to the strings map
656                                 // (or replace it's old value)
657                                 docstring name;
658                                 docstring value;
659
660                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
661                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
662                                         continue;
663
664                                 // next char must be an equal sign
665                                 ifs.get(ch);
666                                 if (!ifs || ch != '=')
667                                         continue;
668
669                                 if (!readValue(value, ifs, strings))
670                                         continue;
671
672                                 strings[name] = value;
673
674                         } else if (entryType == from_ascii("preamble")) {
675
676                                 // preamble definitions are discarded.
677                                 // can they be of any use in lyx?
678                                 docstring value;
679
680                                 if (!readValue(value, ifs, strings))
681                                         continue;
682
683                         } else {
684
685                                 // Citation entry. Try to read the key.
686                                 docstring key;
687
688                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
689                                                    from_ascii("}"), keepCase) || !ifs)
690                                         continue;
691
692                                 /////////////////////////////////////////////
693                                 // now we have a key, so we will add an entry 
694                                 // (even if it's empty, as bibtex does)
695                                 //
696                                 // we now read the field = value pairs.
697                                 // all items must be separated by a comma. If
698                                 // it is missing the scanning of this entry is
699                                 // stopped and the next is searched.
700                                 docstring fields;
701                                 docstring name;
702                                 docstring value;
703                                 docstring commaNewline;
704                                 docstring data;
705                                 BibTeXInfo keyvalmap;
706                                 keyvalmap.entryType = entryType;
707                                 
708                                 bool readNext = removeWSAndComma(ifs);
709  
710                                 while (ifs && readNext) {
711
712                                         // read field name
713                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
714                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
715                                                 break;
716
717                                         // next char must be an equal sign
718                                         ifs.get(ch);
719                                         if (!ifs)
720                                                 break;
721                                         if (ch != '=') {
722                                                 ifs.putback(ch);
723                                                 break;
724                                         }
725
726                                         // read field value
727                                         if (!readValue(value, ifs, strings))
728                                                 break;
729
730                                         keyvalmap[name] = value;
731                                         data += "\n\n" + value;
732                                         keylist.fieldNames.insert(name);
733                                         readNext = removeWSAndComma(ifs);
734                                 }
735
736                                 // add the new entry
737                                 keylist.entryTypes.insert(entryType);
738                                 keyvalmap.allData = data;
739                                 keyvalmap.isBibTeX = true;
740                                 keyvalmap.bibKey = key;
741                                 keylist[key] = keyvalmap;
742                         }
743                 } //< searching '@'
744         } //< for loop over files
745 }
746
747
748
749 bool InsetBibtex::addDatabase(string const & db)
750 {
751         // FIXME UNICODE
752         string bibfiles(to_utf8(getParam("bibfiles")));
753         if (tokenPos(bibfiles, ',', db) == -1) {
754                 if (!bibfiles.empty())
755                         bibfiles += ',';
756                 setParam("bibfiles", from_utf8(bibfiles + db));
757                 return true;
758         }
759         return false;
760 }
761
762
763 bool InsetBibtex::delDatabase(string const & db)
764 {
765         // FIXME UNICODE
766         string bibfiles(to_utf8(getParam("bibfiles")));
767         if (contains(bibfiles, db)) {
768                 int const n = tokenPos(bibfiles, ',', db);
769                 string bd = db;
770                 if (n > 0) {
771                         // this is not the first database
772                         string tmp = ',' + bd;
773                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
774                 } else if (n == 0)
775                         // this is the first (or only) database
776                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
777                 else
778                         return false;
779         }
780         return true;
781 }
782
783
784 void InsetBibtex::validate(LaTeXFeatures & features) const
785 {
786         if (features.bufferParams().use_bibtopic)
787                 features.require("bibtopic");
788 }
789
790
791 } // namespace lyx