]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
adjust
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26
27 #include "frontends/alert.h"
28
29 #include "support/ExceptionMessage.h"
30 #include "support/filetools.h"
31 #include "support/lstrings.h"
32 #include "support/lyxlib.h"
33 #include "support/os.h"
34 #include "support/Path.h"
35 #include "support/textutils.h"
36
37 #include <boost/tokenizer.hpp>
38
39
40 namespace lyx {
41
42 using support::absolutePath;
43 using support::ascii_lowercase;
44 using support::changeExtension;
45 using support::contains;
46 using support::copy;
47 using support::DocFileName;
48 using support::FileName;
49 using support::findtexfile;
50 using support::isFileReadable;
51 using support::isValidLaTeXFilename;
52 using support::latex_path;
53 using support::ltrim;
54 using support::makeAbsPath;
55 using support::makeRelPath;
56 using support::prefixIs;
57 using support::removeExtension;
58 using support::rtrim;
59 using support::split;
60 using support::subst;
61 using support::tokenPos;
62 using support::trim;
63 using support::lowercase;
64
65 namespace Alert = frontend::Alert;
66 namespace os = support::os;
67
68 using std::endl;
69 using std::getline;
70 using std::string;
71 using std::ostream;
72 using std::pair;
73 using std::vector;
74 using std::map;
75
76
77 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
78         : InsetCommand(p, "bibtex")
79 {}
80
81
82 Inset * InsetBibtex::clone() const
83 {
84         return new InsetBibtex(*this);
85 }
86
87
88 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
89 {
90         switch (cmd.action) {
91
92         case LFUN_INSET_MODIFY: {
93                 InsetCommandParams p("bibtex");
94                 try {
95                         if (!InsetCommandMailer::string2params("bibtex", 
96                                         to_utf8(cmd.argument()), p)) {
97                                 cur.noUpdate();
98                                 break;
99                         }
100                 } catch (support::ExceptionMessage const & message) {
101                         if (message.type_ == support::WarningException) {
102                                 Alert::warning(message.title_, message.details_);
103                                 cur.noUpdate();
104                         } else 
105                                 throw message;
106                         break;
107                 }
108                 setParams(p);
109                 cur.buffer().updateBibfilesCache();
110                 break;
111         }
112
113         default:
114                 InsetCommand::doDispatch(cur, cmd);
115                 break;
116         }
117 }
118
119
120 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
121 {
122         return _("BibTeX Generated Bibliography");
123 }
124
125
126 namespace {
127
128 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
129                       string const & name, string const & ext)
130 {
131         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
132         if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
133                 return name;
134         else if (!runparams.nice)
135                 return fname;
136         else
137                 // FIXME UNICODE
138                 return to_utf8(makeRelPath(from_utf8(fname),
139                                            from_utf8(buffer.getMasterBuffer()->filePath())));
140 }
141
142 }
143
144
145 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
146                        OutputParams const & runparams) const
147 {
148         // the sequence of the commands:
149         // 1. \bibliographystyle{style}
150         // 2. \addcontentsline{...} - if option bibtotoc set
151         // 3. \bibliography{database}
152         // and with bibtopic:
153         // 1. \bibliographystyle{style}
154         // 2. \begin{btSect}{database}
155         // 3. \btPrint{Cited|NotCited|All}
156         // 4. \end{btSect}
157
158         // Database(s)
159         // If we are processing the LaTeX file in a temp directory then
160         // copy the .bib databases to this temp directory, mangling their
161         // names in the process. Store this mangled name in the list of
162         // all databases.
163         // (We need to do all this because BibTeX *really*, *really*
164         // can't handle "files with spaces" and Windows users tend to
165         // use such filenames.)
166         // Otherwise, store the (maybe absolute) path to the original,
167         // unmangled database name.
168         typedef boost::char_separator<char_type> Separator;
169         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
170
171         Separator const separator(from_ascii(",").c_str());
172         // The tokenizer must not be called with temporary strings, since
173         // it does not make a copy and uses iterators of the string further
174         // down. getParam returns a reference, so this is OK.
175         Tokenizer const tokens(getParam("bibfiles"), separator);
176         Tokenizer::const_iterator const begin = tokens.begin();
177         Tokenizer::const_iterator const end = tokens.end();
178
179         odocstringstream dbs;
180         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
181                 docstring const input = trim(*it);
182                 // FIXME UNICODE
183                 string utf8input(to_utf8(input));
184                 string database =
185                         normalize_name(buffer, runparams, utf8input, ".bib");
186                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
187                 bool const not_from_texmf = isFileReadable(try_in_file);
188
189                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
190                     not_from_texmf) {
191
192                         // mangledFilename() needs the extension
193                         DocFileName const in_file = DocFileName(try_in_file);
194                         database = removeExtension(in_file.mangledFilename());
195                         FileName const out_file(makeAbsPath(database + ".bib",
196                                         buffer.getMasterBuffer()->temppath()));
197
198                         bool const success = copy(in_file, out_file);
199                         if (!success) {
200                                 lyxerr << "Failed to copy '" << in_file
201                                        << "' to '" << out_file << "'"
202                                        << endl;
203                         }
204                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
205                            !isValidLaTeXFilename(database)) {
206                                 frontend::Alert::warning(_("Invalid filename"),
207                                                          _("The following filename is likely to cause trouble "
208                                                            "when running the exported file through LaTeX: ") +
209                                                             from_utf8(database));
210                 }
211
212                 if (it != begin)
213                         dbs << ',';
214                 // FIXME UNICODE
215                 dbs << from_utf8(latex_path(database));
216         }
217         docstring const db_out = dbs.str();
218
219         // Post this warning only once.
220         static bool warned_about_spaces = false;
221         if (!warned_about_spaces &&
222             runparams.nice && db_out.find(' ') != docstring::npos) {
223                 warned_about_spaces = true;
224
225                 Alert::warning(_("Export Warning!"),
226                                _("There are spaces in the paths to your BibTeX databases.\n"
227                                               "BibTeX will be unable to find them."));
228
229         }
230
231         // Style-Options
232         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
233         string bibtotoc;
234         if (prefixIs(style, "bibtotoc")) {
235                 bibtotoc = "bibtotoc";
236                 if (contains(style, ',')) {
237                         style = split(style, bibtotoc, ',');
238                 }
239         }
240
241         // line count
242         int nlines = 0;
243
244         if (!style.empty()) {
245                 string base =
246                         normalize_name(buffer, runparams, style, ".bst");
247                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
248                 bool const not_from_texmf = isFileReadable(try_in_file);
249                 // If this style does not come from texmf and we are not
250                 // exporting to .tex copy it to the tmp directory.
251                 // This prevents problems with spaces and 8bit charcaters
252                 // in the file name.
253                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
254                     not_from_texmf) {
255                         // use new style name
256                         DocFileName const in_file = DocFileName(try_in_file);
257                         base = removeExtension(in_file.mangledFilename());
258                         FileName const out_file(makeAbsPath(base + ".bst",
259                                         buffer.getMasterBuffer()->temppath()));
260                         bool const success = copy(in_file, out_file);
261                         if (!success) {
262                                 lyxerr << "Failed to copy '" << in_file
263                                        << "' to '" << out_file << "'"
264                                        << endl;
265                         }
266                 }
267                 // FIXME UNICODE
268                 os << "\\bibliographystyle{"
269                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
270                    << "}\n";
271                 nlines += 1;
272         }
273
274         // Post this warning only once.
275         static bool warned_about_bst_spaces = false;
276         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
277                 warned_about_bst_spaces = true;
278                 Alert::warning(_("Export Warning!"),
279                                _("There are spaces in the path to your BibTeX style file.\n"
280                                               "BibTeX will be unable to find it."));
281         }
282
283         if (!db_out.empty() && buffer.params().use_bibtopic){
284                 os << "\\begin{btSect}{" << db_out << "}\n";
285                 docstring btprint = getParam("btprint");
286                 if (btprint.empty())
287                         // default
288                         btprint = from_ascii("btPrintCited");
289                 os << "\\" << btprint << "\n"
290                    << "\\end{btSect}\n";
291                 nlines += 3;
292         }
293
294         // bibtotoc-Option
295         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
296                 // maybe a problem when a textclass has no "art" as
297                 // part of its name, because it's than book.
298                 // For the "official" lyx-layouts it's no problem to support
299                 // all well
300                 if (!contains(buffer.params().getTextClass().name(),
301                               "art")) {
302                         if (buffer.params().sides == TextClass::OneSide) {
303                                 // oneside
304                                 os << "\\clearpage";
305                         } else {
306                                 // twoside
307                                 os << "\\cleardoublepage";
308                         }
309
310                         // bookclass
311                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
312
313                 } else {
314                         // article class
315                         os << "\\addcontentsline{toc}{section}{\\refname}";
316                 }
317         }
318
319         if (!db_out.empty() && !buffer.params().use_bibtopic){
320                 os << "\\bibliography{" << db_out << "}\n";
321                 nlines += 1;
322         }
323
324         return nlines;
325 }
326
327
328 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
329 {
330         FileName path(buffer.filePath());
331         support::Path p(path);
332
333         vector<FileName> vec;
334
335         string tmp;
336         // FIXME UNICODE
337         string bibfiles = to_utf8(getParam("bibfiles"));
338         bibfiles = split(bibfiles, tmp, ',');
339         while (!tmp.empty()) {
340                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
341                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
342
343                 // If we didn't find a matching file name just fail silently
344                 if (!file.empty())
345                         vec.push_back(file);
346
347                 // Get next file name
348                 bibfiles = split(bibfiles, tmp, ',');
349         }
350
351         return vec;
352 }
353
354 namespace {
355
356         // methods for parsing bibtex files
357
358         typedef map<docstring, docstring> VarMap;
359
360         /// remove whitespace characters, optionally a single comma,
361         /// and further whitespace characters from the stream.
362         /// @return true if a comma was found, false otherwise
363         ///
364         bool removeWSAndComma(idocfstream & ifs) {
365                 char_type ch;
366
367                 if (!ifs)
368                         return false;
369
370                 // skip whitespace
371                 do {
372                         ifs.get(ch);
373                 } while (ifs && isSpace(ch));
374
375                 if (!ifs)
376                         return false;
377
378                 if (ch != ',') {
379                         ifs.putback(ch);
380                         return false;
381                 }
382
383                 // skip whitespace
384                 do {
385                         ifs.get(ch);
386                 } while (ifs && isSpace(ch));
387
388                 if (ifs) {
389                         ifs.putback(ch);
390                 }
391
392                 return true;
393         }
394
395
396         enum charCase {
397                 makeLowerCase,
398                 keepCase
399         };
400
401         /// remove whitespace characters, read characer sequence
402         /// not containing whitespace characters or characters in
403         /// delimChars, and remove further whitespace characters.
404         ///
405         /// @return true if a string of length > 0 could be read.
406         ///
407         bool readTypeOrKey(docstring & val, idocfstream & ifs,
408                 docstring const & delimChars, docstring const &illegalChars, 
409                 charCase chCase) {
410
411                 char_type ch;
412
413                 val.clear();
414
415                 if (!ifs)
416                         return false;
417
418                 // skip whitespace
419                 do {
420                         ifs.get(ch);
421                 } while (ifs && isSpace(ch));
422
423                 if (!ifs)
424                         return false;
425
426                 // read value
427                 bool legalChar = true;
428                 while (ifs && !isSpace(ch) && 
429                                                  delimChars.find(ch) == docstring::npos &&
430                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
431                                         ) 
432                 {
433                         if (chCase == makeLowerCase)
434                                 val += lowercase(ch);
435                         else
436                                 val += ch;
437                         ifs.get(ch);
438                 }
439                 
440                 if (!legalChar) {
441                         ifs.putback(ch);
442                         return false;
443                 }
444
445                 // skip whitespace
446                 while (ifs && isSpace(ch)) {
447                         ifs.get(ch);
448                 }
449
450                 if (ifs) {
451                         ifs.putback(ch);
452                 }
453
454                 return val.length() > 0;
455         }
456
457         /// read subsequent bibtex values that are delimited with a #-character.
458         /// Concatenate all parts and replace names with the associated string in
459         /// the variable strings.
460         /// @return true if reading was successfull (all single parts were delimited
461         /// correctly)
462         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
463
464                 char_type ch;
465
466                 val.clear();
467
468                 if (!ifs)
469                         return false;
470
471                 do {
472                         // skip whitespace
473                         do {
474                                 ifs.get(ch);
475                         } while (ifs && isSpace(ch));
476
477                         if (!ifs)
478                                 return false;
479
480                         // check for field type
481                         if (isDigit(ch)) {
482
483                                 // read integer value
484                                 do {
485                                         val += ch;
486                                         ifs.get(ch);
487                                 } while (ifs && isDigit(ch));
488
489                                 if (!ifs)
490                                         return false;
491
492                         } else if (ch == '"' || ch == '{') {
493                                 // set end delimiter
494                                 char_type delim = ch == '"' ? '"': '}';
495
496                                 //Skip whitespace
497                                 do {
498                                         ifs.get(ch);
499                                 } while (ifs && isSpace(ch));
500                                 
501                                 if (!ifs)
502                                         return false;
503                                 
504                                 //We now have the first non-whitespace character
505                                 //We'll collapse adjacent whitespace.
506                                 bool lastWasWhiteSpace = false;
507                                 
508                                 // inside this delimited text braces must match.
509                                 // Thus we can have a closing delimiter only
510                                 // when nestLevel == 0
511                                 int nestLevel = 0;
512  
513                                 while (ifs && (nestLevel > 0 || ch != delim)) {
514                                         if (isSpace(ch)) {
515                                                 lastWasWhiteSpace = true;
516                                                 ifs.get(ch);
517                                                 continue;
518                                         }
519                                         //We output the space only after we stop getting 
520                                         //whitespace so as not to output any whitespace
521                                         //at the end of the value.
522                                         if (lastWasWhiteSpace) {
523                                                 lastWasWhiteSpace = false;
524                                                 val += ' ';
525                                         }
526                                         
527                                         val += ch;
528
529                                         // update nesting level
530                                         switch (ch) {
531                                                 case '{':
532                                                         ++nestLevel;
533                                                         break;
534                                                 case '}':
535                                                         --nestLevel;
536                                                         if (nestLevel < 0) return false;
537                                                         break;
538                                         }
539
540                                         ifs.get(ch);
541                                 }
542
543                                 if (!ifs)
544                                         return false;
545
546                                 ifs.get(ch);
547
548                                 if (!ifs)
549                                         return false;
550
551                         } else {
552
553                                 // reading a string name
554                                 docstring strName;
555
556                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
557                                         strName += lowercase(ch);
558                                         ifs.get(ch);
559                                 }
560
561                                 if (!ifs)
562                                         return false;
563
564                                 // replace the string with its assigned value or
565                                 // discard it if it's not assigned
566                                 if (strName.length()) {
567                                         VarMap::const_iterator pos = strings.find(strName);
568                                         if (pos != strings.end()) {
569                                                 val += pos->second;
570                                         }
571                                 }
572                         }
573
574                         // skip WS
575                         while (ifs && isSpace(ch)) {
576                                 ifs.get(ch);
577                         }
578
579                         if (!ifs)
580                                 return false;
581
582                         // continue reading next value on concatenate with '#'
583                 } while (ch == '#');
584
585                 ifs.putback(ch);
586
587                 return true;
588         }
589 }
590
591
592 // This method returns a comma separated list of Bibtex entries
593 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
594                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
595 {
596         vector<FileName> const files = getFiles(buffer);
597         for (vector<FileName>::const_iterator it = files.begin();
598              it != files.end(); ++ it) {
599                 // This bibtex parser is a first step to parse bibtex files
600                 // more precisely.
601                 //
602                 // - it reads the whole bibtex entry and does a syntax check
603                 //   (matching delimiters, missing commas,...
604                 // - it recovers from errors starting with the next @-character
605                 // - it reads @string definitions and replaces them in the
606                 //   field values.
607                 // - it accepts more characters in keys or value names than
608                 //   bibtex does.
609                 //
610                 // Officially bibtex does only support ASCII, but in practice
611                 // you can use the encoding of the main document as long as
612                 // some elements like keys and names are pure ASCII. Therefore
613                 // we convert the file from the buffer encoding.
614                 // We don't restrict keys to ASCII in LyX, since our own
615                 // InsetBibitem can generate non-ASCII keys, and nonstandard
616                 // 8bit clean bibtex forks exist.
617                 
618                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
619                         std::ios_base::in,
620                         buffer.params().encoding().iconvName());
621
622                 char_type ch;
623                 VarMap strings;
624
625                 while (ifs) {
626
627                         ifs.get(ch);
628                         if (!ifs)
629                                 break;
630
631                         if (ch != '@')
632                                 continue;
633
634                         docstring entryType;
635
636                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
637                                            docstring(), makeLowerCase) || !ifs)
638                                 continue;
639
640                         if (entryType == from_ascii("comment")) {
641
642                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
643                                 continue;
644                         }
645
646                         ifs.get(ch);
647                         if (!ifs)
648                                 break;
649
650                         if ((ch != '(') && (ch != '{')) {
651                                 // invalid entry delimiter
652                                 ifs.putback(ch);
653                                 continue;
654                         }
655
656                         // process the entry
657                         if (entryType == from_ascii("string")) {
658
659                                 // read string and add it to the strings map
660                                 // (or replace it's old value)
661                                 docstring name;
662                                 docstring value;
663
664                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
665                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
666                                         continue;
667
668                                 // next char must be an equal sign
669                                 ifs.get(ch);
670                                 if (!ifs || ch != '=')
671                                         continue;
672
673                                 if (!readValue(value, ifs, strings))
674                                         continue;
675
676                                 strings[name] = value;
677
678                         } else if (entryType == from_ascii("preamble")) {
679
680                                 // preamble definitions are discarded.
681                                 // can they be of any use in lyx?
682                                 docstring value;
683
684                                 if (!readValue(value, ifs, strings))
685                                         continue;
686
687                         } else {
688
689                                 // Citation entry. Try to read the key.
690                                 docstring key;
691
692                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
693                                                    from_ascii("}"), keepCase) || !ifs)
694                                         continue;
695
696                                 /////////////////////////////////////////////
697                                 // now we have a key, so we will add an entry 
698                                 // (even if it's empty, as bibtex does)
699                                 //
700                                 // we now read the field = value pairs.
701                                 // all items must be separated by a comma. If
702                                 // it is missing the scanning of this entry is
703                                 // stopped and the next is searched.
704                                 docstring fields;
705                                 docstring name;
706                                 docstring value;
707                                 docstring commaNewline;
708                                 docstring data;
709                                 BibTeXInfo keyvalmap;
710                                 keyvalmap.entryType = entryType;
711                                 
712                                 bool readNext = removeWSAndComma(ifs);
713  
714                                 while (ifs && readNext) {
715
716                                         // read field name
717                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
718                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
719                                                 break;
720
721                                         // next char must be an equal sign
722                                         ifs.get(ch);
723                                         if (!ifs)
724                                                 break;
725                                         if (ch != '=') {
726                                                 ifs.putback(ch);
727                                                 break;
728                                         }
729
730                                         // read field value
731                                         if (!readValue(value, ifs, strings))
732                                                 break;
733
734                                         keyvalmap[name] = value;
735                                         data += "\n\n" + value;
736                                         keylist.fieldNames.insert(name);
737                                         readNext = removeWSAndComma(ifs);
738                                 }
739
740                                 // add the new entry
741                                 keylist.entryTypes.insert(entryType);
742                                 keyvalmap.allData = data;
743                                 keyvalmap.isBibTeX = true;
744                                 keyvalmap.bibKey = key;
745                                 keylist[key] = keyvalmap;
746                         }
747                 } //< searching '@'
748         } //< for loop over files
749 }
750
751
752
753 bool InsetBibtex::addDatabase(string const & db)
754 {
755         // FIXME UNICODE
756         string bibfiles(to_utf8(getParam("bibfiles")));
757         if (tokenPos(bibfiles, ',', db) == -1) {
758                 if (!bibfiles.empty())
759                         bibfiles += ',';
760                 setParam("bibfiles", from_utf8(bibfiles + db));
761                 return true;
762         }
763         return false;
764 }
765
766
767 bool InsetBibtex::delDatabase(string const & db)
768 {
769         // FIXME UNICODE
770         string bibfiles(to_utf8(getParam("bibfiles")));
771         if (contains(bibfiles, db)) {
772                 int const n = tokenPos(bibfiles, ',', db);
773                 string bd = db;
774                 if (n > 0) {
775                         // this is not the first database
776                         string tmp = ',' + bd;
777                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
778                 } else if (n == 0)
779                         // this is the first (or only) database
780                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
781                 else
782                         return false;
783         }
784         return true;
785 }
786
787
788 void InsetBibtex::validate(LaTeXFeatures & features) const
789 {
790         if (features.bufferParams().use_bibtopic)
791                 features.require("bibtopic");
792 }
793
794
795 } // namespace lyx