]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
InsetBibtex.cpp: compilation fix
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "Encoding.h"
20 #include "FuncRequest.h"
21 #include "LaTeXFeatures.h"
22 #include "MetricsInfo.h"
23 #include "OutputParams.h"
24 #include "TextClass.h"
25
26 #include "frontends/alert.h"
27
28 #include "support/debug.h"
29 #include "support/docstream.h"
30 #include "support/ExceptionMessage.h"
31 #include "support/filetools.h"
32 #include "support/gettext.h"
33 #include "support/lstrings.h"
34 #include "support/os.h"
35 #include "support/Path.h"
36 #include "support/textutils.h"
37
38 #include <limits>
39
40 using namespace std;
41 using namespace lyx::support;
42
43 namespace lyx {
44
45 namespace Alert = frontend::Alert;
46 namespace os = support::os;
47
48
49 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
50         : InsetCommand(p, "bibtex")
51 {}
52
53
54 ParamInfo const & InsetBibtex::findInfo(string const & /* cmdName */)
55 {
56         static ParamInfo param_info_;
57         if (param_info_.empty()) {
58                 param_info_.add("btprint", ParamInfo::LATEX_OPTIONAL);
59                 param_info_.add("bibfiles", ParamInfo::LATEX_REQUIRED);
60                 param_info_.add("options", ParamInfo::LYX_INTERNAL);
61         }
62         return param_info_;
63 }
64
65
66 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
67 {
68         switch (cmd.action) {
69
70         case LFUN_INSET_MODIFY: {
71                 InsetCommandParams p(BIBTEX_CODE);
72                 try {
73                         if (!InsetCommand::string2params("bibtex", 
74                                         to_utf8(cmd.argument()), p)) {
75                                 cur.noUpdate();
76                                 break;
77                         }
78                 } catch (ExceptionMessage const & message) {
79                         if (message.type_ == WarningException) {
80                                 Alert::warning(message.title_, message.details_);
81                                 cur.noUpdate();
82                         } else 
83                                 throw message;
84                         break;
85                 }
86                 //
87                 setParams(p);
88                 buffer().updateBibfilesCache();
89                 break;
90         }
91
92         default:
93                 InsetCommand::doDispatch(cur, cmd);
94                 break;
95         }
96 }
97
98
99 docstring InsetBibtex::screenLabel() const
100 {
101         return _("BibTeX Generated Bibliography");
102 }
103
104
105 static string normalizeName(Buffer const & buffer,
106         OutputParams const & runparams, string const & name, string const & ext)
107 {
108         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
109         if (FileName(name).isAbsolute() || !FileName(fname + ext).isReadableFile())
110                 return name;
111         if (!runparams.nice)
112                 return fname;
113
114         // FIXME UNICODE
115         return to_utf8(makeRelPath(from_utf8(fname),
116                                          from_utf8(buffer.masterBuffer()->filePath())));
117 }
118
119
120 int InsetBibtex::latex(odocstream & os, OutputParams const & runparams) const
121 {
122         // the sequence of the commands:
123         // 1. \bibliographystyle{style}
124         // 2. \addcontentsline{...} - if option bibtotoc set
125         // 3. \bibliography{database}
126         // and with bibtopic:
127         // 1. \bibliographystyle{style}
128         // 2. \begin{btSect}{database}
129         // 3. \btPrint{Cited|NotCited|All}
130         // 4. \end{btSect}
131
132         // Database(s)
133         // If we are processing the LaTeX file in a temp directory then
134         // copy the .bib databases to this temp directory, mangling their
135         // names in the process. Store this mangled name in the list of
136         // all databases.
137         // (We need to do all this because BibTeX *really*, *really*
138         // can't handle "files with spaces" and Windows users tend to
139         // use such filenames.)
140         // Otherwise, store the (maybe absolute) path to the original,
141         // unmangled database name.
142         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
143         vector<docstring>::const_iterator it = bibfilelist.begin();
144         vector<docstring>::const_iterator en = bibfilelist.end();
145         odocstringstream dbs;
146         bool didone = false;
147
148         for (; it != en; ++it) {
149                 string utf8input = to_utf8(*it);
150                 string database =
151                         normalizeName(buffer(), runparams, utf8input, ".bib");
152                 FileName const try_in_file =
153                         makeAbsPath(database + ".bib", buffer().filePath());
154                 bool const not_from_texmf = try_in_file.isReadableFile();
155
156                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
157                     not_from_texmf) {
158
159                         // mangledFilename() needs the extension
160                         DocFileName const in_file = DocFileName(try_in_file);
161                         database = removeExtension(in_file.mangledFilename());
162                         FileName const out_file = makeAbsPath(database + ".bib",
163                                         buffer().masterBuffer()->temppath());
164
165                         bool const success = in_file.copyTo(out_file);
166                         if (!success) {
167                                 lyxerr << "Failed to copy '" << in_file
168                                        << "' to '" << out_file << "'"
169                                        << endl;
170                         }
171                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
172                            !isValidLaTeXFilename(database)) {
173                                 frontend::Alert::warning(_("Invalid filename"),
174                                                          _("The following filename is likely to cause trouble "
175                                                            "when running the exported file through LaTeX: ") +
176                                                             from_utf8(database));
177                 }
178
179                 if (didone)
180                         dbs << ',';
181                 else 
182                         didone =- true;
183                 // FIXME UNICODE
184                 dbs << from_utf8(latex_path(database));
185         }
186         docstring const db_out = dbs.str();
187
188         // Post this warning only once.
189         static bool warned_about_spaces = false;
190         if (!warned_about_spaces &&
191             runparams.nice && db_out.find(' ') != docstring::npos) {
192                 warned_about_spaces = true;
193
194                 Alert::warning(_("Export Warning!"),
195                                _("There are spaces in the paths to your BibTeX databases.\n"
196                                               "BibTeX will be unable to find them."));
197         }
198
199         // Style-Options
200         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
201         string bibtotoc;
202         if (prefixIs(style, "bibtotoc")) {
203                 bibtotoc = "bibtotoc";
204                 if (contains(style, ','))
205                         style = split(style, bibtotoc, ',');
206         }
207
208         // line count
209         int nlines = 0;
210
211         if (!style.empty()) {
212                 string base = normalizeName(buffer(), runparams, style, ".bst");
213                 FileName const try_in_file = 
214                         makeAbsPath(base + ".bst", buffer().filePath());
215                 bool const not_from_texmf = try_in_file.isReadableFile();
216                 // If this style does not come from texmf and we are not
217                 // exporting to .tex copy it to the tmp directory.
218                 // This prevents problems with spaces and 8bit charcaters
219                 // in the file name.
220                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
221                     not_from_texmf) {
222                         // use new style name
223                         DocFileName const in_file = DocFileName(try_in_file);
224                         base = removeExtension(in_file.mangledFilename());
225                         FileName const out_file = makeAbsPath(base + ".bst",
226                                         buffer().masterBuffer()->temppath());
227                         bool const success = in_file.copyTo(out_file);
228                         if (!success) {
229                                 lyxerr << "Failed to copy '" << in_file
230                                        << "' to '" << out_file << "'"
231                                        << endl;
232                         }
233                 }
234                 // FIXME UNICODE
235                 os << "\\bibliographystyle{"
236                    << from_utf8(latex_path(normalizeName(buffer(), runparams, base, ".bst")))
237                    << "}\n";
238                 nlines += 1;
239         }
240
241         // Post this warning only once.
242         static bool warned_about_bst_spaces = false;
243         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
244                 warned_about_bst_spaces = true;
245                 Alert::warning(_("Export Warning!"),
246                                _("There are spaces in the path to your BibTeX style file.\n"
247                                               "BibTeX will be unable to find it."));
248         }
249
250         if (!db_out.empty() && buffer().params().use_bibtopic) {
251                 os << "\\begin{btSect}{" << db_out << "}\n";
252                 docstring btprint = getParam("btprint");
253                 if (btprint.empty())
254                         // default
255                         btprint = from_ascii("btPrintCited");
256                 os << "\\" << btprint << "\n"
257                    << "\\end{btSect}\n";
258                 nlines += 3;
259         }
260
261         // bibtotoc-Option
262         if (!bibtotoc.empty() && !buffer().params().use_bibtopic) {
263                 if (buffer().params().documentClass().hasLaTeXLayout("chapter")) {
264                         if (buffer().params().sides == OneSide) {
265                                 // oneside
266                                 os << "\\clearpage";
267                         } else {
268                                 // twoside
269                                 os << "\\cleardoublepage";
270                         }
271                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
272                 } else if (buffer().params().documentClass().hasLaTeXLayout("section"))
273                         os << "\\addcontentsline{toc}{section}{\\refname}";
274         }
275
276         if (!db_out.empty() && !buffer().params().use_bibtopic) {
277                 docstring btprint = getParam("btprint");
278                 if (btprint == "btPrintAll") {
279                         os << "\\nocite{*}\n";
280                         nlines += 1;
281                 }
282                 os << "\\bibliography{" << db_out << "}\n";
283                 nlines += 1;
284         }
285
286         return nlines;
287 }
288
289
290 support::FileNameList InsetBibtex::getBibFiles() const
291 {
292         FileName path(buffer().filePath());
293         support::PathChanger p(path);
294         
295         support::FileNameList vec;
296         
297         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
298         vector<docstring>::const_iterator it = bibfilelist.begin();
299         vector<docstring>::const_iterator en = bibfilelist.end();
300         for (; it != en; ++it) {
301                 FileName const file = 
302                         findtexfile(changeExtension(to_utf8(*it), "bib"), "bib");
303                 
304                 // If we didn't find a matching file name just fail silently
305                 if (!file.empty())
306                         vec.push_back(file);
307         }
308         
309         return vec;
310
311 }
312
313 namespace {
314
315         // methods for parsing bibtex files
316
317         typedef map<docstring, docstring> VarMap;
318
319         /// remove whitespace characters, optionally a single comma,
320         /// and further whitespace characters from the stream.
321         /// @return true if a comma was found, false otherwise
322         ///
323         bool removeWSAndComma(idocfstream & ifs) {
324                 char_type ch;
325
326                 if (!ifs)
327                         return false;
328
329                 // skip whitespace
330                 do {
331                         ifs.get(ch);
332                 } while (ifs && isSpace(ch));
333
334                 if (!ifs)
335                         return false;
336
337                 if (ch != ',') {
338                         ifs.putback(ch);
339                         return false;
340                 }
341
342                 // skip whitespace
343                 do {
344                         ifs.get(ch);
345                 } while (ifs && isSpace(ch));
346
347                 if (ifs) {
348                         ifs.putback(ch);
349                 }
350
351                 return true;
352         }
353
354
355         enum charCase {
356                 makeLowerCase,
357                 keepCase
358         };
359
360         /// remove whitespace characters, read characer sequence
361         /// not containing whitespace characters or characters in
362         /// delimChars, and remove further whitespace characters.
363         ///
364         /// @return true if a string of length > 0 could be read.
365         ///
366         bool readTypeOrKey(docstring & val, idocfstream & ifs,
367                 docstring const & delimChars, docstring const &illegalChars, 
368                 charCase chCase) {
369
370                 char_type ch;
371
372                 val.clear();
373
374                 if (!ifs)
375                         return false;
376
377                 // skip whitespace
378                 do {
379                         ifs.get(ch);
380                 } while (ifs && isSpace(ch));
381
382                 if (!ifs)
383                         return false;
384
385                 // read value
386                 bool legalChar = true;
387                 while (ifs && !isSpace(ch) && 
388                                                  delimChars.find(ch) == docstring::npos &&
389                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
390                                         ) 
391                 {
392                         if (chCase == makeLowerCase)
393                                 val += lowercase(ch);
394                         else
395                                 val += ch;
396                         ifs.get(ch);
397                 }
398                 
399                 if (!legalChar) {
400                         ifs.putback(ch);
401                         return false;
402                 }
403
404                 // skip whitespace
405                 while (ifs && isSpace(ch)) {
406                         ifs.get(ch);
407                 }
408
409                 if (ifs) {
410                         ifs.putback(ch);
411                 }
412
413                 return val.length() > 0;
414         }
415
416         /// read subsequent bibtex values that are delimited with a #-character.
417         /// Concatenate all parts and replace names with the associated string in
418         /// the variable strings.
419         /// @return true if reading was successfull (all single parts were delimited
420         /// correctly)
421         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
422
423                 char_type ch;
424
425                 val.clear();
426
427                 if (!ifs)
428                         return false;
429
430                 do {
431                         // skip whitespace
432                         do {
433                                 ifs.get(ch);
434                         } while (ifs && isSpace(ch));
435
436                         if (!ifs)
437                                 return false;
438
439                         // check for field type
440                         if (isDigit(ch)) {
441
442                                 // read integer value
443                                 do {
444                                         val += ch;
445                                         ifs.get(ch);
446                                 } while (ifs && isDigit(ch));
447
448                                 if (!ifs)
449                                         return false;
450
451                         } else if (ch == '"' || ch == '{') {
452                                 // set end delimiter
453                                 char_type delim = ch == '"' ? '"': '}';
454
455                                 //Skip whitespace
456                                 do {
457                                         ifs.get(ch);
458                                 } while (ifs && isSpace(ch));
459                                 
460                                 if (!ifs)
461                                         return false;
462                                 
463                                 //We now have the first non-whitespace character
464                                 //We'll collapse adjacent whitespace.
465                                 bool lastWasWhiteSpace = false;
466                                 
467                                 // inside this delimited text braces must match.
468                                 // Thus we can have a closing delimiter only
469                                 // when nestLevel == 0
470                                 int nestLevel = 0;
471  
472                                 while (ifs && (nestLevel > 0 || ch != delim)) {
473                                         if (isSpace(ch)) {
474                                                 lastWasWhiteSpace = true;
475                                                 ifs.get(ch);
476                                                 continue;
477                                         }
478                                         //We output the space only after we stop getting 
479                                         //whitespace so as not to output any whitespace
480                                         //at the end of the value.
481                                         if (lastWasWhiteSpace) {
482                                                 lastWasWhiteSpace = false;
483                                                 val += ' ';
484                                         }
485                                         
486                                         val += ch;
487
488                                         // update nesting level
489                                         switch (ch) {
490                                                 case '{':
491                                                         ++nestLevel;
492                                                         break;
493                                                 case '}':
494                                                         --nestLevel;
495                                                         if (nestLevel < 0) return false;
496                                                         break;
497                                         }
498
499                                         ifs.get(ch);
500                                 }
501
502                                 if (!ifs)
503                                         return false;
504
505                                 ifs.get(ch);
506
507                                 if (!ifs)
508                                         return false;
509
510                         } else {
511
512                                 // reading a string name
513                                 docstring strName;
514
515                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
516                                         strName += lowercase(ch);
517                                         ifs.get(ch);
518                                 }
519
520                                 if (!ifs)
521                                         return false;
522
523                                 // replace the string with its assigned value or
524                                 // discard it if it's not assigned
525                                 if (strName.length()) {
526                                         VarMap::const_iterator pos = strings.find(strName);
527                                         if (pos != strings.end()) {
528                                                 val += pos->second;
529                                         }
530                                 }
531                         }
532
533                         // skip WS
534                         while (ifs && isSpace(ch)) {
535                                 ifs.get(ch);
536                         }
537
538                         if (!ifs)
539                                 return false;
540
541                         // continue reading next value on concatenate with '#'
542                 } while (ch == '#');
543
544                 ifs.putback(ch);
545
546                 return true;
547         }
548 }
549
550
551 // This method returns a comma separated list of Bibtex entries
552 void InsetBibtex::fillWithBibKeys(BiblioInfo & keylist,
553         InsetIterator const & /*di*/) const
554 {
555         // This bibtex parser is a first step to parse bibtex files
556         // more precisely.
557         //
558         // - it reads the whole bibtex entry and does a syntax check
559         //   (matching delimiters, missing commas,...
560         // - it recovers from errors starting with the next @-character
561         // - it reads @string definitions and replaces them in the
562         //   field values.
563         // - it accepts more characters in keys or value names than
564         //   bibtex does.
565         //
566         // Officially bibtex does only support ASCII, but in practice
567         // you can use the encoding of the main document as long as
568         // some elements like keys and names are pure ASCII. Therefore
569         // we convert the file from the buffer encoding.
570         // We don't restrict keys to ASCII in LyX, since our own
571         // InsetBibitem can generate non-ASCII keys, and nonstandard
572         // 8bit clean bibtex forks exist.
573         support::FileNameList const files = getBibFiles();
574         support::FileNameList::const_iterator it = files.begin();
575         support::FileNameList::const_iterator en = files.end();
576         for (; it != en; ++ it) {
577                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
578                         ios_base::in, buffer().params().encoding().iconvName());
579
580                 char_type ch;
581                 VarMap strings;
582
583                 while (ifs) {
584
585                         ifs.get(ch);
586                         if (!ifs)
587                                 break;
588
589                         if (ch != '@')
590                                 continue;
591
592                         docstring entryType;
593
594                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
595                                            docstring(), makeLowerCase) || !ifs)
596                                 continue;
597
598                         if (entryType == from_ascii("comment")) {
599
600                                 ifs.ignore(numeric_limits<int>::max(), '\n');
601                                 continue;
602                         }
603
604                         ifs.get(ch);
605                         if (!ifs)
606                                 break;
607
608                         if ((ch != '(') && (ch != '{')) {
609                                 // invalid entry delimiter
610                                 ifs.putback(ch);
611                                 continue;
612                         }
613
614                         // process the entry
615                         if (entryType == from_ascii("string")) {
616
617                                 // read string and add it to the strings map
618                                 // (or replace it's old value)
619                                 docstring name;
620                                 docstring value;
621
622                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
623                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
624                                         continue;
625
626                                 // next char must be an equal sign
627                                 ifs.get(ch);
628                                 if (!ifs || ch != '=')
629                                         continue;
630
631                                 if (!readValue(value, ifs, strings))
632                                         continue;
633
634                                 strings[name] = value;
635
636                         } else if (entryType == from_ascii("preamble")) {
637
638                                 // preamble definitions are discarded.
639                                 // can they be of any use in lyx?
640                                 docstring value;
641
642                                 if (!readValue(value, ifs, strings))
643                                         continue;
644
645                         } else {
646
647                                 // Citation entry. Try to read the key.
648                                 docstring key;
649
650                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
651                                                    from_ascii("}"), keepCase) || !ifs)
652                                         continue;
653
654                                 /////////////////////////////////////////////
655                                 // now we have a key, so we will add an entry 
656                                 // (even if it's empty, as bibtex does)
657                                 //
658                                 // we now read the field = value pairs.
659                                 // all items must be separated by a comma. If
660                                 // it is missing the scanning of this entry is
661                                 // stopped and the next is searched.
662                                 docstring fields;
663                                 docstring name;
664                                 docstring value;
665                                 docstring commaNewline;
666                                 docstring data;
667                                 BibTeXInfo keyvalmap(key, entryType);
668                                 
669                                 bool readNext = removeWSAndComma(ifs);
670  
671                                 while (ifs && readNext) {
672
673                                         // read field name
674                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
675                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
676                                                 break;
677
678                                         // next char must be an equal sign
679                                         ifs.get(ch);
680                                         if (!ifs)
681                                                 break;
682                                         if (ch != '=') {
683                                                 ifs.putback(ch);
684                                                 break;
685                                         }
686
687                                         // read field value
688                                         if (!readValue(value, ifs, strings))
689                                                 break;
690
691                                         keyvalmap[name] = value;
692                                         data += "\n\n" + value;
693                                         keylist.addFieldName(name);
694                                         readNext = removeWSAndComma(ifs);
695                                 }
696
697                                 // add the new entry
698                                 keylist.addEntryType(entryType);
699                                 keyvalmap.setAllData(data);
700                                 keylist[key] = keyvalmap;
701                         }
702                 } //< searching '@'
703         } //< for loop over files
704 }
705
706
707 FileName InsetBibtex::getBibTeXPath(docstring const & filename, Buffer const & buf)
708 {
709         string texfile = changeExtension(to_utf8(filename), "bib");
710         // note that, if the filename can be found directly from the path, 
711         // findtexfile will just return a FileName object for that path.
712         FileName file(findtexfile(texfile, "bib"));
713         if (file.empty())
714                 file = FileName(makeAbsPath(texfile, buf.filePath()));
715         return file;
716 }
717  
718
719 bool InsetBibtex::addDatabase(docstring const & db)
720 {
721         docstring bibfiles = getParam("bibfiles");
722         if (tokenPos(bibfiles, ',', db) != -1)
723                 return false;
724         if (!bibfiles.empty())
725                 bibfiles += ',';
726         setParam("bibfiles", bibfiles + db);
727         return true;
728 }
729
730
731 bool InsetBibtex::delDatabase(docstring const & db)
732 {
733         docstring bibfiles = getParam("bibfiles");
734         if (contains(bibfiles, db)) {
735                 int const n = tokenPos(bibfiles, ',', db);
736                 docstring bd = db;
737                 if (n > 0) {
738                         // this is not the first database
739                         docstring tmp = ',' + bd;
740                         setParam("bibfiles", subst(bibfiles, tmp, docstring()));
741                 } else if (n == 0)
742                         // this is the first (or only) database
743                         setParam("bibfiles", split(bibfiles, bd, ','));
744                 else
745                         return false;
746         }
747         return true;
748 }
749
750
751 void InsetBibtex::validate(LaTeXFeatures & features) const
752 {
753         if (features.bufferParams().use_bibtopic)
754                 features.require("bibtopic");
755 }
756
757
758 } // namespace lyx