]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
* Text3.cpp (doDispatch): fix the behaviour of word-delete-forward,
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 #include <config.h>
12
13 #include "InsetBibtex.h"
14
15 #include "Buffer.h"
16 #include "BufferParams.h"
17 #include "DispatchResult.h"
18 #include "debug.h"
19 #include "Encoding.h"
20 #include "FuncRequest.h"
21 #include "gettext.h"
22 #include "LaTeXFeatures.h"
23 #include "MetricsInfo.h"
24 #include "OutputParams.h"
25
26 #include "frontends/alert.h"
27
28 #include "support/filetools.h"
29 #include "support/lstrings.h"
30 #include "support/lyxlib.h"
31 #include "support/os.h"
32 #include "support/Path.h"
33 #include "support/textutils.h"
34
35 #include <boost/tokenizer.hpp>
36
37
38 namespace lyx {
39
40 using support::absolutePath;
41 using support::ascii_lowercase;
42 using support::changeExtension;
43 using support::contains;
44 using support::copy;
45 using support::DocFileName;
46 using support::FileName;
47 using support::findtexfile;
48 using support::isFileReadable;
49 using support::isValidLaTeXFilename;
50 using support::latex_path;
51 using support::ltrim;
52 using support::makeAbsPath;
53 using support::makeRelPath;
54 using support::prefixIs;
55 using support::removeExtension;
56 using support::rtrim;
57 using support::split;
58 using support::subst;
59 using support::tokenPos;
60 using support::trim;
61 using support::lowercase;
62
63 namespace Alert = frontend::Alert;
64 namespace os = support::os;
65
66 using std::endl;
67 using std::getline;
68 using std::string;
69 using std::ostream;
70 using std::pair;
71 using std::vector;
72 using std::map;
73
74
75 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
76         : InsetCommand(p, "bibtex")
77 {}
78
79
80 std::auto_ptr<Inset> InsetBibtex::doClone() const
81 {
82         return std::auto_ptr<Inset>(new InsetBibtex(*this));
83 }
84
85
86 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
87 {
88         switch (cmd.action) {
89
90         case LFUN_INSET_MODIFY: {
91                 InsetCommandParams p("bibtex");
92                 InsetCommandMailer::string2params("bibtex", to_utf8(cmd.argument()), p);
93                 if (!p.getCmdName().empty()) {
94                         setParams(p);
95                         cur.buffer().updateBibfilesCache();
96                 } else
97                         cur.noUpdate();
98                 break;
99         }
100
101         default:
102                 InsetCommand::doDispatch(cur, cmd);
103                 break;
104         }
105 }
106
107
108 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
109 {
110         return _("BibTeX Generated Bibliography");
111 }
112
113
114 namespace {
115
116 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
117                       string const & name, string const & ext)
118 {
119         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
120         if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
121                 return name;
122         else if (!runparams.nice)
123                 return fname;
124         else
125                 // FIXME UNICODE
126                 return to_utf8(makeRelPath(from_utf8(fname),
127                                            from_utf8(buffer.getMasterBuffer()->filePath())));
128 }
129
130 }
131
132
133 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
134                        OutputParams const & runparams) const
135 {
136         // the sequence of the commands:
137         // 1. \bibliographystyle{style}
138         // 2. \addcontentsline{...} - if option bibtotoc set
139         // 3. \bibliography{database}
140         // and with bibtopic:
141         // 1. \bibliographystyle{style}
142         // 2. \begin{btSect}{database}
143         // 3. \btPrint{Cited|NotCited|All}
144         // 4. \end{btSect}
145
146         // Database(s)
147         // If we are processing the LaTeX file in a temp directory then
148         // copy the .bib databases to this temp directory, mangling their
149         // names in the process. Store this mangled name in the list of
150         // all databases.
151         // (We need to do all this because BibTeX *really*, *really*
152         // can't handle "files with spaces" and Windows users tend to
153         // use such filenames.)
154         // Otherwise, store the (maybe absolute) path to the original,
155         // unmangled database name.
156         typedef boost::char_separator<char_type> Separator;
157         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
158
159         Separator const separator(from_ascii(",").c_str());
160         // The tokenizer must not be called with temporary strings, since
161         // it does not make a copy and uses iterators of the string further
162         // down. getParam returns a reference, so this is OK.
163         Tokenizer const tokens(getParam("bibfiles"), separator);
164         Tokenizer::const_iterator const begin = tokens.begin();
165         Tokenizer::const_iterator const end = tokens.end();
166
167         odocstringstream dbs;
168         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
169                 docstring const input = trim(*it);
170                 // FIXME UNICODE
171                 string utf8input(to_utf8(input));
172                 string database =
173                         normalize_name(buffer, runparams, utf8input, ".bib");
174                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
175                 bool const not_from_texmf = isFileReadable(try_in_file);
176
177                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
178                     not_from_texmf) {
179
180                         // mangledFilename() needs the extension
181                         DocFileName const in_file = DocFileName(try_in_file);
182                         database = removeExtension(in_file.mangledFilename());
183                         FileName const out_file(makeAbsPath(database + ".bib",
184                                         buffer.getMasterBuffer()->temppath()));
185
186                         bool const success = copy(in_file, out_file);
187                         if (!success) {
188                                 lyxerr << "Failed to copy '" << in_file
189                                        << "' to '" << out_file << "'"
190                                        << endl;
191                         }
192                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
193                            !isValidLaTeXFilename(database)) {
194                                 frontend::Alert::warning(_("Invalid filename"),
195                                                          _("The following filename is likely to cause trouble "
196                                                            "when running the exported file through LaTeX: ") +
197                                                             from_utf8(database));
198                 }
199
200                 if (it != begin)
201                         dbs << ',';
202                 // FIXME UNICODE
203                 dbs << from_utf8(latex_path(database));
204         }
205         docstring const db_out = dbs.str();
206
207         // Post this warning only once.
208         static bool warned_about_spaces = false;
209         if (!warned_about_spaces &&
210             runparams.nice && db_out.find(' ') != docstring::npos) {
211                 warned_about_spaces = true;
212
213                 Alert::warning(_("Export Warning!"),
214                                _("There are spaces in the paths to your BibTeX databases.\n"
215                                               "BibTeX will be unable to find them."));
216
217         }
218
219         // Style-Options
220         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
221         string bibtotoc;
222         if (prefixIs(style, "bibtotoc")) {
223                 bibtotoc = "bibtotoc";
224                 if (contains(style, ',')) {
225                         style = split(style, bibtotoc, ',');
226                 }
227         }
228
229         // line count
230         int nlines = 0;
231
232         if (!style.empty()) {
233                 string base =
234                         normalize_name(buffer, runparams, style, ".bst");
235                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
236                 bool const not_from_texmf = isFileReadable(try_in_file);
237                 // If this style does not come from texmf and we are not
238                 // exporting to .tex copy it to the tmp directory.
239                 // This prevents problems with spaces and 8bit charcaters
240                 // in the file name.
241                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
242                     not_from_texmf) {
243                         // use new style name
244                         DocFileName const in_file = DocFileName(try_in_file);
245                         base = removeExtension(in_file.mangledFilename());
246                         FileName const out_file(makeAbsPath(base + ".bst",
247                                         buffer.getMasterBuffer()->temppath()));
248                         bool const success = copy(in_file, out_file);
249                         if (!success) {
250                                 lyxerr << "Failed to copy '" << in_file
251                                        << "' to '" << out_file << "'"
252                                        << endl;
253                         }
254                 }
255                 // FIXME UNICODE
256                 os << "\\bibliographystyle{"
257                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
258                    << "}\n";
259                 nlines += 1;
260         }
261
262         // Post this warning only once.
263         static bool warned_about_bst_spaces = false;
264         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
265                 warned_about_bst_spaces = true;
266                 Alert::warning(_("Export Warning!"),
267                                _("There are spaces in the path to your BibTeX style file.\n"
268                                               "BibTeX will be unable to find it."));
269         }
270
271         if (!db_out.empty() && buffer.params().use_bibtopic){
272                 os << "\\begin{btSect}{" << db_out << "}\n";
273                 docstring btprint = getParam("btprint");
274                 if (btprint.empty())
275                         // default
276                         btprint = from_ascii("btPrintCited");
277                 os << "\\" << btprint << "\n"
278                    << "\\end{btSect}\n";
279                 nlines += 3;
280         }
281
282         // bibtotoc-Option
283         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
284                 // maybe a problem when a textclass has no "art" as
285                 // part of its name, because it's than book.
286                 // For the "official" lyx-layouts it's no problem to support
287                 // all well
288                 if (!contains(buffer.params().getTextClass().name(),
289                               "art")) {
290                         if (buffer.params().sides == TextClass::OneSide) {
291                                 // oneside
292                                 os << "\\clearpage";
293                         } else {
294                                 // twoside
295                                 os << "\\cleardoublepage";
296                         }
297
298                         // bookclass
299                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
300
301                 } else {
302                         // article class
303                         os << "\\addcontentsline{toc}{section}{\\refname}";
304                 }
305         }
306
307         if (!db_out.empty() && !buffer.params().use_bibtopic){
308                 os << "\\bibliography{" << db_out << "}\n";
309                 nlines += 1;
310         }
311
312         return nlines;
313 }
314
315
316 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
317 {
318         FileName path(buffer.filePath());
319         support::Path p(path);
320
321         vector<FileName> vec;
322
323         string tmp;
324         // FIXME UNICODE
325         string bibfiles = to_utf8(getParam("bibfiles"));
326         bibfiles = split(bibfiles, tmp, ',');
327         while (!tmp.empty()) {
328                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
329                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
330
331                 // If we didn't find a matching file name just fail silently
332                 if (!file.empty())
333                         vec.push_back(file);
334
335                 // Get next file name
336                 bibfiles = split(bibfiles, tmp, ',');
337         }
338
339         return vec;
340 }
341
342 namespace {
343
344         // methods for parsing bibtex files
345
346         typedef map<docstring, docstring> VarMap;
347
348         /// remove whitespace characters, optionally a single comma,
349         /// and further whitespace characters from the stream.
350         /// @return true if a comma was found, false otherwise
351         ///
352         bool removeWSAndComma(idocfstream & ifs) {
353                 char_type ch;
354
355                 if (!ifs)
356                         return false;
357
358                 // skip whitespace
359                 do {
360                         ifs.get(ch);
361                 } while (ifs && isSpace(ch));
362
363                 if (!ifs)
364                         return false;
365
366                 if (ch != ',') {
367                         ifs.putback(ch);
368                         return false;
369                 }
370
371                 // skip whitespace
372                 do {
373                         ifs.get(ch);
374                 } while (ifs && isSpace(ch));
375
376                 if (ifs) {
377                         ifs.putback(ch);
378                 }
379
380                 return true;
381         }
382
383
384         enum charCase {
385                 makeLowerCase,
386                 keepCase
387         };
388
389         /// remove whitespace characters, read characer sequence
390         /// not containing whitespace characters or characters in
391         /// delimChars, and remove further whitespace characters.
392         ///
393         /// @return true if a string of length > 0 could be read.
394         ///
395         bool readTypeOrKey(docstring & val, idocfstream & ifs,
396                 docstring const & delimChars, docstring const &illegalChars, 
397                 charCase chCase) {
398
399                 char_type ch;
400
401                 val.clear();
402
403                 if (!ifs)
404                         return false;
405
406                 // skip whitespace
407                 do {
408                         ifs.get(ch);
409                 } while (ifs && isSpace(ch));
410
411                 if (!ifs)
412                         return false;
413
414                 // read value
415                 bool legalChar = true;
416                 while (ifs && !isSpace(ch) && 
417                            delimChars.find(ch) == docstring::npos &&
418                            (legalChar = illegalChars.find(ch) == docstring::npos)
419                            ) {
420                         if (chCase == makeLowerCase) {
421                                 val += lowercase(ch);
422                         } else {
423                                 val += ch;
424                         }
425                         ifs.get(ch);
426                 }
427                 
428                 if (!legalChar) {
429                         ifs.putback(ch);
430                         return false;
431                 }
432
433                 // skip whitespace
434                 while (ifs && isSpace(ch)) {
435                         ifs.get(ch);
436                 }
437
438                 if (ifs) {
439                         ifs.putback(ch);
440                 }
441
442                 return val.length() > 0;
443         }
444
445         /// read subsequent bibtex values that are delimited with a #-character.
446         /// Concatenate all parts and replace names with the associated string in
447         /// the variable strings.
448         /// @return true if reading was successfull (all single parts were delimited
449         /// correctly)
450         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
451
452                 char_type ch;
453
454                 val.clear();
455
456                 if (!ifs)
457                         return false;
458
459                 do {
460                         // skip whitespace
461                         do {
462                                 ifs.get(ch);
463                         } while (ifs && isSpace(ch));
464
465                         if (!ifs)
466                                 return false;
467
468                         // check for field type
469                         if (isDigit(ch)) {
470
471                                 // read integer value
472                                 do {
473                                         val += ch;
474                                         ifs.get(ch);
475                                 } while (ifs && isDigit(ch));
476
477                                 if (!ifs)
478                                         return false;
479
480                         } else if (ch == '"' || ch == '{') {
481
482                                 // read delimited text - set end delimiter
483                                 char_type delim = ch == '"'? '"': '}';
484
485                                 // inside this delimited text braces must match.
486                                 // Thus we can have a closing delimiter only
487                                 // when nestLevel == 0
488                                 int nestLevel = 0;
489
490                                 ifs.get(ch);
491                                 while (ifs && (nestLevel > 0 || ch != delim)) {
492                                         val += ch;
493
494                                         // update nesting level
495                                         switch (ch) {
496                                                 case '{':
497                                                         ++nestLevel;
498                                                         break;
499                                                 case '}':
500                                                         --nestLevel;
501                                                         if (nestLevel < 0) return false;
502                                                         break;
503                                         }
504
505                                         ifs.get(ch);
506                                 }
507
508                                 if (!ifs)
509                                         return false;
510
511                                 ifs.get(ch);
512
513                                 if (!ifs)
514                                         return false;
515
516                         } else {
517
518                                 // reading a string name
519                                 docstring strName;
520
521                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
522                                         strName += lowercase(ch);
523                                         ifs.get(ch);
524                                 }
525
526                                 if (!ifs)
527                                         return false;
528
529                                 // replace the string with its assigned value or
530                                 // discard it if it's not assigned
531                                 if (strName.length()) {
532                                         VarMap::const_iterator pos = strings.find(strName);
533                                         if (pos != strings.end()) {
534                                                 val += pos->second;
535                                         }
536                                 }
537                         }
538
539                         // skip WS
540                         while (ifs && isSpace(ch)) {
541                                 ifs.get(ch);
542                         }
543
544                         if (!ifs)
545                                 return false;
546
547                         // continue reading next value on concatenate with '#'
548                 } while (ch == '#');
549
550                 ifs.putback(ch);
551
552                 return true;
553         }
554 }
555
556
557 // This method returns a comma separated list of Bibtex entries
558 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
559                 std::vector<std::pair<string, docstring> > & keys) const
560 {
561         vector<FileName> const files = getFiles(buffer);
562         for (vector<FileName>::const_iterator it = files.begin();
563              it != files.end(); ++ it) {
564                 // This bibtex parser is a first step to parse bibtex files
565                 // more precisely.
566                 //
567                 // - it reads the whole bibtex entry and does a syntax check
568                 //   (matching delimiters, missing commas,...
569                 // - it recovers from errors starting with the next @-character
570                 // - it reads @string definitions and replaces them in the
571                 //   field values.
572                 // - it accepts more characters in keys or value names than
573                 //   bibtex does.
574                 //
575                 // TODOS:
576                 // - the entries are split into name = value pairs by the
577                 //   parser. These have to be merged again because of the
578                 //   way lyx treats the entries ( pair<...>(...) ). The citation
579                 //   mechanism in lyx should be changed such that it can use
580                 //   the split entries.
581                 // - messages on parsing errors can be generated.
582                 //
583
584                 // Officially bibtex does only support ASCII, but in practice
585                 // you can use the encoding of the main document as long as
586                 // some elements like keys and names are pure ASCII. Therefore
587                 // we convert the file from the buffer encoding.
588                 // We don't restrict keys to ASCII in LyX, since our own
589                 // InsetBibitem can generate non-ASCII keys, and nonstandard
590                 // 8bit clean bibtex forks exist.
591                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
592                                 std::ios_base::in,
593                                 buffer.params().encoding().iconvName());
594
595                 char_type ch;
596                 VarMap strings;
597
598                 while (ifs) {
599
600                         ifs.get(ch);
601                         if (!ifs)
602                                 break;
603
604                         if (ch != '@')
605                                 continue;
606
607                         docstring entryType;
608
609                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
610                                            docstring(), makeLowerCase) || !ifs)
611                                 continue;
612
613                         if (entryType == from_ascii("comment")) {
614
615                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
616                                 continue;
617                         }
618
619                         ifs.get(ch);
620                         if (!ifs)
621                                 break;
622
623                         if ((ch != '(') && (ch != '{')) {
624                                 // invalid entry delimiter
625                                 ifs.putback(ch);
626                                 continue;
627                         }
628
629                         // process the entry
630                         if (entryType == from_ascii("string")) {
631
632                                 // read string and add it to the strings map
633                                 // (or replace it's old value)
634                                 docstring name;
635                                 docstring value;
636
637                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
638                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
639                                         continue;
640
641                                 // next char must be an equal sign
642                                 ifs.get(ch);
643                                 if (!ifs || ch != '=')
644                                         continue;
645
646                                 if (!readValue(value, ifs, strings))
647                                         continue;
648
649                                 strings[name] = value;
650
651                         } else if (entryType == from_ascii("preamble")) {
652
653                                 // preamble definitions are discarded.
654                                 // can they be of any use in lyx?
655                                 docstring value;
656
657                                 if (!readValue(value, ifs, strings))
658                                         continue;
659
660                         } else {
661
662                                 // Citation entry. Read the key and all name = value pairs
663                                 docstring key;
664                                 docstring fields;
665                                 docstring name;
666                                 docstring value;
667                                 docstring commaNewline;
668
669                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
670                                                    from_ascii("}"), keepCase) || !ifs)
671                                         continue;
672
673                                 // now we have a key, so we will add an entry
674                                 // (even if it's empty, as bibtex does)
675                                 //
676                                 // all items must be separated by a comma. If
677                                 // it is missing the scanning of this entry is
678                                 // stopped and the next is searched.
679                                 bool readNext = removeWSAndComma(ifs);
680
681                                 while (ifs && readNext) {
682
683                                         // read field name
684                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
685                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
686                                                 break;
687
688                                         // next char must be an equal sign
689                                         ifs.get(ch);
690                                         if (!ifs)
691                                                 break;
692                                         if (ch != '=') {
693                                                 ifs.putback(ch);
694                                                 break;
695                                         }
696
697                                         // read field value
698                                         if (!readValue(value, ifs, strings))
699                                                 break;
700
701                                         // append field to the total entry string.
702                                         //
703                                         // TODO: Here is where the fields can be put in
704                                         //       a more intelligent structure that preserves
705                                         //           the already known parts.
706                                         fields += commaNewline;
707                                         fields += name + from_ascii(" = {") + value + '}';
708
709                                         if (!commaNewline.length())
710                                                 commaNewline = from_ascii(",\n");
711
712                                         readNext = removeWSAndComma(ifs);
713                                 }
714
715                                 // add the new entry
716                                 keys.push_back(pair<string, docstring>(
717                                 to_utf8(key), fields));
718                         }
719
720                 } //< searching '@'
721
722         } //< for loop over files
723 }
724
725
726
727 bool InsetBibtex::addDatabase(string const & db)
728 {
729         // FIXME UNICODE
730         string bibfiles(to_utf8(getParam("bibfiles")));
731         if (tokenPos(bibfiles, ',', db) == -1) {
732                 if (!bibfiles.empty())
733                         bibfiles += ',';
734                 setParam("bibfiles", from_utf8(bibfiles + db));
735                 return true;
736         }
737         return false;
738 }
739
740
741 bool InsetBibtex::delDatabase(string const & db)
742 {
743         // FIXME UNICODE
744         string bibfiles(to_utf8(getParam("bibfiles")));
745         if (contains(bibfiles, db)) {
746                 int const n = tokenPos(bibfiles, ',', db);
747                 string bd = db;
748                 if (n > 0) {
749                         // this is not the first database
750                         string tmp = ',' + bd;
751                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
752                 } else if (n == 0)
753                         // this is the first (or only) database
754                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
755                 else
756                         return false;
757         }
758         return true;
759 }
760
761
762 void InsetBibtex::validate(LaTeXFeatures & features) const
763 {
764         if (features.bufferParams().use_bibtopic)
765                 features.require("bibtopic");
766 }
767
768
769 } // namespace lyx