]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
Change semantics of FileName::copyTo(): we now overwrite the target file unconditionally.
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "support/debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "support/gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
27
28 #include "frontends/alert.h"
29
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/FileNameList.h"
33 #include "support/filetools.h"
34 #include "support/lstrings.h"
35 #include "support/lyxlib.h"
36 #include "support/os.h"
37 #include "support/Path.h"
38 #include "support/textutils.h"
39
40 #include <boost/tokenizer.hpp>
41
42 using namespace std;
43 using namespace lyx::support;
44
45 namespace lyx {
46
47 namespace Alert = frontend::Alert;
48 namespace os = support::os;
49
50
51 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
52         : InsetCommand(p, "bibtex")
53 {}
54
55
56 CommandInfo const * InsetBibtex::findInfo(string const & /* cmdName */)
57 {
58         static const char * const paramnames[] = 
59                 {"options", "btprint", "bibfiles", ""};
60         static const bool isoptional[] = {true, true, false};
61         static const CommandInfo info = {3, paramnames, isoptional};
62         return &info;
63 }
64
65
66 Inset * InsetBibtex::clone() const
67 {
68         return new InsetBibtex(*this);
69 }
70
71
72 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
73 {
74         switch (cmd.action) {
75
76         case LFUN_INSET_MODIFY: {
77                 InsetCommandParams p(BIBTEX_CODE);
78                 try {
79                         if (!InsetCommandMailer::string2params("bibtex", 
80                                         to_utf8(cmd.argument()), p)) {
81                                 cur.noUpdate();
82                                 break;
83                         }
84                 } catch (ExceptionMessage const & message) {
85                         if (message.type_ == WarningException) {
86                                 Alert::warning(message.title_, message.details_);
87                                 cur.noUpdate();
88                         } else 
89                                 throw message;
90                         break;
91                 }
92                 setParams(p);
93                 cur.buffer().updateBibfilesCache();
94                 break;
95         }
96
97         default:
98                 InsetCommand::doDispatch(cur, cmd);
99                 break;
100         }
101 }
102
103
104 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
105 {
106         return _("BibTeX Generated Bibliography");
107 }
108
109
110 namespace {
111
112 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
113                       string const & name, string const & ext)
114 {
115         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
116         if (absolutePath(name) || !FileName(fname + ext).isReadableFile())
117                 return name;
118         if (!runparams.nice)
119                 return fname;
120
121         // FIXME UNICODE
122         return to_utf8(makeRelPath(from_utf8(fname),
123                                          from_utf8(buffer.masterBuffer()->filePath())));
124 }
125
126 }
127
128
129 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
130                        OutputParams const & runparams) const
131 {
132         // the sequence of the commands:
133         // 1. \bibliographystyle{style}
134         // 2. \addcontentsline{...} - if option bibtotoc set
135         // 3. \bibliography{database}
136         // and with bibtopic:
137         // 1. \bibliographystyle{style}
138         // 2. \begin{btSect}{database}
139         // 3. \btPrint{Cited|NotCited|All}
140         // 4. \end{btSect}
141
142         // Database(s)
143         // If we are processing the LaTeX file in a temp directory then
144         // copy the .bib databases to this temp directory, mangling their
145         // names in the process. Store this mangled name in the list of
146         // all databases.
147         // (We need to do all this because BibTeX *really*, *really*
148         // can't handle "files with spaces" and Windows users tend to
149         // use such filenames.)
150         // Otherwise, store the (maybe absolute) path to the original,
151         // unmangled database name.
152         typedef boost::char_separator<char_type> Separator;
153         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
154
155         Separator const separator(from_ascii(",").c_str());
156         // The tokenizer must not be called with temporary strings, since
157         // it does not make a copy and uses iterators of the string further
158         // down. getParam returns a reference, so this is OK.
159         Tokenizer const tokens(getParam("bibfiles"), separator);
160         Tokenizer::const_iterator const begin = tokens.begin();
161         Tokenizer::const_iterator const end = tokens.end();
162
163         odocstringstream dbs;
164         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
165                 docstring const input = trim(*it);
166                 // FIXME UNICODE
167                 string utf8input = to_utf8(input);
168                 string database =
169                         normalizeName(buffer, runparams, utf8input, ".bib");
170                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
171                 bool const not_from_texmf = try_in_file.isReadableFile();
172
173                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
174                     not_from_texmf) {
175
176                         // mangledFilename() needs the extension
177                         DocFileName const in_file = DocFileName(try_in_file);
178                         database = removeExtension(in_file.mangledFilename());
179                         FileName const out_file = makeAbsPath(database + ".bib",
180                                         buffer.masterBuffer()->temppath());
181
182                         bool const success = in_file.copyTo(out_file);
183                         if (!success) {
184                                 lyxerr << "Failed to copy '" << in_file
185                                        << "' to '" << out_file << "'"
186                                        << endl;
187                         }
188                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
189                            !isValidLaTeXFilename(database)) {
190                                 frontend::Alert::warning(_("Invalid filename"),
191                                                          _("The following filename is likely to cause trouble "
192                                                            "when running the exported file through LaTeX: ") +
193                                                             from_utf8(database));
194                 }
195
196                 if (it != begin)
197                         dbs << ',';
198                 // FIXME UNICODE
199                 dbs << from_utf8(latex_path(database));
200         }
201         docstring const db_out = dbs.str();
202
203         // Post this warning only once.
204         static bool warned_about_spaces = false;
205         if (!warned_about_spaces &&
206             runparams.nice && db_out.find(' ') != docstring::npos) {
207                 warned_about_spaces = true;
208
209                 Alert::warning(_("Export Warning!"),
210                                _("There are spaces in the paths to your BibTeX databases.\n"
211                                               "BibTeX will be unable to find them."));
212         }
213
214         // Style-Options
215         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
216         string bibtotoc;
217         if (prefixIs(style, "bibtotoc")) {
218                 bibtotoc = "bibtotoc";
219                 if (contains(style, ','))
220                         style = split(style, bibtotoc, ',');
221         }
222
223         // line count
224         int nlines = 0;
225
226         if (!style.empty()) {
227                 string base = normalizeName(buffer, runparams, style, ".bst");
228                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
229                 bool const not_from_texmf = try_in_file.isReadableFile();
230                 // If this style does not come from texmf and we are not
231                 // exporting to .tex copy it to the tmp directory.
232                 // This prevents problems with spaces and 8bit charcaters
233                 // in the file name.
234                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
235                     not_from_texmf) {
236                         // use new style name
237                         DocFileName const in_file = DocFileName(try_in_file);
238                         base = removeExtension(in_file.mangledFilename());
239                         FileName const out_file(makeAbsPath(base + ".bst",
240                                         buffer.masterBuffer()->temppath()));
241                         bool const success = in_file.copyTo(out_file);
242                         if (!success) {
243                                 lyxerr << "Failed to copy '" << in_file
244                                        << "' to '" << out_file << "'"
245                                        << endl;
246                         }
247                 }
248                 // FIXME UNICODE
249                 os << "\\bibliographystyle{"
250                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
251                    << "}\n";
252                 nlines += 1;
253         }
254
255         // Post this warning only once.
256         static bool warned_about_bst_spaces = false;
257         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
258                 warned_about_bst_spaces = true;
259                 Alert::warning(_("Export Warning!"),
260                                _("There are spaces in the path to your BibTeX style file.\n"
261                                               "BibTeX will be unable to find it."));
262         }
263
264         if (!db_out.empty() && buffer.params().use_bibtopic){
265                 os << "\\begin{btSect}{" << db_out << "}\n";
266                 docstring btprint = getParam("btprint");
267                 if (btprint.empty())
268                         // default
269                         btprint = from_ascii("btPrintCited");
270                 os << "\\" << btprint << "\n"
271                    << "\\end{btSect}\n";
272                 nlines += 3;
273         }
274
275         // bibtotoc-Option
276         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
277                 // maybe a problem when a textclass has no "art" as
278                 // part of its name, because it's than book.
279                 // For the "official" lyx-layouts it's no problem to support
280                 // all well
281                 if (!contains(buffer.params().getTextClass().name(),
282                               "art")) {
283                         if (buffer.params().sides == OneSide) {
284                                 // oneside
285                                 os << "\\clearpage";
286                         } else {
287                                 // twoside
288                                 os << "\\cleardoublepage";
289                         }
290
291                         // bookclass
292                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
293
294                 } else {
295                         // article class
296                         os << "\\addcontentsline{toc}{section}{\\refname}";
297                 }
298         }
299
300         if (!db_out.empty() && !buffer.params().use_bibtopic){
301                 os << "\\bibliography{" << db_out << "}\n";
302                 nlines += 1;
303         }
304
305         return nlines;
306 }
307
308
309 FileNameList const InsetBibtex::getFiles(Buffer const & buffer) const
310 {
311         FileName path(buffer.filePath());
312         PathChanger p(path);
313
314         FileNameList vec;
315
316         string tmp;
317         // FIXME UNICODE
318         string bibfiles = to_utf8(getParam("bibfiles"));
319         bibfiles = split(bibfiles, tmp, ',');
320         while (!tmp.empty()) {
321                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
322                 LYXERR(Debug::LATEX, "Bibfile: " << file);
323
324                 // If we didn't find a matching file name just fail silently
325                 if (!file.empty())
326                         vec.push_back(file);
327
328                 // Get next file name
329                 bibfiles = split(bibfiles, tmp, ',');
330         }
331
332         return vec;
333 }
334
335 namespace {
336
337         // methods for parsing bibtex files
338
339         typedef map<docstring, docstring> VarMap;
340
341         /// remove whitespace characters, optionally a single comma,
342         /// and further whitespace characters from the stream.
343         /// @return true if a comma was found, false otherwise
344         ///
345         bool removeWSAndComma(idocfstream & ifs) {
346                 char_type ch;
347
348                 if (!ifs)
349                         return false;
350
351                 // skip whitespace
352                 do {
353                         ifs.get(ch);
354                 } while (ifs && isSpace(ch));
355
356                 if (!ifs)
357                         return false;
358
359                 if (ch != ',') {
360                         ifs.putback(ch);
361                         return false;
362                 }
363
364                 // skip whitespace
365                 do {
366                         ifs.get(ch);
367                 } while (ifs && isSpace(ch));
368
369                 if (ifs) {
370                         ifs.putback(ch);
371                 }
372
373                 return true;
374         }
375
376
377         enum charCase {
378                 makeLowerCase,
379                 keepCase
380         };
381
382         /// remove whitespace characters, read characer sequence
383         /// not containing whitespace characters or characters in
384         /// delimChars, and remove further whitespace characters.
385         ///
386         /// @return true if a string of length > 0 could be read.
387         ///
388         bool readTypeOrKey(docstring & val, idocfstream & ifs,
389                 docstring const & delimChars, docstring const &illegalChars, 
390                 charCase chCase) {
391
392                 char_type ch;
393
394                 val.clear();
395
396                 if (!ifs)
397                         return false;
398
399                 // skip whitespace
400                 do {
401                         ifs.get(ch);
402                 } while (ifs && isSpace(ch));
403
404                 if (!ifs)
405                         return false;
406
407                 // read value
408                 bool legalChar = true;
409                 while (ifs && !isSpace(ch) && 
410                                                  delimChars.find(ch) == docstring::npos &&
411                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
412                                         ) 
413                 {
414                         if (chCase == makeLowerCase)
415                                 val += lowercase(ch);
416                         else
417                                 val += ch;
418                         ifs.get(ch);
419                 }
420                 
421                 if (!legalChar) {
422                         ifs.putback(ch);
423                         return false;
424                 }
425
426                 // skip whitespace
427                 while (ifs && isSpace(ch)) {
428                         ifs.get(ch);
429                 }
430
431                 if (ifs) {
432                         ifs.putback(ch);
433                 }
434
435                 return val.length() > 0;
436         }
437
438         /// read subsequent bibtex values that are delimited with a #-character.
439         /// Concatenate all parts and replace names with the associated string in
440         /// the variable strings.
441         /// @return true if reading was successfull (all single parts were delimited
442         /// correctly)
443         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
444
445                 char_type ch;
446
447                 val.clear();
448
449                 if (!ifs)
450                         return false;
451
452                 do {
453                         // skip whitespace
454                         do {
455                                 ifs.get(ch);
456                         } while (ifs && isSpace(ch));
457
458                         if (!ifs)
459                                 return false;
460
461                         // check for field type
462                         if (isDigit(ch)) {
463
464                                 // read integer value
465                                 do {
466                                         val += ch;
467                                         ifs.get(ch);
468                                 } while (ifs && isDigit(ch));
469
470                                 if (!ifs)
471                                         return false;
472
473                         } else if (ch == '"' || ch == '{') {
474                                 // set end delimiter
475                                 char_type delim = ch == '"' ? '"': '}';
476
477                                 //Skip whitespace
478                                 do {
479                                         ifs.get(ch);
480                                 } while (ifs && isSpace(ch));
481                                 
482                                 if (!ifs)
483                                         return false;
484                                 
485                                 //We now have the first non-whitespace character
486                                 //We'll collapse adjacent whitespace.
487                                 bool lastWasWhiteSpace = false;
488                                 
489                                 // inside this delimited text braces must match.
490                                 // Thus we can have a closing delimiter only
491                                 // when nestLevel == 0
492                                 int nestLevel = 0;
493  
494                                 while (ifs && (nestLevel > 0 || ch != delim)) {
495                                         if (isSpace(ch)) {
496                                                 lastWasWhiteSpace = true;
497                                                 ifs.get(ch);
498                                                 continue;
499                                         }
500                                         //We output the space only after we stop getting 
501                                         //whitespace so as not to output any whitespace
502                                         //at the end of the value.
503                                         if (lastWasWhiteSpace) {
504                                                 lastWasWhiteSpace = false;
505                                                 val += ' ';
506                                         }
507                                         
508                                         val += ch;
509
510                                         // update nesting level
511                                         switch (ch) {
512                                                 case '{':
513                                                         ++nestLevel;
514                                                         break;
515                                                 case '}':
516                                                         --nestLevel;
517                                                         if (nestLevel < 0) return false;
518                                                         break;
519                                         }
520
521                                         ifs.get(ch);
522                                 }
523
524                                 if (!ifs)
525                                         return false;
526
527                                 ifs.get(ch);
528
529                                 if (!ifs)
530                                         return false;
531
532                         } else {
533
534                                 // reading a string name
535                                 docstring strName;
536
537                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
538                                         strName += lowercase(ch);
539                                         ifs.get(ch);
540                                 }
541
542                                 if (!ifs)
543                                         return false;
544
545                                 // replace the string with its assigned value or
546                                 // discard it if it's not assigned
547                                 if (strName.length()) {
548                                         VarMap::const_iterator pos = strings.find(strName);
549                                         if (pos != strings.end()) {
550                                                 val += pos->second;
551                                         }
552                                 }
553                         }
554
555                         // skip WS
556                         while (ifs && isSpace(ch)) {
557                                 ifs.get(ch);
558                         }
559
560                         if (!ifs)
561                                 return false;
562
563                         // continue reading next value on concatenate with '#'
564                 } while (ch == '#');
565
566                 ifs.putback(ch);
567
568                 return true;
569         }
570 }
571
572
573 // This method returns a comma separated list of Bibtex entries
574 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
575                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
576 {
577         FileNameList const files = getFiles(buffer);
578         for (vector<FileName>::const_iterator it = files.begin();
579              it != files.end(); ++ it) {
580                 // This bibtex parser is a first step to parse bibtex files
581                 // more precisely.
582                 //
583                 // - it reads the whole bibtex entry and does a syntax check
584                 //   (matching delimiters, missing commas,...
585                 // - it recovers from errors starting with the next @-character
586                 // - it reads @string definitions and replaces them in the
587                 //   field values.
588                 // - it accepts more characters in keys or value names than
589                 //   bibtex does.
590                 //
591                 // Officially bibtex does only support ASCII, but in practice
592                 // you can use the encoding of the main document as long as
593                 // some elements like keys and names are pure ASCII. Therefore
594                 // we convert the file from the buffer encoding.
595                 // We don't restrict keys to ASCII in LyX, since our own
596                 // InsetBibitem can generate non-ASCII keys, and nonstandard
597                 // 8bit clean bibtex forks exist.
598                 
599                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
600                         ios_base::in,
601                         buffer.params().encoding().iconvName());
602
603                 char_type ch;
604                 VarMap strings;
605
606                 while (ifs) {
607
608                         ifs.get(ch);
609                         if (!ifs)
610                                 break;
611
612                         if (ch != '@')
613                                 continue;
614
615                         docstring entryType;
616
617                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
618                                            docstring(), makeLowerCase) || !ifs)
619                                 continue;
620
621                         if (entryType == from_ascii("comment")) {
622
623                                 ifs.ignore(numeric_limits<int>::max(), '\n');
624                                 continue;
625                         }
626
627                         ifs.get(ch);
628                         if (!ifs)
629                                 break;
630
631                         if ((ch != '(') && (ch != '{')) {
632                                 // invalid entry delimiter
633                                 ifs.putback(ch);
634                                 continue;
635                         }
636
637                         // process the entry
638                         if (entryType == from_ascii("string")) {
639
640                                 // read string and add it to the strings map
641                                 // (or replace it's old value)
642                                 docstring name;
643                                 docstring value;
644
645                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
646                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
647                                         continue;
648
649                                 // next char must be an equal sign
650                                 ifs.get(ch);
651                                 if (!ifs || ch != '=')
652                                         continue;
653
654                                 if (!readValue(value, ifs, strings))
655                                         continue;
656
657                                 strings[name] = value;
658
659                         } else if (entryType == from_ascii("preamble")) {
660
661                                 // preamble definitions are discarded.
662                                 // can they be of any use in lyx?
663                                 docstring value;
664
665                                 if (!readValue(value, ifs, strings))
666                                         continue;
667
668                         } else {
669
670                                 // Citation entry. Try to read the key.
671                                 docstring key;
672
673                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
674                                                    from_ascii("}"), keepCase) || !ifs)
675                                         continue;
676
677                                 /////////////////////////////////////////////
678                                 // now we have a key, so we will add an entry 
679                                 // (even if it's empty, as bibtex does)
680                                 //
681                                 // we now read the field = value pairs.
682                                 // all items must be separated by a comma. If
683                                 // it is missing the scanning of this entry is
684                                 // stopped and the next is searched.
685                                 docstring fields;
686                                 docstring name;
687                                 docstring value;
688                                 docstring commaNewline;
689                                 docstring data;
690                                 BibTeXInfo keyvalmap;
691                                 keyvalmap.entryType = entryType;
692                                 
693                                 bool readNext = removeWSAndComma(ifs);
694  
695                                 while (ifs && readNext) {
696
697                                         // read field name
698                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
699                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
700                                                 break;
701
702                                         // next char must be an equal sign
703                                         ifs.get(ch);
704                                         if (!ifs)
705                                                 break;
706                                         if (ch != '=') {
707                                                 ifs.putback(ch);
708                                                 break;
709                                         }
710
711                                         // read field value
712                                         if (!readValue(value, ifs, strings))
713                                                 break;
714
715                                         keyvalmap[name] = value;
716                                         data += "\n\n" + value;
717                                         keylist.fieldNames.insert(name);
718                                         readNext = removeWSAndComma(ifs);
719                                 }
720
721                                 // add the new entry
722                                 keylist.entryTypes.insert(entryType);
723                                 keyvalmap.allData = data;
724                                 keyvalmap.isBibTeX = true;
725                                 keyvalmap.bibKey = key;
726                                 keylist[key] = keyvalmap;
727                         }
728                 } //< searching '@'
729         } //< for loop over files
730 }
731
732
733
734 bool InsetBibtex::addDatabase(string const & db)
735 {
736         // FIXME UNICODE
737         string bibfiles(to_utf8(getParam("bibfiles")));
738         if (tokenPos(bibfiles, ',', db) == -1) {
739                 if (!bibfiles.empty())
740                         bibfiles += ',';
741                 setParam("bibfiles", from_utf8(bibfiles + db));
742                 return true;
743         }
744         return false;
745 }
746
747
748 bool InsetBibtex::delDatabase(string const & db)
749 {
750         // FIXME UNICODE
751         string bibfiles(to_utf8(getParam("bibfiles")));
752         if (contains(bibfiles, db)) {
753                 int const n = tokenPos(bibfiles, ',', db);
754                 string bd = db;
755                 if (n > 0) {
756                         // this is not the first database
757                         string tmp = ',' + bd;
758                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
759                 } else if (n == 0)
760                         // this is the first (or only) database
761                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
762                 else
763                         return false;
764         }
765         return true;
766 }
767
768
769 void InsetBibtex::validate(LaTeXFeatures & features) const
770 {
771         if (features.bufferParams().use_bibtopic)
772                 features.require("bibtopic");
773 }
774
775
776 } // namespace lyx