]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
header cleanup.
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "support/debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "support/gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
27
28 #include "frontends/alert.h"
29
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/FileNameList.h"
33 #include "support/filetools.h"
34 #include "support/lstrings.h"
35 #include "support/os.h"
36 #include "support/Path.h"
37 #include "support/textutils.h"
38
39 #include <boost/tokenizer.hpp>
40
41 using namespace std;
42 using namespace lyx::support;
43
44 namespace lyx {
45
46 namespace Alert = frontend::Alert;
47 namespace os = support::os;
48
49
50 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
51         : InsetCommand(p, "bibtex")
52 {}
53
54
55 CommandInfo const * InsetBibtex::findInfo(string const & /* cmdName */)
56 {
57         static const char * const paramnames[] = 
58                 {"options", "btprint", "bibfiles", ""};
59         static const bool isoptional[] = {true, true, false};
60         static const CommandInfo info = {3, paramnames, isoptional};
61         return &info;
62 }
63
64
65 Inset * InsetBibtex::clone() const
66 {
67         return new InsetBibtex(*this);
68 }
69
70
71 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
72 {
73         switch (cmd.action) {
74
75         case LFUN_INSET_MODIFY: {
76                 InsetCommandParams p(BIBTEX_CODE);
77                 try {
78                         if (!InsetCommandMailer::string2params("bibtex", 
79                                         to_utf8(cmd.argument()), p)) {
80                                 cur.noUpdate();
81                                 break;
82                         }
83                 } catch (ExceptionMessage const & message) {
84                         if (message.type_ == WarningException) {
85                                 Alert::warning(message.title_, message.details_);
86                                 cur.noUpdate();
87                         } else 
88                                 throw message;
89                         break;
90                 }
91                 setParams(p);
92                 cur.buffer().updateBibfilesCache();
93                 break;
94         }
95
96         default:
97                 InsetCommand::doDispatch(cur, cmd);
98                 break;
99         }
100 }
101
102
103 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
104 {
105         return _("BibTeX Generated Bibliography");
106 }
107
108
109 namespace {
110
111 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
112                       string const & name, string const & ext)
113 {
114         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
115         if (absolutePath(name) || !FileName(fname + ext).isReadableFile())
116                 return name;
117         if (!runparams.nice)
118                 return fname;
119
120         // FIXME UNICODE
121         return to_utf8(makeRelPath(from_utf8(fname),
122                                          from_utf8(buffer.masterBuffer()->filePath())));
123 }
124
125 }
126
127
128 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
129                        OutputParams const & runparams) const
130 {
131         // the sequence of the commands:
132         // 1. \bibliographystyle{style}
133         // 2. \addcontentsline{...} - if option bibtotoc set
134         // 3. \bibliography{database}
135         // and with bibtopic:
136         // 1. \bibliographystyle{style}
137         // 2. \begin{btSect}{database}
138         // 3. \btPrint{Cited|NotCited|All}
139         // 4. \end{btSect}
140
141         // Database(s)
142         // If we are processing the LaTeX file in a temp directory then
143         // copy the .bib databases to this temp directory, mangling their
144         // names in the process. Store this mangled name in the list of
145         // all databases.
146         // (We need to do all this because BibTeX *really*, *really*
147         // can't handle "files with spaces" and Windows users tend to
148         // use such filenames.)
149         // Otherwise, store the (maybe absolute) path to the original,
150         // unmangled database name.
151         typedef boost::char_separator<char_type> Separator;
152         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
153
154         Separator const separator(from_ascii(",").c_str());
155         // The tokenizer must not be called with temporary strings, since
156         // it does not make a copy and uses iterators of the string further
157         // down. getParam returns a reference, so this is OK.
158         Tokenizer const tokens(getParam("bibfiles"), separator);
159         Tokenizer::const_iterator const begin = tokens.begin();
160         Tokenizer::const_iterator const end = tokens.end();
161
162         odocstringstream dbs;
163         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
164                 docstring const input = trim(*it);
165                 // FIXME UNICODE
166                 string utf8input = to_utf8(input);
167                 string database =
168                         normalizeName(buffer, runparams, utf8input, ".bib");
169                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
170                 bool const not_from_texmf = try_in_file.isReadableFile();
171
172                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
173                     not_from_texmf) {
174
175                         // mangledFilename() needs the extension
176                         DocFileName const in_file = DocFileName(try_in_file);
177                         database = removeExtension(in_file.mangledFilename());
178                         FileName const out_file = makeAbsPath(database + ".bib",
179                                         buffer.masterBuffer()->temppath());
180
181                         bool const success = in_file.copyTo(out_file);
182                         if (!success) {
183                                 lyxerr << "Failed to copy '" << in_file
184                                        << "' to '" << out_file << "'"
185                                        << endl;
186                         }
187                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
188                            !isValidLaTeXFilename(database)) {
189                                 frontend::Alert::warning(_("Invalid filename"),
190                                                          _("The following filename is likely to cause trouble "
191                                                            "when running the exported file through LaTeX: ") +
192                                                             from_utf8(database));
193                 }
194
195                 if (it != begin)
196                         dbs << ',';
197                 // FIXME UNICODE
198                 dbs << from_utf8(latex_path(database));
199         }
200         docstring const db_out = dbs.str();
201
202         // Post this warning only once.
203         static bool warned_about_spaces = false;
204         if (!warned_about_spaces &&
205             runparams.nice && db_out.find(' ') != docstring::npos) {
206                 warned_about_spaces = true;
207
208                 Alert::warning(_("Export Warning!"),
209                                _("There are spaces in the paths to your BibTeX databases.\n"
210                                               "BibTeX will be unable to find them."));
211         }
212
213         // Style-Options
214         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
215         string bibtotoc;
216         if (prefixIs(style, "bibtotoc")) {
217                 bibtotoc = "bibtotoc";
218                 if (contains(style, ','))
219                         style = split(style, bibtotoc, ',');
220         }
221
222         // line count
223         int nlines = 0;
224
225         if (!style.empty()) {
226                 string base = normalizeName(buffer, runparams, style, ".bst");
227                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
228                 bool const not_from_texmf = try_in_file.isReadableFile();
229                 // If this style does not come from texmf and we are not
230                 // exporting to .tex copy it to the tmp directory.
231                 // This prevents problems with spaces and 8bit charcaters
232                 // in the file name.
233                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
234                     not_from_texmf) {
235                         // use new style name
236                         DocFileName const in_file = DocFileName(try_in_file);
237                         base = removeExtension(in_file.mangledFilename());
238                         FileName const out_file(makeAbsPath(base + ".bst",
239                                         buffer.masterBuffer()->temppath()));
240                         bool const success = in_file.copyTo(out_file);
241                         if (!success) {
242                                 lyxerr << "Failed to copy '" << in_file
243                                        << "' to '" << out_file << "'"
244                                        << endl;
245                         }
246                 }
247                 // FIXME UNICODE
248                 os << "\\bibliographystyle{"
249                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
250                    << "}\n";
251                 nlines += 1;
252         }
253
254         // Post this warning only once.
255         static bool warned_about_bst_spaces = false;
256         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
257                 warned_about_bst_spaces = true;
258                 Alert::warning(_("Export Warning!"),
259                                _("There are spaces in the path to your BibTeX style file.\n"
260                                               "BibTeX will be unable to find it."));
261         }
262
263         if (!db_out.empty() && buffer.params().use_bibtopic){
264                 os << "\\begin{btSect}{" << db_out << "}\n";
265                 docstring btprint = getParam("btprint");
266                 if (btprint.empty())
267                         // default
268                         btprint = from_ascii("btPrintCited");
269                 os << "\\" << btprint << "\n"
270                    << "\\end{btSect}\n";
271                 nlines += 3;
272         }
273
274         // bibtotoc-Option
275         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
276                 // maybe a problem when a textclass has no "art" as
277                 // part of its name, because it's than book.
278                 // For the "official" lyx-layouts it's no problem to support
279                 // all well
280                 if (!contains(buffer.params().getTextClass().name(),
281                               "art")) {
282                         if (buffer.params().sides == OneSide) {
283                                 // oneside
284                                 os << "\\clearpage";
285                         } else {
286                                 // twoside
287                                 os << "\\cleardoublepage";
288                         }
289
290                         // bookclass
291                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
292
293                 } else {
294                         // article class
295                         os << "\\addcontentsline{toc}{section}{\\refname}";
296                 }
297         }
298
299         if (!db_out.empty() && !buffer.params().use_bibtopic){
300                 os << "\\bibliography{" << db_out << "}\n";
301                 nlines += 1;
302         }
303
304         return nlines;
305 }
306
307
308 FileNameList const InsetBibtex::getFiles(Buffer const & buffer) const
309 {
310         FileName path(buffer.filePath());
311         PathChanger p(path);
312
313         FileNameList vec;
314
315         string tmp;
316         // FIXME UNICODE
317         string bibfiles = to_utf8(getParam("bibfiles"));
318         bibfiles = split(bibfiles, tmp, ',');
319         while (!tmp.empty()) {
320                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
321                 LYXERR(Debug::LATEX, "Bibfile: " << file);
322
323                 // If we didn't find a matching file name just fail silently
324                 if (!file.empty())
325                         vec.push_back(file);
326
327                 // Get next file name
328                 bibfiles = split(bibfiles, tmp, ',');
329         }
330
331         return vec;
332 }
333
334 namespace {
335
336         // methods for parsing bibtex files
337
338         typedef map<docstring, docstring> VarMap;
339
340         /// remove whitespace characters, optionally a single comma,
341         /// and further whitespace characters from the stream.
342         /// @return true if a comma was found, false otherwise
343         ///
344         bool removeWSAndComma(idocfstream & ifs) {
345                 char_type ch;
346
347                 if (!ifs)
348                         return false;
349
350                 // skip whitespace
351                 do {
352                         ifs.get(ch);
353                 } while (ifs && isSpace(ch));
354
355                 if (!ifs)
356                         return false;
357
358                 if (ch != ',') {
359                         ifs.putback(ch);
360                         return false;
361                 }
362
363                 // skip whitespace
364                 do {
365                         ifs.get(ch);
366                 } while (ifs && isSpace(ch));
367
368                 if (ifs) {
369                         ifs.putback(ch);
370                 }
371
372                 return true;
373         }
374
375
376         enum charCase {
377                 makeLowerCase,
378                 keepCase
379         };
380
381         /// remove whitespace characters, read characer sequence
382         /// not containing whitespace characters or characters in
383         /// delimChars, and remove further whitespace characters.
384         ///
385         /// @return true if a string of length > 0 could be read.
386         ///
387         bool readTypeOrKey(docstring & val, idocfstream & ifs,
388                 docstring const & delimChars, docstring const &illegalChars, 
389                 charCase chCase) {
390
391                 char_type ch;
392
393                 val.clear();
394
395                 if (!ifs)
396                         return false;
397
398                 // skip whitespace
399                 do {
400                         ifs.get(ch);
401                 } while (ifs && isSpace(ch));
402
403                 if (!ifs)
404                         return false;
405
406                 // read value
407                 bool legalChar = true;
408                 while (ifs && !isSpace(ch) && 
409                                                  delimChars.find(ch) == docstring::npos &&
410                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
411                                         ) 
412                 {
413                         if (chCase == makeLowerCase)
414                                 val += lowercase(ch);
415                         else
416                                 val += ch;
417                         ifs.get(ch);
418                 }
419                 
420                 if (!legalChar) {
421                         ifs.putback(ch);
422                         return false;
423                 }
424
425                 // skip whitespace
426                 while (ifs && isSpace(ch)) {
427                         ifs.get(ch);
428                 }
429
430                 if (ifs) {
431                         ifs.putback(ch);
432                 }
433
434                 return val.length() > 0;
435         }
436
437         /// read subsequent bibtex values that are delimited with a #-character.
438         /// Concatenate all parts and replace names with the associated string in
439         /// the variable strings.
440         /// @return true if reading was successfull (all single parts were delimited
441         /// correctly)
442         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
443
444                 char_type ch;
445
446                 val.clear();
447
448                 if (!ifs)
449                         return false;
450
451                 do {
452                         // skip whitespace
453                         do {
454                                 ifs.get(ch);
455                         } while (ifs && isSpace(ch));
456
457                         if (!ifs)
458                                 return false;
459
460                         // check for field type
461                         if (isDigit(ch)) {
462
463                                 // read integer value
464                                 do {
465                                         val += ch;
466                                         ifs.get(ch);
467                                 } while (ifs && isDigit(ch));
468
469                                 if (!ifs)
470                                         return false;
471
472                         } else if (ch == '"' || ch == '{') {
473                                 // set end delimiter
474                                 char_type delim = ch == '"' ? '"': '}';
475
476                                 //Skip whitespace
477                                 do {
478                                         ifs.get(ch);
479                                 } while (ifs && isSpace(ch));
480                                 
481                                 if (!ifs)
482                                         return false;
483                                 
484                                 //We now have the first non-whitespace character
485                                 //We'll collapse adjacent whitespace.
486                                 bool lastWasWhiteSpace = false;
487                                 
488                                 // inside this delimited text braces must match.
489                                 // Thus we can have a closing delimiter only
490                                 // when nestLevel == 0
491                                 int nestLevel = 0;
492  
493                                 while (ifs && (nestLevel > 0 || ch != delim)) {
494                                         if (isSpace(ch)) {
495                                                 lastWasWhiteSpace = true;
496                                                 ifs.get(ch);
497                                                 continue;
498                                         }
499                                         //We output the space only after we stop getting 
500                                         //whitespace so as not to output any whitespace
501                                         //at the end of the value.
502                                         if (lastWasWhiteSpace) {
503                                                 lastWasWhiteSpace = false;
504                                                 val += ' ';
505                                         }
506                                         
507                                         val += ch;
508
509                                         // update nesting level
510                                         switch (ch) {
511                                                 case '{':
512                                                         ++nestLevel;
513                                                         break;
514                                                 case '}':
515                                                         --nestLevel;
516                                                         if (nestLevel < 0) return false;
517                                                         break;
518                                         }
519
520                                         ifs.get(ch);
521                                 }
522
523                                 if (!ifs)
524                                         return false;
525
526                                 ifs.get(ch);
527
528                                 if (!ifs)
529                                         return false;
530
531                         } else {
532
533                                 // reading a string name
534                                 docstring strName;
535
536                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
537                                         strName += lowercase(ch);
538                                         ifs.get(ch);
539                                 }
540
541                                 if (!ifs)
542                                         return false;
543
544                                 // replace the string with its assigned value or
545                                 // discard it if it's not assigned
546                                 if (strName.length()) {
547                                         VarMap::const_iterator pos = strings.find(strName);
548                                         if (pos != strings.end()) {
549                                                 val += pos->second;
550                                         }
551                                 }
552                         }
553
554                         // skip WS
555                         while (ifs && isSpace(ch)) {
556                                 ifs.get(ch);
557                         }
558
559                         if (!ifs)
560                                 return false;
561
562                         // continue reading next value on concatenate with '#'
563                 } while (ch == '#');
564
565                 ifs.putback(ch);
566
567                 return true;
568         }
569 }
570
571
572 // This method returns a comma separated list of Bibtex entries
573 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
574                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
575 {
576         FileNameList const files = getFiles(buffer);
577         for (vector<FileName>::const_iterator it = files.begin();
578              it != files.end(); ++ it) {
579                 // This bibtex parser is a first step to parse bibtex files
580                 // more precisely.
581                 //
582                 // - it reads the whole bibtex entry and does a syntax check
583                 //   (matching delimiters, missing commas,...
584                 // - it recovers from errors starting with the next @-character
585                 // - it reads @string definitions and replaces them in the
586                 //   field values.
587                 // - it accepts more characters in keys or value names than
588                 //   bibtex does.
589                 //
590                 // Officially bibtex does only support ASCII, but in practice
591                 // you can use the encoding of the main document as long as
592                 // some elements like keys and names are pure ASCII. Therefore
593                 // we convert the file from the buffer encoding.
594                 // We don't restrict keys to ASCII in LyX, since our own
595                 // InsetBibitem can generate non-ASCII keys, and nonstandard
596                 // 8bit clean bibtex forks exist.
597                 
598                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
599                         ios_base::in,
600                         buffer.params().encoding().iconvName());
601
602                 char_type ch;
603                 VarMap strings;
604
605                 while (ifs) {
606
607                         ifs.get(ch);
608                         if (!ifs)
609                                 break;
610
611                         if (ch != '@')
612                                 continue;
613
614                         docstring entryType;
615
616                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
617                                            docstring(), makeLowerCase) || !ifs)
618                                 continue;
619
620                         if (entryType == from_ascii("comment")) {
621
622                                 ifs.ignore(numeric_limits<int>::max(), '\n');
623                                 continue;
624                         }
625
626                         ifs.get(ch);
627                         if (!ifs)
628                                 break;
629
630                         if ((ch != '(') && (ch != '{')) {
631                                 // invalid entry delimiter
632                                 ifs.putback(ch);
633                                 continue;
634                         }
635
636                         // process the entry
637                         if (entryType == from_ascii("string")) {
638
639                                 // read string and add it to the strings map
640                                 // (or replace it's old value)
641                                 docstring name;
642                                 docstring value;
643
644                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
645                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
646                                         continue;
647
648                                 // next char must be an equal sign
649                                 ifs.get(ch);
650                                 if (!ifs || ch != '=')
651                                         continue;
652
653                                 if (!readValue(value, ifs, strings))
654                                         continue;
655
656                                 strings[name] = value;
657
658                         } else if (entryType == from_ascii("preamble")) {
659
660                                 // preamble definitions are discarded.
661                                 // can they be of any use in lyx?
662                                 docstring value;
663
664                                 if (!readValue(value, ifs, strings))
665                                         continue;
666
667                         } else {
668
669                                 // Citation entry. Try to read the key.
670                                 docstring key;
671
672                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
673                                                    from_ascii("}"), keepCase) || !ifs)
674                                         continue;
675
676                                 /////////////////////////////////////////////
677                                 // now we have a key, so we will add an entry 
678                                 // (even if it's empty, as bibtex does)
679                                 //
680                                 // we now read the field = value pairs.
681                                 // all items must be separated by a comma. If
682                                 // it is missing the scanning of this entry is
683                                 // stopped and the next is searched.
684                                 docstring fields;
685                                 docstring name;
686                                 docstring value;
687                                 docstring commaNewline;
688                                 docstring data;
689                                 BibTeXInfo keyvalmap;
690                                 keyvalmap.entryType = entryType;
691                                 
692                                 bool readNext = removeWSAndComma(ifs);
693  
694                                 while (ifs && readNext) {
695
696                                         // read field name
697                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
698                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
699                                                 break;
700
701                                         // next char must be an equal sign
702                                         ifs.get(ch);
703                                         if (!ifs)
704                                                 break;
705                                         if (ch != '=') {
706                                                 ifs.putback(ch);
707                                                 break;
708                                         }
709
710                                         // read field value
711                                         if (!readValue(value, ifs, strings))
712                                                 break;
713
714                                         keyvalmap[name] = value;
715                                         data += "\n\n" + value;
716                                         keylist.fieldNames.insert(name);
717                                         readNext = removeWSAndComma(ifs);
718                                 }
719
720                                 // add the new entry
721                                 keylist.entryTypes.insert(entryType);
722                                 keyvalmap.allData = data;
723                                 keyvalmap.isBibTeX = true;
724                                 keyvalmap.bibKey = key;
725                                 keylist[key] = keyvalmap;
726                         }
727                 } //< searching '@'
728         } //< for loop over files
729 }
730
731
732
733 bool InsetBibtex::addDatabase(string const & db)
734 {
735         // FIXME UNICODE
736         string bibfiles(to_utf8(getParam("bibfiles")));
737         if (tokenPos(bibfiles, ',', db) == -1) {
738                 if (!bibfiles.empty())
739                         bibfiles += ',';
740                 setParam("bibfiles", from_utf8(bibfiles + db));
741                 return true;
742         }
743         return false;
744 }
745
746
747 bool InsetBibtex::delDatabase(string const & db)
748 {
749         // FIXME UNICODE
750         string bibfiles(to_utf8(getParam("bibfiles")));
751         if (contains(bibfiles, db)) {
752                 int const n = tokenPos(bibfiles, ',', db);
753                 string bd = db;
754                 if (n > 0) {
755                         // this is not the first database
756                         string tmp = ',' + bd;
757                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
758                 } else if (n == 0)
759                         // this is the first (or only) database
760                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
761                 else
762                         return false;
763         }
764         return true;
765 }
766
767
768 void InsetBibtex::validate(LaTeXFeatures & features) const
769 {
770         if (features.bufferParams().use_bibtopic)
771                 features.require("bibtopic");
772 }
773
774
775 } // namespace lyx