]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
'using namespace std' instead of 'using std::xxx'
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  * \author Richard Heck (BibTeX parser improvements)
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "InsetBibtex.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "support/debug.h"
20 #include "Encoding.h"
21 #include "FuncRequest.h"
22 #include "support/gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
27
28 #include "frontends/alert.h"
29
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/FileNameList.h"
33 #include "support/filetools.h"
34 #include "support/lstrings.h"
35 #include "support/lyxlib.h"
36 #include "support/os.h"
37 #include "support/Path.h"
38 #include "support/textutils.h"
39
40 #include <boost/tokenizer.hpp>
41
42 using namespace std;
43
44 namespace lyx {
45
46 using support::absolutePath;
47 using support::ascii_lowercase;
48 using support::changeExtension;
49 using support::contains;
50 using support::copy;
51 using support::DocFileName;
52 using support::FileName;
53 using support::FileNameList;
54 using support::findtexfile;
55 using support::isValidLaTeXFilename;
56 using support::latex_path;
57 using support::ltrim;
58 using support::makeAbsPath;
59 using support::makeRelPath;
60 using support::prefixIs;
61 using support::removeExtension;
62 using support::rtrim;
63 using support::split;
64 using support::subst;
65 using support::tokenPos;
66 using support::trim;
67 using support::lowercase;
68
69 namespace Alert = frontend::Alert;
70 namespace os = support::os;
71
72
73 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
74         : InsetCommand(p, "bibtex")
75 {}
76
77
78 CommandInfo const * InsetBibtex::findInfo(std::string const & /* cmdName */)
79 {
80         static const char * const paramnames[] = 
81                 {"options", "btprint", "bibfiles", ""};
82         static const bool isoptional[] = {true, true, false};
83         static const CommandInfo info = {3, paramnames, isoptional};
84         return &info;
85 }
86
87
88 Inset * InsetBibtex::clone() const
89 {
90         return new InsetBibtex(*this);
91 }
92
93
94 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
95 {
96         switch (cmd.action) {
97
98         case LFUN_INSET_MODIFY: {
99                 InsetCommandParams p(BIBTEX_CODE);
100                 try {
101                         if (!InsetCommandMailer::string2params("bibtex", 
102                                         to_utf8(cmd.argument()), p)) {
103                                 cur.noUpdate();
104                                 break;
105                         }
106                 } catch (support::ExceptionMessage const & message) {
107                         if (message.type_ == support::WarningException) {
108                                 Alert::warning(message.title_, message.details_);
109                                 cur.noUpdate();
110                         } else 
111                                 throw message;
112                         break;
113                 }
114                 setParams(p);
115                 cur.buffer().updateBibfilesCache();
116                 break;
117         }
118
119         default:
120                 InsetCommand::doDispatch(cur, cmd);
121                 break;
122         }
123 }
124
125
126 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
127 {
128         return _("BibTeX Generated Bibliography");
129 }
130
131
132 namespace {
133
134 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
135                       string const & name, string const & ext)
136 {
137         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
138         if (absolutePath(name) || !FileName(fname + ext).isReadableFile())
139                 return name;
140         if (!runparams.nice)
141                 return fname;
142
143         // FIXME UNICODE
144         return to_utf8(makeRelPath(from_utf8(fname),
145                                          from_utf8(buffer.masterBuffer()->filePath())));
146 }
147
148 }
149
150
151 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
152                        OutputParams const & runparams) const
153 {
154         // the sequence of the commands:
155         // 1. \bibliographystyle{style}
156         // 2. \addcontentsline{...} - if option bibtotoc set
157         // 3. \bibliography{database}
158         // and with bibtopic:
159         // 1. \bibliographystyle{style}
160         // 2. \begin{btSect}{database}
161         // 3. \btPrint{Cited|NotCited|All}
162         // 4. \end{btSect}
163
164         // Database(s)
165         // If we are processing the LaTeX file in a temp directory then
166         // copy the .bib databases to this temp directory, mangling their
167         // names in the process. Store this mangled name in the list of
168         // all databases.
169         // (We need to do all this because BibTeX *really*, *really*
170         // can't handle "files with spaces" and Windows users tend to
171         // use such filenames.)
172         // Otherwise, store the (maybe absolute) path to the original,
173         // unmangled database name.
174         typedef boost::char_separator<char_type> Separator;
175         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
176
177         Separator const separator(from_ascii(",").c_str());
178         // The tokenizer must not be called with temporary strings, since
179         // it does not make a copy and uses iterators of the string further
180         // down. getParam returns a reference, so this is OK.
181         Tokenizer const tokens(getParam("bibfiles"), separator);
182         Tokenizer::const_iterator const begin = tokens.begin();
183         Tokenizer::const_iterator const end = tokens.end();
184
185         odocstringstream dbs;
186         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
187                 docstring const input = trim(*it);
188                 // FIXME UNICODE
189                 string utf8input = to_utf8(input);
190                 string database =
191                         normalizeName(buffer, runparams, utf8input, ".bib");
192                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
193                 bool const not_from_texmf = try_in_file.isReadableFile();
194
195                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
196                     not_from_texmf) {
197
198                         // mangledFilename() needs the extension
199                         DocFileName const in_file = DocFileName(try_in_file);
200                         database = removeExtension(in_file.mangledFilename());
201                         FileName const out_file = makeAbsPath(database + ".bib",
202                                         buffer.masterBuffer()->temppath());
203
204                         bool const success = copy(in_file, out_file);
205                         if (!success) {
206                                 lyxerr << "Failed to copy '" << in_file
207                                        << "' to '" << out_file << "'"
208                                        << endl;
209                         }
210                 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
211                            !isValidLaTeXFilename(database)) {
212                                 frontend::Alert::warning(_("Invalid filename"),
213                                                          _("The following filename is likely to cause trouble "
214                                                            "when running the exported file through LaTeX: ") +
215                                                             from_utf8(database));
216                 }
217
218                 if (it != begin)
219                         dbs << ',';
220                 // FIXME UNICODE
221                 dbs << from_utf8(latex_path(database));
222         }
223         docstring const db_out = dbs.str();
224
225         // Post this warning only once.
226         static bool warned_about_spaces = false;
227         if (!warned_about_spaces &&
228             runparams.nice && db_out.find(' ') != docstring::npos) {
229                 warned_about_spaces = true;
230
231                 Alert::warning(_("Export Warning!"),
232                                _("There are spaces in the paths to your BibTeX databases.\n"
233                                               "BibTeX will be unable to find them."));
234         }
235
236         // Style-Options
237         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
238         string bibtotoc;
239         if (prefixIs(style, "bibtotoc")) {
240                 bibtotoc = "bibtotoc";
241                 if (contains(style, ','))
242                         style = split(style, bibtotoc, ',');
243         }
244
245         // line count
246         int nlines = 0;
247
248         if (!style.empty()) {
249                 string base = normalizeName(buffer, runparams, style, ".bst");
250                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
251                 bool const not_from_texmf = try_in_file.isReadableFile();
252                 // If this style does not come from texmf and we are not
253                 // exporting to .tex copy it to the tmp directory.
254                 // This prevents problems with spaces and 8bit charcaters
255                 // in the file name.
256                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
257                     not_from_texmf) {
258                         // use new style name
259                         DocFileName const in_file = DocFileName(try_in_file);
260                         base = removeExtension(in_file.mangledFilename());
261                         FileName const out_file(makeAbsPath(base + ".bst",
262                                         buffer.masterBuffer()->temppath()));
263                         bool const success = copy(in_file, out_file);
264                         if (!success) {
265                                 lyxerr << "Failed to copy '" << in_file
266                                        << "' to '" << out_file << "'"
267                                        << endl;
268                         }
269                 }
270                 // FIXME UNICODE
271                 os << "\\bibliographystyle{"
272                    << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
273                    << "}\n";
274                 nlines += 1;
275         }
276
277         // Post this warning only once.
278         static bool warned_about_bst_spaces = false;
279         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
280                 warned_about_bst_spaces = true;
281                 Alert::warning(_("Export Warning!"),
282                                _("There are spaces in the path to your BibTeX style file.\n"
283                                               "BibTeX will be unable to find it."));
284         }
285
286         if (!db_out.empty() && buffer.params().use_bibtopic){
287                 os << "\\begin{btSect}{" << db_out << "}\n";
288                 docstring btprint = getParam("btprint");
289                 if (btprint.empty())
290                         // default
291                         btprint = from_ascii("btPrintCited");
292                 os << "\\" << btprint << "\n"
293                    << "\\end{btSect}\n";
294                 nlines += 3;
295         }
296
297         // bibtotoc-Option
298         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
299                 // maybe a problem when a textclass has no "art" as
300                 // part of its name, because it's than book.
301                 // For the "official" lyx-layouts it's no problem to support
302                 // all well
303                 if (!contains(buffer.params().getTextClass().name(),
304                               "art")) {
305                         if (buffer.params().sides == OneSide) {
306                                 // oneside
307                                 os << "\\clearpage";
308                         } else {
309                                 // twoside
310                                 os << "\\cleardoublepage";
311                         }
312
313                         // bookclass
314                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
315
316                 } else {
317                         // article class
318                         os << "\\addcontentsline{toc}{section}{\\refname}";
319                 }
320         }
321
322         if (!db_out.empty() && !buffer.params().use_bibtopic){
323                 os << "\\bibliography{" << db_out << "}\n";
324                 nlines += 1;
325         }
326
327         return nlines;
328 }
329
330
331 FileNameList const InsetBibtex::getFiles(Buffer const & buffer) const
332 {
333         FileName path(buffer.filePath());
334         support::PathChanger p(path);
335
336         FileNameList vec;
337
338         string tmp;
339         // FIXME UNICODE
340         string bibfiles = to_utf8(getParam("bibfiles"));
341         bibfiles = split(bibfiles, tmp, ',');
342         while (!tmp.empty()) {
343                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
344                 LYXERR(Debug::LATEX, "Bibfile: " << file);
345
346                 // If we didn't find a matching file name just fail silently
347                 if (!file.empty())
348                         vec.push_back(file);
349
350                 // Get next file name
351                 bibfiles = split(bibfiles, tmp, ',');
352         }
353
354         return vec;
355 }
356
357 namespace {
358
359         // methods for parsing bibtex files
360
361         typedef map<docstring, docstring> VarMap;
362
363         /// remove whitespace characters, optionally a single comma,
364         /// and further whitespace characters from the stream.
365         /// @return true if a comma was found, false otherwise
366         ///
367         bool removeWSAndComma(idocfstream & ifs) {
368                 char_type ch;
369
370                 if (!ifs)
371                         return false;
372
373                 // skip whitespace
374                 do {
375                         ifs.get(ch);
376                 } while (ifs && isSpace(ch));
377
378                 if (!ifs)
379                         return false;
380
381                 if (ch != ',') {
382                         ifs.putback(ch);
383                         return false;
384                 }
385
386                 // skip whitespace
387                 do {
388                         ifs.get(ch);
389                 } while (ifs && isSpace(ch));
390
391                 if (ifs) {
392                         ifs.putback(ch);
393                 }
394
395                 return true;
396         }
397
398
399         enum charCase {
400                 makeLowerCase,
401                 keepCase
402         };
403
404         /// remove whitespace characters, read characer sequence
405         /// not containing whitespace characters or characters in
406         /// delimChars, and remove further whitespace characters.
407         ///
408         /// @return true if a string of length > 0 could be read.
409         ///
410         bool readTypeOrKey(docstring & val, idocfstream & ifs,
411                 docstring const & delimChars, docstring const &illegalChars, 
412                 charCase chCase) {
413
414                 char_type ch;
415
416                 val.clear();
417
418                 if (!ifs)
419                         return false;
420
421                 // skip whitespace
422                 do {
423                         ifs.get(ch);
424                 } while (ifs && isSpace(ch));
425
426                 if (!ifs)
427                         return false;
428
429                 // read value
430                 bool legalChar = true;
431                 while (ifs && !isSpace(ch) && 
432                                                  delimChars.find(ch) == docstring::npos &&
433                                                  (legalChar = (illegalChars.find(ch) == docstring::npos))
434                                         ) 
435                 {
436                         if (chCase == makeLowerCase)
437                                 val += lowercase(ch);
438                         else
439                                 val += ch;
440                         ifs.get(ch);
441                 }
442                 
443                 if (!legalChar) {
444                         ifs.putback(ch);
445                         return false;
446                 }
447
448                 // skip whitespace
449                 while (ifs && isSpace(ch)) {
450                         ifs.get(ch);
451                 }
452
453                 if (ifs) {
454                         ifs.putback(ch);
455                 }
456
457                 return val.length() > 0;
458         }
459
460         /// read subsequent bibtex values that are delimited with a #-character.
461         /// Concatenate all parts and replace names with the associated string in
462         /// the variable strings.
463         /// @return true if reading was successfull (all single parts were delimited
464         /// correctly)
465         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
466
467                 char_type ch;
468
469                 val.clear();
470
471                 if (!ifs)
472                         return false;
473
474                 do {
475                         // skip whitespace
476                         do {
477                                 ifs.get(ch);
478                         } while (ifs && isSpace(ch));
479
480                         if (!ifs)
481                                 return false;
482
483                         // check for field type
484                         if (isDigit(ch)) {
485
486                                 // read integer value
487                                 do {
488                                         val += ch;
489                                         ifs.get(ch);
490                                 } while (ifs && isDigit(ch));
491
492                                 if (!ifs)
493                                         return false;
494
495                         } else if (ch == '"' || ch == '{') {
496                                 // set end delimiter
497                                 char_type delim = ch == '"' ? '"': '}';
498
499                                 //Skip whitespace
500                                 do {
501                                         ifs.get(ch);
502                                 } while (ifs && isSpace(ch));
503                                 
504                                 if (!ifs)
505                                         return false;
506                                 
507                                 //We now have the first non-whitespace character
508                                 //We'll collapse adjacent whitespace.
509                                 bool lastWasWhiteSpace = false;
510                                 
511                                 // inside this delimited text braces must match.
512                                 // Thus we can have a closing delimiter only
513                                 // when nestLevel == 0
514                                 int nestLevel = 0;
515  
516                                 while (ifs && (nestLevel > 0 || ch != delim)) {
517                                         if (isSpace(ch)) {
518                                                 lastWasWhiteSpace = true;
519                                                 ifs.get(ch);
520                                                 continue;
521                                         }
522                                         //We output the space only after we stop getting 
523                                         //whitespace so as not to output any whitespace
524                                         //at the end of the value.
525                                         if (lastWasWhiteSpace) {
526                                                 lastWasWhiteSpace = false;
527                                                 val += ' ';
528                                         }
529                                         
530                                         val += ch;
531
532                                         // update nesting level
533                                         switch (ch) {
534                                                 case '{':
535                                                         ++nestLevel;
536                                                         break;
537                                                 case '}':
538                                                         --nestLevel;
539                                                         if (nestLevel < 0) return false;
540                                                         break;
541                                         }
542
543                                         ifs.get(ch);
544                                 }
545
546                                 if (!ifs)
547                                         return false;
548
549                                 ifs.get(ch);
550
551                                 if (!ifs)
552                                         return false;
553
554                         } else {
555
556                                 // reading a string name
557                                 docstring strName;
558
559                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
560                                         strName += lowercase(ch);
561                                         ifs.get(ch);
562                                 }
563
564                                 if (!ifs)
565                                         return false;
566
567                                 // replace the string with its assigned value or
568                                 // discard it if it's not assigned
569                                 if (strName.length()) {
570                                         VarMap::const_iterator pos = strings.find(strName);
571                                         if (pos != strings.end()) {
572                                                 val += pos->second;
573                                         }
574                                 }
575                         }
576
577                         // skip WS
578                         while (ifs && isSpace(ch)) {
579                                 ifs.get(ch);
580                         }
581
582                         if (!ifs)
583                                 return false;
584
585                         // continue reading next value on concatenate with '#'
586                 } while (ch == '#');
587
588                 ifs.putback(ch);
589
590                 return true;
591         }
592 }
593
594
595 // This method returns a comma separated list of Bibtex entries
596 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
597                 BiblioInfo & keylist, InsetIterator const & /*di*/) const
598 {
599         FileNameList const files = getFiles(buffer);
600         for (vector<FileName>::const_iterator it = files.begin();
601              it != files.end(); ++ it) {
602                 // This bibtex parser is a first step to parse bibtex files
603                 // more precisely.
604                 //
605                 // - it reads the whole bibtex entry and does a syntax check
606                 //   (matching delimiters, missing commas,...
607                 // - it recovers from errors starting with the next @-character
608                 // - it reads @string definitions and replaces them in the
609                 //   field values.
610                 // - it accepts more characters in keys or value names than
611                 //   bibtex does.
612                 //
613                 // Officially bibtex does only support ASCII, but in practice
614                 // you can use the encoding of the main document as long as
615                 // some elements like keys and names are pure ASCII. Therefore
616                 // we convert the file from the buffer encoding.
617                 // We don't restrict keys to ASCII in LyX, since our own
618                 // InsetBibitem can generate non-ASCII keys, and nonstandard
619                 // 8bit clean bibtex forks exist.
620                 
621                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
622                         std::ios_base::in,
623                         buffer.params().encoding().iconvName());
624
625                 char_type ch;
626                 VarMap strings;
627
628                 while (ifs) {
629
630                         ifs.get(ch);
631                         if (!ifs)
632                                 break;
633
634                         if (ch != '@')
635                                 continue;
636
637                         docstring entryType;
638
639                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), 
640                                            docstring(), makeLowerCase) || !ifs)
641                                 continue;
642
643                         if (entryType == from_ascii("comment")) {
644
645                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
646                                 continue;
647                         }
648
649                         ifs.get(ch);
650                         if (!ifs)
651                                 break;
652
653                         if ((ch != '(') && (ch != '{')) {
654                                 // invalid entry delimiter
655                                 ifs.putback(ch);
656                                 continue;
657                         }
658
659                         // process the entry
660                         if (entryType == from_ascii("string")) {
661
662                                 // read string and add it to the strings map
663                                 // (or replace it's old value)
664                                 docstring name;
665                                 docstring value;
666
667                                 if (!readTypeOrKey(name, ifs, from_ascii("="), 
668                                                    from_ascii("#{}(),"), makeLowerCase) || !ifs)
669                                         continue;
670
671                                 // next char must be an equal sign
672                                 ifs.get(ch);
673                                 if (!ifs || ch != '=')
674                                         continue;
675
676                                 if (!readValue(value, ifs, strings))
677                                         continue;
678
679                                 strings[name] = value;
680
681                         } else if (entryType == from_ascii("preamble")) {
682
683                                 // preamble definitions are discarded.
684                                 // can they be of any use in lyx?
685                                 docstring value;
686
687                                 if (!readValue(value, ifs, strings))
688                                         continue;
689
690                         } else {
691
692                                 // Citation entry. Try to read the key.
693                                 docstring key;
694
695                                 if (!readTypeOrKey(key, ifs, from_ascii(","), 
696                                                    from_ascii("}"), keepCase) || !ifs)
697                                         continue;
698
699                                 /////////////////////////////////////////////
700                                 // now we have a key, so we will add an entry 
701                                 // (even if it's empty, as bibtex does)
702                                 //
703                                 // we now read the field = value pairs.
704                                 // all items must be separated by a comma. If
705                                 // it is missing the scanning of this entry is
706                                 // stopped and the next is searched.
707                                 docstring fields;
708                                 docstring name;
709                                 docstring value;
710                                 docstring commaNewline;
711                                 docstring data;
712                                 BibTeXInfo keyvalmap;
713                                 keyvalmap.entryType = entryType;
714                                 
715                                 bool readNext = removeWSAndComma(ifs);
716  
717                                 while (ifs && readNext) {
718
719                                         // read field name
720                                         if (!readTypeOrKey(name, ifs, from_ascii("="), 
721                                                            from_ascii("{}(),"), makeLowerCase) || !ifs)
722                                                 break;
723
724                                         // next char must be an equal sign
725                                         ifs.get(ch);
726                                         if (!ifs)
727                                                 break;
728                                         if (ch != '=') {
729                                                 ifs.putback(ch);
730                                                 break;
731                                         }
732
733                                         // read field value
734                                         if (!readValue(value, ifs, strings))
735                                                 break;
736
737                                         keyvalmap[name] = value;
738                                         data += "\n\n" + value;
739                                         keylist.fieldNames.insert(name);
740                                         readNext = removeWSAndComma(ifs);
741                                 }
742
743                                 // add the new entry
744                                 keylist.entryTypes.insert(entryType);
745                                 keyvalmap.allData = data;
746                                 keyvalmap.isBibTeX = true;
747                                 keyvalmap.bibKey = key;
748                                 keylist[key] = keyvalmap;
749                         }
750                 } //< searching '@'
751         } //< for loop over files
752 }
753
754
755
756 bool InsetBibtex::addDatabase(string const & db)
757 {
758         // FIXME UNICODE
759         string bibfiles(to_utf8(getParam("bibfiles")));
760         if (tokenPos(bibfiles, ',', db) == -1) {
761                 if (!bibfiles.empty())
762                         bibfiles += ',';
763                 setParam("bibfiles", from_utf8(bibfiles + db));
764                 return true;
765         }
766         return false;
767 }
768
769
770 bool InsetBibtex::delDatabase(string const & db)
771 {
772         // FIXME UNICODE
773         string bibfiles(to_utf8(getParam("bibfiles")));
774         if (contains(bibfiles, db)) {
775                 int const n = tokenPos(bibfiles, ',', db);
776                 string bd = db;
777                 if (n > 0) {
778                         // this is not the first database
779                         string tmp = ',' + bd;
780                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
781                 } else if (n == 0)
782                         // this is the first (or only) database
783                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
784                 else
785                         return false;
786         }
787         return true;
788 }
789
790
791 void InsetBibtex::validate(LaTeXFeatures & features) const
792 {
793         if (features.bufferParams().use_bibtopic)
794                 features.require("bibtopic");
795 }
796
797
798 } // namespace lyx