]> git.lyx.org Git - lyx.git/blob - src/insets/InsetBibtex.cpp
Last (?) batch of renames:
[lyx.git] / src / insets / InsetBibtex.cpp
1 /**
2  * \file InsetBibtex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Alejandro Aguilar Sierra
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 #include <config.h>
12
13 #include "InsetBibtex.h"
14
15 #include "Buffer.h"
16 #include "BufferParams.h"
17 #include "DispatchResult.h"
18 #include "debug.h"
19 #include "Encoding.h"
20 #include "FuncRequest.h"
21 #include "gettext.h"
22 #include "LaTeXFeatures.h"
23 #include "MetricsInfo.h"
24 #include "OutputParams.h"
25
26 #include "frontends/alert.h"
27
28 #include "support/filetools.h"
29 #include "support/lstrings.h"
30 #include "support/lyxlib.h"
31 #include "support/os.h"
32 #include "support/Path.h"
33 #include "support/textutils.h"
34
35 #include <boost/tokenizer.hpp>
36
37
38 namespace lyx {
39
40 using support::absolutePath;
41 using support::ascii_lowercase;
42 using support::changeExtension;
43 using support::contains;
44 using support::copy;
45 using support::DocFileName;
46 using support::FileName;
47 using support::findtexfile;
48 using support::isFileReadable;
49 using support::latex_path;
50 using support::ltrim;
51 using support::makeAbsPath;
52 using support::makeRelPath;
53 using support::prefixIs;
54 using support::removeExtension;
55 using support::rtrim;
56 using support::split;
57 using support::subst;
58 using support::tokenPos;
59 using support::trim;
60 using support::lowercase;
61
62 namespace Alert = frontend::Alert;
63 namespace os = support::os;
64
65 using std::endl;
66 using std::getline;
67 using std::string;
68 using std::ostream;
69 using std::pair;
70 using std::vector;
71 using std::map;
72
73
74 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
75         : InsetCommand(p, "bibtex")
76 {}
77
78
79 std::auto_ptr<InsetBase> InsetBibtex::doClone() const
80 {
81         return std::auto_ptr<InsetBase>(new InsetBibtex(*this));
82 }
83
84
85 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
86 {
87         switch (cmd.action) {
88
89         case LFUN_INSET_MODIFY: {
90                 InsetCommandParams p("bibtex");
91                 InsetCommandMailer::string2params("bibtex", to_utf8(cmd.argument()), p);
92                 if (!p.getCmdName().empty()) {
93                         setParams(p);
94                         cur.buffer().updateBibfilesCache();
95                 } else
96                         cur.noUpdate();
97                 break;
98         }
99
100         default:
101                 InsetCommand::doDispatch(cur, cmd);
102                 break;
103         }
104 }
105
106
107 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
108 {
109         return _("BibTeX Generated Bibliography");
110 }
111
112
113 namespace {
114
115 string normalize_name(Buffer const & buffer, OutputParams const & runparams,
116                       string const & name, string const & ext)
117 {
118         string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
119         if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
120                 return name;
121         else if (!runparams.nice)
122                 return fname;
123         else
124                 // FIXME UNICODE
125                 return to_utf8(makeRelPath(from_utf8(fname),
126                                            from_utf8(buffer.getMasterBuffer()->filePath())));
127 }
128
129 }
130
131
132 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
133                        OutputParams const & runparams) const
134 {
135         // the sequence of the commands:
136         // 1. \bibliographystyle{style}
137         // 2. \addcontentsline{...} - if option bibtotoc set
138         // 3. \bibliography{database}
139         // and with bibtopic:
140         // 1. \bibliographystyle{style}
141         // 2. \begin{btSect}{database}
142         // 3. \btPrint{Cited|NotCited|All}
143         // 4. \end{btSect}
144
145         // Database(s)
146         // If we are processing the LaTeX file in a temp directory then
147         // copy the .bib databases to this temp directory, mangling their
148         // names in the process. Store this mangled name in the list of
149         // all databases.
150         // (We need to do all this because BibTeX *really*, *really*
151         // can't handle "files with spaces" and Windows users tend to
152         // use such filenames.)
153         // Otherwise, store the (maybe absolute) path to the original,
154         // unmangled database name.
155         typedef boost::char_separator<char_type> Separator;
156         typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
157
158         Separator const separator(from_ascii(",").c_str());
159         // The tokenizer must not be called with temporary strings, since
160         // it does not make a copy and uses iterators of the string further
161         // down. getParam returns a reference, so this is OK.
162         Tokenizer const tokens(getParam("bibfiles"), separator);
163         Tokenizer::const_iterator const begin = tokens.begin();
164         Tokenizer::const_iterator const end = tokens.end();
165
166         odocstringstream dbs;
167         for (Tokenizer::const_iterator it = begin; it != end; ++it) {
168                 docstring const input = trim(*it);
169                 // FIXME UNICODE
170                 string utf8input(to_utf8(input));
171                 string database =
172                         normalize_name(buffer, runparams, utf8input, ".bib");
173                 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
174                 bool const not_from_texmf = isFileReadable(try_in_file);
175
176                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
177                     not_from_texmf) {
178
179                         // mangledFilename() needs the extension
180                         DocFileName const in_file = DocFileName(try_in_file);
181                         database = removeExtension(in_file.mangledFilename());
182                         FileName const out_file(makeAbsPath(database + ".bib",
183                                         buffer.getMasterBuffer()->temppath()));
184
185                         bool const success = copy(in_file, out_file);
186                         if (!success) {
187                                 lyxerr << "Failed to copy '" << in_file
188                                        << "' to '" << out_file << "'"
189                                        << endl;
190                         }
191                 }
192
193                 if (it != begin)
194                         dbs << ',';
195                 // FIXME UNICODE
196                 dbs << from_utf8(latex_path(database));
197         }
198         docstring const db_out = dbs.str();
199
200         // Post this warning only once.
201         static bool warned_about_spaces = false;
202         if (!warned_about_spaces &&
203             runparams.nice && db_out.find(' ') != docstring::npos) {
204                 warned_about_spaces = true;
205
206                 Alert::warning(_("Export Warning!"),
207                                _("There are spaces in the paths to your BibTeX databases.\n"
208                                               "BibTeX will be unable to find them."));
209
210         }
211
212         // Style-Options
213         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
214         string bibtotoc;
215         if (prefixIs(style, "bibtotoc")) {
216                 bibtotoc = "bibtotoc";
217                 if (contains(style, ',')) {
218                         style = split(style, bibtotoc, ',');
219                 }
220         }
221
222         // line count
223         int nlines = 0;
224
225         if (!style.empty()) {
226                 string base =
227                         normalize_name(buffer, runparams, style, ".bst");
228                 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
229                 bool const not_from_texmf = isFileReadable(try_in_file);
230                 // If this style does not come from texmf and we are not
231                 // exporting to .tex copy it to the tmp directory.
232                 // This prevents problems with spaces and 8bit charcaters
233                 // in the file name.
234                 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
235                     not_from_texmf) {
236                         // use new style name
237                         DocFileName const in_file = DocFileName(try_in_file);
238                         base = removeExtension(in_file.mangledFilename());
239                         FileName const out_file(makeAbsPath(base + ".bst",
240                                         buffer.getMasterBuffer()->temppath()));
241                         bool const success = copy(in_file, out_file);
242                         if (!success) {
243                                 lyxerr << "Failed to copy '" << in_file
244                                        << "' to '" << out_file << "'"
245                                        << endl;
246                         }
247                 }
248                 // FIXME UNICODE
249                 os << "\\bibliographystyle{"
250                    << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
251                    << "}\n";
252                 nlines += 1;
253         }
254
255         // Post this warning only once.
256         static bool warned_about_bst_spaces = false;
257         if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
258                 warned_about_bst_spaces = true;
259                 Alert::warning(_("Export Warning!"),
260                                _("There are spaces in the path to your BibTeX style file.\n"
261                                               "BibTeX will be unable to find it."));
262         }
263
264         if (!db_out.empty() && buffer.params().use_bibtopic){
265                 os << "\\begin{btSect}{" << db_out << "}\n";
266                 docstring btprint = getParam("btprint");
267                 if (btprint.empty())
268                         // default
269                         btprint = from_ascii("btPrintCited");
270                 os << "\\" << btprint << "\n"
271                    << "\\end{btSect}\n";
272                 nlines += 3;
273         }
274
275         // bibtotoc-Option
276         if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
277                 // maybe a problem when a textclass has no "art" as
278                 // part of its name, because it's than book.
279                 // For the "official" lyx-layouts it's no problem to support
280                 // all well
281                 if (!contains(buffer.params().getLyXTextClass().name(),
282                               "art")) {
283                         if (buffer.params().sides == LyXTextClass::OneSide) {
284                                 // oneside
285                                 os << "\\clearpage";
286                         } else {
287                                 // twoside
288                                 os << "\\cleardoublepage";
289                         }
290
291                         // bookclass
292                         os << "\\addcontentsline{toc}{chapter}{\\bibname}";
293
294                 } else {
295                         // article class
296                         os << "\\addcontentsline{toc}{section}{\\refname}";
297                 }
298         }
299
300         if (!db_out.empty() && !buffer.params().use_bibtopic){
301                 os << "\\bibliography{" << db_out << "}\n";
302                 nlines += 1;
303         }
304
305         return nlines;
306 }
307
308
309 vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
310 {
311         FileName path(buffer.filePath());
312         support::Path p(path);
313
314         vector<FileName> vec;
315
316         string tmp;
317         // FIXME UNICODE
318         string bibfiles = to_utf8(getParam("bibfiles"));
319         bibfiles = split(bibfiles, tmp, ',');
320         while (!tmp.empty()) {
321                 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
322                 LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
323
324                 // If we didn't find a matching file name just fail silently
325                 if (!file.empty())
326                         vec.push_back(file);
327
328                 // Get next file name
329                 bibfiles = split(bibfiles, tmp, ',');
330         }
331
332         return vec;
333 }
334
335 namespace {
336
337         // methods for parsing bibtex files
338
339         typedef map<docstring, docstring> VarMap;
340
341         /// remove whitespace characters, optionally a single comma, 
342         /// and further whitespace characters from the stream.
343         /// @return true if a comma was found, false otherwise
344         ///
345         bool removeWSAndComma(idocfstream & ifs) {
346                 char_type ch;
347
348                 if (!ifs) 
349                         return false;
350
351                 // skip whitespace
352                 do {
353                         ifs.get(ch);
354                 } while (ifs && isSpace(ch));
355
356                 if (!ifs) 
357                         return false;
358
359                 if (ch != ',') {
360                         ifs.putback(ch);
361                         return false;
362                 }
363
364                 // skip whitespace
365                 do {
366                         ifs.get(ch);
367                 } while (ifs && isSpace(ch));
368
369                 if (ifs) {
370                         ifs.putback(ch);
371                 }
372
373                 return true;
374         }
375
376
377         enum charCase {
378                 makeLowerCase,
379                 keepCase
380         };
381
382         /// remove whitespace characters, read characer sequence
383         /// not containing whitespace characters or characters in
384         /// delimChars, and remove further whitespace characters.
385         ///
386         /// @return true if a string of length > 0 could be read.
387         /// 
388         bool readTypeOrKey(docstring & val, idocfstream & ifs, 
389                 docstring const & delimChars, charCase chCase) {
390
391                 char_type ch;
392
393                 val.clear();
394
395                 if (!ifs) 
396                         return false;
397
398                 // skip whitespace
399                 do {
400                         ifs.get(ch);
401                 } while (ifs && isSpace(ch));
402
403                 if (!ifs) 
404                         return false;
405
406                 // read value 
407                 while (ifs && !isSpace(ch) && delimChars.find(ch) == docstring::npos) {
408                         if (chCase == makeLowerCase) {
409                                 val += lowercase(ch);
410                         } else {
411                                 val += ch;
412                         }
413                         ifs.get(ch);
414                 }
415
416                 // skip whitespace
417                 while (ifs && isSpace(ch)) {
418                         ifs.get(ch);
419                 }
420
421                 if (ifs) {
422                         ifs.putback(ch);
423                 }
424
425                 return val.length() > 0;
426         }
427
428         /// read subsequent bibtex values that are delimited with a #-character.
429         /// Concatenate all parts and replace names with the associated string in 
430         /// the variable strings.
431         /// @return true if reading was successfull (all single parts were delimited
432         /// correctly)
433         bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
434
435                 char_type ch;
436
437                 val.clear();
438
439                 if (!ifs) 
440                         return false;
441
442                 do {
443                         // skip whitespace
444                         do {
445                                 ifs.get(ch);
446                         } while (ifs && isSpace(ch));
447
448                         if (!ifs)
449                                 return false;
450
451                         // check for field type
452                         if (isDigit(ch)) {
453
454                                 // read integer value
455                                 do {
456                                         val += ch;
457                                         ifs.get(ch);
458                                 } while (ifs && isDigit(ch));
459
460                                 if (!ifs)
461                                         return false;
462
463                         } else if (ch == '"' || ch == '{') {
464
465                                 // read delimited text - set end delimiter
466                                 char_type delim = ch == '"'? '"': '}';
467
468                                 // inside this delimited text braces must match.
469                                 // Thus we can have a closing delimiter only
470                                 // when nestLevel == 0
471                                 int nestLevel = 0;
472
473                                 ifs.get(ch);
474                                 while (ifs && (nestLevel > 0 || ch != delim)) {
475                                         val += ch;
476                                         
477                                         // update nesting level
478                                         switch (ch) {
479                                                 case '{':
480                                                         ++nestLevel;
481                                                         break;
482                                                 case '}':
483                                                         --nestLevel;
484                                                         if (nestLevel < 0) return false;
485                                                         break;
486                                         }
487
488                                         ifs.get(ch);
489                                 }
490
491                                 if (!ifs)
492                                         return false;
493
494                                 ifs.get(ch);
495
496                                 if (!ifs)
497                                         return false;
498
499                         } else {
500
501                                 // reading a string name
502                                 docstring strName;
503
504                                 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
505                                         strName += lowercase(ch);
506                                         ifs.get(ch);
507                                 }
508
509                                 if (!ifs)
510                                         return false;
511
512                                 // replace the string with its assigned value or
513                                 // discard it if it's not assigned
514                                 if (strName.length()) {
515                                         VarMap::const_iterator pos = strings.find(strName);
516                                         if (pos != strings.end()) {
517                                                 val += pos->second;
518                                         }
519                                 }
520                         }
521
522                         // skip WS
523                         while (ifs && isSpace(ch)) {
524                                 ifs.get(ch);
525                         }
526
527                         if (!ifs)
528                                 return false;
529
530                         // continue reading next value on concatenate with '#'
531                 } while (ch == '#');  
532
533                 ifs.putback(ch);
534
535                 return true;
536         }
537 }
538
539
540 // This method returns a comma separated list of Bibtex entries
541 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
542                 std::vector<std::pair<string, docstring> > & keys) const
543 {
544         vector<FileName> const files = getFiles(buffer);
545         for (vector<FileName>::const_iterator it = files.begin();
546              it != files.end(); ++ it) {
547                 // This bibtex parser is a first step to parse bibtex files
548                 // more precisely. 
549                 // 
550                 // - it reads the whole bibtex entry and does a syntax check
551                 //   (matching delimiters, missing commas,...
552                 // - it recovers from errors starting with the next @-character
553                 // - it reads @string definitions and replaces them in the 
554                 //   field values.
555                 // - it accepts more characters in keys or value names than 
556                 //   bibtex does.
557                 //
558                 // TODOS:
559                 // - the entries are split into name = value pairs by the 
560                 //   parser. These have to be merged again because of the 
561                 //   way lyx treats the entries ( pair<...>(...) ). The citation
562                 //   mechanism in lyx should be changed such that it can use
563                 //   the split entries.
564                 // - messages on parsing errors can be generated.
565                 //
566
567                 // Officially bibtex does only support ASCII, but in practice
568                 // you can use the encoding of the main document as long as
569                 // some elements like keys and names are pure ASCII. Therefore
570                 // we convert the file from the buffer encoding.
571                 // We don't restrict keys to ASCII in LyX, since our own
572                 // InsetBibitem can generate non-ASCII keys, and nonstandard
573                 // 8bit clean bibtex forks exist.
574                 idocfstream ifs(it->toFilesystemEncoding().c_str(),
575                                 std::ios_base::in,
576                                 buffer.params().encoding().iconvName());
577                 
578                 char_type ch;
579                 VarMap strings;
580
581                 while (ifs) {
582
583                         ifs.get(ch);
584                         if (!ifs) 
585                                 break;
586
587                         if (ch != '@') 
588                                 continue;
589
590                         docstring entryType;
591
592                         if (!readTypeOrKey(entryType, ifs, from_ascii("{("), makeLowerCase) || !ifs)
593                                 continue;
594
595                         if (entryType == from_ascii("comment")) {
596
597                                 ifs.ignore(std::numeric_limits<int>::max(), '\n');
598                                 continue;
599                         } 
600
601                         ifs.get(ch);
602                         if (!ifs) 
603                                 break;
604
605                         if ((ch != '(') && (ch != '{')) {
606                                 // invalid entry delimiter
607                                 ifs.putback(ch);
608                                 continue;
609                         }
610
611                         // process the entry
612                         if (entryType == from_ascii("string")) {
613
614                                 // read string and add it to the strings map 
615                                 // (or replace it's old value)
616                                 docstring name;
617                                 docstring value;
618
619                                 if (!readTypeOrKey(name, ifs, from_ascii("#=}),"), makeLowerCase) || !ifs)
620                                         continue;
621
622                                 ifs.get(ch);
623                                 if (!ifs || ch != '=')
624                                         continue;
625
626                                 if (!readValue(value, ifs, strings))
627                                         continue;
628
629                                 strings[name] = value;
630
631                         } else if (entryType == from_ascii("preamble")) {
632
633                                 // preamble definitions are discarded. 
634                                 // can they be of any use in lyx?
635                                 docstring value;
636
637                                 if (!readValue(value, ifs, strings))
638                                         continue;
639
640                         } else {
641
642                                 // Citation entry. Read the key and all name = value pairs
643                                 docstring key;
644                                 docstring fields;
645                                 docstring name;
646                                 docstring value;
647                                 docstring commaNewline;
648
649                                 if (!readTypeOrKey(key, ifs, from_ascii(",})"), keepCase) || !ifs)
650                                         continue;
651
652                                 // now we have a key, so we will add an entry 
653                                 // (even if it's empty, as bibtex does)
654                                 // 
655                                 // all items must be separated by a comma. If
656                                 // it is missing the scanning of this entry is
657                                 // stopped and the next is searched.
658                                 bool readNext = removeWSAndComma(ifs);
659
660                                 while (ifs && readNext) {
661
662                                         // read field name
663                                         if (!readTypeOrKey(name, ifs, from_ascii("=}),"), makeLowerCase) || !ifs)
664                                                 break;
665
666                                         // next char must be an equal sign
667                                         ifs.get(ch);
668                                         if (!ifs)
669                                                 break;
670                                         if (ch != '=') {
671                                                 ifs.putback(ch);
672                                                 break;
673                                         }
674
675                                         // read field value
676                                         if (!readValue(value, ifs, strings)) 
677                                                 break;
678
679                                         // append field to the total entry string.
680                                         //
681                                         // TODO: Here is where the fields can be put in 
682                                         //       a more intelligent structure that preserves
683                                         //           the already known parts.
684                                         fields += commaNewline;
685                                         fields += name + from_ascii(" = {") + value + '}';
686
687                                         if (!commaNewline.length()) 
688                                                 commaNewline = from_ascii(",\n"); 
689
690                                         readNext = removeWSAndComma(ifs);
691                                 }
692
693                                 // add the new entry
694                                 keys.push_back(pair<string, docstring>(
695                                 to_utf8(key), fields));
696                         }
697
698                 } //< searching '@'
699
700         } //< for loop over files
701 }
702
703
704
705 bool InsetBibtex::addDatabase(string const & db)
706 {
707         // FIXME UNICODE
708         string bibfiles(to_utf8(getParam("bibfiles")));
709         if (tokenPos(bibfiles, ',', db) == -1) {
710                 if (!bibfiles.empty())
711                         bibfiles += ',';
712                 setParam("bibfiles", from_utf8(bibfiles + db));
713                 return true;
714         }
715         return false;
716 }
717
718
719 bool InsetBibtex::delDatabase(string const & db)
720 {
721         // FIXME UNICODE
722         string bibfiles(to_utf8(getParam("bibfiles")));
723         if (contains(bibfiles, db)) {
724                 int const n = tokenPos(bibfiles, ',', db);
725                 string bd = db;
726                 if (n > 0) {
727                         // this is not the first database
728                         string tmp = ',' + bd;
729                         setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
730                 } else if (n == 0)
731                         // this is the first (or only) database
732                         setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
733                 else
734                         return false;
735         }
736         return true;
737 }
738
739
740 void InsetBibtex::validate(LaTeXFeatures & features) const
741 {
742         if (features.bufferParams().use_bibtopic)
743                 features.require("bibtopic");
744 }
745
746
747 } // namespace lyx