2 * \file InsetBibtex.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Alejandro Aguilar Sierra
7 * \author Richard Heck (BibTeX parser improvements)
9 * Full author contact details are available in file CREDITS.
14 #include "InsetBibtex.h"
17 #include "BufferParams.h"
18 #include "DispatchResult.h"
19 #include "support/debug.h"
21 #include "FuncRequest.h"
22 #include "support/gettext.h"
23 #include "LaTeXFeatures.h"
24 #include "MetricsInfo.h"
25 #include "OutputParams.h"
26 #include "TextClass.h"
28 #include "frontends/alert.h"
30 #include "support/ExceptionMessage.h"
31 #include "support/docstream.h"
32 #include "support/FileNameList.h"
33 #include "support/filetools.h"
34 #include "support/lstrings.h"
35 #include "support/lyxlib.h"
36 #include "support/os.h"
37 #include "support/Path.h"
38 #include "support/textutils.h"
40 #include <boost/tokenizer.hpp>
46 using support::absolutePath;
47 using support::ascii_lowercase;
48 using support::changeExtension;
49 using support::contains;
51 using support::DocFileName;
52 using support::FileName;
53 using support::FileNameList;
54 using support::findtexfile;
55 using support::isValidLaTeXFilename;
56 using support::latex_path;
58 using support::makeAbsPath;
59 using support::makeRelPath;
60 using support::prefixIs;
61 using support::removeExtension;
65 using support::tokenPos;
67 using support::lowercase;
69 namespace Alert = frontend::Alert;
70 namespace os = support::os;
73 InsetBibtex::InsetBibtex(InsetCommandParams const & p)
74 : InsetCommand(p, "bibtex")
78 CommandInfo const * InsetBibtex::findInfo(std::string const & /* cmdName */)
80 static const char * const paramnames[] =
81 {"options", "btprint", "bibfiles", ""};
82 static const bool isoptional[] = {true, true, false};
83 static const CommandInfo info = {3, paramnames, isoptional};
88 Inset * InsetBibtex::clone() const
90 return new InsetBibtex(*this);
94 void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
98 case LFUN_INSET_MODIFY: {
99 InsetCommandParams p(BIBTEX_CODE);
101 if (!InsetCommandMailer::string2params("bibtex",
102 to_utf8(cmd.argument()), p)) {
106 } catch (support::ExceptionMessage const & message) {
107 if (message.type_ == support::WarningException) {
108 Alert::warning(message.title_, message.details_);
115 cur.buffer().updateBibfilesCache();
120 InsetCommand::doDispatch(cur, cmd);
126 docstring const InsetBibtex::getScreenLabel(Buffer const &) const
128 return _("BibTeX Generated Bibliography");
134 string normalizeName(Buffer const & buffer, OutputParams const & runparams,
135 string const & name, string const & ext)
137 string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
138 if (absolutePath(name) || !FileName(fname + ext).isReadableFile())
144 return to_utf8(makeRelPath(from_utf8(fname),
145 from_utf8(buffer.masterBuffer()->filePath())));
151 int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
152 OutputParams const & runparams) const
154 // the sequence of the commands:
155 // 1. \bibliographystyle{style}
156 // 2. \addcontentsline{...} - if option bibtotoc set
157 // 3. \bibliography{database}
158 // and with bibtopic:
159 // 1. \bibliographystyle{style}
160 // 2. \begin{btSect}{database}
161 // 3. \btPrint{Cited|NotCited|All}
165 // If we are processing the LaTeX file in a temp directory then
166 // copy the .bib databases to this temp directory, mangling their
167 // names in the process. Store this mangled name in the list of
169 // (We need to do all this because BibTeX *really*, *really*
170 // can't handle "files with spaces" and Windows users tend to
171 // use such filenames.)
172 // Otherwise, store the (maybe absolute) path to the original,
173 // unmangled database name.
174 typedef boost::char_separator<char_type> Separator;
175 typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
177 Separator const separator(from_ascii(",").c_str());
178 // The tokenizer must not be called with temporary strings, since
179 // it does not make a copy and uses iterators of the string further
180 // down. getParam returns a reference, so this is OK.
181 Tokenizer const tokens(getParam("bibfiles"), separator);
182 Tokenizer::const_iterator const begin = tokens.begin();
183 Tokenizer::const_iterator const end = tokens.end();
185 odocstringstream dbs;
186 for (Tokenizer::const_iterator it = begin; it != end; ++it) {
187 docstring const input = trim(*it);
189 string utf8input = to_utf8(input);
191 normalizeName(buffer, runparams, utf8input, ".bib");
192 FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
193 bool const not_from_texmf = try_in_file.isReadableFile();
195 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
198 // mangledFilename() needs the extension
199 DocFileName const in_file = DocFileName(try_in_file);
200 database = removeExtension(in_file.mangledFilename());
201 FileName const out_file = makeAbsPath(database + ".bib",
202 buffer.masterBuffer()->temppath());
204 bool const success = copy(in_file, out_file);
206 lyxerr << "Failed to copy '" << in_file
207 << "' to '" << out_file << "'"
210 } else if (!runparams.inComment && runparams.nice && not_from_texmf &&
211 !isValidLaTeXFilename(database)) {
212 frontend::Alert::warning(_("Invalid filename"),
213 _("The following filename is likely to cause trouble "
214 "when running the exported file through LaTeX: ") +
215 from_utf8(database));
221 dbs << from_utf8(latex_path(database));
223 docstring const db_out = dbs.str();
225 // Post this warning only once.
226 static bool warned_about_spaces = false;
227 if (!warned_about_spaces &&
228 runparams.nice && db_out.find(' ') != docstring::npos) {
229 warned_about_spaces = true;
231 Alert::warning(_("Export Warning!"),
232 _("There are spaces in the paths to your BibTeX databases.\n"
233 "BibTeX will be unable to find them."));
237 string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
239 if (prefixIs(style, "bibtotoc")) {
240 bibtotoc = "bibtotoc";
241 if (contains(style, ','))
242 style = split(style, bibtotoc, ',');
248 if (!style.empty()) {
249 string base = normalizeName(buffer, runparams, style, ".bst");
250 FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
251 bool const not_from_texmf = try_in_file.isReadableFile();
252 // If this style does not come from texmf and we are not
253 // exporting to .tex copy it to the tmp directory.
254 // This prevents problems with spaces and 8bit charcaters
256 if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
258 // use new style name
259 DocFileName const in_file = DocFileName(try_in_file);
260 base = removeExtension(in_file.mangledFilename());
261 FileName const out_file(makeAbsPath(base + ".bst",
262 buffer.masterBuffer()->temppath()));
263 bool const success = copy(in_file, out_file);
265 lyxerr << "Failed to copy '" << in_file
266 << "' to '" << out_file << "'"
271 os << "\\bibliographystyle{"
272 << from_utf8(latex_path(normalizeName(buffer, runparams, base, ".bst")))
277 // Post this warning only once.
278 static bool warned_about_bst_spaces = false;
279 if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
280 warned_about_bst_spaces = true;
281 Alert::warning(_("Export Warning!"),
282 _("There are spaces in the path to your BibTeX style file.\n"
283 "BibTeX will be unable to find it."));
286 if (!db_out.empty() && buffer.params().use_bibtopic){
287 os << "\\begin{btSect}{" << db_out << "}\n";
288 docstring btprint = getParam("btprint");
291 btprint = from_ascii("btPrintCited");
292 os << "\\" << btprint << "\n"
293 << "\\end{btSect}\n";
298 if (!bibtotoc.empty() && !buffer.params().use_bibtopic) {
299 // maybe a problem when a textclass has no "art" as
300 // part of its name, because it's than book.
301 // For the "official" lyx-layouts it's no problem to support
303 if (!contains(buffer.params().getTextClass().name(),
305 if (buffer.params().sides == OneSide) {
310 os << "\\cleardoublepage";
314 os << "\\addcontentsline{toc}{chapter}{\\bibname}";
318 os << "\\addcontentsline{toc}{section}{\\refname}";
322 if (!db_out.empty() && !buffer.params().use_bibtopic){
323 os << "\\bibliography{" << db_out << "}\n";
331 FileNameList const InsetBibtex::getFiles(Buffer const & buffer) const
333 FileName path(buffer.filePath());
334 support::PathChanger p(path);
340 string bibfiles = to_utf8(getParam("bibfiles"));
341 bibfiles = split(bibfiles, tmp, ',');
342 while (!tmp.empty()) {
343 FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
344 LYXERR(Debug::LATEX, "Bibfile: " << file);
346 // If we didn't find a matching file name just fail silently
350 // Get next file name
351 bibfiles = split(bibfiles, tmp, ',');
359 // methods for parsing bibtex files
361 typedef map<docstring, docstring> VarMap;
363 /// remove whitespace characters, optionally a single comma,
364 /// and further whitespace characters from the stream.
365 /// @return true if a comma was found, false otherwise
367 bool removeWSAndComma(idocfstream & ifs) {
376 } while (ifs && isSpace(ch));
389 } while (ifs && isSpace(ch));
404 /// remove whitespace characters, read characer sequence
405 /// not containing whitespace characters or characters in
406 /// delimChars, and remove further whitespace characters.
408 /// @return true if a string of length > 0 could be read.
410 bool readTypeOrKey(docstring & val, idocfstream & ifs,
411 docstring const & delimChars, docstring const &illegalChars,
424 } while (ifs && isSpace(ch));
430 bool legalChar = true;
431 while (ifs && !isSpace(ch) &&
432 delimChars.find(ch) == docstring::npos &&
433 (legalChar = (illegalChars.find(ch) == docstring::npos))
436 if (chCase == makeLowerCase)
437 val += lowercase(ch);
449 while (ifs && isSpace(ch)) {
457 return val.length() > 0;
460 /// read subsequent bibtex values that are delimited with a #-character.
461 /// Concatenate all parts and replace names with the associated string in
462 /// the variable strings.
463 /// @return true if reading was successfull (all single parts were delimited
465 bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
478 } while (ifs && isSpace(ch));
483 // check for field type
486 // read integer value
490 } while (ifs && isDigit(ch));
495 } else if (ch == '"' || ch == '{') {
497 char_type delim = ch == '"' ? '"': '}';
502 } while (ifs && isSpace(ch));
507 //We now have the first non-whitespace character
508 //We'll collapse adjacent whitespace.
509 bool lastWasWhiteSpace = false;
511 // inside this delimited text braces must match.
512 // Thus we can have a closing delimiter only
513 // when nestLevel == 0
516 while (ifs && (nestLevel > 0 || ch != delim)) {
518 lastWasWhiteSpace = true;
522 //We output the space only after we stop getting
523 //whitespace so as not to output any whitespace
524 //at the end of the value.
525 if (lastWasWhiteSpace) {
526 lastWasWhiteSpace = false;
532 // update nesting level
539 if (nestLevel < 0) return false;
556 // reading a string name
559 while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
560 strName += lowercase(ch);
567 // replace the string with its assigned value or
568 // discard it if it's not assigned
569 if (strName.length()) {
570 VarMap::const_iterator pos = strings.find(strName);
571 if (pos != strings.end()) {
578 while (ifs && isSpace(ch)) {
585 // continue reading next value on concatenate with '#'
595 // This method returns a comma separated list of Bibtex entries
596 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
597 BiblioInfo & keylist, InsetIterator const & /*di*/) const
599 FileNameList const files = getFiles(buffer);
600 for (vector<FileName>::const_iterator it = files.begin();
601 it != files.end(); ++ it) {
602 // This bibtex parser is a first step to parse bibtex files
605 // - it reads the whole bibtex entry and does a syntax check
606 // (matching delimiters, missing commas,...
607 // - it recovers from errors starting with the next @-character
608 // - it reads @string definitions and replaces them in the
610 // - it accepts more characters in keys or value names than
613 // Officially bibtex does only support ASCII, but in practice
614 // you can use the encoding of the main document as long as
615 // some elements like keys and names are pure ASCII. Therefore
616 // we convert the file from the buffer encoding.
617 // We don't restrict keys to ASCII in LyX, since our own
618 // InsetBibitem can generate non-ASCII keys, and nonstandard
619 // 8bit clean bibtex forks exist.
621 idocfstream ifs(it->toFilesystemEncoding().c_str(),
623 buffer.params().encoding().iconvName());
639 if (!readTypeOrKey(entryType, ifs, from_ascii("{("),
640 docstring(), makeLowerCase) || !ifs)
643 if (entryType == from_ascii("comment")) {
645 ifs.ignore(std::numeric_limits<int>::max(), '\n');
653 if ((ch != '(') && (ch != '{')) {
654 // invalid entry delimiter
660 if (entryType == from_ascii("string")) {
662 // read string and add it to the strings map
663 // (or replace it's old value)
667 if (!readTypeOrKey(name, ifs, from_ascii("="),
668 from_ascii("#{}(),"), makeLowerCase) || !ifs)
671 // next char must be an equal sign
673 if (!ifs || ch != '=')
676 if (!readValue(value, ifs, strings))
679 strings[name] = value;
681 } else if (entryType == from_ascii("preamble")) {
683 // preamble definitions are discarded.
684 // can they be of any use in lyx?
687 if (!readValue(value, ifs, strings))
692 // Citation entry. Try to read the key.
695 if (!readTypeOrKey(key, ifs, from_ascii(","),
696 from_ascii("}"), keepCase) || !ifs)
699 /////////////////////////////////////////////
700 // now we have a key, so we will add an entry
701 // (even if it's empty, as bibtex does)
703 // we now read the field = value pairs.
704 // all items must be separated by a comma. If
705 // it is missing the scanning of this entry is
706 // stopped and the next is searched.
710 docstring commaNewline;
712 BibTeXInfo keyvalmap;
713 keyvalmap.entryType = entryType;
715 bool readNext = removeWSAndComma(ifs);
717 while (ifs && readNext) {
720 if (!readTypeOrKey(name, ifs, from_ascii("="),
721 from_ascii("{}(),"), makeLowerCase) || !ifs)
724 // next char must be an equal sign
734 if (!readValue(value, ifs, strings))
737 keyvalmap[name] = value;
738 data += "\n\n" + value;
739 keylist.fieldNames.insert(name);
740 readNext = removeWSAndComma(ifs);
744 keylist.entryTypes.insert(entryType);
745 keyvalmap.allData = data;
746 keyvalmap.isBibTeX = true;
747 keyvalmap.bibKey = key;
748 keylist[key] = keyvalmap;
751 } //< for loop over files
756 bool InsetBibtex::addDatabase(string const & db)
759 string bibfiles(to_utf8(getParam("bibfiles")));
760 if (tokenPos(bibfiles, ',', db) == -1) {
761 if (!bibfiles.empty())
763 setParam("bibfiles", from_utf8(bibfiles + db));
770 bool InsetBibtex::delDatabase(string const & db)
773 string bibfiles(to_utf8(getParam("bibfiles")));
774 if (contains(bibfiles, db)) {
775 int const n = tokenPos(bibfiles, ',', db);
778 // this is not the first database
779 string tmp = ',' + bd;
780 setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
782 // this is the first (or only) database
783 setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
791 void InsetBibtex::validate(LaTeXFeatures & features) const
793 if (features.bufferParams().use_bibtopic)
794 features.require("bibtopic");