#include "bufferparams.h"
#include "dispatchresult.h"
#include "debug.h"
+#include "encoding.h"
#include "funcrequest.h"
-#include "LaTeXFeatures.h"
#include "gettext.h"
+#include "LaTeXFeatures.h"
#include "metricsinfo.h"
#include "outputparams.h"
+#include "frontends/Alert.h"
+
#include "support/filetools.h"
#include "support/lstrings.h"
+#include "support/lyxlib.h"
#include "support/os.h"
#include "support/path.h"
-
-#include <fstream>
-
-using lyx::support::AbsolutePath;
-using lyx::support::ascii_lowercase;
-using lyx::support::ChangeExtension;
-using lyx::support::contains;
-using lyx::support::findtexfile;
-using lyx::support::IsFileReadable;
-using lyx::support::ltrim;
-using lyx::support::MakeAbsPath;
-using lyx::support::MakeRelPath;
-using lyx::support::Path;
-using lyx::support::prefixIs;
-using lyx::support::rtrim;
-using lyx::support::split;
-using lyx::support::subst;
-using lyx::support::tokenPos;
-using lyx::support::trim;
-
-namespace os = lyx::support::os;
+#include "support/textutils.h"
+
+#include <boost/tokenizer.hpp>
+
+
+namespace lyx {
+
+using support::absolutePath;
+using support::ascii_lowercase;
+using support::changeExtension;
+using support::contains;
+using support::copy;
+using support::DocFileName;
+using support::FileName;
+using support::findtexfile;
+using support::isFileReadable;
+using support::latex_path;
+using support::ltrim;
+using support::makeAbsPath;
+using support::makeRelPath;
+using support::prefixIs;
+using support::removeExtension;
+using support::rtrim;
+using support::split;
+using support::subst;
+using support::tokenPos;
+using support::trim;
+using support::lowercase;
+
+namespace Alert = frontend::Alert;
+namespace os = support::os;
using std::endl;
using std::getline;
using std::string;
-using std::ifstream;
using std::ostream;
using std::pair;
using std::vector;
+using std::map;
InsetBibtex::InsetBibtex(InsetCommandParams const & p)
switch (cmd.action) {
case LFUN_INSET_MODIFY: {
- InsetCommandParams p;
- InsetCommandMailer::string2params("bibtex", cmd.argument, p);
- if (!p.getCmdName().empty())
+ InsetCommandParams p("bibtex");
+ InsetCommandMailer::string2params("bibtex", to_utf8(cmd.argument()), p);
+ if (!p.getCmdName().empty()) {
setParams(p);
+ cur.buffer().updateBibfilesCache();
+ } else
+ cur.noUpdate();
break;
}
}
-string const InsetBibtex::getScreenLabel(Buffer const &) const
+docstring const InsetBibtex::getScreenLabel(Buffer const &) const
{
- return _("BibTeX Generated References");
+ return _("BibTeX Generated Bibliography");
}
string normalize_name(Buffer const & buffer, OutputParams const & runparams,
string const & name, string const & ext)
{
- string const fname = MakeAbsPath(name, buffer.filePath());
- if (AbsolutePath(name) || !IsFileReadable(fname + ext))
+ string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
+ if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
return name;
else if (!runparams.nice)
return fname;
else
- return MakeRelPath(fname, buffer.getMasterBuffer()->filePath());
+ // FIXME UNICODE
+ return to_utf8(makeRelPath(from_utf8(fname),
+ from_utf8(buffer.getMasterBuffer()->filePath())));
}
}
-int InsetBibtex::latex(Buffer const & buffer, ostream & os,
+int InsetBibtex::latex(Buffer const & buffer, odocstream & os,
OutputParams const & runparams) const
{
// the sequence of the commands:
// 3. \btPrint{Cited|NotCited|All}
// 4. \end{btSect}
- // the database string
- string adb;
- string db_in = getContents();
- db_in = split(db_in, adb, ',');
- // If we generate in a temp dir, we might need to give an
- // absolute path there. This is a bit complicated since we can
- // have a comma-separated list of bibliographies
- string db_out;
- while (!adb.empty()) {
- db_out += os::external_path(normalize_name(buffer, runparams,
- adb, ".bib"));
- db_out += ',';
- db_in = split(db_in, adb,',');
+ // Database(s)
+ // If we are processing the LaTeX file in a temp directory then
+ // copy the .bib databases to this temp directory, mangling their
+ // names in the process. Store this mangled name in the list of
+ // all databases.
+ // (We need to do all this because BibTeX *really*, *really*
+ // can't handle "files with spaces" and Windows users tend to
+ // use such filenames.)
+ // Otherwise, store the (maybe absolute) path to the original,
+ // unmangled database name.
+ typedef boost::char_separator<char_type> Separator;
+ typedef boost::tokenizer<Separator, docstring::const_iterator, docstring> Tokenizer;
+
+ Separator const separator(from_ascii(",").c_str());
+ // The tokenizer must not be called with temporary strings, since
+ // it does not make a copy and uses iterators of the string further
+ // down. getParam returns a reference, so this is OK.
+ Tokenizer const tokens(getParam("bibfiles"), separator);
+ Tokenizer::const_iterator const begin = tokens.begin();
+ Tokenizer::const_iterator const end = tokens.end();
+
+ odocstringstream dbs;
+ for (Tokenizer::const_iterator it = begin; it != end; ++it) {
+ docstring const input = trim(*it);
+ // FIXME UNICODE
+ string utf8input(to_utf8(input));
+ string database =
+ normalize_name(buffer, runparams, utf8input, ".bib");
+ FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
+ bool const not_from_texmf = isFileReadable(try_in_file);
+
+ if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
+ not_from_texmf) {
+
+ // mangledFilename() needs the extension
+ DocFileName const in_file = DocFileName(try_in_file);
+ database = removeExtension(in_file.mangledFilename());
+ FileName const out_file(makeAbsPath(database + ".bib",
+ buffer.getMasterBuffer()->temppath()));
+
+ bool const success = copy(in_file, out_file);
+ if (!success) {
+ lyxerr << "Failed to copy '" << in_file
+ << "' to '" << out_file << "'"
+ << endl;
+ }
+ }
+
+ if (it != begin)
+ dbs << ',';
+ // FIXME UNICODE
+ dbs << from_utf8(latex_path(database));
+ }
+ docstring const db_out = dbs.str();
+
+ // Post this warning only once.
+ static bool warned_about_spaces = false;
+ if (!warned_about_spaces &&
+ runparams.nice && db_out.find(' ') != docstring::npos) {
+ warned_about_spaces = true;
+
+ Alert::warning(_("Export Warning!"),
+ _("There are spaces in the paths to your BibTeX databases.\n"
+ "BibTeX will be unable to find them."));
+
}
- db_out = rtrim(db_out, ",");
// Style-Options
- string style = getOptions(); // maybe empty! and with bibtotoc
+ string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
string bibtotoc;
if (prefixIs(style, "bibtotoc")) {
bibtotoc = "bibtotoc";
}
// line count
- int i = 0;
+ int nlines = 0;
if (!style.empty()) {
+ string base =
+ normalize_name(buffer, runparams, style, ".bst");
+ FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
+ bool const not_from_texmf = isFileReadable(try_in_file);
+ // If this style does not come from texmf and we are not
+ // exporting to .tex copy it to the tmp directory.
+ // This prevents problems with spaces and 8bit charcaters
+ // in the file name.
+ if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
+ not_from_texmf) {
+ // use new style name
+ DocFileName const in_file = DocFileName(try_in_file);
+ base = removeExtension(in_file.mangledFilename());
+ FileName const out_file(makeAbsPath(base + ".bst",
+ buffer.getMasterBuffer()->temppath()));
+ bool const success = copy(in_file, out_file);
+ if (!success) {
+ lyxerr << "Failed to copy '" << in_file
+ << "' to '" << out_file << "'"
+ << endl;
+ }
+ }
+ // FIXME UNICODE
os << "\\bibliographystyle{"
- << os::external_path(normalize_name(buffer, runparams,
- style, ".bst"))
+ << from_utf8(latex_path(normalize_name(buffer, runparams, base, ".bst")))
<< "}\n";
- i += 1;
+ nlines += 1;
}
- if (buffer.params().use_bibtopic){
+ // Post this warning only once.
+ static bool warned_about_bst_spaces = false;
+ if (!warned_about_bst_spaces && runparams.nice && contains(style, ' ')) {
+ warned_about_bst_spaces = true;
+ Alert::warning(_("Export Warning!"),
+ _("There are spaces in the path to your BibTeX style file.\n"
+ "BibTeX will be unable to find it."));
+ }
+
+ if (!db_out.empty() && buffer.params().use_bibtopic){
os << "\\begin{btSect}{" << db_out << "}\n";
- string btprint = getSecOptions();
+ docstring btprint = getParam("btprint");
if (btprint.empty())
// default
- btprint = "btPrintCited";
+ btprint = from_ascii("btPrintCited");
os << "\\" << btprint << "\n"
<< "\\end{btSect}\n";
- i += 3;
+ nlines += 3;
}
// bibtotoc-Option
}
}
- if (!buffer.params().use_bibtopic){
+ if (!db_out.empty() && !buffer.params().use_bibtopic){
os << "\\bibliography{" << db_out << "}\n";
- i += 1;
+ nlines += 1;
}
- return i;
+ return nlines;
}
-vector<string> const InsetBibtex::getFiles(Buffer const & buffer) const
+vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
{
- Path p(buffer.filePath());
+ FileName path(buffer.filePath());
+ support::Path p(path);
- vector<string> vec;
+ vector<FileName> vec;
string tmp;
- string bibfiles = getContents();
+ // FIXME UNICODE
+ string bibfiles = to_utf8(getParam("bibfiles"));
bibfiles = split(bibfiles, tmp, ',');
while (!tmp.empty()) {
- string file = findtexfile(ChangeExtension(tmp, "bib"), "bib");
- lyxerr[Debug::LATEX] << "Bibfile: " << file << endl;
+ FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
+ LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
// If we didn't find a matching file name just fail silently
if (!file.empty())
return vec;
}
+namespace {
+
+ // methods for parsing bibtex files
+
+ typedef map<docstring, docstring> VarMap;
+
+ /// remove whitespace characters, optionally a single comma,
+ /// and further whitespace characters from the stream.
+ /// @return true if a comma was found, false otherwise
+ ///
+ bool removeWSAndComma(idocfstream & ifs) {
+ char_type ch;
+
+ if (!ifs)
+ return false;
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ if (ch != ',') {
+ ifs.putback(ch);
+ return false;
+ }
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (ifs) {
+ ifs.putback(ch);
+ }
+
+ return true;
+ }
+
+ /// remove whitespace characters, read characer sequence
+ /// not containing whitespace characters or characters in
+ /// delimChars, and remove further whitespace characters.
+ ///
+ /// @return true if a string of length > 0 could be read.
+ ///
+ bool readTypeOrKey(docstring & val, idocfstream & ifs, docstring const & delimChars) {
+
+ char_type ch;
+
+ val.clear();
+
+ if (!ifs)
+ return false;
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ // read value
+ while (ifs && !isSpace(ch) && delimChars.find(ch) == docstring::npos) {
+ val += lowercase(ch);
+ ifs.get(ch);
+ }
+
+ // skip whitespace
+ while (ifs && isSpace(ch)) {
+ ifs.get(ch);
+ }
+
+ if (ifs) {
+ ifs.putback(ch);
+ }
+
+ return val.length() > 0;
+ }
+
+ /// read subsequent bibtex values that are delimited with a #-character.
+ /// Concatenate all parts and replace names with the associated string in
+ /// the variable strings.
+ /// @return true if reading was successfull (all single parts were delimited
+ /// correctly)
+ bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
+
+ char_type ch;
+
+ val.clear();
+
+ if (!ifs)
+ return false;
+
+ do {
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ // check for field type
+ if (isDigit(ch)) {
+
+ // read integer value
+ do {
+ val += ch;
+ ifs.get(ch);
+ } while (ifs && isDigit(ch));
+
+ if (!ifs)
+ return false;
+
+ } else if (ch == '"' || ch == '{') {
+
+ // read delimited text - set end delimiter
+ char_type delim = ch == '"'? '"': '}';
+
+ // inside this delimited text braces must match.
+ // Thus we can have a closing delimiter only
+ // when nestLevel == 0
+ int nestLevel = 0;
+
+ ifs.get(ch);
+ while (ifs && (nestLevel > 0 || ch != delim)) {
+ val += ch;
+
+ // update nesting level
+ switch (ch) {
+ case '{':
+ ++nestLevel;
+ break;
+ case '}':
+ --nestLevel;
+ if (nestLevel < 0) return false;
+ break;
+ }
+
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ ifs.get(ch);
+
+ if (!ifs)
+ return false;
+
+ } else {
+
+ // reading a string name
+ docstring strName;
+
+ while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
+ strName += lowercase(ch);
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ // replace the string with its assigned value or
+ // discard it if it's not assigned
+ if (strName.length()) {
+ VarMap::const_iterator pos = strings.find(strName);
+ if (pos != strings.end()) {
+ val += pos->second;
+ }
+ }
+ }
+
+ // skip WS
+ while (ifs && isSpace(ch)) {
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ // continue reading next value on concatenate with '#'
+ } while (ch == '#');
+
+ ifs.putback(ch);
+
+ return true;
+ }
+}
+
// This method returns a comma separated list of Bibtex entries
void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
- std::vector<std::pair<string, string> > & keys) const
+ std::vector<std::pair<string, docstring> > & keys) const
{
- vector<string> const files = getFiles(buffer);
- for (vector<string>::const_iterator it = files.begin();
+ vector<FileName> const files = getFiles(buffer);
+ for (vector<FileName>::const_iterator it = files.begin();
it != files.end(); ++ it) {
- // This is a _very_ simple parser for Bibtex database
- // files. All it does is to look for lines starting
- // in @ and not being @preamble and @string entries.
- // It does NOT do any syntax checking!
- ifstream ifs(it->c_str());
- string linebuf0;
- while (getline(ifs, linebuf0)) {
- string linebuf = trim(linebuf0);
- if (linebuf.empty()) continue;
- if (prefixIs(linebuf, "@")) {
- linebuf = subst(linebuf, '{', '(');
- string tmp;
- linebuf = split(linebuf, tmp, '(');
- tmp = ascii_lowercase(tmp);
- if (!prefixIs(tmp, "@string")
- && !prefixIs(tmp, "@preamble")) {
- linebuf = split(linebuf, tmp, ',');
- tmp = ltrim(tmp, " \t");
- if (!tmp.empty()) {
- keys.push_back(pair<string,string>(tmp,string()));
+ // This bibtex parser is a first step to parse bibtex files
+ // more precisely.
+ //
+ // - it reads the whole bibtex entry and does a syntax check
+ // (matching delimiters, missing commas,...
+ // - it recovers from errors starting with the next @-character
+ // - it reads @string definitions and replaces them in the
+ // field values.
+ // - it accepts more characters in keys or value names than
+ // bibtex does.
+ //
+ // TODOS:
+ // - the entries are split into name = value pairs by the
+ // parser. These have to be merged again because of the
+ // way lyx treats the entries ( pair<...>(...) ). The citation
+ // mechanism in lyx should be changed such that it can use
+ // the split entries.
+ // - messages on parsing errors can be generated.
+ //
+
+ // Officially bibtex does only support ASCII, but in practice
+ // you can use the encoding of the main document as long as
+ // some elements like keys and names are pure ASCII. Therefore
+ // we convert the file from the buffer encoding.
+ // We don't restrict keys to ASCII in LyX, since our own
+ // InsetBibitem can generate non-ASCII keys, and nonstandard
+ // 8bit clean bibtex forks exist.
+ idocfstream ifs(it->toFilesystemEncoding().c_str(),
+ std::ios_base::in,
+ buffer.params().encoding().iconvName());
+
+ char_type ch;
+ VarMap strings;
+
+ while (ifs) {
+
+ ifs.get(ch);
+ if (!ifs)
+ break;
+
+ if (ch != '@')
+ continue;
+
+ docstring entryType;
+
+ if (!readTypeOrKey(entryType, ifs, from_ascii("{(")) || !ifs)
+ continue;
+
+ if (entryType == from_ascii("comment")) {
+
+ ifs.ignore(std::numeric_limits<int>::max(), '\n');
+ continue;
+ }
+
+ // check entry delimiter
+ char_type entryDelim;
+
+ ifs.get(ch);
+ if (!ifs)
+ break;
+
+ if (ch == '(') entryDelim = ')';
+ else if (ch == '{') entryDelim = ')';
+ else {
+ // invalid entry delimiter
+ ifs.putback(ch);
+ continue;
+ }
+
+ // process the entry
+ if (entryType == from_ascii("string")) {
+
+ // read string and add it to the strings map
+ // (or replace it's old value)
+ docstring name;
+ docstring value;
+
+ if (!readTypeOrKey(name, ifs, from_ascii("#=}),")) || !ifs)
+ continue;
+
+ ifs.get(ch);
+ if (!ifs || ch != '=')
+ continue;
+
+ if (!readValue(value, ifs, strings))
+ continue;
+
+ strings[name] = value;
+
+ } else if (entryType == from_ascii("preamble")) {
+
+ // preamble definitions are discarded.
+ // can they be of any use in lyx?
+ docstring value;
+
+ if (!readValue(value, ifs, strings))
+ continue;
+
+ } else {
+
+ // Citation entry. Read the key and all name = value pairs
+ docstring key;
+ docstring fields;
+ docstring name;
+ docstring value;
+ docstring commaNewline;
+
+ if (!readTypeOrKey(key, ifs, from_ascii(",})")) || !ifs)
+ continue;
+
+ // now we have a key, so we will add an entry
+ // (even if it's empty, as bibtex does)
+ //
+ // all items must be separated by a comma. If
+ // it is missing the scanning of this entry is
+ // stopped and the next is searched.
+ bool readNext = removeWSAndComma(ifs);
+
+ while (ifs && readNext) {
+
+ // read field name
+ if (!readTypeOrKey(name, ifs, from_ascii("=}),")) || !ifs)
+ break;
+
+ // next char must be an equal sign
+ ifs.get(ch);
+ if (!ifs)
+ break;
+ if (ch != '=') {
+ ifs.putback(ch);
+ break;
}
+
+ // read field value
+ if (!readValue(value, ifs, strings))
+ break;
+
+ // append field to the total entry string.
+ //
+ // TODO: Here is where the fields can be put in
+ // a more intelligent structure that preserves
+ // the already known parts.
+ fields += commaNewline;
+ fields += name + from_ascii(" = {") + value + '}';
+
+ if (!commaNewline.length())
+ commaNewline = from_ascii(",\n");
+
+ readNext = removeWSAndComma(ifs);
}
- } else if (!keys.empty()) {
- keys.back().second += linebuf + "\n";
+
+ // add the new entry
+ keys.push_back(pair<string, docstring>(
+ to_utf8(key), fields));
}
- }
- }
+
+ } //< searching '@'
+
+ } //< for loop over files
}
+
bool InsetBibtex::addDatabase(string const & db)
{
- string contents(getContents());
- if (!contains(contents, db)) {
- if (!contents.empty())
- contents += ',';
- setContents(contents + db);
+ // FIXME UNICODE
+ string bibfiles(to_utf8(getParam("bibfiles")));
+ if (tokenPos(bibfiles, ',', db) == -1) {
+ if (!bibfiles.empty())
+ bibfiles += ',';
+ setParam("bibfiles", from_utf8(bibfiles + db));
return true;
}
return false;
bool InsetBibtex::delDatabase(string const & db)
{
- if (contains(getContents(), db)) {
+ // FIXME UNICODE
+ string bibfiles(to_utf8(getParam("bibfiles")));
+ if (contains(bibfiles, db)) {
+ int const n = tokenPos(bibfiles, ',', db);
string bd = db;
- int const n = tokenPos(getContents(), ',', bd);
if (n > 0) {
- // Weird code, would someone care to explain this?(Lgb)
- string tmp(", ");
- tmp += bd;
- setContents(subst(getContents(), tmp, ", "));
+ // this is not the first database
+ string tmp = ',' + bd;
+ setParam("bibfiles", from_utf8(subst(bibfiles, tmp, string())));
} else if (n == 0)
- setContents(split(getContents(), bd, ','));
+ // this is the first (or only) database
+ setParam("bibfiles", from_utf8(split(bibfiles, bd, ',')));
else
return false;
}
if (features.bufferParams().use_bibtopic)
features.require("bibtopic");
}
+
+
+} // namespace lyx