#include "bufferparams.h"
#include "dispatchresult.h"
#include "debug.h"
+#include "encoding.h"
#include "funcrequest.h"
#include "gettext.h"
#include "LaTeXFeatures.h"
#include "support/lyxlib.h"
#include "support/os.h"
#include "support/path.h"
+#include "support/textutils.h"
#include <boost/tokenizer.hpp>
-#include <fstream>
-#include <sstream>
-
namespace lyx {
using support::ltrim;
using support::makeAbsPath;
using support::makeRelPath;
-using support::Path;
using support::prefixIs;
using support::removeExtension;
using support::rtrim;
using support::subst;
using support::tokenPos;
using support::trim;
+using support::lowercase;
namespace Alert = frontend::Alert;
namespace os = support::os;
using std::endl;
using std::getline;
using std::string;
-using std::ifstream;
using std::ostream;
using std::pair;
using std::vector;
+using std::map;
InsetBibtex::InsetBibtex(InsetCommandParams const & p)
string normalize_name(Buffer const & buffer, OutputParams const & runparams,
string const & name, string const & ext)
{
- string const fname = makeAbsPath(name, buffer.filePath());
+ string const fname = makeAbsPath(name, buffer.filePath()).absFilename();
if (absolutePath(name) || !isFileReadable(FileName(fname + ext)))
return name;
else if (!runparams.nice)
return fname;
else
- return makeRelPath(fname, buffer.getMasterBuffer()->filePath());
+ // FIXME UNICODE
+ return to_utf8(makeRelPath(from_utf8(fname),
+ from_utf8(buffer.getMasterBuffer()->filePath())));
}
}
string utf8input(to_utf8(input));
string database =
normalize_name(buffer, runparams, utf8input, ".bib");
- string const try_in_file = makeAbsPath(database + ".bib", buffer.filePath());
- bool const not_from_texmf = isFileReadable(FileName(try_in_file));
+ FileName const try_in_file(makeAbsPath(database + ".bib", buffer.filePath()));
+ bool const not_from_texmf = isFileReadable(try_in_file);
if (!runparams.inComment && !runparams.dryrun && !runparams.nice &&
not_from_texmf) {
// mangledFilename() needs the extension
DocFileName const in_file = DocFileName(try_in_file);
database = removeExtension(in_file.mangledFilename());
- FileName const out_file = FileName(makeAbsPath(database + ".bib",
+ FileName const out_file(makeAbsPath(database + ".bib",
buffer.getMasterBuffer()->temppath()));
bool const success = copy(in_file, out_file);
if (!style.empty()) {
string base =
normalize_name(buffer, runparams, style, ".bst");
- string const try_in_file = makeAbsPath(base + ".bst", buffer.filePath());
- bool const not_from_texmf = isFileReadable(FileName(try_in_file));
+ FileName const try_in_file(makeAbsPath(base + ".bst", buffer.filePath()));
+ bool const not_from_texmf = isFileReadable(try_in_file);
// If this style does not come from texmf and we are not
// exporting to .tex copy it to the tmp directory.
// This prevents problems with spaces and 8bit charcaters
// use new style name
DocFileName const in_file = DocFileName(try_in_file);
base = removeExtension(in_file.mangledFilename());
- FileName const out_file = FileName(makeAbsPath(base + ".bst",
+ FileName const out_file(makeAbsPath(base + ".bst",
buffer.getMasterBuffer()->temppath()));
bool const success = copy(in_file, out_file);
if (!success) {
vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
{
- Path p(buffer.filePath());
+ FileName path(buffer.filePath());
+ support::Path p(path);
vector<FileName> vec;
bibfiles = split(bibfiles, tmp, ',');
while (!tmp.empty()) {
FileName const file = findtexfile(changeExtension(tmp, "bib"), "bib");
- lyxerr[Debug::LATEX] << "Bibfile: " << file << endl;
+ LYXERR(Debug::LATEX) << "Bibfile: " << file << endl;
// If we didn't find a matching file name just fail silently
if (!file.empty())
return vec;
}
+namespace {
+
+ // methods for parsing bibtex files
+
+ typedef map<docstring, docstring> VarMap;
+
+ /// remove whitespace characters, optionally a single comma,
+ /// and further whitespace characters from the stream.
+ /// @return true if a comma was found, false otherwise
+ ///
+ bool removeWSAndComma(idocfstream & ifs) {
+ char_type ch;
+
+ if (!ifs)
+ return false;
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ if (ch != ',') {
+ ifs.putback(ch);
+ return false;
+ }
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (ifs) {
+ ifs.putback(ch);
+ }
+
+ return true;
+ }
+
+ /// remove whitespace characters, read characer sequence
+ /// not containing whitespace characters or characters in
+ /// delimChars, and remove further whitespace characters.
+ ///
+ /// @return true if a string of length > 0 could be read.
+ ///
+ bool readTypeOrKey(docstring & val, idocfstream & ifs, docstring const & delimChars) {
+
+ char_type ch;
+
+ val.clear();
+
+ if (!ifs)
+ return false;
+
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ // read value
+ while (ifs && !isSpace(ch) && delimChars.find(ch) == docstring::npos) {
+ val += lowercase(ch);
+ ifs.get(ch);
+ }
+
+ // skip whitespace
+ while (ifs && isSpace(ch)) {
+ ifs.get(ch);
+ }
+
+ if (ifs) {
+ ifs.putback(ch);
+ }
+
+ return val.length() > 0;
+ }
+
+ /// read subsequent bibtex values that are delimited with a #-character.
+ /// Concatenate all parts and replace names with the associated string in
+ /// the variable strings.
+ /// @return true if reading was successfull (all single parts were delimited
+ /// correctly)
+ bool readValue(docstring & val, idocfstream & ifs, const VarMap & strings) {
+
+ char_type ch;
+
+ val.clear();
+
+ if (!ifs)
+ return false;
+
+ do {
+ // skip whitespace
+ do {
+ ifs.get(ch);
+ } while (ifs && isSpace(ch));
+
+ if (!ifs)
+ return false;
+
+ // check for field type
+ if (isDigit(ch)) {
+
+ // read integer value
+ do {
+ val += ch;
+ ifs.get(ch);
+ } while (ifs && isDigit(ch));
+
+ if (!ifs)
+ return false;
+
+ } else if (ch == '"' || ch == '{') {
+
+ // read delimited text - set end delimiter
+ char_type delim = ch == '"'? '"': '}';
+
+ // inside this delimited text braces must match.
+ // Thus we can have a closing delimiter only
+ // when nestLevel == 0
+ int nestLevel = 0;
+
+ ifs.get(ch);
+ while (ifs && (nestLevel > 0 || ch != delim)) {
+ val += ch;
+
+ // update nesting level
+ switch (ch) {
+ case '{':
+ ++nestLevel;
+ break;
+ case '}':
+ --nestLevel;
+ if (nestLevel < 0) return false;
+ break;
+ }
+
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ ifs.get(ch);
+
+ if (!ifs)
+ return false;
+
+ } else {
+
+ // reading a string name
+ docstring strName;
+
+ while (ifs && !isSpace(ch) && ch != '#' && ch != ',' && ch != '}' && ch != ')') {
+ strName += lowercase(ch);
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ // replace the string with its assigned value or
+ // discard it if it's not assigned
+ if (strName.length()) {
+ VarMap::const_iterator pos = strings.find(strName);
+ if (pos != strings.end()) {
+ val += pos->second;
+ }
+ }
+ }
+
+ // skip WS
+ while (ifs && isSpace(ch)) {
+ ifs.get(ch);
+ }
+
+ if (!ifs)
+ return false;
+
+ // continue reading next value on concatenate with '#'
+ } while (ch == '#');
+
+ ifs.putback(ch);
+
+ return true;
+ }
+}
+
// This method returns a comma separated list of Bibtex entries
void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
- std::vector<std::pair<string, string> > & keys) const
+ std::vector<std::pair<string, docstring> > & keys) const
{
vector<FileName> const files = getFiles(buffer);
for (vector<FileName>::const_iterator it = files.begin();
it != files.end(); ++ it) {
- // This is a _very_ simple parser for Bibtex database
- // files. All it does is to look for lines starting
- // in @ and not being @preamble and @string entries.
- // It does NOT do any syntax checking!
- ifstream ifs(it->toFilesystemEncoding().c_str());
- string linebuf0;
- while (getline(ifs, linebuf0)) {
- string linebuf = trim(linebuf0);
- if (linebuf.empty()) continue;
- if (prefixIs(linebuf, "@")) {
- linebuf = subst(linebuf, '{', '(');
- string tmp;
- linebuf = split(linebuf, tmp, '(');
- tmp = ascii_lowercase(tmp);
- if (!prefixIs(tmp, "@string")
- && !prefixIs(tmp, "@preamble")) {
- linebuf = split(linebuf, tmp, ',');
- tmp = ltrim(tmp, " \t");
- if (!tmp.empty()) {
- keys.push_back(pair<string,string>(tmp,string()));
+ // This bibtex parser is a first step to parse bibtex files
+ // more precisely.
+ //
+ // - it reads the whole bibtex entry and does a syntax check
+ // (matching delimiters, missing commas,...
+ // - it recovers from errors starting with the next @-character
+ // - it reads @string definitions and replaces them in the
+ // field values.
+ // - it accepts more characters in keys or value names than
+ // bibtex does.
+ //
+ // TODOS:
+ // - the entries are split into name = value pairs by the
+ // parser. These have to be merged again because of the
+ // way lyx treats the entries ( pair<...>(...) ). The citation
+ // mechanism in lyx should be changed such that it can use
+ // the split entries.
+ // - messages on parsing errors can be generated.
+ //
+
+ // Officially bibtex does only support ASCII, but in practice
+ // you can use the encoding of the main document as long as
+ // some elements like keys and names are pure ASCII. Therefore
+ // we convert the file from the buffer encoding.
+ // We don't restrict keys to ASCII in LyX, since our own
+ // InsetBibitem can generate non-ASCII keys, and nonstandard
+ // 8bit clean bibtex forks exist.
+ idocfstream ifs(it->toFilesystemEncoding().c_str(),
+ std::ios_base::in,
+ buffer.params().encoding().iconvName());
+
+ char_type ch;
+ VarMap strings;
+
+ while (ifs) {
+
+ ifs.get(ch);
+ if (!ifs)
+ break;
+
+ if (ch != '@')
+ continue;
+
+ docstring entryType;
+
+ if (!readTypeOrKey(entryType, ifs, from_ascii("{(")) || !ifs)
+ continue;
+
+ if (entryType == from_ascii("comment")) {
+
+ ifs.ignore(std::numeric_limits<int>::max(), '\n');
+ continue;
+ }
+
+ // check entry delimiter
+ char_type entryDelim;
+
+ ifs.get(ch);
+ if (!ifs)
+ break;
+
+ if (ch == '(') entryDelim = ')';
+ else if (ch == '{') entryDelim = ')';
+ else {
+ // invalid entry delimiter
+ ifs.putback(ch);
+ continue;
+ }
+
+ // process the entry
+ if (entryType == from_ascii("string")) {
+
+ // read string and add it to the strings map
+ // (or replace it's old value)
+ docstring name;
+ docstring value;
+
+ if (!readTypeOrKey(name, ifs, from_ascii("#=}),")) || !ifs)
+ continue;
+
+ ifs.get(ch);
+ if (!ifs || ch != '=')
+ continue;
+
+ if (!readValue(value, ifs, strings))
+ continue;
+
+ strings[name] = value;
+
+ } else if (entryType == from_ascii("preamble")) {
+
+ // preamble definitions are discarded.
+ // can they be of any use in lyx?
+ docstring value;
+
+ if (!readValue(value, ifs, strings))
+ continue;
+
+ } else {
+
+ // Citation entry. Read the key and all name = value pairs
+ docstring key;
+ docstring fields;
+ docstring name;
+ docstring value;
+ docstring commaNewline;
+
+ if (!readTypeOrKey(key, ifs, from_ascii(",})")) || !ifs)
+ continue;
+
+ // now we have a key, so we will add an entry
+ // (even if it's empty, as bibtex does)
+ //
+ // all items must be separated by a comma. If
+ // it is missing the scanning of this entry is
+ // stopped and the next is searched.
+ bool readNext = removeWSAndComma(ifs);
+
+ while (ifs && readNext) {
+
+ // read field name
+ if (!readTypeOrKey(name, ifs, from_ascii("=}),")) || !ifs)
+ break;
+
+ // next char must be an equal sign
+ ifs.get(ch);
+ if (!ifs)
+ break;
+ if (ch != '=') {
+ ifs.putback(ch);
+ break;
}
+
+ // read field value
+ if (!readValue(value, ifs, strings))
+ break;
+
+ // append field to the total entry string.
+ //
+ // TODO: Here is where the fields can be put in
+ // a more intelligent structure that preserves
+ // the already known parts.
+ fields += commaNewline;
+ fields += name + from_ascii(" = {") + value + '}';
+
+ if (!commaNewline.length())
+ commaNewline = from_ascii(",\n");
+
+ readNext = removeWSAndComma(ifs);
}
- } else if (!keys.empty()) {
- keys.back().second += linebuf + "\n";
+
+ // add the new entry
+ keys.push_back(pair<string, docstring>(
+ to_utf8(key), fields));
}
- }
- }
+
+ } //< searching '@'
+
+ } //< for loop over files
}
+
bool InsetBibtex::addDatabase(string const & db)
{
// FIXME UNICODE