From e4c3ce462791c85922d919f8859e3408f57d10fa Mon Sep 17 00:00:00 2001
From: Georg Baum <Georg.Baum@post.rwth-aachen.de>
Date: Sun, 17 Dec 2006 10:52:04 +0000
Subject: [PATCH] Convert most of the bibtex machinery to docstring. Don't
 limit citation keys to ASCII anymore, since InsetBibitem can generate
 non-ASCII keys.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@16310 a592a061-630c-0410-9148-cb99ea01b6c8
---
 src/buffer.C                                |  21 +-
 src/buffer.h                                |   2 +-
 src/frontends/controllers/ControlCitation.C |  14 +-
 src/frontends/controllers/ControlCitation.h |   2 +-
 src/frontends/controllers/biblio.C          | 234 ++++++++++----------
 src/frontends/controllers/biblio.h          |  23 +-
 src/frontends/qt4/QCitation.C               |   6 +-
 src/insets/insetbibtex.C                    |  20 +-
 src/insets/insetbibtex.h                    |   2 +-
 src/insets/insetcite.C                      |  76 +++----
 src/insets/insetinclude.C                   |   2 +-
 src/insets/insetinclude.h                   |   2 +-
 src/support/lstrings.C                      |  60 ++++-
 src/support/lstrings.h                      |   2 +
 14 files changed, 256 insertions(+), 210 deletions(-)
diff --git a/src/buffer.C b/src/buffer.C
index 691cd5fa17..e81bcc1d5f 100644
--- a/src/buffer.C
+++ b/src/buffer.C
@@ -1210,7 +1210,7 @@ void Buffer::getLabelList(vector<docstring> & list) const
 
 
 // This is also a buffer property (ale)
-void Buffer::fillWithBibKeys(vector<pair<string, string> > & keys)
+void Buffer::fillWithBibKeys(vector<pair<string, docstring> > & keys)
 	const
 {
 	/// if this is a child document and the parent is already loaded
@@ -1234,11 +1234,12 @@ void Buffer::fillWithBibKeys(vector<pair<string, string> > & keys)
 		} else if (it->lyxCode() == InsetBase::BIBITEM_CODE) {
 			InsetBibitem const & inset =
 				dynamic_cast<InsetBibitem const &>(*it);
-			string const key = inset.getContents();
-			string const opt = inset.getOptions();
-			string const ref; // = pit->asString(this, false);
-			string const info = opt + "TheBibliographyRef" + ref;
-			keys.push_back(pair<string, string>(key, info));
+			// FIXME UNICODE
+			string const key = to_utf8(inset.getParam("key"));
+			docstring const label = inset.getParam("label");
+			docstring const ref; // = pit->asString(this, false);
+			docstring const info = label + "TheBibliographyRef" + ref;
+			keys.push_back(pair<string, docstring>(key, info));
 		}
 	}
 }
@@ -1617,10 +1618,10 @@ void Buffer::changeRefsIfUnique(docstring const & from, docstring const & to,
 	vector<docstring> labels;
 
 	if (code == InsetBase::CITE_CODE) {
-		vector<pair<string, string> > keys;
+		vector<pair<string, docstring> > keys;
 		fillWithBibKeys(keys);
-		vector<pair<string, string> >::const_iterator bit  = keys.begin();
-		vector<pair<string, string> >::const_iterator bend = keys.end();
+		vector<pair<string, docstring> >::const_iterator bit  = keys.begin();
+		vector<pair<string, docstring> >::const_iterator bend = keys.end();
 
 		for (; bit != bend; ++bit)
 			// FIXME UNICODE
@@ -1681,7 +1682,7 @@ void Buffer::getSourceCode(odocstream & os, pit_type par_begin,
 ErrorList const & Buffer::errorList(string const & type) const
 {
 	static ErrorList const emptyErrorList;
-	std::map<std::string, ErrorList>::const_iterator I = errorLists_.find(type);
+	std::map<string, ErrorList>::const_iterator I = errorLists_.find(type);
 	if (I == errorLists_.end())
 		return emptyErrorList;
 
diff --git a/src/buffer.h b/src/buffer.h
index f37336790b..e8b519e15d 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -255,7 +255,7 @@ public:
 	void validate(LaTeXFeatures &) const;
 
 	/// return all bibkeys from buffer and its childs
-	void fillWithBibKeys(std::vector<std::pair<std::string, std::string> > & keys) const;
+	void fillWithBibKeys(std::vector<std::pair<std::string, docstring> > & keys) const;
 	/// Update the cache with all bibfiles in use (including bibfiles
 	/// of loaded child documents).
 	void updateBibfilesCache();
diff --git a/src/frontends/controllers/ControlCitation.C b/src/frontends/controllers/ControlCitation.C
index 2ea86ee45a..2c43aa63ee 100644
--- a/src/frontends/controllers/ControlCitation.C
+++ b/src/frontends/controllers/ControlCitation.C
@@ -36,14 +36,14 @@ bool ControlCitation::initialiseParams(string const & data)
 {
 	ControlCommand::initialiseParams(data);
 
-	vector<pair<string, string> > blist;
+	vector<pair<string, docstring> > blist;
 	kernel().buffer().fillWithBibKeys(blist);
 
 	biblio::CiteEngine const engine = biblio::getEngine(kernel().buffer());
 
 	bool use_styles = engine != biblio::ENGINE_BASIC;
 
-	typedef std::map<string, string>::value_type InfoMapValue;
+	typedef std::map<string, docstring>::value_type InfoMapValue;
 
 	for (vector<pair<string,string> >::size_type i = 0;
 	     i < blist.size(); ++i) {
@@ -83,19 +83,15 @@ biblio::CiteEngine_enum ControlCitation::getEngine() const
 }
 
 
-vector<string> const ControlCitation::getCiteStrings(string const & key) const
+vector<docstring> const ControlCitation::getCiteStrings(string const & key) const
 {
-	vector<string> styles;
-
 	biblio::CiteEngine const engine = biblio::getEngine(kernel().buffer());
 	vector<biblio::CiteStyle> const cs = biblio::getCiteStyles(engine);
 
 	if (engine == biblio::ENGINE_NATBIB_NUMERICAL)
-		styles = biblio::getNumericalStrings(key, bibkeysInfo_, cs);
+		return biblio::getNumericalStrings(key, bibkeysInfo_, cs);
 	else
-		styles = biblio::getAuthorYearStrings(key, bibkeysInfo_, cs);
-
-	return styles;
+		return biblio::getAuthorYearStrings(key, bibkeysInfo_, cs);
 }
 
 } // namespace frontend
diff --git a/src/frontends/controllers/ControlCitation.h b/src/frontends/controllers/ControlCitation.h
index 5dc10ad44e..174328625f 100644
--- a/src/frontends/controllers/ControlCitation.h
+++ b/src/frontends/controllers/ControlCitation.h
@@ -43,7 +43,7 @@ public:
 	biblio::CiteEngine_enum getEngine() const;
 
 	/// Possible citations based on this key
-	std::vector<std::string> const getCiteStrings(std::string const & key) const;
+	std::vector<docstring> const getCiteStrings(std::string const & key) const;
 
 	/// available CiteStyle-s (depends on availability of Natbib/Jurabib)
 	static std::vector<biblio::CiteStyle> const & getCiteStyles() {
diff --git a/src/frontends/controllers/biblio.C b/src/frontends/controllers/biblio.C
index 5a7fb20d81..d60bc40b39 100644
--- a/src/frontends/controllers/biblio.C
+++ b/src/frontends/controllers/biblio.C
@@ -22,10 +22,8 @@
 #include <boost/regex.hpp>
 
 #include <algorithm>
-#include <sstream>
 
 using std::string;
-using std::ostringstream;
 using std::vector;
 
 
@@ -106,6 +104,9 @@ string const default_cite_command(CiteEngine engine)
 	return str;
 }
 
+
+static const docstring TheBibliographyRef(from_ascii("TheBibliographyRef"));
+
 } // namespace anon
 
 
@@ -153,21 +154,21 @@ string const asValidLatexCommand(string const & input,
 }
 
 
-string const familyName(string const & name)
+docstring const familyName(docstring const & name)
 {
 	// Very simple parser
-	string fname = name;
+	docstring fname = name;
 
 	// possible authorname combinations are:
 	// "Surname, FirstName"
 	// "Surname, F."
 	// "FirstName Surname"
 	// "F. Surname"
-	string::size_type idx = fname.find(',');
-	if (idx != string::npos)
+	docstring::size_type idx = fname.find(',');
+	if (idx != docstring::npos)
 		return ltrim(fname.substr(0, idx));
 	idx = fname.rfind('.');
-	if (idx != string::npos)
+	if (idx != docstring::npos)
 		fname = ltrim(fname.substr(idx + 1));
 	// test if we have a LaTeX Space in front
 	if (fname[0] == '\\')
@@ -177,32 +178,32 @@ string const familyName(string const & name)
 }
 
 
-string const getAbbreviatedAuthor(InfoMap const & map, string const & key)
+docstring const getAbbreviatedAuthor(InfoMap const & map, string const & key)
 {
 	BOOST_ASSERT(!map.empty());
 
 	InfoMap::const_iterator it = map.find(key);
 	if (it == map.end())
-		return string();
-	string const & data = it->second;
+		return docstring();
+	docstring const & data = it->second;
 
 	// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-	string::size_type const pos = data.find("TheBibliographyRef");
-	if (pos != string::npos) {
+	docstring::size_type const pos = data.find(TheBibliographyRef);
+	if (pos != docstring::npos) {
 		if (pos <= 2) {
-			return string();
+			return docstring();
 		}
 
-		string const opt = trim(data.substr(0, pos - 1));
+		docstring const opt = trim(data.substr(0, pos - 1));
 		if (opt.empty())
-			return string();
+			return docstring();
 
-		string authors;
+		docstring authors;
 		split(opt, authors, '(');
 		return authors;
 	}
 
-	string author = parseBibTeX(data, "author");
+	docstring author = parseBibTeX(data, "author");
 
 	if (author.empty())
 		author = parseBibTeX(data, "editor");
@@ -210,59 +211,58 @@ string const getAbbreviatedAuthor(InfoMap const & map, string const & key)
 	if (author.empty()) {
 		author = parseBibTeX(data, "key");
 		if (author.empty())
-			author = key;
+			// FIXME UNICODE
+			return from_utf8(key);
 		return author;
 	}
 
-	vector<string> const authors = getVectorFromString(author, " and ");
+	vector<docstring> const authors = getVectorFromString(author, from_ascii(" and "));
 	if (authors.empty())
 		return author;
 
 	if (authors.size() == 2)
-		// FIXME UNICODE
-		return lyx::to_utf8(bformat(_("%1$s and %2$s"),
-			lyx::from_utf8(familyName(authors[0])), lyx::from_utf8(familyName(authors[1]))));
+		return bformat(_("%1$s and %2$s"),
+			familyName(authors[0]), familyName(authors[1]));
 
 	if (authors.size() > 2)
-		// FIXME UNICODE
-		return lyx::to_utf8(bformat(_("%1$s et al."), lyx::from_utf8(familyName(authors[0]))));
+		return bformat(_("%1$s et al."), familyName(authors[0]));
 
 	return familyName(authors[0]);
 }
 
 
-string const getYear(InfoMap const & map, string const & key)
+docstring const getYear(InfoMap const & map, string const & key)
 {
 	BOOST_ASSERT(!map.empty());
 
 	InfoMap::const_iterator it = map.find(key);
 	if (it == map.end())
-		return string();
-	string const & data = it->second;
+		return docstring();
+	docstring const & data = it->second;
 
 	// Is the entry a BibTeX one or one from lyx-layout "bibliography"?
-	string::size_type const pos = data.find("TheBibliographyRef");
-	if (pos != string::npos) {
+	docstring::size_type const pos = data.find(TheBibliographyRef);
+	if (pos != docstring::npos) {
 		if (pos <= 2) {
-			return string();
+			return docstring();
 		}
 
-		string const opt =
+		docstring const opt =
 			trim(data.substr(0, pos - 1));
 		if (opt.empty())
-			return string();
+			return docstring();
 
-		string authors;
-		string const tmp = split(opt, authors, '(');
-		string year;
+		docstring authors;
+		docstring const tmp = split(opt, authors, '(');
+		docstring year;
 		split(tmp, year, ')');
 		return year;
 
 	}
 
-	string year = parseBibTeX(data, "year");
+	docstring year = parseBibTeX(data, "year");
 	if (year.empty())
-		year = lyx::to_utf8(_("No year"));
+		year = _("No year");
 
 	return year;
 }
@@ -296,39 +296,37 @@ vector<string> const getKeys(InfoMap const & map)
 }
 
 
-string const getInfo(InfoMap const & map, string const & key)
+docstring const getInfo(InfoMap const & map, string const & key)
 {
 	BOOST_ASSERT(!map.empty());
 
 	InfoMap::const_iterator it = map.find(key);
 	if (it == map.end())
-		return string();
-	string const & data = it->second;
+		return docstring();
+	docstring const & data = it->second;
 
 	// is the entry a BibTeX one or one from lyx-layout "bibliography"?
-	string const separator("TheBibliographyRef");
-	string::size_type const pos = data.find(separator);
-	if (pos != string::npos) {
-		string::size_type const pos2 = pos + separator.size();
-		string const info = trim(data.substr(pos2));
+	docstring::size_type const pos = data.find(TheBibliographyRef);
+	if (pos != docstring::npos) {
+		docstring::size_type const pos2 = pos + TheBibliographyRef.size();
+		docstring const info = trim(data.substr(pos2));
 		return info;
 	}
 
 	// Search for all possible "required" keys
-	string author = parseBibTeX(data, "author");
+	docstring author = parseBibTeX(data, "author");
 	if (author.empty())
 		author = parseBibTeX(data, "editor");
 
-	string year       = parseBibTeX(data, "year");
-	string title      = parseBibTeX(data, "title");
-	string booktitle  = parseBibTeX(data, "booktitle");
-	string chapter    = parseBibTeX(data, "chapter");
-	string number     = parseBibTeX(data, "number");
-	string volume     = parseBibTeX(data, "volume");
-	string pages      = parseBibTeX(data, "pages");
-	string annote     = parseBibTeX(data, "annote");
-
-	string media      = parseBibTeX(data, "journal");
+	docstring year      = parseBibTeX(data, "year");
+	docstring title     = parseBibTeX(data, "title");
+	docstring booktitle = parseBibTeX(data, "booktitle");
+	docstring chapter   = parseBibTeX(data, "chapter");
+	docstring number    = parseBibTeX(data, "number");
+	docstring volume    = parseBibTeX(data, "volume");
+	docstring pages     = parseBibTeX(data, "pages");
+	docstring annote    = parseBibTeX(data, "annote");
+	docstring media     = parseBibTeX(data, "journal");
 	if (media.empty())
 		media = parseBibTeX(data, "publisher");
 	if (media.empty())
@@ -336,7 +334,7 @@ string const getInfo(InfoMap const & map, string const & key)
 	if (media.empty())
 		media = parseBibTeX(data, "institution");
 
-	ostringstream result;
+	odocstringstream result;
 	if (!author.empty())
 		result << author << ", ";
 	if (!title.empty())
@@ -358,7 +356,7 @@ string const getInfo(InfoMap const & map, string const & key)
 	if (!annote.empty())
 		result << "\n\n" << annote;
 
-	string const result_str = rtrim(result.str());
+	docstring const result_str = rtrim(result.str());
 	if (!result_str.empty())
 		return result_str;
 
@@ -409,7 +407,8 @@ public:
 		string data = key;
 		InfoMap::const_iterator info = map_.find(key);
 		if (info != map_.end())
-			data += ' ' + info->second;
+			// FIXME UNICODE
+			data += ' ' + to_utf8(info->second);
 
 		// Attempts to find a match for the current RE
 		// somewhere in data.
@@ -471,27 +470,29 @@ searchKeys(InfoMap const & theMap,
 }
 
 
-string const parseBibTeX(string data, string const & findkey)
+docstring const parseBibTeX(docstring data, string const & findkey)
 {
-	string keyvalue;
 	// at first we delete all characters right of '%' and
 	// replace tabs through a space and remove leading spaces
 	// we read the data line by line so that the \n are
 	// ignored, too.
-	string data_;
+	docstring data_;
 	int Entries = 0;
-	string dummy = token(data,'\n', Entries);
+	docstring dummy = token(data,'\n', Entries);
 	while (!dummy.empty()) {
-		dummy = subst(dummy, '\t', ' ');	// no tabs
-		dummy = ltrim(dummy);		// no leading spaces
+		// no tabs
+		dummy = subst(dummy, '\t', ' ');
+		// no leading spaces
+		dummy = ltrim(dummy);
 		// ignore lines with a beginning '%' or ignore all right of %
-		string::size_type const idx =
-			dummy.empty() ? string::npos : dummy.find('%');
-		if (idx != string::npos)
+		docstring::size_type const idx =
+			dummy.empty() ? docstring::npos : dummy.find('%');
+		if (idx != docstring::npos)
 			// Check if this is really a comment or just "\%"
 			if (idx == 0 || dummy[idx - 1] != '\\')
-				dummy.erase(idx, string::npos);
-			else  //  This is "\%", so just erase the '\'
+				dummy.erase(idx, docstring::npos);
+			else
+				//  This is "\%", so just erase the '\'
 				dummy.erase(idx - 1, 1);
 		// do we have a new token or a new line of
 		// the same one? In the first case we ignore
@@ -507,16 +508,17 @@ string const parseBibTeX(string data, string const & findkey)
 	}
 
 	// replace double commas with "" for easy scanning
-	data = subst(data_, ",,", "\"\"");
+	data = subst(data_, from_ascii(",,"), from_ascii("\"\""));
 
 	// unlikely!
 	if (data.empty())
-		return string();
+		return docstring();
 
 	// now get only the important line of the bibtex entry.
 	// all entries are devided by ',' except the last one.
-	data += ',';  // now we have same behaviour for all entries
-		      // because the last one is "blah ... }"
+	data += ',';
+	// now we have same behaviour for all entries because the last one
+	// is "blah ... }"
 	Entries = 0;
 	bool found = false;
 	// parsing of title and booktitle is different from the
@@ -524,21 +526,21 @@ string const parseBibTeX(string data, string const & findkey)
 	do {
 		dummy = token(data, ',', Entries++);
 		if (!dummy.empty()) {
-			found = contains(ascii_lowercase(dummy), findkey);
+			found = contains(ascii_lowercase(dummy), from_ascii(findkey));
 			if (findkey == "title" &&
-				contains(ascii_lowercase(dummy), "booktitle"))
+			    contains(ascii_lowercase(dummy), from_ascii("booktitle")))
 				found = false;
 		}
 	} while (!found && !dummy.empty());
 	if (dummy.empty())
 		// no such keyword
-		return string();
+		return docstring();
 
 	// we are not sure, if we get all, because "key= "blah, blah" is
 	// allowed.
 	// Therefore we read all until the next "=" character, which follows a
 	// new keyword
-	keyvalue = dummy;
+	docstring keyvalue = dummy;
 	dummy = token(data, ',', Entries++);
 	while (!contains(dummy, '=') && !dummy.empty()) {
 		keyvalue += ',' + dummy;
@@ -547,7 +549,7 @@ string const parseBibTeX(string data, string const & findkey)
 
 	// replace double "" with originals ,, (two commas)
 	// leaving us with the all-important line
-	data = subst(keyvalue, "\"\"", ",,");
+	data = subst(keyvalue, from_ascii("\"\""), from_ascii(",,"));
 
 	// Clean-up.
 	// 1. Spaces
@@ -556,24 +558,26 @@ string const parseBibTeX(string data, string const & findkey)
 	if (!contains(data, '{'))
 		data = rtrim(data, "}");
 	// happens, when last keyword
-	string::size_type const idx =
-		!data.empty() ? data.find('=') : string::npos;
+	docstring::size_type const idx =
+		!data.empty() ? data.find('=') : docstring::npos;
 
-	if (idx == string::npos)
-		return string();
+	if (idx == docstring::npos)
+		return docstring();
 
 	data = trim(data.substr(idx));
 
-	if (data.length() < 2 || data[0] != '=') {	// a valid entry?
-		return string();
-	} else {
+	// a valid entry?
+	if (data.length() < 2 || data[0] != '=')
+		return docstring();
+	else {
 		// delete '=' and the following spaces
 		data = ltrim(data, " =");
 		if (data.length() < 2) {
-			return data;	// not long enough to find delimiters
+			// not long enough to find delimiters
+			return data;
 		} else {
-			string::size_type keypos = 1;
-			char enclosing;
+			docstring::size_type keypos = 1;
+			char_type enclosing;
 			if (data[0] == '{') {
 				enclosing = '}';
 			} else if (data[0] == '"') {
@@ -583,9 +587,9 @@ string const parseBibTeX(string data, string const & findkey)
 				// possible ',' at the end
 				return rtrim(data, ",");
 			}
-			string tmp = data.substr(keypos);
-			while (tmp.find('{') != string::npos &&
-			       tmp.find('}') != string::npos &&
+			docstring tmp = data.substr(keypos);
+			while (tmp.find('{') != docstring::npos &&
+			       tmp.find('}') != docstring::npos &&
 			       tmp.find('{') < tmp.find('}') &&
 			       tmp.find('{') < tmp.find(enclosing)) {
 
@@ -594,7 +598,7 @@ string const parseBibTeX(string data, string const & findkey)
 				keypos += tmp.find('}') + 1;
 				tmp = data.substr(keypos);
 			}
-			if (tmp.find(enclosing) == string::npos)
+			if (tmp.find(enclosing) == docstring::npos)
 				return data;
 			else {
 				keypos += tmp.find(enclosing);
@@ -723,27 +727,26 @@ vector<CiteStyle> const getCiteStyles(CiteEngine_enum const & engine)
 }
 
 
-vector<string> const
+vector<docstring> const
 getNumericalStrings(string const & key,
 		    InfoMap const & map, vector<CiteStyle> const & styles)
 {
-	if (map.empty()) {
-		return vector<string>();
-	}
+	if (map.empty())
+		return vector<docstring>();
 
-	string const author = getAbbreviatedAuthor(map, key);
-	string const year   = getYear(map, key);
+	docstring const author = getAbbreviatedAuthor(map, key);
+	docstring const year   = getYear(map, key);
 	if (author.empty() || year.empty())
-		return vector<string>();
+		return vector<docstring>();
 
-	vector<string> vec(styles.size());
-	for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
-		string str;
+	vector<docstring> vec(styles.size());
+	for (vector<docstring>::size_type i = 0; i != vec.size(); ++i) {
+		docstring str;
 
 		switch (styles[i]) {
 		case CITE:
 		case CITEP:
-			str = "[#ID]";
+			str = from_ascii("[#ID]");
 			break;
 
 		case CITET:
@@ -755,7 +758,7 @@ getNumericalStrings(string const & key,
 			break;
 
 		case CITEALP:
-			str = "#ID";
+			str = from_ascii("#ID");
 			break;
 
 		case CITEAUTHOR:
@@ -778,28 +781,27 @@ getNumericalStrings(string const & key,
 }
 
 
-vector<string> const
+vector<docstring> const
 getAuthorYearStrings(string const & key,
 		    InfoMap const & map, vector<CiteStyle> const & styles)
 {
-	if (map.empty()) {
-		return vector<string>();
-	}
+	if (map.empty())
+		return vector<docstring>();
 
-	string const author = getAbbreviatedAuthor(map, key);
-	string const year   = getYear(map, key);
+	docstring const author = getAbbreviatedAuthor(map, key);
+	docstring const year   = getYear(map, key);
 	if (author.empty() || year.empty())
-		return vector<string>();
+		return vector<docstring>();
 
-	vector<string> vec(styles.size());
-	for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
-		string str;
+	vector<docstring> vec(styles.size());
+	for (vector<docstring>::size_type i = 0; i != vec.size(); ++i) {
+		docstring str;
 
 		switch (styles[i]) {
 		case CITE:
 			// jurabib only: Author/Annotator
 			// (i.e. the "before" field, 2nd opt arg)
-			str = author + "/<" + lyx::to_utf8(_("before")) + '>';
+			str = author + "/<" + _("before") + '>';
 			break;
 
 		case CITET:
diff --git a/src/frontends/controllers/biblio.h b/src/frontends/controllers/biblio.h
index 6da958af11..55882cff56 100644
--- a/src/frontends/controllers/biblio.h
+++ b/src/frontends/controllers/biblio.h
@@ -12,8 +12,9 @@
 #ifndef BIBLIOHELPERS_H
 #define BIBLIOHELPERS_H
 
+#include "support/docstring.h"
+
 #include <map>
-#include <string>
 #include <vector>
 
 class Buffer;
@@ -59,27 +60,27 @@ std::string const asValidLatexCommand(std::string const & input,
 				      CiteEngine_enum const & engine);
 
 /// First entry is the bibliography key, second the data
-typedef std::map<std::string, std::string> InfoMap;
+typedef std::map<std::string, docstring> InfoMap;
 
 /// Returns a vector of bibliography keys
 std::vector<std::string> const getKeys(InfoMap const &);
 
 /** Returns the BibTeX data associated with a given key.
     Empty if no info exists. */
-std::string const getInfo(InfoMap const &, std::string const &);
+docstring const getInfo(InfoMap const &, std::string const & key);
 
-// rturn the year from the bibtex data record
-std::string const getYear(InfoMap const & map, std::string const & key);
+/// return the year from the bibtex data record
+docstring const getYear(InfoMap const & map, std::string const & key);
 
 /// return the short form of an authorlist
-std::string const getAbbreviatedAuthor(InfoMap const & map, std::string const & key);
+docstring const getAbbreviatedAuthor(InfoMap const & map, std::string const & key);
 
 // return only the family name
-std::string const familyName(std::string const & name);
+docstring const familyName(docstring const & name);
 
 /** Search a BibTeX info field for the given key and return the
     associated field. */
-std::string const parseBibTeX(std::string data, std::string const & findkey);
+docstring const parseBibTeX(docstring data, std::string const & findkey);
 
 /** Returns an iterator to the first key that meets the search
     criterion, or end() if unsuccessful.
@@ -96,7 +97,7 @@ std::string const parseBibTeX(std::string data, std::string const & findkey);
 std::vector<std::string>::const_iterator
 searchKeys(InfoMap const & map,
 	   std::vector<std::string> const & keys_to_search,
-	   std::string const & search_expression,
+	   docstring const & search_expression,
 	   std::vector<std::string>::const_iterator start,
 	   Search,
 	   Direction,
@@ -137,7 +138,7 @@ std::vector<CiteStyle> const getCiteStyles(CiteEngine_enum const &);
    the InfoMap of bibkeys info,
    the available citation styles
 */
-std::vector<std::string> const
+std::vector<docstring> const
 getNumericalStrings(std::string const & key,
 		    InfoMap const & map,
 		    std::vector<CiteStyle> const & styles);
@@ -154,7 +155,7 @@ getNumericalStrings(std::string const & key,
    the InfoMap of bibkeys info,
    the available citation styles
 */
-std::vector<std::string> const
+std::vector<docstring> const
 getAuthorYearStrings(std::string const & key,
 		     InfoMap const & map,
 		     std::vector<CiteStyle> const & styles);
diff --git a/src/frontends/qt4/QCitation.C b/src/frontends/qt4/QCitation.C
index d9dba70a58..57107e135a 100644
--- a/src/frontends/qt4/QCitation.C
+++ b/src/frontends/qt4/QCitation.C
@@ -30,7 +30,9 @@ using std::vector;
 using std::string;
 
 
-static QStringList toQStringList(vector<string> const & v)
+namespace {
+
+template<typename String> static QStringList toQStringList(vector<String> const & v)
 {
 	QStringList qlist;
 
@@ -42,6 +44,8 @@ static QStringList toQStringList(vector<string> const & v)
 	return qlist;
 }
 
+}
+
 
 namespace lyx {
 namespace frontend {
diff --git a/src/insets/insetbibtex.C b/src/insets/insetbibtex.C
index 9d8d81732c..f194c30e4e 100644
--- a/src/insets/insetbibtex.C
+++ b/src/insets/insetbibtex.C
@@ -330,7 +330,7 @@ vector<FileName> const InsetBibtex::getFiles(Buffer const & buffer) const
 
 // This method returns a comma separated list of Bibtex entries
 void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
-				  std::vector<std::pair<string, string> > & keys) const
+		std::vector<std::pair<string, docstring> > & keys) const
 {
 	vector<FileName> const files = getFiles(buffer);
 	for (vector<FileName>::const_iterator it = files.begin();
@@ -344,13 +344,17 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
 		// you can use the encoding of the main document as long as
 		// some elements like keys and names are pure ASCII. Therefore
 		// we convert the file from the buffer encoding.
+		// We don't restrict keys to ASCII in LyX, since our own
+		// InsetBibitem can generate non-ASCII keys, and nonstandard
+		// 8bit clean bibtex forks exist.
 		idocfstream ifs(it->toFilesystemEncoding().c_str(),
 		                std::ios_base::in,
 		                buffer.params().encoding().iconvName());
 		docstring linebuf0;
 		while (getline(ifs, linebuf0)) {
 			docstring linebuf = trim(linebuf0);
-			if (linebuf.empty()) continue;
+			if (linebuf.empty())
+				continue;
 			if (prefixIs(linebuf, from_ascii("@"))) {
 				linebuf = subst(linebuf, '{', '(');
 				docstring tmp;
@@ -361,15 +365,13 @@ void InsetBibtex::fillWithBibKeys(Buffer const & buffer,
 					linebuf = split(linebuf, tmp, ',');
 					tmp = ltrim(tmp, " \t");
 					if (!tmp.empty()) {
-						// to_ascii because bibtex keys may
-						// only consist of ASCII characters
-						keys.push_back(pair<string, string>(to_ascii(tmp), string()));
+						// FIXME UNICODE
+						keys.push_back(pair<string, docstring>(
+							to_utf8(tmp), docstring()));
 					}
 				}
-			} else if (!keys.empty()) {
-				// FIXME UNICODE
-				keys.back().second += to_utf8(linebuf + '\n');
-			}
+			} else if (!keys.empty())
+				keys.back().second += linebuf + '\n';
 		}
 	}
 }
diff --git a/src/insets/insetbibtex.h b/src/insets/insetbibtex.h
index 78f9e62dd4..3c461d73bb 100644
--- a/src/insets/insetbibtex.h
+++ b/src/insets/insetbibtex.h
@@ -39,7 +39,7 @@ public:
 	int latex(Buffer const &, odocstream &, OutputParams const &) const;
 	///
 	void fillWithBibKeys(Buffer const & buffer,
-		std::vector<std::pair<std::string,std::string> > & keys) const;
+		std::vector<std::pair<std::string, docstring> > & keys) const;
 	///
 	std::vector<support::FileName> const getFiles(Buffer const &) const;
 	///
diff --git a/src/insets/insetcite.C b/src/insets/insetcite.C
index fcdbd0be88..103e44160f 100644
--- a/src/insets/insetcite.C
+++ b/src/insets/insetcite.C
@@ -55,14 +55,14 @@ namespace fs = boost::filesystem;
 
 namespace {
 
-string const getNatbibLabel(Buffer const & buffer,
+docstring const getNatbibLabel(Buffer const & buffer,
 			    string const & citeType, string const & keyList,
-			    string const & before, string const & after,
+			    docstring const & before, docstring const & after,
 			    biblio::CiteEngine engine)
 {
 	// Only start the process off after the buffer is loaded from file.
 	if (!buffer.fully_loaded())
-		return string();
+		return docstring();
 
 	// Cache the labels
 	typedef std::map<Buffer const *, biblio::InfoMap> CachedMap;
@@ -97,7 +97,7 @@ string const getNatbibLabel(Buffer const & buffer,
 
 	// build the keylist only if the bibfiles have been changed
 	if (cached_keys.empty() || bibfileStatus.empty() || changed) {
-		typedef vector<std::pair<string, string> > InfoType;
+		typedef vector<std::pair<string, docstring> > InfoType;
 		InfoType bibkeys;
 		buffer.fillWithBibKeys(bibkeys);
 
@@ -113,7 +113,7 @@ string const getNatbibLabel(Buffer const & buffer,
 		infomap = cached_keys[&buffer];
 
 	if (infomap.empty())
-		return string();
+		return docstring();
 
 	// the natbib citation-styles
 	// CITET:	author (year)
@@ -133,7 +133,7 @@ string const getNatbibLabel(Buffer const & buffer,
 	if (cite_type[cite_type.size() - 1] == '*')
 		cite_type = cite_type.substr(0, cite_type.size() - 1);
 
-	string before_str;
+	docstring before_str;
 	if (!before.empty()) {
 		// In CITET and CITEALT mode, the "before" string is
 		// attached to the label associated with each and every key.
@@ -153,7 +153,7 @@ string const getNatbibLabel(Buffer const & buffer,
 			before_str = '/' + before;
 	}
 
-	string after_str;
+	docstring after_str;
 	if (!after.empty()) {
 		// The "after" key is appended only to the end of the whole.
 		after_str = ", " + after;
@@ -165,22 +165,22 @@ string const getNatbibLabel(Buffer const & buffer,
 	// puctuation mark separating citation entries.
 	char const * const sep = ";";
 
-	string const op_str(' ' + string(1, op));
-	string const cp_str(string(1, cp) + ' ');
-	string const sep_str(string(sep) + ' ');
+	docstring const op_str(' ' + docstring(1, op));
+	docstring const cp_str(docstring(1, cp) + ' ');
+	docstring const sep_str(from_ascii(sep) + ' ');
 
-	string label;
+	docstring label;
 	vector<string> keys = getVectorFromString(keyList);
 	vector<string>::const_iterator it  = keys.begin();
 	vector<string>::const_iterator end = keys.end();
 	for (; it != end; ++it) {
 		// get the bibdata corresponding to the key
-		string const author(biblio::getAbbreviatedAuthor(infomap, *it));
-		string const year(biblio::getYear(infomap, *it));
+		docstring const author(biblio::getAbbreviatedAuthor(infomap, *it));
+		docstring const year(biblio::getYear(infomap, *it));
 
 		// Something isn't right. Fail safely.
 		if (author.empty() || year.empty())
-			return string();
+			return docstring();
 
 		// authors1/<before>;  ... ;
 		//  authors_last, <after>
@@ -199,8 +199,9 @@ string const getNatbibLabel(Buffer const & buffer,
 					year + cp + sep_str;
 				break;
 			case biblio::ENGINE_NATBIB_NUMERICAL:
+				// FIXME UNICODE
 				label += author + op_str + before_str +
-					'#' + *it + cp + sep_str;
+					'#' + from_utf8(*it) + cp + sep_str;
 				break;
 			case biblio::ENGINE_JURABIB:
 				label += before_str + author + op_str +
@@ -214,7 +215,8 @@ string const getNatbibLabel(Buffer const & buffer,
 		} else if (cite_type == "citep" ||
 			   cite_type == "citealp") {
 			if (engine == biblio::ENGINE_NATBIB_NUMERICAL) {
-				label += *it + sep_str;
+				// FIXME UNICODE
+				label += from_utf8(*it) + sep_str;
 			} else {
 				label += author + ", " + year + sep_str;
 			}
@@ -228,8 +230,9 @@ string const getNatbibLabel(Buffer const & buffer,
 					year + sep_str;
 				break;
 			case biblio::ENGINE_NATBIB_NUMERICAL:
+				// FIXME UNICODE
 				label += author + ' ' + before_str +
-					'#' + *it + sep_str;
+					'#' + from_utf8(*it) + sep_str;
 				break;
 			case biblio::ENGINE_JURABIB:
 				label += before_str + author + ' ' +
@@ -272,22 +275,22 @@ string const getNatbibLabel(Buffer const & buffer,
 	}
 
 	if (cite_type == "citep" || cite_type == "citeyearpar")
-		label = string(1, op) + label + string(1, cp);
+		label = op + label + cp;
 
 	return label;
 }
 
 
-string const getBasicLabel(string const & keyList, string const & after)
+docstring const getBasicLabel(docstring const & keyList, docstring const & after)
 {
-	string keys(keyList);
-	string label;
+	docstring keys(keyList);
+	docstring label;
 
 	if (contains(keys, ',')) {
 		// Final comma allows while loop to cover all keys
 		keys = ltrim(split(keys, label, ',')) + ',';
 		while (contains(keys, ',')) {
-			string key;
+			docstring key;
 			keys = ltrim(split(keys, key, ','));
 			label += ", " + key;
 		}
@@ -310,23 +313,23 @@ InsetCitation::InsetCitation(InsetCommandParams const & p)
 
 docstring const InsetCitation::generateLabel(Buffer const & buffer) const
 {
-	string const before = getSecOptions();
-	string const after  = getOptions();
+	docstring const before = getParam("before");
+	docstring const after  = getParam("after");
 
-	string label;
+	docstring label;
 	biblio::CiteEngine const engine = buffer.params().cite_engine;
 	if (engine != biblio::ENGINE_BASIC) {
-		label = getNatbibLabel(buffer, getCmdName(), getContents(),
+		// FIXME UNICODE
+		label = getNatbibLabel(buffer, getCmdName(), to_utf8(getParam("key")),
 				       before, after, engine);
 	}
 
 	// Fallback to fail-safe
 	if (label.empty()) {
-		label = getBasicLabel(getContents(), after);
+		label = getBasicLabel(getParam("key"), after);
 	}
 
-	// FIXME UNICODE
-	return from_utf8(label);
+	return label;
 }
 
 
@@ -373,14 +376,14 @@ int InsetCitation::plaintext(Buffer const & buffer, odocstream & os,
 
 namespace {
 
-string const cleanupWhitespace(string const & citelist)
+docstring const cleanupWhitespace(docstring const & citelist)
 {
-	string::const_iterator it  = citelist.begin();
-	string::const_iterator end = citelist.end();
+	docstring::const_iterator it  = citelist.begin();
+	docstring::const_iterator end = citelist.end();
 	// Paranoia check: make sure that there is no whitespace in here
 	// -- at least not behind commas or at the beginning
-	string result;
-	char last = ',';
+	docstring result;
+	char_type last = ',';
 	for (; it != end; ++it) {
 		if (*it != ' ')
 			last = *it;
@@ -396,7 +399,7 @@ string const cleanupWhitespace(string const & citelist)
 int InsetCitation::docbook(Buffer const &, odocstream & os, OutputParams const &) const
 {
 	os << "<citation>"
-           << from_ascii(cleanupWhitespace(getContents()))
+           << cleanupWhitespace(getParam("key"))
            << "</citation>";
 	return 0;
 }
@@ -430,8 +433,7 @@ int InsetCitation::latex(Buffer const & buffer, odocstream & os,
 	else if (!after.empty())
 		os << '[' << after << ']';
 
-	// FIXME UNICODE
-	os << '{' << from_utf8(cleanupWhitespace(getContents())) << '}';
+	os << '{' << cleanupWhitespace(getParam("key")) << '}';
 
 	return 0;
 }
diff --git a/src/insets/insetinclude.C b/src/insets/insetinclude.C
index c1fec50560..859aba0cb3 100644
--- a/src/insets/insetinclude.C
+++ b/src/insets/insetinclude.C
@@ -611,7 +611,7 @@ void InsetInclude::getLabelList(Buffer const & buffer,
 
 
 void InsetInclude::fillWithBibKeys(Buffer const & buffer,
-				   std::vector<std::pair<string,string> > & keys) const
+		std::vector<std::pair<string, docstring> > & keys) const
 {
 	if (loadIfNeeded(buffer, params_)) {
 		string const included_file = includedFilename(buffer, params_);
diff --git a/src/insets/insetinclude.h b/src/insets/insetinclude.h
index 0b2cf43046..c42159e30c 100644
--- a/src/insets/insetinclude.h
+++ b/src/insets/insetinclude.h
@@ -59,7 +59,7 @@ public:
 	 *  \param keys the list of bibkeys in the child buffer.
 	 */
 	void fillWithBibKeys(Buffer const & buffer,
-		std::vector<std::pair<std::string,std::string> > & keys) const;
+		std::vector<std::pair<std::string, docstring> > & keys) const;
 	/** Update the cache with all bibfiles in use of the child buffer
 	 *  (including bibfiles of grandchild documents).
 	 *  Does nothing if the child document is not loaded to prevent
diff --git a/src/support/lstrings.C b/src/support/lstrings.C
index 43dab89fae..cd498896a7 100644
--- a/src/support/lstrings.C
+++ b/src/support/lstrings.C
@@ -666,6 +666,23 @@ string const rtrim(string const & a, char const * p)
 }
 
 
+docstring const rtrim(docstring const & a, char const * p)
+{
+	BOOST_ASSERT(p);
+
+	if (a.empty() || !*p)
+		return a;
+
+	docstring::size_type r = a.find_last_not_of(from_ascii(p));
+
+	// Is this test really needed? (Lgb)
+	if (r == docstring::npos)
+		return docstring();
+
+	return a.substr(0, r + 1);
+}
+
+
 string const ltrim(string const & a, char const * p)
 {
 	BOOST_ASSERT(p);
@@ -779,38 +796,57 @@ docstring const escape(docstring const & lab)
 }
 
 
-/// gives a vector of stringparts which have the delimiter delim
-vector<string> const getVectorFromString(string const & str,
-					 string const & delim)
+namespace {
+
+template<typename String> vector<String> const
+getVectorFromStringT(String const & str, String const & delim)
 {
 // Lars would like this code to go, but for now his replacement (below)
 // doesn't fullfil the same function. I have, therefore, reactivated the
 // old code for now. Angus 11 Nov 2002.
 #if 1
-	vector<string> vec;
+	vector<String> vec;
 	if (str.empty())
 		return vec;
-	string keys = rtrim(str);
+	String keys = rtrim(str);
 	for(;;) {
-		string::size_type const idx = keys.find(delim);
-		if (idx == string::npos) {
+		typename String::size_type const idx = keys.find(delim);
+		if (idx == String::npos) {
 			vec.push_back(ltrim(keys));
 			break;
 		}
-		string const key = trim(keys.substr(0, idx));
+		String const key = trim(keys.substr(0, idx));
 		if (!key.empty())
 			vec.push_back(key);
-		string::size_type const start = idx + delim.size();
+		typename String::size_type const start = idx + delim.size();
 		keys = keys.substr(start);
 	}
 	return vec;
 #else
-	boost::char_separator<char> sep(delim.c_str());
-	boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
-	return vector<string>(tokens.begin(), tokens.end());
+	typedef boost::char_separator<typename String::value_type> Separator;
+	typedef boost::tokenizer<Separator, typename String::const_iterator, String> Tokenizer;
+	Separator sep(delim.c_str());
+	Tokenizer tokens(str, sep);
+	return vector<String>(tokens.begin(), tokens.end());
 #endif
 }
 
+}
+
+
+vector<string> const getVectorFromString(string const & str,
+                                         string const & delim)
+{
+	return getVectorFromStringT<string>(str, delim);
+}
+
+
+vector<docstring> const getVectorFromString(docstring const & str,
+                                            docstring const & delim)
+{
+	return getVectorFromStringT<docstring>(str, delim);
+}
+
 
 // the same vice versa
 string const getStringFromVector(vector<string> const & vec,
diff --git a/src/support/lstrings.h b/src/support/lstrings.h
index 82346d0b45..87e7f242fd 100644
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@@ -225,6 +225,8 @@ docstring const escape(docstring const & lab);
 /// gives a vector of stringparts which have the delimiter delim
 std::vector<std::string> const getVectorFromString(std::string const & str,
 					      std::string const & delim = std::string(","));
+std::vector<docstring> const getVectorFromString(docstring const & str,
+		docstring const & delim = from_ascii(","));
 
 // the same vice versa
 std::string const getStringFromVector(std::vector<std::string> const & vec,
-- 
2.39.2