X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fxml.cpp;h=5b81024336481a9c612e639bd221e435d8f29717;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=06a900751c0d38180a20cd22ae7bc77c1650a744;hpb=0be32e3b98faf4d06604288a8dbd500201232131;p=lyx.git

diff --git a/src/xml.cpp b/src/xml.cpp
index 06a900751c..5b81024336 100644
--- a/src/xml.cpp
+++ b/src/xml.cpp
@@ -17,12 +17,12 @@
 #include "BufferParams.h"
 #include "Counters.h"
 #include "Layout.h"
-#include "OutputParams.h"
 #include "Paragraph.h"
 #include "Text.h"
 #include "TextClass.h"
 
 #include "support/convert.h"
+#include "support/debug.h"
 #include "support/docstream.h"
 #include "support/lassert.h"
 #include "support/lstrings.h"
@@ -91,13 +91,6 @@ docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
 }
 
 
-// escape what needs escaping
-docstring xmlize(docstring const &str, XMLStream::EscapeSettings e)
-{
-	return xml::escapeString(str, e);
-}
-
-
 docstring cleanAttr(docstring const & str)
 {
 	docstring newname;
@@ -115,12 +108,9 @@ docstring StartTag::writeTag() const
 {
 	docstring output = '<' + tag_;
 	if (!attr_.empty()) {
-		docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE);
-		attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
-                                                          [](int c) {return !std::isspace(c);}));
-		if (!attributes.empty()) {
+		docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE));
+		if (!attributes.empty())
 			output += ' ' + attributes;
-		}
 	}
 	output += ">";
 	return output;
@@ -133,12 +123,6 @@ docstring StartTag::writeEndTag() const
 }
 
 
-bool StartTag::operator==(FontTag const &rhs) const
-{
-	return rhs == *this;
-}
-
-
 docstring EndTag::writeEndTag() const
 {
 	return from_utf8("</") + tag_ + from_utf8(">");
@@ -147,13 +131,13 @@ docstring EndTag::writeEndTag() const
 
 docstring CompTag::writeTag() const
 {
-	docstring output = '<' + from_utf8(tag_);
+	docstring output = '<' + tag_;
 	if (!attr_.empty()) {
 		// Erase the beginning of the attributes if it contains space characters: this function deals with that
 		// automatically.
-		docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE);
+		docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
 		attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
-                                                          [](int c) {return !std::isspace(c);}));
+                                                          [](char_type c) {return !isSpace(c);}));
 		if (!attributes.empty()) {
 			output += ' ' + attributes;
 		}
@@ -162,29 +146,30 @@ docstring CompTag::writeTag() const
 	return output;
 }
 
+} // namespace xml
+
 
-bool FontTag::operator==(StartTag const & tag) const
+void XMLStream::writeError(std::string const &s)
 {
-	FontTag const * const ftag = tag.asFontTag();
-	if (!ftag)
-		return false;
-	return (font_type_ == ftag->font_type_);
+	LYXERR(Debug::OUTFILE, s);
+	*this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
+	*this << xml::CR();
 }
 
-} // namespace xml
-
 
-void XMLStream::writeError(std::string const &s) const
+void XMLStream::writeError(docstring const &s)
 {
-	LYXERR0(s);
-	os_ << from_utf8("<!-- Output Error: " + s + " -->\n");
+	LYXERR(Debug::OUTFILE, s);
+	*this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
+	*this << s;
+	*this << ESCAPE_NONE << from_utf8(" -->");
+	*this << xml::CR();
 }
 
 
-void XMLStream::writeError(docstring const &s) const
+XMLStream::TagPtr XMLStream::getLastStackTag()
 {
-	LYXERR0(s);
-	os_ << from_utf8("<!-- Output Error: ") << s << from_utf8(" -->\n");
+	return tag_stack_.back();
 }
 
 
@@ -205,9 +190,6 @@ bool XMLStream::closeFontTags()
 		if (**curtag != xml::parsep_tag)
 			os_ << (*curtag)->writeEndTag();
 		tag_stack_.pop_back();
-		// this shouldn't happen, since then the font tags
-		// weren't in any other tag.
-//		LASSERT(!tag_stack_.empty(), return true);
 		if (tag_stack_.empty())
 			return true;
 		curtag = &tag_stack_.back();
@@ -297,8 +279,19 @@ void XMLStream::clearTagDeque()
 
 XMLStream &XMLStream::operator<<(docstring const &d)
 {
+	is_last_tag_cr_ = false;
 	clearTagDeque();
-	os_ << xml::xmlize(d, escape_);
+	os_ << xml::escapeString(d, escape_);
+	escape_ = ESCAPE_ALL;
+	return *this;
+}
+
+
+XMLStream &XMLStream::operator<<(xml::NullTag const &)
+{
+	is_last_tag_cr_ = false;
+	clearTagDeque();
+	// Don't output anything to os_, by definition of a NullTag (as opposed to text output).
 	escape_ = ESCAPE_ALL;
 	return *this;
 }
@@ -306,9 +299,10 @@ XMLStream &XMLStream::operator<<(docstring const &d)
 
 XMLStream &XMLStream::operator<<(const char *s)
 {
+	is_last_tag_cr_ = false;
 	clearTagDeque();
 	docstring const d = from_ascii(s);
-	os_ << xml::xmlize(d, escape_);
+	os_ << xml::escapeString(d, escape_);
 	escape_ = ESCAPE_ALL;
 	return *this;
 }
@@ -316,6 +310,7 @@ XMLStream &XMLStream::operator<<(const char *s)
 
 XMLStream &XMLStream::operator<<(char_type c)
 {
+	is_last_tag_cr_ = false;
 	clearTagDeque();
 	os_ << xml::escapeChar(c, escape_);
 	escape_ = ESCAPE_ALL;
@@ -325,6 +320,7 @@ XMLStream &XMLStream::operator<<(char_type c)
 
 XMLStream &XMLStream::operator<<(char c)
 {
+	is_last_tag_cr_ = false;
 	clearTagDeque();
 	os_ << xml::escapeChar(c, escape_);
 	escape_ = ESCAPE_ALL;
@@ -334,6 +330,7 @@ XMLStream &XMLStream::operator<<(char c)
 
 XMLStream &XMLStream::operator<<(int i)
 {
+	is_last_tag_cr_ = false;
 	clearTagDeque();
 	os_ << i;
 	escape_ = ESCAPE_ALL;
@@ -343,6 +340,7 @@ XMLStream &XMLStream::operator<<(int i)
 
 XMLStream &XMLStream::operator<<(EscapeSettings e)
 {
+	// Don't update is_last_tag_cr_ here, as this does not output anything.
 	escape_ = e;
 	return *this;
 }
@@ -350,6 +348,7 @@ XMLStream &XMLStream::operator<<(EscapeSettings e)
 
 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
 {
+	is_last_tag_cr_ = false;
 	if (tag.tag_.empty())
 		return *this;
 	pending_tags_.push_back(makeTagPtr(tag));
@@ -361,6 +360,7 @@ XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
 
 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
 {
+	is_last_tag_cr_ = false;
 	if (tag.tag_.empty())
 		return *this;
 	pending_tags_.push_back(makeTagPtr(tag));
@@ -370,6 +370,7 @@ XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
 
 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
 {
+	is_last_tag_cr_ = false;
 	if (tag.tag_.empty())
 		return *this;
 	clearTagDeque();
@@ -380,6 +381,7 @@ XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
 
 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
 {
+	is_last_tag_cr_ = false;
 	if (tag.tag_.empty())
 		return *this;
 	pending_tags_.push_back(makeTagPtr(tag));
@@ -389,6 +391,7 @@ XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
 
 XMLStream &XMLStream::operator<<(xml::CR const &)
 {
+	is_last_tag_cr_ = true;
 	clearTagDeque();
 	os_ << from_ascii("\n");
 	return *this;
@@ -441,6 +444,8 @@ bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
 // best to make things work.
 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
 {
+	is_last_tag_cr_ = false;
+
 	if (etag.tag_.empty())
 		return *this;
 
@@ -467,7 +472,8 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
 						   + "' when other tags were pending. Last pending tag is `"
 						   + to_utf8(pending_tags_.back()->writeTag())
 						   + "'. Tag discarded.");
-				pending_tags_.erase(dit);
+				if (!pending_tags_.empty())
+					pending_tags_.erase(dit);
 				return *this;
 			}
 		}
@@ -483,7 +489,7 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
 		string estr = "Closing tag `" + to_utf8(etag.tag_)
 					  + "' when other tags are pending. Discarded pending tags:\n";
 		for (dit = pending_tags_.begin(); dit != den; ++dit)
-			estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
+			estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
 		writeError(estr);
 		// clear the pending tags...
 		pending_tags_.clear();
@@ -588,68 +594,124 @@ docstring xml::uniqueID(docstring const & label)
 }
 
 
+bool xml::isNotOnlySpace(docstring const & str)
+{
+	for (auto const & c: str) {
+		if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
+		return true;
+	}
+	return false;
+}
+
+
+docstring xml::trimLeft(docstring const & str)
+{
+	size_t i = 0;
+	for (auto const & c: str) {
+		if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
+			return str.substr(i, docstring::npos);
+		i++;
+	}
+	return str;
+}
+
+
 docstring xml::cleanID(docstring const & orig)
 {
-	// The standard xml:id only allows letters,
-	// digits, '-' and '.' in a name.
-	// This routine replaces illegal characters by '-' or '.'
-	// and adds a number for uniqueness if need be.
-	docstring const allowed = from_ascii(".-_");
+	// The standard xml:id only allows letters, digits, '-' and '.' in a name.
+	// This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
 
 	// Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
 	// are not mixed up in the document.
+	// This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
 	typedef map<docstring, docstring> MangledMap;
 	static QThreadStorage<MangledMap> tMangledNames;
 	static QThreadStorage<int> tMangleID;
 
-	MangledMap & mangledNames = tMangledNames.localData();
-
 	// If the name is already known, just return it.
-	MangledMap::const_iterator const known = mangledNames.find(orig);
+	MangledMap & mangledNames = tMangledNames.localData();
+	auto const known = mangledNames.find(orig);
 	if (known != mangledNames.end())
 		return known->second;
 
 	// Start creating the mangled name by iterating over the characters.
 	docstring content;
-	docstring::const_iterator it  = orig.begin();
-	docstring::const_iterator end = orig.end();
+	auto it = orig.cbegin();
+	auto end = orig.cend();
 
 	// Make sure it starts with a letter.
-	if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size())
+	if (!isAlphaASCII(*it))
 		content += "x";
 
-	// Do the mangling.
+	// Parse the ID character by character and change what needs to.
 	bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
 	for (; it != end; ++it) {
 		char_type c = *it;
-		if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.'
-		      || allowed.find(c) < allowed.size())
+		if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
 			content += c;
-		else if (c == '_' || c == ' ') {
-			mangle = true;
-			content += "-";
-		}
-		else if (c == ':' || c == ',' || c == ';' || c == '!') {
+		} else if (c == ':' || c == ',' || c == ';' || c == '!') {
 			mangle = true;
 			content += ".";
-		}
-		else {
+		} else { // Other invalid characters, such as ' '.
 			mangle = true;
 			content += "-";
 		}
 	}
 
-	if (mangle) {
+	// If there had to be a change, check if ID unicity is still guaranteed.
+	// This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
+	// as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
+	if (mangle && mangledNames.find(content) != mangledNames.end()) {
 		int & mangleID = tMangleID.localData();
-		content += "-" + convert<docstring>(mangleID++);
+		if (mangleID > 0)
+			content += "-" + convert<docstring>(mangleID);
+		mangleID += 1;
 	}
 
+	// Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
 	mangledNames[orig] = content;
-
 	return content;
 }
 
 
+bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs)
+{
+	xml::FontTag const * const lhs_ft = lhs.asFontTag();
+	xml::FontTag const * const rhs_ft = rhs.asFontTag();
+
+	if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
+		return false;
+	if (!lhs_ft && !rhs_ft)
+		return lhs.tag_ == rhs.tag_;
+	return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
+}
+
+
+bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs)
+{
+	xml::EndFontTag const * const lhs_ft = lhs.asFontTag();
+	xml::FontTag const * const rhs_ft = rhs.asFontTag();
+
+	if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
+		return false;
+	if (!lhs_ft && !rhs_ft)
+		return lhs.tag_ == rhs.tag_;
+	return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
+}
+
+
+bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs)
+{
+	return !(lhs == rhs);
+}
+
+
+bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs)
+{
+	return !(lhs == rhs);
+}
+
+
 void xml::openTag(odocstream & os, string const & name, string const & attribute)
 {
     // FIXME UNICODE
@@ -657,9 +719,9 @@ void xml::openTag(odocstream & os, string const & name, string const & attribute
     string param = subst(attribute, "<", "\"");
     param = subst(param, ">", "\"");
 
-    // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
+    // Note: we ignore the name if it is empty or if it is a comment "<!-- -->" or
     // if the name is *dummy*.
-    // We ignore dummy because dummy is not a valid docbook element and it is
+    // We ignore dummy because dummy is not a valid DocBook element and it is
     // the internal name given to single paragraphs in the latex output.
     // This allow us to simplify the code a lot and is a reasonable compromise.
     if (!name.empty() && name != "!-- --" && name != "dummy") {
@@ -724,4 +786,147 @@ void xml::closeTag(odocstream & os, Paragraph const & par)
 }
 
 
+void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+	xs << xml::StartTag(tag, attr);
+}
+
+
+void closeInlineTag(XMLStream & xs, const docstring & tag)
+{
+	xs << xml::EndTag(tag);
+}
+
+
+void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+	if (!xs.isLastTagCR())
+		xs << xml::CR();
+	xs << xml::StartTag(tag, attr);
+}
+
+
+void closeParTag(XMLStream & xs, const docstring & tag)
+{
+	xs << xml::EndTag(tag);
+	xs << xml::CR();
+}
+
+
+void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+	if (!xs.isLastTagCR())
+		xs << xml::CR();
+	xs << xml::StartTag(tag, attr);
+	xs << xml::CR();
+}
+
+
+void closeBlockTag(XMLStream & xs, const docstring & tag)
+{
+	if (!xs.isLastTagCR())
+		xs << xml::CR();
+	xs << xml::EndTag(tag);
+	xs << xml::CR();
+}
+
+
+void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
+{
+	if (tag.empty() || tag == from_ascii("NONE")) // Common check to be performed elsewhere, if it was not here.
+		return;
+
+	if (tag == from_ascii("para") || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
+		openParTag(xs, tag, attr);
+	else if (tagtype == "block")
+		openBlockTag(xs, tag, attr);
+	else if (tagtype == "inline")
+		openInlineTag(xs, tag, attr);
+	else if (tagtype == "none")
+		xs << xml::StartTag(tag, attr);
+	else
+		xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + (attr.empty() ? "" : " ") +
+				to_utf8(attr) + "'");
+}
+
+
+void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
+{
+	xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
+}
+
+
+void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
+{
+	xml::openTag(xs, tag, from_utf8(attr), tagtype);
+}
+
+
+void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
+{
+	xml::openTag(xs, from_utf8(tag), attr, tagtype);
+}
+
+
+void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype)
+{
+	if (tag.empty() || tag == "NONE" || tag == "IGNORE")
+		return;
+
+	if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
+		closeParTag(xs, tag);
+	else if (tagtype == "block")
+		closeBlockTag(xs, tag);
+	else if (tagtype == "inline")
+		closeInlineTag(xs, tag);
+	else if (tagtype == "none")
+		xs << xml::EndTag(tag);
+	else
+		xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
+}
+
+
+void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
+{
+	xml::closeTag(xs, from_utf8(tag), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
+{
+	if (tag.empty() || tag == from_ascii("NONE"))
+		return;
+
+	// Special case for <para>: always considered as a paragraph.
+	if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") {
+		if (!xs.isLastTagCR())
+			xs << xml::CR();
+		xs << xml::CompTag(tag, attr);
+		xs << xml::CR();
+	} else if (tagtype == "inline") {
+		xs << xml::CompTag(tag, attr);
+	} else {
+		xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
+	}
+}
+
+
+void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
+{
+	xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
+{
+	xml::compTag(xs, tag, from_utf8(attr), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
+{
+	xml::compTag(xs, from_utf8(tag), attr, tagtype);
+}
+
+
 } // namespace lyx