X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fxml.cpp;h=5b81024336481a9c612e639bd221e435d8f29717;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=06a900751c0d38180a20cd22ae7bc77c1650a744;hpb=0be32e3b98faf4d06604288a8dbd500201232131;p=lyx.git diff --git a/src/xml.cpp b/src/xml.cpp index 06a900751c..5b81024336 100644 --- a/src/xml.cpp +++ b/src/xml.cpp @@ -17,12 +17,12 @@ #include "BufferParams.h" #include "Counters.h" #include "Layout.h" -#include "OutputParams.h" #include "Paragraph.h" #include "Text.h" #include "TextClass.h" #include "support/convert.h" +#include "support/debug.h" #include "support/docstream.h" #include "support/lassert.h" #include "support/lstrings.h" @@ -91,13 +91,6 @@ docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e) } -// escape what needs escaping -docstring xmlize(docstring const &str, XMLStream::EscapeSettings e) -{ - return xml::escapeString(str, e); -} - - docstring cleanAttr(docstring const & str) { docstring newname; @@ -115,12 +108,9 @@ docstring StartTag::writeTag() const { docstring output = '<' + tag_; if (!attr_.empty()) { - docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE); - attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(), - [](int c) {return !std::isspace(c);})); - if (!attributes.empty()) { + docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE)); + if (!attributes.empty()) output += ' ' + attributes; - } } output += ">"; return output; @@ -133,12 +123,6 @@ docstring StartTag::writeEndTag() const } -bool StartTag::operator==(FontTag const &rhs) const -{ - return rhs == *this; -} - - docstring EndTag::writeEndTag() const { return from_utf8(""); @@ -147,13 +131,13 @@ docstring EndTag::writeEndTag() const docstring CompTag::writeTag() const { - docstring output = '<' + from_utf8(tag_); + docstring output = '<' + tag_; if (!attr_.empty()) { // Erase the beginning of the attributes if it contains space characters: this function deals with that // automatically. - docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE); + docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE); attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(), - [](int c) {return !std::isspace(c);})); + [](char_type c) {return !isSpace(c);})); if (!attributes.empty()) { output += ' ' + attributes; } @@ -162,29 +146,30 @@ docstring CompTag::writeTag() const return output; } +} // namespace xml + -bool FontTag::operator==(StartTag const & tag) const +void XMLStream::writeError(std::string const &s) { - FontTag const * const ftag = tag.asFontTag(); - if (!ftag) - return false; - return (font_type_ == ftag->font_type_); + LYXERR(Debug::OUTFILE, s); + *this << ESCAPE_NONE << from_utf8(""); + *this << xml::CR(); } -} // namespace xml - -void XMLStream::writeError(std::string const &s) const +void XMLStream::writeError(docstring const &s) { - LYXERR0(s); - os_ << from_utf8("\n"); + LYXERR(Debug::OUTFILE, s); + *this << ESCAPE_NONE << from_utf8(""); + *this << xml::CR(); } -void XMLStream::writeError(docstring const &s) const +XMLStream::TagPtr XMLStream::getLastStackTag() { - LYXERR0(s); - os_ << from_utf8("\n"); + return tag_stack_.back(); } @@ -205,9 +190,6 @@ bool XMLStream::closeFontTags() if (**curtag != xml::parsep_tag) os_ << (*curtag)->writeEndTag(); tag_stack_.pop_back(); - // this shouldn't happen, since then the font tags - // weren't in any other tag. -// LASSERT(!tag_stack_.empty(), return true); if (tag_stack_.empty()) return true; curtag = &tag_stack_.back(); @@ -297,8 +279,19 @@ void XMLStream::clearTagDeque() XMLStream &XMLStream::operator<<(docstring const &d) { + is_last_tag_cr_ = false; clearTagDeque(); - os_ << xml::xmlize(d, escape_); + os_ << xml::escapeString(d, escape_); + escape_ = ESCAPE_ALL; + return *this; +} + + +XMLStream &XMLStream::operator<<(xml::NullTag const &) +{ + is_last_tag_cr_ = false; + clearTagDeque(); + // Don't output anything to os_, by definition of a NullTag (as opposed to text output). escape_ = ESCAPE_ALL; return *this; } @@ -306,9 +299,10 @@ XMLStream &XMLStream::operator<<(docstring const &d) XMLStream &XMLStream::operator<<(const char *s) { + is_last_tag_cr_ = false; clearTagDeque(); docstring const d = from_ascii(s); - os_ << xml::xmlize(d, escape_); + os_ << xml::escapeString(d, escape_); escape_ = ESCAPE_ALL; return *this; } @@ -316,6 +310,7 @@ XMLStream &XMLStream::operator<<(const char *s) XMLStream &XMLStream::operator<<(char_type c) { + is_last_tag_cr_ = false; clearTagDeque(); os_ << xml::escapeChar(c, escape_); escape_ = ESCAPE_ALL; @@ -325,6 +320,7 @@ XMLStream &XMLStream::operator<<(char_type c) XMLStream &XMLStream::operator<<(char c) { + is_last_tag_cr_ = false; clearTagDeque(); os_ << xml::escapeChar(c, escape_); escape_ = ESCAPE_ALL; @@ -334,6 +330,7 @@ XMLStream &XMLStream::operator<<(char c) XMLStream &XMLStream::operator<<(int i) { + is_last_tag_cr_ = false; clearTagDeque(); os_ << i; escape_ = ESCAPE_ALL; @@ -343,6 +340,7 @@ XMLStream &XMLStream::operator<<(int i) XMLStream &XMLStream::operator<<(EscapeSettings e) { + // Don't update is_last_tag_cr_ here, as this does not output anything. escape_ = e; return *this; } @@ -350,6 +348,7 @@ XMLStream &XMLStream::operator<<(EscapeSettings e) XMLStream &XMLStream::operator<<(xml::StartTag const &tag) { + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -361,6 +360,7 @@ XMLStream &XMLStream::operator<<(xml::StartTag const &tag) XMLStream &XMLStream::operator<<(xml::ParTag const &tag) { + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -370,6 +370,7 @@ XMLStream &XMLStream::operator<<(xml::ParTag const &tag) XMLStream &XMLStream::operator<<(xml::CompTag const &tag) { + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; clearTagDeque(); @@ -380,6 +381,7 @@ XMLStream &XMLStream::operator<<(xml::CompTag const &tag) XMLStream &XMLStream::operator<<(xml::FontTag const &tag) { + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -389,6 +391,7 @@ XMLStream &XMLStream::operator<<(xml::FontTag const &tag) XMLStream &XMLStream::operator<<(xml::CR const &) { + is_last_tag_cr_ = true; clearTagDeque(); os_ << from_ascii("\n"); return *this; @@ -441,6 +444,8 @@ bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const // best to make things work. XMLStream &XMLStream::operator<<(xml::EndTag const &etag) { + is_last_tag_cr_ = false; + if (etag.tag_.empty()) return *this; @@ -467,7 +472,8 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag) + "' when other tags were pending. Last pending tag is `" + to_utf8(pending_tags_.back()->writeTag()) + "'. Tag discarded."); - pending_tags_.erase(dit); + if (!pending_tags_.empty()) + pending_tags_.erase(dit); return *this; } } @@ -483,7 +489,7 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag) string estr = "Closing tag `" + to_utf8(etag.tag_) + "' when other tags are pending. Discarded pending tags:\n"; for (dit = pending_tags_.begin(); dit != den; ++dit) - estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n"; + estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n"; writeError(estr); // clear the pending tags... pending_tags_.clear(); @@ -588,68 +594,124 @@ docstring xml::uniqueID(docstring const & label) } +bool xml::isNotOnlySpace(docstring const & str) +{ + for (auto const & c: str) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r') + return true; + } + return false; +} + + +docstring xml::trimLeft(docstring const & str) +{ + size_t i = 0; + for (auto const & c: str) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r') + return str.substr(i, docstring::npos); + i++; + } + return str; +} + + docstring xml::cleanID(docstring const & orig) { - // The standard xml:id only allows letters, - // digits, '-' and '.' in a name. - // This routine replaces illegal characters by '-' or '.' - // and adds a number for uniqueness if need be. - docstring const allowed = from_ascii(".-_"); + // The standard xml:id only allows letters, digits, '-' and '.' in a name. + // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be. // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs // are not mixed up in the document. + // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick. typedef map MangledMap; static QThreadStorage tMangledNames; static QThreadStorage tMangleID; - MangledMap & mangledNames = tMangledNames.localData(); - // If the name is already known, just return it. - MangledMap::const_iterator const known = mangledNames.find(orig); + MangledMap & mangledNames = tMangledNames.localData(); + auto const known = mangledNames.find(orig); if (known != mangledNames.end()) return known->second; // Start creating the mangled name by iterating over the characters. docstring content; - docstring::const_iterator it = orig.begin(); - docstring::const_iterator end = orig.end(); + auto it = orig.cbegin(); + auto end = orig.cend(); // Make sure it starts with a letter. - if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size()) + if (!isAlphaASCII(*it)) content += "x"; - // Do the mangling. + // Parse the ID character by character and change what needs to. bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique. for (; it != end; ++it) { char_type c = *it; - if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' - || allowed.find(c) < allowed.size()) + if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') { content += c; - else if (c == '_' || c == ' ') { - mangle = true; - content += "-"; - } - else if (c == ':' || c == ',' || c == ';' || c == '!') { + } else if (c == ':' || c == ',' || c == ';' || c == '!') { mangle = true; content += "."; - } - else { + } else { // Other invalid characters, such as ' '. mangle = true; content += "-"; } } - if (mangle) { + // If there had to be a change, check if ID unicity is still guaranteed. + // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b", + // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1". + if (mangle && mangledNames.find(content) != mangledNames.end()) { int & mangleID = tMangleID.localData(); - content += "-" + convert(mangleID++); + if (mangleID > 0) + content += "-" + convert(mangleID); + mangleID += 1; } + // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document. mangledNames[orig] = content; - return content; } +bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs) +{ + xml::FontTag const * const lhs_ft = lhs.asFontTag(); + xml::FontTag const * const rhs_ft = rhs.asFontTag(); + + if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft)) + return false; + if (!lhs_ft && !rhs_ft) + return lhs.tag_ == rhs.tag_; + return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_; +} + + +bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs) +{ + xml::EndFontTag const * const lhs_ft = lhs.asFontTag(); + xml::FontTag const * const rhs_ft = rhs.asFontTag(); + + if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft)) + return false; + if (!lhs_ft && !rhs_ft) + return lhs.tag_ == rhs.tag_; + return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_; +} + + +bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs) +{ + return !(lhs == rhs); +} + + +bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs) +{ + return !(lhs == rhs); +} + + void xml::openTag(odocstream & os, string const & name, string const & attribute) { // FIXME UNICODE @@ -657,9 +719,9 @@ void xml::openTag(odocstream & os, string const & name, string const & attribute string param = subst(attribute, "<", "\""); param = subst(param, ">", "\""); - // Note: we ignore the name if it empty or if it is a comment "" or + // Note: we ignore the name if it is empty or if it is a comment "" or // if the name is *dummy*. - // We ignore dummy because dummy is not a valid docbook element and it is + // We ignore dummy because dummy is not a valid DocBook element and it is // the internal name given to single paragraphs in the latex output. // This allow us to simplify the code a lot and is a reasonable compromise. if (!name.empty() && name != "!-- --" && name != "dummy") { @@ -724,4 +786,147 @@ void xml::closeTag(odocstream & os, Paragraph const & par) } +void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + xs << xml::StartTag(tag, attr); +} + + +void closeInlineTag(XMLStream & xs, const docstring & tag) +{ + xs << xml::EndTag(tag); +} + + +void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::StartTag(tag, attr); +} + + +void closeParTag(XMLStream & xs, const docstring & tag) +{ + xs << xml::EndTag(tag); + xs << xml::CR(); +} + + +void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::StartTag(tag, attr); + xs << xml::CR(); +} + + +void closeBlockTag(XMLStream & xs, const docstring & tag) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::EndTag(tag); + xs << xml::CR(); +} + + +void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype) +{ + if (tag.empty() || tag == from_ascii("NONE")) // Common check to be performed elsewhere, if it was not here. + return; + + if (tag == from_ascii("para") || tagtype == "paragraph") // Special case for : always considered as a paragraph. + openParTag(xs, tag, attr); + else if (tagtype == "block") + openBlockTag(xs, tag, attr); + else if (tagtype == "inline") + openInlineTag(xs, tag, attr); + else if (tagtype == "none") + xs << xml::StartTag(tag, attr); + else + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + (attr.empty() ? "" : " ") + + to_utf8(attr) + "'"); +} + + +void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype) +{ + xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype); +} + + +void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype) +{ + xml::openTag(xs, tag, from_utf8(attr), tagtype); +} + + +void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype) +{ + xml::openTag(xs, from_utf8(tag), attr, tagtype); +} + + +void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype) +{ + if (tag.empty() || tag == "NONE" || tag == "IGNORE") + return; + + if (tag == "para" || tagtype == "paragraph") // Special case for : always considered as a paragraph. + closeParTag(xs, tag); + else if (tagtype == "block") + closeBlockTag(xs, tag); + else if (tagtype == "inline") + closeInlineTag(xs, tag); + else if (tagtype == "none") + xs << xml::EndTag(tag); + else + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'"); +} + + +void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype) +{ + xml::closeTag(xs, from_utf8(tag), tagtype); +} + + +void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype) +{ + if (tag.empty() || tag == from_ascii("NONE")) + return; + + // Special case for : always considered as a paragraph. + if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") { + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::CompTag(tag, attr); + xs << xml::CR(); + } else if (tagtype == "inline") { + xs << xml::CompTag(tag, attr); + } else { + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'"); + } +} + + +void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype) +{ + xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype); +} + + +void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype) +{ + xml::compTag(xs, tag, from_utf8(attr), tagtype); +} + + +void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype) +{ + xml::compTag(xs, from_utf8(tag), attr, tagtype); +} + + } // namespace lyx