X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fxml.cpp;h=5b81024336481a9c612e639bd221e435d8f29717;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=16173d0949398d573002f0b84a47ce02fc8544fb;hpb=8dd2e7e6816a714b2c19eb7466dcb80fa582a0e4;p=lyx.git diff --git a/src/xml.cpp b/src/xml.cpp index 16173d0949..5b81024336 100644 --- a/src/xml.cpp +++ b/src/xml.cpp @@ -17,12 +17,12 @@ #include "BufferParams.h" #include "Counters.h" #include "Layout.h" -#include "OutputParams.h" #include "Paragraph.h" #include "Text.h" #include "TextClass.h" #include "support/convert.h" +#include "support/debug.h" #include "support/docstream.h" #include "support/lassert.h" #include "support/lstrings.h" @@ -44,43 +44,50 @@ docstring escapeChar(char_type c, XMLStream::EscapeSettings e) { docstring str; switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter. - case XMLStream::ESCAPE_NONE: - str += c; + case XMLStream::ESCAPE_NONE: + case XMLStream::ESCAPE_COMMENTS: + str += c; + break; + case XMLStream::ESCAPE_ALL: + if (c == '<') { + str += "<"; break; - case XMLStream::ESCAPE_ALL: - if (c == '<') { - str += "<"; - break; - } else if (c == '>') { - str += ">"; - break; - } - // fall through - case XMLStream::ESCAPE_AND: - if (c == '&') - str += "&"; - else - str +=c ; + } else if (c == '>') { + str += ">"; break; + } + // fall through + case XMLStream::ESCAPE_AND: + if (c == '&') + str += "&"; + else + str +=c ; + break; } return str; } -// escape what needs escaping -docstring xmlize(docstring const &str, XMLStream::EscapeSettings e) { - odocstringstream d; - docstring::const_iterator it = str.begin(); - docstring::const_iterator en = str.end(); - for (; it != en; ++it) - d << escapeChar(*it, e); - return d.str(); +docstring escapeChar(char c, XMLStream::EscapeSettings e) +{ + LATTEST(static_cast(c) < 0x80); + return escapeChar(static_cast(c), e); } -docstring escapeChar(char c, XMLStream::EscapeSettings e) { - LATTEST(static_cast(c) < 0x80); - return escapeChar(static_cast(c), e); +docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e) +{ + docstring bin; + bin.reserve(raw.size() * 2); // crude approximation is sufficient + for (size_t i = 0; i != raw.size(); ++i) { + char_type c = raw[i]; + if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-') + bin += "-"; + else + bin += xml::escapeChar(c, e); + } + + return bin; } @@ -97,44 +104,40 @@ docstring cleanAttr(docstring const & str) } -docstring StartTag::writeTag() const { +docstring StartTag::writeTag() const +{ docstring output = '<' + tag_; if (!attr_.empty()) { - docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE); - attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(), - [](int c) {return !std::isspace(c);})); - if (!attributes.empty()) { + docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE)); + if (!attributes.empty()) output += ' ' + attributes; - } } output += ">"; return output; } -docstring StartTag::writeEndTag() const { +docstring StartTag::writeEndTag() const +{ return from_utf8(""); } -bool StartTag::operator==(FontTag const &rhs) const { - return rhs == *this; -} - - -docstring EndTag::writeEndTag() const { +docstring EndTag::writeEndTag() const +{ return from_utf8(""); } -docstring CompTag::writeTag() const { - docstring output = '<' + from_utf8(tag_); +docstring CompTag::writeTag() const +{ + docstring output = '<' + tag_; if (!attr_.empty()) { // Erase the beginning of the attributes if it contains space characters: this function deals with that // automatically. - docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE); + docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE); attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(), - [](int c) {return !std::isspace(c);})); + [](char_type c) {return !isSpace(c);})); if (!attributes.empty()) { output += ' ' + attributes; } @@ -143,31 +146,35 @@ docstring CompTag::writeTag() const { return output; } +} // namespace xml -bool FontTag::operator==(StartTag const & tag) const + +void XMLStream::writeError(std::string const &s) { - FontTag const * const ftag = tag.asFontTag(); - if (!ftag) - return false; - return (font_type_ == ftag->font_type_); + LYXERR(Debug::OUTFILE, s); + *this << ESCAPE_NONE << from_utf8(""); + *this << xml::CR(); } -} // namespace xml - -void XMLStream::writeError(std::string const &s) const { - LYXERR0(s); - os_ << from_utf8("\n"); +void XMLStream::writeError(docstring const &s) +{ + LYXERR(Debug::OUTFILE, s); + *this << ESCAPE_NONE << from_utf8(""); + *this << xml::CR(); } -void XMLStream::writeError(docstring const &s) const { - LYXERR0(s); - os_ << from_utf8("\n"); +XMLStream::TagPtr XMLStream::getLastStackTag() +{ + return tag_stack_.back(); } -bool XMLStream::closeFontTags() { +bool XMLStream::closeFontTags() +{ if (isTagPending(xml::parsep_tag)) // we haven't had any content return true; @@ -183,9 +190,6 @@ bool XMLStream::closeFontTags() { if (**curtag != xml::parsep_tag) os_ << (*curtag)->writeEndTag(); tag_stack_.pop_back(); - // this shouldn't happen, since then the font tags - // weren't in any other tag. -// LASSERT(!tag_stack_.empty(), return true); if (tag_stack_.empty()) return true; curtag = &tag_stack_.back(); @@ -208,14 +212,16 @@ bool XMLStream::closeFontTags() { } -void XMLStream::startDivision(bool keep_empty) { +void XMLStream::startDivision(bool keep_empty) +{ pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag))); if (keep_empty) clearTagDeque(); } -void XMLStream::endDivision() { +void XMLStream::endDivision() +{ if (isTagPending(xml::parsep_tag)) { // this case is normal. it just means we didn't have content, // so the parsep_tag never got moved onto the tag stack. @@ -258,7 +264,8 @@ void XMLStream::endDivision() { } -void XMLStream::clearTagDeque() { +void XMLStream::clearTagDeque() +{ while (!pending_tags_.empty()) { TagPtr const & tag = pending_tags_.front(); if (*tag != xml::parsep_tag) @@ -270,24 +277,40 @@ void XMLStream::clearTagDeque() { } -XMLStream &XMLStream::operator<<(docstring const &d) { +XMLStream &XMLStream::operator<<(docstring const &d) +{ + is_last_tag_cr_ = false; clearTagDeque(); - os_ << xml::xmlize(d, escape_); + os_ << xml::escapeString(d, escape_); escape_ = ESCAPE_ALL; return *this; } -XMLStream &XMLStream::operator<<(const char *s) { +XMLStream &XMLStream::operator<<(xml::NullTag const &) +{ + is_last_tag_cr_ = false; + clearTagDeque(); + // Don't output anything to os_, by definition of a NullTag (as opposed to text output). + escape_ = ESCAPE_ALL; + return *this; +} + + +XMLStream &XMLStream::operator<<(const char *s) +{ + is_last_tag_cr_ = false; clearTagDeque(); docstring const d = from_ascii(s); - os_ << xml::xmlize(d, escape_); + os_ << xml::escapeString(d, escape_); escape_ = ESCAPE_ALL; return *this; } -XMLStream &XMLStream::operator<<(char_type c) { +XMLStream &XMLStream::operator<<(char_type c) +{ + is_last_tag_cr_ = false; clearTagDeque(); os_ << xml::escapeChar(c, escape_); escape_ = ESCAPE_ALL; @@ -295,7 +318,9 @@ XMLStream &XMLStream::operator<<(char_type c) { } -XMLStream &XMLStream::operator<<(char c) { +XMLStream &XMLStream::operator<<(char c) +{ + is_last_tag_cr_ = false; clearTagDeque(); os_ << xml::escapeChar(c, escape_); escape_ = ESCAPE_ALL; @@ -303,7 +328,9 @@ XMLStream &XMLStream::operator<<(char c) { } -XMLStream &XMLStream::operator<<(int i) { +XMLStream &XMLStream::operator<<(int i) +{ + is_last_tag_cr_ = false; clearTagDeque(); os_ << i; escape_ = ESCAPE_ALL; @@ -311,13 +338,17 @@ XMLStream &XMLStream::operator<<(int i) { } -XMLStream &XMLStream::operator<<(EscapeSettings e) { +XMLStream &XMLStream::operator<<(EscapeSettings e) +{ + // Don't update is_last_tag_cr_ here, as this does not output anything. escape_ = e; return *this; } -XMLStream &XMLStream::operator<<(xml::StartTag const &tag) { +XMLStream &XMLStream::operator<<(xml::StartTag const &tag) +{ + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -327,7 +358,9 @@ XMLStream &XMLStream::operator<<(xml::StartTag const &tag) { } -XMLStream &XMLStream::operator<<(xml::ParTag const &tag) { +XMLStream &XMLStream::operator<<(xml::ParTag const &tag) +{ + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -335,7 +368,9 @@ XMLStream &XMLStream::operator<<(xml::ParTag const &tag) { } -XMLStream &XMLStream::operator<<(xml::CompTag const &tag) { +XMLStream &XMLStream::operator<<(xml::CompTag const &tag) +{ + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; clearTagDeque(); @@ -344,7 +379,9 @@ XMLStream &XMLStream::operator<<(xml::CompTag const &tag) { } -XMLStream &XMLStream::operator<<(xml::FontTag const &tag) { +XMLStream &XMLStream::operator<<(xml::FontTag const &tag) +{ + is_last_tag_cr_ = false; if (tag.tag_.empty()) return *this; pending_tags_.push_back(makeTagPtr(tag)); @@ -352,14 +389,17 @@ XMLStream &XMLStream::operator<<(xml::FontTag const &tag) { } -XMLStream &XMLStream::operator<<(xml::CR const &) { +XMLStream &XMLStream::operator<<(xml::CR const &) +{ + is_last_tag_cr_ = true; clearTagDeque(); os_ << from_ascii("\n"); return *this; } -bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const { +bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const +{ auto sit = tag_stack_.begin(); auto sen = tag_stack_.cend(); for (; sit != sen && maxdepth != 0; ++sit) { @@ -371,7 +411,8 @@ bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const { } -bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const { +bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const +{ auto sit = tag_stack_.begin(); auto sen = tag_stack_.cend(); for (; sit != sen && maxdepth != 0; ++sit) { @@ -383,7 +424,8 @@ bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const { } -bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const { +bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const +{ auto sit = pending_tags_.begin(); auto sen = pending_tags_.cend(); for (; sit != sen && maxdepth != 0; ++sit) { @@ -400,7 +442,10 @@ bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const { // sure of that, but we won't assert (yet) if we run into // a problem. we'll just output error messages and try our // best to make things work. -XMLStream &XMLStream::operator<<(xml::EndTag const &etag) { +XMLStream &XMLStream::operator<<(xml::EndTag const &etag) +{ + is_last_tag_cr_ = false; + if (etag.tag_.empty()) return *this; @@ -427,7 +472,8 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag) { + "' when other tags were pending. Last pending tag is `" + to_utf8(pending_tags_.back()->writeTag()) + "'. Tag discarded."); - pending_tags_.erase(dit); + if (!pending_tags_.empty()) + pending_tags_.erase(dit); return *this; } } @@ -443,7 +489,7 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag) { string estr = "Closing tag `" + to_utf8(etag.tag_) + "' when other tags are pending. Discarded pending tags:\n"; for (dit = pending_tags_.begin(); dit != den; ++dit) - estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n"; + estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n"; writeError(estr); // clear the pending tags... pending_tags_.clear(); @@ -540,87 +586,132 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag) { } -docstring xml::escapeString(docstring const & raw, XMLStream::EscapeSettings e) +docstring xml::uniqueID(docstring const & label) { - docstring bin; - bin.reserve(raw.size() * 2); // crude approximation is sufficient - for (size_t i = 0; i != raw.size(); ++i) - bin += xml::escapeChar(raw[i], e); + // thread-safe + static atomic_uint seed(1000); + return label + convert(++seed); +} - return bin; + +bool xml::isNotOnlySpace(docstring const & str) +{ + for (auto const & c: str) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r') + return true; + } + return false; } -docstring const xml::uniqueID(docstring const & label) +docstring xml::trimLeft(docstring const & str) { - // thread-safe - static atomic_uint seed(1000); - return label + convert(++seed); + size_t i = 0; + for (auto const & c: str) { + if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r') + return str.substr(i, docstring::npos); + i++; + } + return str; } docstring xml::cleanID(docstring const & orig) { - // The standard xml:id only allows letters, - // digits, '-' and '.' in a name. - // This routine replaces illegal characters by '-' or '.' - // and adds a number for uniqueness if need be. - docstring const allowed = from_ascii(".-_"); + // The standard xml:id only allows letters, digits, '-' and '.' in a name. + // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be. // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs // are not mixed up in the document. + // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick. typedef map MangledMap; static QThreadStorage tMangledNames; static QThreadStorage tMangleID; - MangledMap & mangledNames = tMangledNames.localData(); - // If the name is already known, just return it. - MangledMap::const_iterator const known = mangledNames.find(orig); + MangledMap & mangledNames = tMangledNames.localData(); + auto const known = mangledNames.find(orig); if (known != mangledNames.end()) return known->second; // Start creating the mangled name by iterating over the characters. docstring content; - docstring::const_iterator it = orig.begin(); - docstring::const_iterator end = orig.end(); + auto it = orig.cbegin(); + auto end = orig.cend(); // Make sure it starts with a letter. - if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size()) + if (!isAlphaASCII(*it)) content += "x"; - // Do the mangling. + // Parse the ID character by character and change what needs to. bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique. for (; it != end; ++it) { char_type c = *it; - if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' - || allowed.find(c) < allowed.size()) + if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') { content += c; - else if (c == '_' || c == ' ') { - mangle = true; - content += "-"; - } - else if (c == ':' || c == ',' || c == ';' || c == '!') { + } else if (c == ':' || c == ',' || c == ';' || c == '!') { mangle = true; content += "."; - } - else { + } else { // Other invalid characters, such as ' '. mangle = true; content += "-"; } } - if (mangle) { + // If there had to be a change, check if ID unicity is still guaranteed. + // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b", + // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1". + if (mangle && mangledNames.find(content) != mangledNames.end()) { int & mangleID = tMangleID.localData(); - content += "-" + convert(mangleID++); + if (mangleID > 0) + content += "-" + convert(mangleID); + mangleID += 1; } + // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document. mangledNames[orig] = content; - return content; } +bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs) +{ + xml::FontTag const * const lhs_ft = lhs.asFontTag(); + xml::FontTag const * const rhs_ft = rhs.asFontTag(); + + if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft)) + return false; + if (!lhs_ft && !rhs_ft) + return lhs.tag_ == rhs.tag_; + return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_; +} + + +bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs) +{ + xml::EndFontTag const * const lhs_ft = lhs.asFontTag(); + xml::FontTag const * const rhs_ft = rhs.asFontTag(); + + if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft)) + return false; + if (!lhs_ft && !rhs_ft) + return lhs.tag_ == rhs.tag_; + return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_; +} + + +bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs) +{ + return !(lhs == rhs); +} + + +bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs) +{ + return !(lhs == rhs); +} + + void xml::openTag(odocstream & os, string const & name, string const & attribute) { // FIXME UNICODE @@ -628,9 +719,9 @@ void xml::openTag(odocstream & os, string const & name, string const & attribute string param = subst(attribute, "<", "\""); param = subst(param, ">", "\""); - // Note: we ignore the name if it empty or if it is a comment "" or + // Note: we ignore the name if it is empty or if it is a comment "" or // if the name is *dummy*. - // We ignore dummy because dummy is not a valid docbook element and it is + // We ignore dummy because dummy is not a valid DocBook element and it is // the internal name given to single paragraphs in the latex output. // This allow us to simplify the code a lot and is a reasonable compromise. if (!name.empty() && name != "!-- --" && name != "dummy") { @@ -695,4 +786,147 @@ void xml::closeTag(odocstream & os, Paragraph const & par) } +void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + xs << xml::StartTag(tag, attr); +} + + +void closeInlineTag(XMLStream & xs, const docstring & tag) +{ + xs << xml::EndTag(tag); +} + + +void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::StartTag(tag, attr); +} + + +void closeParTag(XMLStream & xs, const docstring & tag) +{ + xs << xml::EndTag(tag); + xs << xml::CR(); +} + + +void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::StartTag(tag, attr); + xs << xml::CR(); +} + + +void closeBlockTag(XMLStream & xs, const docstring & tag) +{ + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::EndTag(tag); + xs << xml::CR(); +} + + +void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype) +{ + if (tag.empty() || tag == from_ascii("NONE")) // Common check to be performed elsewhere, if it was not here. + return; + + if (tag == from_ascii("para") || tagtype == "paragraph") // Special case for : always considered as a paragraph. + openParTag(xs, tag, attr); + else if (tagtype == "block") + openBlockTag(xs, tag, attr); + else if (tagtype == "inline") + openInlineTag(xs, tag, attr); + else if (tagtype == "none") + xs << xml::StartTag(tag, attr); + else + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + (attr.empty() ? "" : " ") + + to_utf8(attr) + "'"); +} + + +void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype) +{ + xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype); +} + + +void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype) +{ + xml::openTag(xs, tag, from_utf8(attr), tagtype); +} + + +void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype) +{ + xml::openTag(xs, from_utf8(tag), attr, tagtype); +} + + +void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype) +{ + if (tag.empty() || tag == "NONE" || tag == "IGNORE") + return; + + if (tag == "para" || tagtype == "paragraph") // Special case for : always considered as a paragraph. + closeParTag(xs, tag); + else if (tagtype == "block") + closeBlockTag(xs, tag); + else if (tagtype == "inline") + closeInlineTag(xs, tag); + else if (tagtype == "none") + xs << xml::EndTag(tag); + else + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'"); +} + + +void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype) +{ + xml::closeTag(xs, from_utf8(tag), tagtype); +} + + +void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype) +{ + if (tag.empty() || tag == from_ascii("NONE")) + return; + + // Special case for : always considered as a paragraph. + if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") { + if (!xs.isLastTagCR()) + xs << xml::CR(); + xs << xml::CompTag(tag, attr); + xs << xml::CR(); + } else if (tagtype == "inline") { + xs << xml::CompTag(tag, attr); + } else { + xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'"); + } +} + + +void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype) +{ + xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype); +} + + +void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype) +{ + xml::compTag(xs, tag, from_utf8(attr), tagtype); +} + + +void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype) +{ + xml::compTag(xs, from_utf8(tag), attr, tagtype); +} + + } // namespace lyx