inputenc only expects one option

[lyx.git] / src / xml.cpp
diff --git a/src/xml.cpp b/src/xml.cpp

index 3864a3f4009426bc3be163d423e5d35bcfb0182e..7ee8d9137a754749b08134bdc39dcfb29431113c 100644 (file)
--- a/src/xml.cpp
+++ b/src/xml.cpp
@@ -17,12 +17,12 @@
  #include "BufferParams.h"
  #include "Counters.h"
  #include "Layout.h"
-#include "OutputParams.h"
  #include "Paragraph.h"
  #include "Text.h"
  #include "TextClass.h"
  
  #include "support/convert.h"
+#include "support/debug.h"
  #include "support/docstream.h"
  #include "support/lassert.h"
  #include "support/lstrings.h"
@@ -44,40 +44,30 @@ docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
  {
         docstring str;
         switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
-               case XMLStream::ESCAPE_NONE:
-                       str += c;
+       case XMLStream::ESCAPE_NONE:
+       case XMLStream::ESCAPE_COMMENTS:
+               str += c;
+               break;
+       case XMLStream::ESCAPE_ALL:
+               if (c == '<') {
+                       str += "&lt;";
                         break;
-               case XMLStream::ESCAPE_ALL:
-                       if (c == '<') {
-                               str += "&lt;";
-                               break;
-                       } else if (c == '>') {
-                               str += "&gt;";
-                               break;
-                       }
-                       // fall through
-               case XMLStream::ESCAPE_AND:
-                       if (c == '&')
-                               str += "&amp;";
-                       else
-                               str     +=c ;
+               } else if (c == '>') {
+                       str += "&gt;";
                         break;
+               }
+               // fall through
+       case XMLStream::ESCAPE_AND:
+               if (c == '&')
+                       str += "&amp;";
+               else
+                       str     +=c ;
+               break;
         }
         return str;
  }
  
  
-// escape what needs escaping
-docstring xmlize(docstring const &str, XMLStream::EscapeSettings e) {
-       odocstringstream d;
-       docstring::const_iterator it = str.begin();
-       docstring::const_iterator en = str.end();
-       for (; it != en; ++it)
-               d << escapeChar(*it, e);
-       return d.str();
-}
-
-
  docstring escapeChar(char c, XMLStream::EscapeSettings e)
  {
         LATTEST(static_cast<unsigned char>(c) < 0x80);
@@ -85,6 +75,22 @@ docstring escapeChar(char c, XMLStream::EscapeSettings e)
  }
  
  
+docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
+{
+       docstring bin;
+       bin.reserve(raw.size() * 2); // crude approximation is sufficient
+       for (size_t i = 0; i != raw.size(); ++i) {
+               char_type c = raw[i];
+               if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
+                       bin += "&#45;";
+               else
+                       bin += xml::escapeChar(c, e);
+       }
+
+       return bin;
+}
+
+
  docstring cleanAttr(docstring const & str)
  {
         docstring newname;
@@ -102,12 +108,9 @@ docstring StartTag::writeTag() const
  {
         docstring output = '<' + tag_;
         if (!attr_.empty()) {
-               docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE);
-               attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
-                                                          [](int c) {return !std::isspace(c);}));
-               if (!attributes.empty()) {
+               docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE));
+               if (!attributes.empty())
                         output += ' ' + attributes;
-               }
         }
         output += ">";
         return output;
@@ -134,13 +137,13 @@ docstring EndTag::writeEndTag() const
  
  docstring CompTag::writeTag() const
  {
-       docstring output = '<' + from_utf8(tag_);
+       docstring output = '<' + tag_;
         if (!attr_.empty()) {
                 // Erase the beginning of the attributes if it contains space characters: this function deals with that
                 // automatically.
-               docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE);
+               docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
                 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
-                                                          [](int c) {return !std::isspace(c);}));
+                                                          [](char_type c) {return !isSpace(c);}));
                 if (!attributes.empty()) {
                         output += ' ' + attributes;
                 }
@@ -161,17 +164,27 @@ bool FontTag::operator==(StartTag const & tag) const
  } // namespace xml
  
  
-void XMLStream::writeError(std::string const &s) const
+void XMLStream::writeError(std::string const &s)
  {
-       LYXERR0(s);
-       os_ << from_utf8("<!-- Output Error: " + s + " -->\n");
+       LYXERR(Debug::OUTFILE, s);
+       *this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
+       *this << xml::CR();
  }
  
  
-void XMLStream::writeError(docstring const &s) const
+void XMLStream::writeError(docstring const &s)
  {
-       LYXERR0(s);
-       os_ << from_utf8("<!-- Output Error: ") << s << from_utf8(" -->\n");
+       LYXERR(Debug::OUTFILE, s);
+       *this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
+       *this << s;
+       *this << ESCAPE_NONE << from_utf8(" -->");
+       *this << xml::CR();
+}
+
+
+XMLStream::TagPtr XMLStream::getLastStackTag()
+{
+       return tag_stack_.back();
  }
  
  
@@ -192,9 +205,6 @@ bool XMLStream::closeFontTags()
                 if (**curtag != xml::parsep_tag)
                         os_ << (*curtag)->writeEndTag();
                 tag_stack_.pop_back();
-               // this shouldn't happen, since then the font tags
-               // weren't in any other tag.
-//             LASSERT(!tag_stack_.empty(), return true);
                 if (tag_stack_.empty())
                         return true;
                 curtag = &tag_stack_.back();
@@ -284,8 +294,9 @@ void XMLStream::clearTagDeque()
  
  XMLStream &XMLStream::operator<<(docstring const &d)
  {
+       is_last_tag_cr_ = false;
         clearTagDeque();
-       os_ << xml::xmlize(d, escape_);
+       os_ << xml::escapeString(d, escape_);
         escape_ = ESCAPE_ALL;
         return *this;
  }
@@ -293,9 +304,10 @@ XMLStream &XMLStream::operator<<(docstring const &d)
  
  XMLStream &XMLStream::operator<<(const char *s)
  {
+       is_last_tag_cr_ = false;
         clearTagDeque();
         docstring const d = from_ascii(s);
-       os_ << xml::xmlize(d, escape_);
+       os_ << xml::escapeString(d, escape_);
         escape_ = ESCAPE_ALL;
         return *this;
  }
@@ -303,6 +315,7 @@ XMLStream &XMLStream::operator<<(const char *s)
  
  XMLStream &XMLStream::operator<<(char_type c)
  {
+       is_last_tag_cr_ = false;
         clearTagDeque();
         os_ << xml::escapeChar(c, escape_);
         escape_ = ESCAPE_ALL;
@@ -312,6 +325,7 @@ XMLStream &XMLStream::operator<<(char_type c)
  
  XMLStream &XMLStream::operator<<(char c)
  {
+       is_last_tag_cr_ = false;
         clearTagDeque();
         os_ << xml::escapeChar(c, escape_);
         escape_ = ESCAPE_ALL;
@@ -321,6 +335,7 @@ XMLStream &XMLStream::operator<<(char c)
  
  XMLStream &XMLStream::operator<<(int i)
  {
+       is_last_tag_cr_ = false;
         clearTagDeque();
         os_ << i;
         escape_ = ESCAPE_ALL;
@@ -330,6 +345,7 @@ XMLStream &XMLStream::operator<<(int i)
  
  XMLStream &XMLStream::operator<<(EscapeSettings e)
  {
+       // Don't update is_last_tag_cr_ here, as this does not output anything.
         escape_ = e;
         return *this;
  }
@@ -337,6 +353,7 @@ XMLStream &XMLStream::operator<<(EscapeSettings e)
  
  XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
  {
+       is_last_tag_cr_ = false;
         if (tag.tag_.empty())
                 return *this;
         pending_tags_.push_back(makeTagPtr(tag));
@@ -348,6 +365,7 @@ XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
  
  XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
  {
+       is_last_tag_cr_ = false;
         if (tag.tag_.empty())
                 return *this;
         pending_tags_.push_back(makeTagPtr(tag));
@@ -357,6 +375,7 @@ XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
  
  XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
  {
+       is_last_tag_cr_ = false;
         if (tag.tag_.empty())
                 return *this;
         clearTagDeque();
@@ -367,6 +386,7 @@ XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
  
  XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
  {
+       is_last_tag_cr_ = false;
         if (tag.tag_.empty())
                 return *this;
         pending_tags_.push_back(makeTagPtr(tag));
@@ -376,6 +396,7 @@ XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
  
  XMLStream &XMLStream::operator<<(xml::CR const &)
  {
+       is_last_tag_cr_ = true;
         clearTagDeque();
         os_ << from_ascii("\n");
         return *this;
@@ -428,6 +449,8 @@ bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
  // best to make things work.
  XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
  {
+       is_last_tag_cr_ = false;
+
         if (etag.tag_.empty())
                 return *this;
  
@@ -454,7 +477,8 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
                                                    + "' when other tags were pending. Last pending tag is `"
                                                    + to_utf8(pending_tags_.back()->writeTag())
                                                    + "'. Tag discarded.");
-                               pending_tags_.erase(dit);
+                               if (!pending_tags_.empty())
+                                       pending_tags_.erase(dit);
                                 return *this;
                         }
                 }
@@ -470,7 +494,7 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
                 string estr = "Closing tag `" + to_utf8(etag.tag_)
                                           + "' when other tags are pending. Discarded pending tags:\n";
                 for (dit = pending_tags_.begin(); dit != den; ++dit)
-                       estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
+                       estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
                 writeError(estr);
                 // clear the pending tags...
                 pending_tags_.clear();
@@ -567,83 +591,90 @@ XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
  }
  
  
-docstring xml::escapeString(docstring const & raw, XMLStream::EscapeSettings e)
+docstring xml::uniqueID(docstring const & label)
  {
-       docstring bin;
-       bin.reserve(raw.size() * 2); // crude approximation is sufficient
-       for (size_t i = 0; i != raw.size(); ++i)
-               bin += xml::escapeChar(raw[i], e);
+       // thread-safe
+       static atomic_uint seed(1000);
+       return label + convert<docstring>(++seed);
+}
  
-       return bin;
+
+bool xml::isNotOnlySpace(docstring const & str)
+{
+       for (auto const & c: str) {
+               if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
+               return true;
+       }
+       return false;
  }
  
  
-docstring const xml::uniqueID(docstring const & label)
+docstring xml::trimLeft(docstring const & str)
  {
-       // thread-safe
-       static atomic_uint seed(1000);
-       return label + convert<docstring>(++seed);
+       size_t i = 0;
+       for (auto const & c: str) {
+               if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
+                       return str.substr(i, docstring::npos);
+               i++;
+       }
+       return str;
  }
  
  
  docstring xml::cleanID(docstring const & orig)
  {
-       // The standard xml:id only allows letters,
-       // digits, '-' and '.' in a name.
-       // This routine replaces illegal characters by '-' or '.'
-       // and adds a number for uniqueness if need be.
-       docstring const allowed = from_ascii(".-_");
+       // The standard xml:id only allows letters, digits, '-' and '.' in a name.
+       // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
  
         // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
         // are not mixed up in the document.
+       // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
         typedef map<docstring, docstring> MangledMap;
         static QThreadStorage<MangledMap> tMangledNames;
         static QThreadStorage<int> tMangleID;
  
-       MangledMap & mangledNames = tMangledNames.localData();
-
         // If the name is already known, just return it.
-       MangledMap::const_iterator const known = mangledNames.find(orig);
+       MangledMap & mangledNames = tMangledNames.localData();
+       auto const known = mangledNames.find(orig);
         if (known != mangledNames.end())
                 return known->second;
  
         // Start creating the mangled name by iterating over the characters.
         docstring content;
-       docstring::const_iterator it  = orig.begin();
-       docstring::const_iterator end = orig.end();
+       auto it = orig.cbegin();
+       auto end = orig.cend();
  
         // Make sure it starts with a letter.
-       if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size())
+       if (!isAlphaASCII(*it))
                 content += "x";
  
-       // Do the mangling.
+       // Parse the ID character by character and change what needs to.
         bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
         for (; it != end; ++it) {
                 char_type c = *it;
-               if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.'
-                     || allowed.find(c) < allowed.size())
+               if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
                         content += c;
-               else if (c == '_' || c == ' ') {
-                       mangle = true;
-                       content += "-";
-               }
-               else if (c == ':' || c == ',' || c == ';' || c == '!') {
+               } else if (c == ':' || c == ',' || c == ';' || c == '!') {
                         mangle = true;
                         content += ".";
-               }
-               else {
+               } else { // Other invalid characters, such as ' '.
                         mangle = true;
                         content += "-";
                 }
         }
  
-       if (mangle) {
+       // If there had to be a change, check if ID unicity is still guaranteed.
+       // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
+       // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
+       if (mangle && mangledNames.find(content) != mangledNames.end()) {
                 int & mangleID = tMangleID.localData();
-               content += "-" + convert<docstring>(mangleID++);
+               if (mangleID > 0)
+                       content += "-" + convert<docstring>(mangleID);
+               mangleID += 1;
         }
  
+       // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
         mangledNames[orig] = content;
-
         return content;
  }
  
@@ -655,9 +686,9 @@ void xml::openTag(odocstream & os, string const & name, string const & attribute
      string param = subst(attribute, "<", "\"");
      param = subst(param, ">", "\"");
  
-    // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
+    // Note: we ignore the name if it is empty or if it is a comment "<!-- -->" or
      // if the name is *dummy*.
-    // We ignore dummy because dummy is not a valid docbook element and it is
+    // We ignore dummy because dummy is not a valid DocBook element and it is
      // the internal name given to single paragraphs in the latex output.
      // This allow us to simplify the code a lot and is a reasonable compromise.
      if (!name.empty() && name != "!-- --" && name != "dummy") {
@@ -722,4 +753,146 @@ void xml::closeTag(odocstream & os, Paragraph const & par)
  }
  
  
+void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+       xs << xml::StartTag(tag, attr);
+}
+
+
+void closeInlineTag(XMLStream & xs, const docstring & tag)
+{
+       xs << xml::EndTag(tag);
+}
+
+
+void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+       if (!xs.isLastTagCR())
+               xs << xml::CR();
+       xs << xml::StartTag(tag, attr);
+}
+
+
+void closeParTag(XMLStream & xs, const docstring & tag)
+{
+       xs << xml::EndTag(tag);
+       xs << xml::CR();
+}
+
+
+void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr)
+{
+       if (!xs.isLastTagCR())
+               xs << xml::CR();
+       xs << xml::StartTag(tag, attr);
+       xs << xml::CR();
+}
+
+
+void closeBlockTag(XMLStream & xs, const docstring & tag)
+{
+       if (!xs.isLastTagCR())
+               xs << xml::CR();
+       xs << xml::EndTag(tag);
+       xs << xml::CR();
+}
+
+
+void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
+{
+       if (tag.empty() || tag == "NONE") // Common check to be performed elsewhere, if it was not here.
+               return;
+
+       if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
+               openParTag(xs, tag, attr);
+       else if (tagtype == "block")
+               openBlockTag(xs, tag, attr);
+       else if (tagtype == "inline")
+               openInlineTag(xs, tag, attr);
+       else if (tagtype == "none")
+               xs << xml::StartTag(tag, attr);
+       else
+               xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + " " + to_utf8(attr) + "'");
+}
+
+
+void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
+{
+       xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
+}
+
+
+void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
+{
+       xml::openTag(xs, tag, from_utf8(attr), tagtype);
+}
+
+
+void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
+{
+       xml::openTag(xs, from_utf8(tag), attr, tagtype);
+}
+
+
+void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype)
+{
+       if (tag.empty() || tag == "NONE" || tag == "IGNORE")
+               return;
+
+       if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
+               closeParTag(xs, tag);
+       else if (tagtype == "block")
+               closeBlockTag(xs, tag);
+       else if (tagtype == "inline")
+               closeInlineTag(xs, tag);
+       else if (tagtype == "none")
+               xs << xml::EndTag(tag);
+       else
+               xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
+}
+
+
+void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
+{
+       xml::closeTag(xs, from_utf8(tag), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
+{
+       if (tag.empty() || tag == from_ascii("NONE"))
+               return;
+
+       // Special case for <para>: always considered as a paragraph.
+       if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") {
+               if (!xs.isLastTagCR())
+                       xs << xml::CR();
+               xs << xml::CompTag(tag, attr);
+               xs << xml::CR();
+       } else if (tagtype == "inline") {
+               xs << xml::CompTag(tag, attr);
+       } else {
+               xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
+       }
+}
+
+
+void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
+{
+       xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
+{
+       xml::compTag(xs, tag, from_utf8(attr), tagtype);
+}
+
+
+void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
+{
+       xml::compTag(xs, from_utf8(tag), attr, tagtype);
+}
+
+
  } // namespace lyx