3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "Paragraph.h"
22 #include "TextClass.h"
24 #include "support/convert.h"
25 #include "support/debug.h"
26 #include "support/docstream.h"
27 #include "support/lassert.h"
28 #include "support/lstrings.h"
29 #include "support/textutils.h"
34 #include <QThreadStorage>
37 using namespace lyx::support;
43 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
46 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
47 case XMLStream::ESCAPE_NONE:
48 case XMLStream::ESCAPE_COMMENTS:
51 case XMLStream::ESCAPE_ALL:
55 } else if (c == '>') {
60 case XMLStream::ESCAPE_AND:
71 docstring escapeChar(char c, XMLStream::EscapeSettings e)
73 LATTEST(static_cast<unsigned char>(c) < 0x80);
74 return escapeChar(static_cast<char_type>(c), e);
78 docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
81 bin.reserve(raw.size() * 2); // crude approximation is sufficient
82 for (size_t i = 0; i != raw.size(); ++i) {
84 if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
87 bin += xml::escapeChar(c, e);
94 docstring cleanAttr(docstring const & str)
97 docstring::const_iterator it = str.begin();
98 docstring::const_iterator en = str.end();
99 for (; it != en; ++it) {
100 char_type const c = *it;
101 newname += isAlnumASCII(c) ? c : char_type('_');
107 docstring StartTag::writeTag() const
109 docstring output = '<' + tag_;
110 if (!attr_.empty()) {
111 docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE));
112 if (!attributes.empty())
113 output += ' ' + attributes;
120 docstring StartTag::writeEndTag() const
122 return from_utf8("</") + tag_ + from_utf8(">");
126 docstring EndTag::writeEndTag() const
128 return from_utf8("</") + tag_ + from_utf8(">");
132 docstring CompTag::writeTag() const
134 docstring output = '<' + tag_;
135 if (!attr_.empty()) {
136 // Erase the beginning of the attributes if it contains space characters: this function deals with that
138 docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
139 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
140 [](char_type c) {return !isSpace(c);}));
141 if (!attributes.empty()) {
142 output += ' ' + attributes;
152 void XMLStream::writeError(std::string const &s)
154 LYXERR(Debug::OUTFILE, s);
155 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
160 void XMLStream::writeError(docstring const &s)
162 LYXERR(Debug::OUTFILE, s);
163 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
165 *this << ESCAPE_NONE << from_utf8(" -->");
170 XMLStream::TagPtr XMLStream::getLastStackTag()
172 return tag_stack_.back();
176 bool XMLStream::closeFontTags()
178 if (isTagPending(xml::parsep_tag))
179 // we haven't had any content
182 // this may be a useless check, since we ought at least to have
183 // the parsep_tag. but it can't hurt too much to be careful.
184 if (tag_stack_.empty())
187 // first, we close any open font tags we can close
188 TagPtr *curtag = &tag_stack_.back();
189 while ((*curtag)->asFontTag()) {
190 if (**curtag != xml::parsep_tag)
191 os_ << (*curtag)->writeEndTag();
192 tag_stack_.pop_back();
193 if (tag_stack_.empty())
195 curtag = &tag_stack_.back();
198 if (**curtag == xml::parsep_tag)
201 // so we've hit a non-font tag.
202 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
203 "but you might want to check these tags:");
204 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
205 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
206 for (; it != en; ++it) {
207 if (**it == xml::parsep_tag)
209 writeError((*it)->tag_);
215 void XMLStream::startDivision(bool keep_empty)
217 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
223 void XMLStream::endDivision()
225 if (isTagPending(xml::parsep_tag)) {
226 // this case is normal. it just means we didn't have content,
227 // so the parsep_tag never got moved onto the tag stack.
228 while (!pending_tags_.empty()) {
229 // clear all pending tags up to and including the parsep tag.
230 // note that we work from the back, because we want to get rid
231 // of everything that hasn't been used.
232 TagPtr const cur_tag = pending_tags_.back();
233 pending_tags_.pop_back();
234 if (*cur_tag == xml::parsep_tag)
239 dumpTagStack("EndDivision");
245 if (!isTagOpen(xml::parsep_tag)) {
246 writeError("No division separation tag found in endDivision().");
250 // this case is also normal, if the parsep tag is the last one
251 // on the stack. otherwise, it's an error.
252 while (!tag_stack_.empty()) {
253 TagPtr const cur_tag = tag_stack_.back();
254 tag_stack_.pop_back();
255 if (*cur_tag == xml::parsep_tag)
257 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
258 os_ << cur_tag->writeEndTag();
262 dumpTagStack("EndDivision");
267 void XMLStream::clearTagDeque()
269 while (!pending_tags_.empty()) {
270 TagPtr const & tag = pending_tags_.front();
271 if (*tag != xml::parsep_tag)
273 os_ << tag->writeTag();
274 tag_stack_.push_back(tag);
275 pending_tags_.pop_front();
280 XMLStream &XMLStream::operator<<(docstring const &d)
282 is_last_tag_cr_ = false;
284 os_ << xml::escapeString(d, escape_);
285 escape_ = ESCAPE_ALL;
290 XMLStream &XMLStream::operator<<(xml::NullTag const &)
292 is_last_tag_cr_ = false;
294 // Don't output anything to os_, by definition of a NullTag (as opposed to text output).
295 escape_ = ESCAPE_ALL;
300 XMLStream &XMLStream::operator<<(const char *s)
302 is_last_tag_cr_ = false;
304 docstring const d = from_ascii(s);
305 os_ << xml::escapeString(d, escape_);
306 escape_ = ESCAPE_ALL;
311 XMLStream &XMLStream::operator<<(char_type c)
313 is_last_tag_cr_ = false;
315 os_ << xml::escapeChar(c, escape_);
316 escape_ = ESCAPE_ALL;
321 XMLStream &XMLStream::operator<<(char c)
323 is_last_tag_cr_ = false;
325 os_ << xml::escapeChar(c, escape_);
326 escape_ = ESCAPE_ALL;
331 XMLStream &XMLStream::operator<<(int i)
333 is_last_tag_cr_ = false;
336 escape_ = ESCAPE_ALL;
341 XMLStream &XMLStream::operator<<(EscapeSettings e)
343 // Don't update is_last_tag_cr_ here, as this does not output anything.
349 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
351 is_last_tag_cr_ = false;
352 if (tag.tag_.empty())
354 pending_tags_.push_back(makeTagPtr(tag));
361 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
363 is_last_tag_cr_ = false;
364 if (tag.tag_.empty())
366 pending_tags_.push_back(makeTagPtr(tag));
371 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
373 is_last_tag_cr_ = false;
374 if (tag.tag_.empty())
377 os_ << tag.writeTag();
382 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
384 is_last_tag_cr_ = false;
385 if (tag.tag_.empty())
387 pending_tags_.push_back(makeTagPtr(tag));
392 XMLStream &XMLStream::operator<<(xml::CR const &)
394 is_last_tag_cr_ = true;
396 os_ << from_ascii("\n");
401 bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const
403 auto sit = tag_stack_.begin();
404 auto sen = tag_stack_.cend();
405 for (; sit != sen && maxdepth != 0; ++sit) {
414 bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const
416 auto sit = tag_stack_.begin();
417 auto sen = tag_stack_.cend();
418 for (; sit != sen && maxdepth != 0; ++sit) {
427 bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
429 auto sit = pending_tags_.begin();
430 auto sen = pending_tags_.cend();
431 for (; sit != sen && maxdepth != 0; ++sit) {
440 // this is complicated, because we want to make sure that
441 // everything is properly nested. the code ought to make
442 // sure of that, but we won't assert (yet) if we run into
443 // a problem. we'll just output error messages and try our
444 // best to make things work.
445 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
447 is_last_tag_cr_ = false;
449 if (etag.tag_.empty())
452 // if this tag is pending, we can simply discard it.
453 if (!pending_tags_.empty()) {
454 if (etag == *pending_tags_.back()) {
455 // we have <tag></tag>, so we discard it and remove it
456 // from the pending_tags_.
457 pending_tags_.pop_back();
461 // there is a pending tag that isn't the one we are trying
464 // is this tag itself pending?
465 // non-const iterators because we may call erase().
466 TagDeque::iterator dit = pending_tags_.begin();
467 TagDeque::iterator const den = pending_tags_.end();
468 for (; dit != den; ++dit) {
470 // it was pending, so we just erase it
471 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
472 + "' when other tags were pending. Last pending tag is `"
473 + to_utf8(pending_tags_.back()->writeTag())
474 + "'. Tag discarded.");
475 if (!pending_tags_.empty())
476 pending_tags_.erase(dit);
480 // so etag isn't itself pending. is it even open?
481 if (!isTagOpen(etag)) {
482 writeError("Tried to close `" + to_utf8(etag.tag_)
483 + "' when tag was not open. Tag discarded.");
486 // ok, so etag is open.
487 // our strategy will be as below: we will do what we need to
488 // do to close this tag.
489 string estr = "Closing tag `" + to_utf8(etag.tag_)
490 + "' when other tags are pending. Discarded pending tags:\n";
491 for (dit = pending_tags_.begin(); dit != den; ++dit)
492 estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
494 // clear the pending tags...
495 pending_tags_.clear();
496 // ...and then just fall through.
499 // make sure there are tags to be closed
500 if (tag_stack_.empty()) {
501 writeError("Tried to close `" + etag.tag_
502 + "' when no tags were open!");
506 // is the tag we are closing the last one we opened?
507 if (etag == *tag_stack_.back()) {
509 os_ << etag.writeEndTag();
510 // ...and forget about it
511 tag_stack_.pop_back();
515 // we are trying to close a tag other than the one last opened.
516 // let's first see if this particular tag is still open somehow.
517 if (!isTagOpen(etag)) {
518 writeError("Tried to close `" + etag.tag_
519 + "' when tag was not open. Tag discarded.");
523 // so the tag was opened, but other tags have been opened since
524 // and not yet closed.
525 // if it's a font tag, though...
526 if (etag.asFontTag()) {
527 // it won't be a problem if the other tags open since this one
528 // are also font tags.
529 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
530 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
531 for (; rit != ren; ++rit) {
534 if (!(*rit)->asFontTag()) {
535 // we'll just leave it and, presumably, have to close it later.
536 writeError("Unable to close font tag `" + etag.tag_
537 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
543 // <em>this is <strong>bold
544 // and are being asked to closed em. we want:
545 // <em>this is <strong>bold</strong></em><strong>
546 // first, we close the intervening tags...
547 TagPtr *curtag = &tag_stack_.back();
548 // ...remembering them in a stack.
550 while (etag != **curtag) {
551 os_ << (*curtag)->writeEndTag();
552 fontstack.push_back(*curtag);
553 tag_stack_.pop_back();
554 curtag = &tag_stack_.back();
556 os_ << etag.writeEndTag();
557 tag_stack_.pop_back();
559 // ...and restore the other tags.
560 rit = fontstack.rbegin();
561 ren = fontstack.rend();
562 for (; rit != ren; ++rit)
563 pending_tags_.push_back(*rit);
567 // it wasn't a font tag.
568 // so other tags were opened before this one and not properly closed.
569 // so we'll close them, too. that may cause other issues later, but it
570 // at least guarantees proper nesting.
571 writeError("Closing tag `" + etag.tag_
572 + "' when other tags are open, namely:");
573 TagPtr *curtag = &tag_stack_.back();
574 while (etag != **curtag) {
575 writeError((*curtag)->tag_);
576 if (**curtag != xml::parsep_tag)
577 os_ << (*curtag)->writeEndTag();
578 tag_stack_.pop_back();
579 curtag = &tag_stack_.back();
581 // curtag is now the one we actually want.
582 os_ << (*curtag)->writeEndTag();
583 tag_stack_.pop_back();
589 docstring xml::uniqueID(docstring const & label)
592 static atomic_uint seed(1000);
593 return label + convert<docstring>(++seed);
597 bool xml::isNotOnlySpace(docstring const & str)
599 for (auto const & c: str) {
600 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
607 docstring xml::trimLeft(docstring const & str)
610 for (auto const & c: str) {
611 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
612 return str.substr(i, docstring::npos);
619 docstring xml::cleanID(docstring const & orig)
621 // The standard xml:id only allows letters, digits, '-' and '.' in a name.
622 // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
624 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
625 // are not mixed up in the document.
626 // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
627 typedef map<docstring, docstring> MangledMap;
628 static QThreadStorage<MangledMap> tMangledNames;
629 static QThreadStorage<int> tMangleID;
631 // If the name is already known, just return it.
632 MangledMap & mangledNames = tMangledNames.localData();
633 auto const known = mangledNames.find(orig);
634 if (known != mangledNames.end())
635 return known->second;
637 // Start creating the mangled name by iterating over the characters.
639 auto it = orig.cbegin();
640 auto end = orig.cend();
642 // Make sure it starts with a letter.
643 if (!isAlphaASCII(*it))
646 // Parse the ID character by character and change what needs to.
647 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
648 for (; it != end; ++it) {
650 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
652 } else if (c == ':' || c == ',' || c == ';' || c == '!') {
655 } else { // Other invalid characters, such as ' '.
661 // If there had to be a change, check if ID unicity is still guaranteed.
662 // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
663 // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
664 if (mangle && mangledNames.find(content) != mangledNames.end()) {
665 int & mangleID = tMangleID.localData();
667 content += "-" + convert<docstring>(mangleID);
671 // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
672 mangledNames[orig] = content;
677 bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs)
679 xml::FontTag const * const lhs_ft = lhs.asFontTag();
680 xml::FontTag const * const rhs_ft = rhs.asFontTag();
682 if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
684 if (!lhs_ft && !rhs_ft)
685 return lhs.tag_ == rhs.tag_;
686 return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
690 bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs)
692 xml::EndFontTag const * const lhs_ft = lhs.asFontTag();
693 xml::FontTag const * const rhs_ft = rhs.asFontTag();
695 if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
697 if (!lhs_ft && !rhs_ft)
698 return lhs.tag_ == rhs.tag_;
699 return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
703 bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs)
705 return !(lhs == rhs);
709 bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs)
711 return !(lhs == rhs);
715 void xml::openTag(odocstream & os, string const & name, string const & attribute)
718 // This should be fixed in layout files later.
719 string param = subst(attribute, "<", "\"");
720 param = subst(param, ">", "\"");
722 // Note: we ignore the name if it is empty or if it is a comment "<!-- -->" or
723 // if the name is *dummy*.
724 // We ignore dummy because dummy is not a valid DocBook element and it is
725 // the internal name given to single paragraphs in the latex output.
726 // This allow us to simplify the code a lot and is a reasonable compromise.
727 if (!name.empty() && name != "!-- --" && name != "dummy") {
728 os << '<' << from_ascii(name);
730 os << ' ' << from_ascii(param);
736 void xml::closeTag(odocstream & os, string const & name)
738 if (!name.empty() && name != "!-- --" && name != "dummy")
739 os << "</" << from_ascii(name) << '>';
743 void xml::openTag(Buffer const & buf, odocstream & os,
744 OutputParams const & runparams, Paragraph const & par)
746 Layout const & style = par.layout();
747 string const & name = style.latexname();
748 string param = style.latexparam();
749 Counters & counters = buf.params().documentClass().counters();
751 string id = par.getID(buf, runparams);
755 if (param.find('#') != string::npos) {
756 string::size_type pos = param.find("id=<");
757 string::size_type end = param.find(">");
758 if( pos != string::npos && end != string::npos)
759 param.erase(pos, end-pos + 1);
761 attribute = id + ' ' + param;
763 if (param.find('#') != string::npos) {
765 if (!style.counter.empty())
766 // This uses InternalUpdate at the moment becuase xml output
767 // does not do anything with tracked counters, and it would need
768 // to track layouts if it did want to use them.
769 counters.step(style.counter, InternalUpdate);
771 counters.step(from_ascii(name), InternalUpdate);
772 int i = counters.value(from_ascii(name));
773 attribute = subst(param, "#", convert<string>(i));
778 openTag(os, name, attribute);
782 void xml::closeTag(odocstream & os, Paragraph const & par)
784 Layout const & style = par.layout();
785 closeTag(os, style.latexname());
789 void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr)
791 xs << xml::StartTag(tag, attr);
795 void closeInlineTag(XMLStream & xs, const docstring & tag)
797 xs << xml::EndTag(tag);
801 void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr)
803 if (!xs.isLastTagCR())
805 xs << xml::StartTag(tag, attr);
809 void closeParTag(XMLStream & xs, const docstring & tag)
811 xs << xml::EndTag(tag);
816 void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr)
818 if (!xs.isLastTagCR())
820 xs << xml::StartTag(tag, attr);
825 void closeBlockTag(XMLStream & xs, const docstring & tag)
827 if (!xs.isLastTagCR())
829 xs << xml::EndTag(tag);
834 void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
836 if (tag.empty() || tag == from_ascii("NONE")) // Common check to be performed elsewhere, if it was not here.
839 if (tag == from_ascii("para") || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
840 openParTag(xs, tag, attr);
841 else if (tagtype == "block")
842 openBlockTag(xs, tag, attr);
843 else if (tagtype == "inline")
844 openInlineTag(xs, tag, attr);
845 else if (tagtype == "none")
846 xs << xml::StartTag(tag, attr);
848 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + (attr.empty() ? "" : " ") +
849 to_utf8(attr) + "'");
853 void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
855 xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
859 void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
861 xml::openTag(xs, tag, from_utf8(attr), tagtype);
865 void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
867 xml::openTag(xs, from_utf8(tag), attr, tagtype);
871 void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype)
873 if (tag.empty() || tag == "NONE" || tag == "IGNORE")
876 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
877 closeParTag(xs, tag);
878 else if (tagtype == "block")
879 closeBlockTag(xs, tag);
880 else if (tagtype == "inline")
881 closeInlineTag(xs, tag);
882 else if (tagtype == "none")
883 xs << xml::EndTag(tag);
885 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
889 void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
891 xml::closeTag(xs, from_utf8(tag), tagtype);
895 void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
897 if (tag.empty() || tag == from_ascii("NONE"))
900 // Special case for <para>: always considered as a paragraph.
901 if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") {
902 if (!xs.isLastTagCR())
904 xs << xml::CompTag(tag, attr);
906 } else if (tagtype == "inline") {
907 xs << xml::CompTag(tag, attr);
909 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
914 void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
916 xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
920 void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
922 xml::compTag(xs, tag, from_utf8(attr), tagtype);
926 void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
928 xml::compTag(xs, from_utf8(tag), attr, tagtype);