3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "Paragraph.h"
22 #include "TextClass.h"
24 #include "support/convert.h"
25 #include "support/debug.h"
26 #include "support/docstream.h"
27 #include "support/lassert.h"
28 #include "support/lstrings.h"
29 #include "support/textutils.h"
34 #include <QThreadStorage>
37 using namespace lyx::support;
43 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
46 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
47 case XMLStream::ESCAPE_NONE:
48 case XMLStream::ESCAPE_COMMENTS:
51 case XMLStream::ESCAPE_ALL:
55 } else if (c == '>') {
60 case XMLStream::ESCAPE_AND:
71 docstring escapeChar(char c, XMLStream::EscapeSettings e)
73 LATTEST(static_cast<unsigned char>(c) < 0x80);
74 return escapeChar(static_cast<char_type>(c), e);
78 docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
81 bin.reserve(raw.size() * 2); // crude approximation is sufficient
82 for (size_t i = 0; i != raw.size(); ++i) {
84 if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
87 bin += xml::escapeChar(c, e);
94 docstring cleanAttr(docstring const & str)
97 docstring::const_iterator it = str.begin();
98 docstring::const_iterator en = str.end();
99 for (; it != en; ++it) {
100 char_type const c = *it;
101 newname += isAlnumASCII(c) ? c : char_type('_');
107 docstring StartTag::writeTag() const
109 docstring output = '<' + tag_;
110 if (!attr_.empty()) {
111 docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE));
112 if (!attributes.empty())
113 output += ' ' + attributes;
120 docstring StartTag::writeEndTag() const
122 return from_utf8("</") + tag_ + from_utf8(">");
126 docstring EndTag::writeEndTag() const
128 return from_utf8("</") + tag_ + from_utf8(">");
132 docstring CompTag::writeTag() const
134 docstring output = '<' + tag_;
135 if (!attr_.empty()) {
136 // Erase the beginning of the attributes if it contains space characters: this function deals with that
138 docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
139 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
140 [](char_type c) {return !isSpace(c);}));
141 if (!attributes.empty()) {
142 output += ' ' + attributes;
152 void XMLStream::writeError(std::string const &s)
154 LYXERR(Debug::OUTFILE, s);
155 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
160 void XMLStream::writeError(docstring const &s)
162 LYXERR(Debug::OUTFILE, s);
163 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
165 *this << ESCAPE_NONE << from_utf8(" -->");
170 XMLStream::TagPtr XMLStream::getLastStackTag()
172 return tag_stack_.back();
176 bool XMLStream::closeFontTags()
178 if (isTagPending(xml::parsep_tag))
179 // we haven't had any content
182 // this may be a useless check, since we ought at least to have
183 // the parsep_tag. but it can't hurt too much to be careful.
184 if (tag_stack_.empty())
187 // first, we close any open font tags we can close
188 TagPtr *curtag = &tag_stack_.back();
189 while ((*curtag)->asFontTag()) {
190 if (**curtag != xml::parsep_tag)
191 os_ << (*curtag)->writeEndTag();
192 tag_stack_.pop_back();
193 if (tag_stack_.empty())
195 curtag = &tag_stack_.back();
198 if (**curtag == xml::parsep_tag)
201 // so we've hit a non-font tag.
202 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
203 "but you might want to check these tags:");
204 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
205 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
206 for (; it != en; ++it) {
207 if (**it == xml::parsep_tag)
209 writeError((*it)->tag_);
215 void XMLStream::startDivision(bool keep_empty)
217 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
223 void XMLStream::endDivision()
225 if (isTagPending(xml::parsep_tag)) {
226 // this case is normal. it just means we didn't have content,
227 // so the parsep_tag never got moved onto the tag stack.
228 while (!pending_tags_.empty()) {
229 // clear all pending tags up to and including the parsep tag.
230 // note that we work from the back, because we want to get rid
231 // of everything that hasn't been used.
232 TagPtr const cur_tag = pending_tags_.back();
233 pending_tags_.pop_back();
234 if (*cur_tag == xml::parsep_tag)
239 dumpTagStack("EndDivision");
245 if (!isTagOpen(xml::parsep_tag)) {
246 writeError("No division separation tag found in endDivision().");
250 // this case is also normal, if the parsep tag is the last one
251 // on the stack. otherwise, it's an error.
252 while (!tag_stack_.empty()) {
253 TagPtr const cur_tag = tag_stack_.back();
254 tag_stack_.pop_back();
255 if (*cur_tag == xml::parsep_tag)
257 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
258 os_ << cur_tag->writeEndTag();
262 dumpTagStack("EndDivision");
267 void XMLStream::clearTagDeque()
269 while (!pending_tags_.empty()) {
270 TagPtr const & tag = pending_tags_.front();
271 if (*tag != xml::parsep_tag)
273 os_ << tag->writeTag();
274 tag_stack_.push_back(tag);
275 pending_tags_.pop_front();
280 XMLStream &XMLStream::operator<<(docstring const &d)
282 is_last_tag_cr_ = false;
284 os_ << xml::escapeString(d, escape_);
285 escape_ = ESCAPE_ALL;
290 XMLStream &XMLStream::operator<<(const char *s)
292 is_last_tag_cr_ = false;
294 docstring const d = from_ascii(s);
295 os_ << xml::escapeString(d, escape_);
296 escape_ = ESCAPE_ALL;
301 XMLStream &XMLStream::operator<<(char_type c)
303 is_last_tag_cr_ = false;
305 os_ << xml::escapeChar(c, escape_);
306 escape_ = ESCAPE_ALL;
311 XMLStream &XMLStream::operator<<(char c)
313 is_last_tag_cr_ = false;
315 os_ << xml::escapeChar(c, escape_);
316 escape_ = ESCAPE_ALL;
321 XMLStream &XMLStream::operator<<(int i)
323 is_last_tag_cr_ = false;
326 escape_ = ESCAPE_ALL;
331 XMLStream &XMLStream::operator<<(EscapeSettings e)
333 // Don't update is_last_tag_cr_ here, as this does not output anything.
339 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
341 is_last_tag_cr_ = false;
342 if (tag.tag_.empty())
344 pending_tags_.push_back(makeTagPtr(tag));
351 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
353 is_last_tag_cr_ = false;
354 if (tag.tag_.empty())
356 pending_tags_.push_back(makeTagPtr(tag));
361 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
363 is_last_tag_cr_ = false;
364 if (tag.tag_.empty())
367 os_ << tag.writeTag();
372 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
374 is_last_tag_cr_ = false;
375 if (tag.tag_.empty())
377 pending_tags_.push_back(makeTagPtr(tag));
382 XMLStream &XMLStream::operator<<(xml::CR const &)
384 is_last_tag_cr_ = true;
386 os_ << from_ascii("\n");
391 bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const
393 auto sit = tag_stack_.begin();
394 auto sen = tag_stack_.cend();
395 for (; sit != sen && maxdepth != 0; ++sit) {
404 bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const
406 auto sit = tag_stack_.begin();
407 auto sen = tag_stack_.cend();
408 for (; sit != sen && maxdepth != 0; ++sit) {
417 bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
419 auto sit = pending_tags_.begin();
420 auto sen = pending_tags_.cend();
421 for (; sit != sen && maxdepth != 0; ++sit) {
430 // this is complicated, because we want to make sure that
431 // everything is properly nested. the code ought to make
432 // sure of that, but we won't assert (yet) if we run into
433 // a problem. we'll just output error messages and try our
434 // best to make things work.
435 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
437 is_last_tag_cr_ = false;
439 if (etag.tag_.empty())
442 // if this tag is pending, we can simply discard it.
443 if (!pending_tags_.empty()) {
444 if (etag == *pending_tags_.back()) {
445 // we have <tag></tag>, so we discard it and remove it
446 // from the pending_tags_.
447 pending_tags_.pop_back();
451 // there is a pending tag that isn't the one we are trying
454 // is this tag itself pending?
455 // non-const iterators because we may call erase().
456 TagDeque::iterator dit = pending_tags_.begin();
457 TagDeque::iterator const den = pending_tags_.end();
458 for (; dit != den; ++dit) {
460 // it was pending, so we just erase it
461 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
462 + "' when other tags were pending. Last pending tag is `"
463 + to_utf8(pending_tags_.back()->writeTag())
464 + "'. Tag discarded.");
465 if (!pending_tags_.empty())
466 pending_tags_.erase(dit);
470 // so etag isn't itself pending. is it even open?
471 if (!isTagOpen(etag)) {
472 writeError("Tried to close `" + to_utf8(etag.tag_)
473 + "' when tag was not open. Tag discarded.");
476 // ok, so etag is open.
477 // our strategy will be as below: we will do what we need to
478 // do to close this tag.
479 string estr = "Closing tag `" + to_utf8(etag.tag_)
480 + "' when other tags are pending. Discarded pending tags:\n";
481 for (dit = pending_tags_.begin(); dit != den; ++dit)
482 estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
484 // clear the pending tags...
485 pending_tags_.clear();
486 // ...and then just fall through.
489 // make sure there are tags to be closed
490 if (tag_stack_.empty()) {
491 writeError("Tried to close `" + etag.tag_
492 + "' when no tags were open!");
496 // is the tag we are closing the last one we opened?
497 if (etag == *tag_stack_.back()) {
499 os_ << etag.writeEndTag();
500 // ...and forget about it
501 tag_stack_.pop_back();
505 // we are trying to close a tag other than the one last opened.
506 // let's first see if this particular tag is still open somehow.
507 if (!isTagOpen(etag)) {
508 writeError("Tried to close `" + etag.tag_
509 + "' when tag was not open. Tag discarded.");
513 // so the tag was opened, but other tags have been opened since
514 // and not yet closed.
515 // if it's a font tag, though...
516 if (etag.asFontTag()) {
517 // it won't be a problem if the other tags open since this one
518 // are also font tags.
519 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
520 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
521 for (; rit != ren; ++rit) {
524 if (!(*rit)->asFontTag()) {
525 // we'll just leave it and, presumably, have to close it later.
526 writeError("Unable to close font tag `" + etag.tag_
527 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
533 // <em>this is <strong>bold
534 // and are being asked to closed em. we want:
535 // <em>this is <strong>bold</strong></em><strong>
536 // first, we close the intervening tags...
537 TagPtr *curtag = &tag_stack_.back();
538 // ...remembering them in a stack.
540 while (etag != **curtag) {
541 os_ << (*curtag)->writeEndTag();
542 fontstack.push_back(*curtag);
543 tag_stack_.pop_back();
544 curtag = &tag_stack_.back();
546 os_ << etag.writeEndTag();
547 tag_stack_.pop_back();
549 // ...and restore the other tags.
550 rit = fontstack.rbegin();
551 ren = fontstack.rend();
552 for (; rit != ren; ++rit)
553 pending_tags_.push_back(*rit);
557 // it wasn't a font tag.
558 // so other tags were opened before this one and not properly closed.
559 // so we'll close them, too. that may cause other issues later, but it
560 // at least guarantees proper nesting.
561 writeError("Closing tag `" + etag.tag_
562 + "' when other tags are open, namely:");
563 TagPtr *curtag = &tag_stack_.back();
564 while (etag != **curtag) {
565 writeError((*curtag)->tag_);
566 if (**curtag != xml::parsep_tag)
567 os_ << (*curtag)->writeEndTag();
568 tag_stack_.pop_back();
569 curtag = &tag_stack_.back();
571 // curtag is now the one we actually want.
572 os_ << (*curtag)->writeEndTag();
573 tag_stack_.pop_back();
579 docstring xml::uniqueID(docstring const & label)
582 static atomic_uint seed(1000);
583 return label + convert<docstring>(++seed);
587 bool xml::isNotOnlySpace(docstring const & str)
589 for (auto const & c: str) {
590 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
597 docstring xml::trimLeft(docstring const & str)
600 for (auto const & c: str) {
601 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
602 return str.substr(i, docstring::npos);
609 docstring xml::cleanID(docstring const & orig)
611 // The standard xml:id only allows letters, digits, '-' and '.' in a name.
612 // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
614 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
615 // are not mixed up in the document.
616 // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
617 typedef map<docstring, docstring> MangledMap;
618 static QThreadStorage<MangledMap> tMangledNames;
619 static QThreadStorage<int> tMangleID;
621 // If the name is already known, just return it.
622 MangledMap & mangledNames = tMangledNames.localData();
623 auto const known = mangledNames.find(orig);
624 if (known != mangledNames.end())
625 return known->second;
627 // Start creating the mangled name by iterating over the characters.
629 auto it = orig.cbegin();
630 auto end = orig.cend();
632 // Make sure it starts with a letter.
633 if (!isAlphaASCII(*it))
636 // Parse the ID character by character and change what needs to.
637 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
638 for (; it != end; ++it) {
640 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
642 } else if (c == ':' || c == ',' || c == ';' || c == '!') {
645 } else { // Other invalid characters, such as ' '.
651 // If there had to be a change, check if ID unicity is still guaranteed.
652 // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
653 // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
654 if (mangle && mangledNames.find(content) != mangledNames.end()) {
655 int & mangleID = tMangleID.localData();
657 content += "-" + convert<docstring>(mangleID);
661 // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
662 mangledNames[orig] = content;
667 bool operator==(xml::StartTag const & lhs, xml::StartTag const & rhs)
669 xml::FontTag const * const lhs_ft = lhs.asFontTag();
670 xml::FontTag const * const rhs_ft = rhs.asFontTag();
672 if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
674 if (!lhs_ft && !rhs_ft)
675 return lhs.tag_ == rhs.tag_;
676 return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
680 bool operator==(xml::EndTag const & lhs, xml::StartTag const & rhs)
682 xml::EndFontTag const * const lhs_ft = lhs.asFontTag();
683 xml::FontTag const * const rhs_ft = rhs.asFontTag();
685 if ((!lhs_ft && rhs_ft) || (lhs_ft && !rhs_ft))
687 if (!lhs_ft && !rhs_ft)
688 return lhs.tag_ == rhs.tag_;
689 return lhs_ft->tag_ == rhs_ft->tag_ && lhs_ft->font_type_ == rhs_ft->font_type_;
693 bool operator!=(xml::EndTag const & lhs, xml::StartTag const & rhs)
695 return !(lhs == rhs);
699 bool operator!=(xml::StartTag const & lhs, xml::StartTag const & rhs)
701 return !(lhs == rhs);
705 void xml::openTag(odocstream & os, string const & name, string const & attribute)
708 // This should be fixed in layout files later.
709 string param = subst(attribute, "<", "\"");
710 param = subst(param, ">", "\"");
712 // Note: we ignore the name if it is empty or if it is a comment "<!-- -->" or
713 // if the name is *dummy*.
714 // We ignore dummy because dummy is not a valid DocBook element and it is
715 // the internal name given to single paragraphs in the latex output.
716 // This allow us to simplify the code a lot and is a reasonable compromise.
717 if (!name.empty() && name != "!-- --" && name != "dummy") {
718 os << '<' << from_ascii(name);
720 os << ' ' << from_ascii(param);
726 void xml::closeTag(odocstream & os, string const & name)
728 if (!name.empty() && name != "!-- --" && name != "dummy")
729 os << "</" << from_ascii(name) << '>';
733 void xml::openTag(Buffer const & buf, odocstream & os,
734 OutputParams const & runparams, Paragraph const & par)
736 Layout const & style = par.layout();
737 string const & name = style.latexname();
738 string param = style.latexparam();
739 Counters & counters = buf.params().documentClass().counters();
741 string id = par.getID(buf, runparams);
745 if (param.find('#') != string::npos) {
746 string::size_type pos = param.find("id=<");
747 string::size_type end = param.find(">");
748 if( pos != string::npos && end != string::npos)
749 param.erase(pos, end-pos + 1);
751 attribute = id + ' ' + param;
753 if (param.find('#') != string::npos) {
755 if (!style.counter.empty())
756 // This uses InternalUpdate at the moment becuase xml output
757 // does not do anything with tracked counters, and it would need
758 // to track layouts if it did want to use them.
759 counters.step(style.counter, InternalUpdate);
761 counters.step(from_ascii(name), InternalUpdate);
762 int i = counters.value(from_ascii(name));
763 attribute = subst(param, "#", convert<string>(i));
768 openTag(os, name, attribute);
772 void xml::closeTag(odocstream & os, Paragraph const & par)
774 Layout const & style = par.layout();
775 closeTag(os, style.latexname());
779 void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr)
781 xs << xml::StartTag(tag, attr);
785 void closeInlineTag(XMLStream & xs, const docstring & tag)
787 xs << xml::EndTag(tag);
791 void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr)
793 if (!xs.isLastTagCR())
795 xs << xml::StartTag(tag, attr);
799 void closeParTag(XMLStream & xs, const docstring & tag)
801 xs << xml::EndTag(tag);
806 void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr)
808 if (!xs.isLastTagCR())
810 xs << xml::StartTag(tag, attr);
815 void closeBlockTag(XMLStream & xs, const docstring & tag)
817 if (!xs.isLastTagCR())
819 xs << xml::EndTag(tag);
824 void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
826 if (tag.empty() || tag == from_ascii("NONE")) // Common check to be performed elsewhere, if it was not here.
829 if (tag == from_ascii("para") || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
830 openParTag(xs, tag, attr);
831 else if (tagtype == "block")
832 openBlockTag(xs, tag, attr);
833 else if (tagtype == "inline")
834 openInlineTag(xs, tag, attr);
835 else if (tagtype == "none")
836 xs << xml::StartTag(tag, attr);
838 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + (attr.empty() ? "" : " ") +
839 to_utf8(attr) + "'");
843 void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
845 xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
849 void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
851 xml::openTag(xs, tag, from_utf8(attr), tagtype);
855 void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
857 xml::openTag(xs, from_utf8(tag), attr, tagtype);
861 void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype)
863 if (tag.empty() || tag == "NONE" || tag == "IGNORE")
866 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
867 closeParTag(xs, tag);
868 else if (tagtype == "block")
869 closeBlockTag(xs, tag);
870 else if (tagtype == "inline")
871 closeInlineTag(xs, tag);
872 else if (tagtype == "none")
873 xs << xml::EndTag(tag);
875 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
879 void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
881 xml::closeTag(xs, from_utf8(tag), tagtype);
885 void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
887 if (tag.empty() || tag == from_ascii("NONE"))
890 // Special case for <para>: always considered as a paragraph.
891 if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") {
892 if (!xs.isLastTagCR())
894 xs << xml::CompTag(tag, attr);
896 } else if (tagtype == "inline") {
897 xs << xml::CompTag(tag, attr);
899 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
904 void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
906 xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
910 void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
912 xml::compTag(xs, tag, from_utf8(attr), tagtype);
916 void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
918 xml::compTag(xs, from_utf8(tag), attr, tagtype);