3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
23 #include "TextClass.h"
25 #include "support/convert.h"
26 #include "support/debug.h"
27 #include "support/docstream.h"
28 #include "support/lassert.h"
29 #include "support/lstrings.h"
30 #include "support/textutils.h"
35 #include <QThreadStorage>
38 using namespace lyx::support;
44 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
47 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
48 case XMLStream::ESCAPE_NONE:
49 case XMLStream::ESCAPE_COMMENTS:
52 case XMLStream::ESCAPE_ALL:
56 } else if (c == '>') {
61 case XMLStream::ESCAPE_AND:
72 docstring escapeChar(char c, XMLStream::EscapeSettings e)
74 LATTEST(static_cast<unsigned char>(c) < 0x80);
75 return escapeChar(static_cast<char_type>(c), e);
79 docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
82 bin.reserve(raw.size() * 2); // crude approximation is sufficient
83 for (size_t i = 0; i != raw.size(); ++i) {
85 if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
88 bin += xml::escapeChar(c, e);
95 docstring cleanAttr(docstring const & str)
98 docstring::const_iterator it = str.begin();
99 docstring::const_iterator en = str.end();
100 for (; it != en; ++it) {
101 char_type const c = *it;
102 newname += isAlnumASCII(c) ? c : char_type('_');
108 docstring StartTag::writeTag() const
110 docstring output = '<' + tag_;
111 if (!attr_.empty()) {
112 docstring attributes = xml::trimLeft(xml::escapeString(attr_, XMLStream::ESCAPE_NONE));
113 if (!attributes.empty())
114 output += ' ' + attributes;
121 docstring StartTag::writeEndTag() const
123 return from_utf8("</") + tag_ + from_utf8(">");
127 bool StartTag::operator==(FontTag const &rhs) const
133 docstring EndTag::writeEndTag() const
135 return from_utf8("</") + tag_ + from_utf8(">");
139 docstring CompTag::writeTag() const
141 docstring output = '<' + tag_;
142 if (!attr_.empty()) {
143 // Erase the beginning of the attributes if it contains space characters: this function deals with that
145 docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
146 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
147 [](char_type c) {return !isSpace(c);}));
148 if (!attributes.empty()) {
149 output += ' ' + attributes;
157 bool FontTag::operator==(StartTag const & tag) const
159 FontTag const * const ftag = tag.asFontTag();
162 return (font_type_ == ftag->font_type_);
168 void XMLStream::writeError(std::string const &s)
171 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
176 void XMLStream::writeError(docstring const &s)
179 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
181 *this << ESCAPE_NONE << from_utf8(" -->");
186 XMLStream::TagPtr XMLStream::getLastStackTag()
188 return tag_stack_.back();
192 bool XMLStream::closeFontTags()
194 if (isTagPending(xml::parsep_tag))
195 // we haven't had any content
198 // this may be a useless check, since we ought at least to have
199 // the parsep_tag. but it can't hurt too much to be careful.
200 if (tag_stack_.empty())
203 // first, we close any open font tags we can close
204 TagPtr *curtag = &tag_stack_.back();
205 while ((*curtag)->asFontTag()) {
206 if (**curtag != xml::parsep_tag)
207 os_ << (*curtag)->writeEndTag();
208 tag_stack_.pop_back();
209 if (tag_stack_.empty())
211 curtag = &tag_stack_.back();
214 if (**curtag == xml::parsep_tag)
217 // so we've hit a non-font tag.
218 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
219 "but you might want to check these tags:");
220 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
221 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
222 for (; it != en; ++it) {
223 if (**it == xml::parsep_tag)
225 writeError((*it)->tag_);
231 void XMLStream::startDivision(bool keep_empty)
233 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
239 void XMLStream::endDivision()
241 if (isTagPending(xml::parsep_tag)) {
242 // this case is normal. it just means we didn't have content,
243 // so the parsep_tag never got moved onto the tag stack.
244 while (!pending_tags_.empty()) {
245 // clear all pending tags up to and including the parsep tag.
246 // note that we work from the back, because we want to get rid
247 // of everything that hasn't been used.
248 TagPtr const cur_tag = pending_tags_.back();
249 pending_tags_.pop_back();
250 if (*cur_tag == xml::parsep_tag)
255 dumpTagStack("EndDivision");
261 if (!isTagOpen(xml::parsep_tag)) {
262 writeError("No division separation tag found in endDivision().");
266 // this case is also normal, if the parsep tag is the last one
267 // on the stack. otherwise, it's an error.
268 while (!tag_stack_.empty()) {
269 TagPtr const cur_tag = tag_stack_.back();
270 tag_stack_.pop_back();
271 if (*cur_tag == xml::parsep_tag)
273 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
274 os_ << cur_tag->writeEndTag();
278 dumpTagStack("EndDivision");
283 void XMLStream::clearTagDeque()
285 while (!pending_tags_.empty()) {
286 TagPtr const & tag = pending_tags_.front();
287 if (*tag != xml::parsep_tag)
289 os_ << tag->writeTag();
290 tag_stack_.push_back(tag);
291 pending_tags_.pop_front();
296 XMLStream &XMLStream::operator<<(docstring const &d)
298 is_last_tag_cr_ = false;
300 os_ << xml::escapeString(d, escape_);
301 escape_ = ESCAPE_ALL;
306 XMLStream &XMLStream::operator<<(const char *s)
308 is_last_tag_cr_ = false;
310 docstring const d = from_ascii(s);
311 os_ << xml::escapeString(d, escape_);
312 escape_ = ESCAPE_ALL;
317 XMLStream &XMLStream::operator<<(char_type c)
319 is_last_tag_cr_ = false;
321 os_ << xml::escapeChar(c, escape_);
322 escape_ = ESCAPE_ALL;
327 XMLStream &XMLStream::operator<<(char c)
329 is_last_tag_cr_ = false;
331 os_ << xml::escapeChar(c, escape_);
332 escape_ = ESCAPE_ALL;
337 XMLStream &XMLStream::operator<<(int i)
339 is_last_tag_cr_ = false;
342 escape_ = ESCAPE_ALL;
347 XMLStream &XMLStream::operator<<(EscapeSettings e)
349 // Don't update is_last_tag_cr_ here, as this does not output anything.
355 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
357 is_last_tag_cr_ = false;
358 if (tag.tag_.empty())
360 pending_tags_.push_back(makeTagPtr(tag));
367 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
369 is_last_tag_cr_ = false;
370 if (tag.tag_.empty())
372 pending_tags_.push_back(makeTagPtr(tag));
377 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
379 is_last_tag_cr_ = false;
380 if (tag.tag_.empty())
383 os_ << tag.writeTag();
388 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
390 is_last_tag_cr_ = false;
391 if (tag.tag_.empty())
393 pending_tags_.push_back(makeTagPtr(tag));
398 XMLStream &XMLStream::operator<<(xml::CR const &)
400 is_last_tag_cr_ = true;
402 os_ << from_ascii("\n");
407 bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const
409 auto sit = tag_stack_.begin();
410 auto sen = tag_stack_.cend();
411 for (; sit != sen && maxdepth != 0; ++sit) {
420 bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const
422 auto sit = tag_stack_.begin();
423 auto sen = tag_stack_.cend();
424 for (; sit != sen && maxdepth != 0; ++sit) {
433 bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
435 auto sit = pending_tags_.begin();
436 auto sen = pending_tags_.cend();
437 for (; sit != sen && maxdepth != 0; ++sit) {
446 // this is complicated, because we want to make sure that
447 // everything is properly nested. the code ought to make
448 // sure of that, but we won't assert (yet) if we run into
449 // a problem. we'll just output error messages and try our
450 // best to make things work.
451 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
453 is_last_tag_cr_ = false;
455 if (etag.tag_.empty())
458 // if this tag is pending, we can simply discard it.
459 if (!pending_tags_.empty()) {
460 if (etag == *pending_tags_.back()) {
461 // we have <tag></tag>, so we discard it and remove it
462 // from the pending_tags_.
463 pending_tags_.pop_back();
467 // there is a pending tag that isn't the one we are trying
470 // is this tag itself pending?
471 // non-const iterators because we may call erase().
472 TagDeque::iterator dit = pending_tags_.begin();
473 TagDeque::iterator const den = pending_tags_.end();
474 for (; dit != den; ++dit) {
476 // it was pending, so we just erase it
477 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
478 + "' when other tags were pending. Last pending tag is `"
479 + to_utf8(pending_tags_.back()->writeTag())
480 + "'. Tag discarded.");
481 if (!pending_tags_.empty())
482 pending_tags_.erase(dit);
486 // so etag isn't itself pending. is it even open?
487 if (!isTagOpen(etag)) {
488 writeError("Tried to close `" + to_utf8(etag.tag_)
489 + "' when tag was not open. Tag discarded.");
492 // ok, so etag is open.
493 // our strategy will be as below: we will do what we need to
494 // do to close this tag.
495 string estr = "Closing tag `" + to_utf8(etag.tag_)
496 + "' when other tags are pending. Discarded pending tags:\n";
497 for (dit = pending_tags_.begin(); dit != den; ++dit)
498 estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
500 // clear the pending tags...
501 pending_tags_.clear();
502 // ...and then just fall through.
505 // make sure there are tags to be closed
506 if (tag_stack_.empty()) {
507 writeError("Tried to close `" + etag.tag_
508 + "' when no tags were open!");
512 // is the tag we are closing the last one we opened?
513 if (etag == *tag_stack_.back()) {
515 os_ << etag.writeEndTag();
516 // ...and forget about it
517 tag_stack_.pop_back();
521 // we are trying to close a tag other than the one last opened.
522 // let's first see if this particular tag is still open somehow.
523 if (!isTagOpen(etag)) {
524 writeError("Tried to close `" + etag.tag_
525 + "' when tag was not open. Tag discarded.");
529 // so the tag was opened, but other tags have been opened since
530 // and not yet closed.
531 // if it's a font tag, though...
532 if (etag.asFontTag()) {
533 // it won't be a problem if the other tags open since this one
534 // are also font tags.
535 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
536 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
537 for (; rit != ren; ++rit) {
540 if (!(*rit)->asFontTag()) {
541 // we'll just leave it and, presumably, have to close it later.
542 writeError("Unable to close font tag `" + etag.tag_
543 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
549 // <em>this is <strong>bold
550 // and are being asked to closed em. we want:
551 // <em>this is <strong>bold</strong></em><strong>
552 // first, we close the intervening tags...
553 TagPtr *curtag = &tag_stack_.back();
554 // ...remembering them in a stack.
556 while (etag != **curtag) {
557 os_ << (*curtag)->writeEndTag();
558 fontstack.push_back(*curtag);
559 tag_stack_.pop_back();
560 curtag = &tag_stack_.back();
562 os_ << etag.writeEndTag();
563 tag_stack_.pop_back();
565 // ...and restore the other tags.
566 rit = fontstack.rbegin();
567 ren = fontstack.rend();
568 for (; rit != ren; ++rit)
569 pending_tags_.push_back(*rit);
573 // it wasn't a font tag.
574 // so other tags were opened before this one and not properly closed.
575 // so we'll close them, too. that may cause other issues later, but it
576 // at least guarantees proper nesting.
577 writeError("Closing tag `" + etag.tag_
578 + "' when other tags are open, namely:");
579 TagPtr *curtag = &tag_stack_.back();
580 while (etag != **curtag) {
581 writeError((*curtag)->tag_);
582 if (**curtag != xml::parsep_tag)
583 os_ << (*curtag)->writeEndTag();
584 tag_stack_.pop_back();
585 curtag = &tag_stack_.back();
587 // curtag is now the one we actually want.
588 os_ << (*curtag)->writeEndTag();
589 tag_stack_.pop_back();
595 docstring xml::uniqueID(docstring const & label)
598 static atomic_uint seed(1000);
599 return label + convert<docstring>(++seed);
603 bool xml::isNotOnlySpace(docstring const & str)
605 for (auto const & c: str) {
606 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
613 docstring xml::trimLeft(docstring const & str)
616 for (auto const & c: str) {
617 if (c != ' ' && c != '\t' && c != '\n' && c != '\v' && c != '\f' && c != '\r')
618 return str.substr(i, docstring::npos);
625 docstring xml::cleanID(docstring const & orig)
627 // The standard xml:id only allows letters, digits, '-' and '.' in a name.
628 // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
630 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
631 // are not mixed up in the document.
632 // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
633 typedef map<docstring, docstring> MangledMap;
634 static QThreadStorage<MangledMap> tMangledNames;
635 static QThreadStorage<int> tMangleID;
637 // If the name is already known, just return it.
638 MangledMap & mangledNames = tMangledNames.localData();
639 auto const known = mangledNames.find(orig);
640 if (known != mangledNames.end())
641 return known->second;
643 // Start creating the mangled name by iterating over the characters.
645 auto it = orig.cbegin();
646 auto end = orig.cend();
648 // Make sure it starts with a letter.
649 if (!isAlphaASCII(*it))
652 // Parse the ID character by character and change what needs to.
653 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
654 for (; it != end; ++it) {
656 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
658 } else if (c == ':' || c == ',' || c == ';' || c == '!') {
661 } else { // Other invalid characters, such as ' '.
667 // If there had to be a change, check if ID unicity is still guaranteed.
668 // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
669 // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
670 if (mangle && mangledNames.find(content) != mangledNames.end()) {
671 int & mangleID = tMangleID.localData();
673 content += "-" + convert<docstring>(mangleID);
677 // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
678 mangledNames[orig] = content;
683 void xml::openTag(odocstream & os, string const & name, string const & attribute)
686 // This should be fixed in layout files later.
687 string param = subst(attribute, "<", "\"");
688 param = subst(param, ">", "\"");
690 // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
691 // if the name is *dummy*.
692 // We ignore dummy because dummy is not a valid docbook element and it is
693 // the internal name given to single paragraphs in the latex output.
694 // This allow us to simplify the code a lot and is a reasonable compromise.
695 if (!name.empty() && name != "!-- --" && name != "dummy") {
696 os << '<' << from_ascii(name);
698 os << ' ' << from_ascii(param);
704 void xml::closeTag(odocstream & os, string const & name)
706 if (!name.empty() && name != "!-- --" && name != "dummy")
707 os << "</" << from_ascii(name) << '>';
711 void xml::openTag(Buffer const & buf, odocstream & os,
712 OutputParams const & runparams, Paragraph const & par)
714 Layout const & style = par.layout();
715 string const & name = style.latexname();
716 string param = style.latexparam();
717 Counters & counters = buf.params().documentClass().counters();
719 string id = par.getID(buf, runparams);
723 if (param.find('#') != string::npos) {
724 string::size_type pos = param.find("id=<");
725 string::size_type end = param.find(">");
726 if( pos != string::npos && end != string::npos)
727 param.erase(pos, end-pos + 1);
729 attribute = id + ' ' + param;
731 if (param.find('#') != string::npos) {
733 if (!style.counter.empty())
734 // This uses InternalUpdate at the moment becuase xml output
735 // does not do anything with tracked counters, and it would need
736 // to track layouts if it did want to use them.
737 counters.step(style.counter, InternalUpdate);
739 counters.step(from_ascii(name), InternalUpdate);
740 int i = counters.value(from_ascii(name));
741 attribute = subst(param, "#", convert<string>(i));
746 openTag(os, name, attribute);
750 void xml::closeTag(odocstream & os, Paragraph const & par)
752 Layout const & style = par.layout();
753 closeTag(os, style.latexname());
757 void openInlineTag(XMLStream & xs, const docstring & tag, const docstring & attr)
759 xs << xml::StartTag(tag, attr);
763 void closeInlineTag(XMLStream & xs, const docstring & tag)
765 xs << xml::EndTag(tag);
769 void openParTag(XMLStream & xs, const docstring & tag, const docstring & attr)
771 if (!xs.isLastTagCR())
773 xs << xml::StartTag(tag, attr);
777 void closeParTag(XMLStream & xs, const docstring & tag)
779 xs << xml::EndTag(tag);
784 void openBlockTag(XMLStream & xs, const docstring & tag, const docstring & attr)
786 if (!xs.isLastTagCR())
788 xs << xml::StartTag(tag, attr);
793 void closeBlockTag(XMLStream & xs, const docstring & tag)
795 if (!xs.isLastTagCR())
797 xs << xml::EndTag(tag);
802 void xml::openTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
804 if (tag.empty() || tag == "NONE") // Common check to be performed elsewhere, if it was not here.
807 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
808 openParTag(xs, tag, attr);
809 else if (tagtype == "block")
810 openBlockTag(xs, tag, attr);
811 else if (tagtype == "inline")
812 openInlineTag(xs, tag, attr);
813 else if (tagtype == "none")
814 xs << xml::StartTag(tag, attr);
816 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + " " + to_utf8(attr) + "'");
820 void xml::openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
822 xml::openTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
826 void xml::openTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
828 xml::openTag(xs, tag, from_utf8(attr), tagtype);
832 void xml::openTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
834 xml::openTag(xs, from_utf8(tag), attr, tagtype);
838 void xml::closeTag(XMLStream & xs, const docstring & tag, const std::string & tagtype)
840 if (tag.empty() || tag == "NONE")
843 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
844 closeParTag(xs, tag);
845 else if (tagtype == "block")
846 closeBlockTag(xs, tag);
847 else if (tagtype == "inline")
848 closeInlineTag(xs, tag);
849 else if (tagtype == "none")
850 xs << xml::EndTag(tag);
852 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
856 void xml::closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
858 xml::closeTag(xs, from_utf8(tag), tagtype);
862 void xml::compTag(XMLStream & xs, const docstring & tag, const docstring & attr, const std::string & tagtype)
864 if (tag.empty() || tag == from_ascii("NONE"))
867 // Special case for <para>: always considered as a paragraph.
868 if (tag == from_ascii("para") || tagtype == "paragraph" || tagtype == "block") {
869 if (!xs.isLastTagCR())
871 xs << xml::CompTag(tag, attr);
873 } else if (tagtype == "inline") {
874 xs << xml::CompTag(tag, attr);
876 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + to_utf8(tag) + "'");
881 void xml::compTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
883 xml::compTag(xs, from_utf8(tag), from_utf8(attr), tagtype);
887 void xml::compTag(XMLStream & xs, const docstring & tag, const std::string & attr, const std::string & tagtype)
889 xml::compTag(xs, tag, from_utf8(attr), tagtype);
893 void xml::compTag(XMLStream & xs, const std::string & tag, const docstring & attr, const std::string & tagtype)
895 xml::compTag(xs, from_utf8(tag), attr, tagtype);