3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
23 #include "TextClass.h"
25 #include "support/convert.h"
26 #include "support/docstream.h"
27 #include "support/lassert.h"
28 #include "support/lstrings.h"
29 #include "support/textutils.h"
34 #include <QThreadStorage>
37 using namespace lyx::support;
43 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
46 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
47 case XMLStream::ESCAPE_NONE:
48 case XMLStream::ESCAPE_COMMENTS:
51 case XMLStream::ESCAPE_ALL:
55 } else if (c == '>') {
60 case XMLStream::ESCAPE_AND:
71 docstring escapeChar(char c, XMLStream::EscapeSettings e)
73 LATTEST(static_cast<unsigned char>(c) < 0x80);
74 return escapeChar(static_cast<char_type>(c), e);
78 docstring escapeString(docstring const & raw, XMLStream::EscapeSettings e)
81 bin.reserve(raw.size() * 2); // crude approximation is sufficient
82 for (size_t i = 0; i != raw.size(); ++i) {
84 if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
87 bin += xml::escapeChar(c, e);
94 docstring cleanAttr(docstring const & str)
97 docstring::const_iterator it = str.begin();
98 docstring::const_iterator en = str.end();
99 for (; it != en; ++it) {
100 char_type const c = *it;
101 newname += isAlnumASCII(c) ? c : char_type('_');
107 docstring StartTag::writeTag() const
109 docstring output = '<' + tag_;
110 if (!attr_.empty()) {
111 docstring attributes = xml::escapeString(attr_, XMLStream::ESCAPE_NONE);
112 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
113 [](int c) {return !std::isspace(c);}));
114 if (!attributes.empty()) {
115 output += ' ' + attributes;
123 docstring StartTag::writeEndTag() const
125 return from_utf8("</") + tag_ + from_utf8(">");
129 bool StartTag::operator==(FontTag const &rhs) const
135 docstring EndTag::writeEndTag() const
137 return from_utf8("</") + tag_ + from_utf8(">");
141 docstring CompTag::writeTag() const
143 docstring output = '<' + tag_;
144 if (!attr_.empty()) {
145 // Erase the beginning of the attributes if it contains space characters: this function deals with that
147 docstring attributes = escapeString(attr_, XMLStream::ESCAPE_NONE);
148 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
149 [](int c) {return !std::isspace(c);}));
150 if (!attributes.empty()) {
151 output += ' ' + attributes;
159 bool FontTag::operator==(StartTag const & tag) const
161 FontTag const * const ftag = tag.asFontTag();
164 return (font_type_ == ftag->font_type_);
170 void XMLStream::writeError(std::string const &s)
173 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: " + s + " -->");
178 void XMLStream::writeError(docstring const &s)
181 *this << ESCAPE_NONE << from_utf8("<!-- Output Error: ");
183 *this << ESCAPE_NONE << from_utf8(" -->");
188 XMLStream::TagPtr XMLStream::getLastStackTag()
190 return tag_stack_.back();
194 bool XMLStream::closeFontTags()
196 if (isTagPending(xml::parsep_tag))
197 // we haven't had any content
200 // this may be a useless check, since we ought at least to have
201 // the parsep_tag. but it can't hurt too much to be careful.
202 if (tag_stack_.empty())
205 // first, we close any open font tags we can close
206 TagPtr *curtag = &tag_stack_.back();
207 while ((*curtag)->asFontTag()) {
208 if (**curtag != xml::parsep_tag)
209 os_ << (*curtag)->writeEndTag();
210 tag_stack_.pop_back();
211 // this shouldn't happen, since then the font tags
212 // weren't in any other tag.
213 LASSERT(!tag_stack_.empty(), return true);
214 if (tag_stack_.empty())
216 curtag = &tag_stack_.back();
219 if (**curtag == xml::parsep_tag)
222 // so we've hit a non-font tag.
223 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
224 "but you might want to check these tags:");
225 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
226 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
227 for (; it != en; ++it) {
228 if (**it == xml::parsep_tag)
230 writeError((*it)->tag_);
236 void XMLStream::startDivision(bool keep_empty)
238 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
244 void XMLStream::endDivision()
246 if (isTagPending(xml::parsep_tag)) {
247 // this case is normal. it just means we didn't have content,
248 // so the parsep_tag never got moved onto the tag stack.
249 while (!pending_tags_.empty()) {
250 // clear all pending tags up to and including the parsep tag.
251 // note that we work from the back, because we want to get rid
252 // of everything that hasn't been used.
253 TagPtr const cur_tag = pending_tags_.back();
254 pending_tags_.pop_back();
255 if (*cur_tag == xml::parsep_tag)
260 dumpTagStack("EndDivision");
266 if (!isTagOpen(xml::parsep_tag)) {
267 writeError("No division separation tag found in endDivision().");
271 // this case is also normal, if the parsep tag is the last one
272 // on the stack. otherwise, it's an error.
273 while (!tag_stack_.empty()) {
274 TagPtr const cur_tag = tag_stack_.back();
275 tag_stack_.pop_back();
276 if (*cur_tag == xml::parsep_tag)
278 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
279 os_ << cur_tag->writeEndTag();
283 dumpTagStack("EndDivision");
288 void XMLStream::clearTagDeque()
290 while (!pending_tags_.empty()) {
291 TagPtr const & tag = pending_tags_.front();
292 if (*tag != xml::parsep_tag)
294 os_ << tag->writeTag();
295 tag_stack_.push_back(tag);
296 pending_tags_.pop_front();
301 XMLStream &XMLStream::operator<<(docstring const &d)
303 is_last_tag_cr_ = false;
305 os_ << xml::escapeString(d, escape_);
306 escape_ = ESCAPE_ALL;
311 XMLStream &XMLStream::operator<<(const char *s)
313 is_last_tag_cr_ = false;
315 docstring const d = from_ascii(s);
316 os_ << xml::escapeString(d, escape_);
317 escape_ = ESCAPE_ALL;
322 XMLStream &XMLStream::operator<<(char_type c)
324 is_last_tag_cr_ = false;
326 os_ << xml::escapeChar(c, escape_);
327 escape_ = ESCAPE_ALL;
332 XMLStream &XMLStream::operator<<(char c)
334 is_last_tag_cr_ = false;
336 os_ << xml::escapeChar(c, escape_);
337 escape_ = ESCAPE_ALL;
342 XMLStream &XMLStream::operator<<(int i)
344 is_last_tag_cr_ = false;
347 escape_ = ESCAPE_ALL;
352 XMLStream &XMLStream::operator<<(EscapeSettings e)
354 // Don't update is_last_tag_cr_ here, as this does not output anything.
360 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
362 is_last_tag_cr_ = false;
363 if (tag.tag_.empty())
365 pending_tags_.push_back(makeTagPtr(tag));
372 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
374 is_last_tag_cr_ = false;
375 if (tag.tag_.empty())
377 pending_tags_.push_back(makeTagPtr(tag));
382 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
384 is_last_tag_cr_ = false;
385 if (tag.tag_.empty())
388 os_ << tag.writeTag();
393 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
395 is_last_tag_cr_ = false;
396 if (tag.tag_.empty())
398 pending_tags_.push_back(makeTagPtr(tag));
403 XMLStream &XMLStream::operator<<(xml::CR const &)
405 is_last_tag_cr_ = true;
407 os_ << from_ascii("\n");
412 bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const
414 auto sit = tag_stack_.begin();
415 auto sen = tag_stack_.cend();
416 for (; sit != sen && maxdepth != 0; ++sit) {
425 bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const
427 auto sit = tag_stack_.begin();
428 auto sen = tag_stack_.cend();
429 for (; sit != sen && maxdepth != 0; ++sit) {
438 bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
440 auto sit = pending_tags_.begin();
441 auto sen = pending_tags_.cend();
442 for (; sit != sen && maxdepth != 0; ++sit) {
451 // this is complicated, because we want to make sure that
452 // everything is properly nested. the code ought to make
453 // sure of that, but we won't assert (yet) if we run into
454 // a problem. we'll just output error messages and try our
455 // best to make things work.
456 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
458 is_last_tag_cr_ = false;
460 if (etag.tag_.empty())
463 // if this tag is pending, we can simply discard it.
464 if (!pending_tags_.empty()) {
465 if (etag == *pending_tags_.back()) {
466 // we have <tag></tag>, so we discard it and remove it
467 // from the pending_tags_.
468 pending_tags_.pop_back();
472 // there is a pending tag that isn't the one we are trying
475 // is this tag itself pending?
476 // non-const iterators because we may call erase().
477 TagDeque::iterator dit = pending_tags_.begin();
478 TagDeque::iterator const den = pending_tags_.end();
479 for (; dit != den; ++dit) {
481 // it was pending, so we just erase it
482 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
483 + "' when other tags were pending. Last pending tag is `"
484 + to_utf8(pending_tags_.back()->writeTag())
485 + "'. Tag discarded.");
486 pending_tags_.erase(dit);
490 // so etag isn't itself pending. is it even open?
491 if (!isTagOpen(etag)) {
492 writeError("Tried to close `" + to_utf8(etag.tag_)
493 + "' when tag was not open. Tag discarded.");
496 // ok, so etag is open.
497 // our strategy will be as below: we will do what we need to
498 // do to close this tag.
499 string estr = "Closing tag `" + to_utf8(etag.tag_)
500 + "' when other tags are pending. Discarded pending tags:\n";
501 for (dit = pending_tags_.begin(); dit != den; ++dit)
502 estr += to_utf8(xml::escapeString((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
504 // clear the pending tags...
505 pending_tags_.clear();
506 // ...and then just fall through.
509 // make sure there are tags to be closed
510 if (tag_stack_.empty()) {
511 writeError("Tried to close `" + etag.tag_
512 + "' when no tags were open!");
516 // is the tag we are closing the last one we opened?
517 if (etag == *tag_stack_.back()) {
519 os_ << etag.writeEndTag();
520 // ...and forget about it
521 tag_stack_.pop_back();
525 // we are trying to close a tag other than the one last opened.
526 // let's first see if this particular tag is still open somehow.
527 if (!isTagOpen(etag)) {
528 writeError("Tried to close `" + etag.tag_
529 + "' when tag was not open. Tag discarded.");
533 // so the tag was opened, but other tags have been opened since
534 // and not yet closed.
535 // if it's a font tag, though...
536 if (etag.asFontTag()) {
537 // it won't be a problem if the other tags open since this one
538 // are also font tags.
539 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
540 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
541 for (; rit != ren; ++rit) {
544 if (!(*rit)->asFontTag()) {
545 // we'll just leave it and, presumably, have to close it later.
546 writeError("Unable to close font tag `" + etag.tag_
547 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
553 // <em>this is <strong>bold
554 // and are being asked to closed em. we want:
555 // <em>this is <strong>bold</strong></em><strong>
556 // first, we close the intervening tags...
557 TagPtr *curtag = &tag_stack_.back();
558 // ...remembering them in a stack.
560 while (etag != **curtag) {
561 os_ << (*curtag)->writeEndTag();
562 fontstack.push_back(*curtag);
563 tag_stack_.pop_back();
564 curtag = &tag_stack_.back();
566 os_ << etag.writeEndTag();
567 tag_stack_.pop_back();
569 // ...and restore the other tags.
570 rit = fontstack.rbegin();
571 ren = fontstack.rend();
572 for (; rit != ren; ++rit)
573 pending_tags_.push_back(*rit);
577 // it wasn't a font tag.
578 // so other tags were opened before this one and not properly closed.
579 // so we'll close them, too. that may cause other issues later, but it
580 // at least guarantees proper nesting.
581 writeError("Closing tag `" + etag.tag_
582 + "' when other tags are open, namely:");
583 TagPtr *curtag = &tag_stack_.back();
584 while (etag != **curtag) {
585 writeError((*curtag)->tag_);
586 if (**curtag != xml::parsep_tag)
587 os_ << (*curtag)->writeEndTag();
588 tag_stack_.pop_back();
589 curtag = &tag_stack_.back();
591 // curtag is now the one we actually want.
592 os_ << (*curtag)->writeEndTag();
593 tag_stack_.pop_back();
599 docstring xml::uniqueID(docstring const & label)
602 static atomic_uint seed(1000);
603 return label + convert<docstring>(++seed);
607 docstring xml::cleanID(docstring const & orig)
609 // The standard xml:id only allows letters, digits, '-' and '.' in a name.
610 // This routine replaces illegal characters by '-' or '.' and adds a number for uniqueness if need be.
612 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
613 // are not mixed up in the document.
614 // This code could be improved: it uses Qt outside the GUI part. Any TLS implementation could do the trick.
615 typedef map<docstring, docstring> MangledMap;
616 static QThreadStorage<MangledMap> tMangledNames;
617 static QThreadStorage<int> tMangleID;
619 // If the name is already known, just return it.
620 MangledMap & mangledNames = tMangledNames.localData();
621 auto const known = mangledNames.find(orig);
622 if (known != mangledNames.end())
623 return known->second;
625 // Start creating the mangled name by iterating over the characters.
627 auto it = orig.cbegin();
628 auto end = orig.cend();
630 // Make sure it starts with a letter.
631 if (!isAlphaASCII(*it))
634 // Parse the ID character by character and change what needs to.
635 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
636 for (; it != end; ++it) {
638 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.' || c == '_') {
640 } else if (c == ':' || c == ',' || c == ';' || c == '!') {
643 } else { // Other invalid characters, such as ' '.
649 // If there had to be a change, check if ID unicity is still guaranteed.
650 // This avoids having a clash if satisfying XML requirements for ID makes two IDs identical, like "a:b" and "a!b",
651 // as both of them would be transformed as "a.b". With this procedure, one will become "a.b" and the other "a.b-1".
652 if (mangle && mangledNames.find(content) != mangledNames.end()) {
653 int & mangleID = tMangleID.localData();
654 content += "-" + convert<docstring>(mangleID);
658 // Save the new ID to avoid recomputing it afterwards and to ensure stability over the document.
659 mangledNames[orig] = content;
664 void xml::openTag(odocstream & os, string const & name, string const & attribute)
667 // This should be fixed in layout files later.
668 string param = subst(attribute, "<", "\"");
669 param = subst(param, ">", "\"");
671 // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
672 // if the name is *dummy*.
673 // We ignore dummy because dummy is not a valid docbook element and it is
674 // the internal name given to single paragraphs in the latex output.
675 // This allow us to simplify the code a lot and is a reasonable compromise.
676 if (!name.empty() && name != "!-- --" && name != "dummy") {
677 os << '<' << from_ascii(name);
679 os << ' ' << from_ascii(param);
685 void xml::closeTag(odocstream & os, string const & name)
687 if (!name.empty() && name != "!-- --" && name != "dummy")
688 os << "</" << from_ascii(name) << '>';
692 void xml::openTag(Buffer const & buf, odocstream & os,
693 OutputParams const & runparams, Paragraph const & par)
695 Layout const & style = par.layout();
696 string const & name = style.latexname();
697 string param = style.latexparam();
698 Counters & counters = buf.params().documentClass().counters();
700 string id = par.getID(buf, runparams);
704 if (param.find('#') != string::npos) {
705 string::size_type pos = param.find("id=<");
706 string::size_type end = param.find(">");
707 if( pos != string::npos && end != string::npos)
708 param.erase(pos, end-pos + 1);
710 attribute = id + ' ' + param;
712 if (param.find('#') != string::npos) {
714 if (!style.counter.empty())
715 // This uses InternalUpdate at the moment becuase xml output
716 // does not do anything with tracked counters, and it would need
717 // to track layouts if it did want to use them.
718 counters.step(style.counter, InternalUpdate);
720 counters.step(from_ascii(name), InternalUpdate);
721 int i = counters.value(from_ascii(name));
722 attribute = subst(param, "#", convert<string>(i));
727 openTag(os, name, attribute);
731 void xml::closeTag(odocstream & os, Paragraph const & par)
733 Layout const & style = par.layout();
734 closeTag(os, style.latexname());