3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
23 #include "TextClass.h"
25 #include "support/convert.h"
26 #include "support/docstream.h"
27 #include "support/lassert.h"
28 #include "support/lstrings.h"
29 #include "support/textutils.h"
34 #include <QThreadStorage>
37 using namespace lyx::support;
43 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
46 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
47 case XMLStream::ESCAPE_NONE:
48 case XMLStream::ESCAPE_COMMENTS:
51 case XMLStream::ESCAPE_ALL:
55 } else if (c == '>') {
60 case XMLStream::ESCAPE_AND:
71 docstring escapeChar(char c, XMLStream::EscapeSettings e)
73 LATTEST(static_cast<unsigned char>(c) < 0x80);
74 return escapeChar(static_cast<char_type>(c), e);
78 docstring xml::escapeString(docstring const & raw, XMLStream::EscapeSettings e)
81 bin.reserve(raw.size() * 2); // crude approximation is sufficient
82 for (size_t i = 0; i != raw.size(); ++i) {
84 if (e == XMLStream::ESCAPE_COMMENTS && c == '-' && i > 0 && raw[i - 1] == '-')
87 bin += xml::escapeChar(c, e);
94 // escape what needs escaping
95 docstring xmlize(docstring const &str, XMLStream::EscapeSettings e)
97 return xml::escapeString(str, e);
101 docstring cleanAttr(docstring const & str)
104 docstring::const_iterator it = str.begin();
105 docstring::const_iterator en = str.end();
106 for (; it != en; ++it) {
107 char_type const c = *it;
108 newname += isAlnumASCII(c) ? c : char_type('_');
114 docstring StartTag::writeTag() const
116 docstring output = '<' + tag_;
117 if (!attr_.empty()) {
118 docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE);
119 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
120 [](int c) {return !std::isspace(c);}));
121 if (!attributes.empty()) {
122 output += ' ' + attributes;
130 docstring StartTag::writeEndTag() const
132 return from_utf8("</") + tag_ + from_utf8(">");
136 bool StartTag::operator==(FontTag const &rhs) const
142 docstring EndTag::writeEndTag() const
144 return from_utf8("</") + tag_ + from_utf8(">");
148 docstring CompTag::writeTag() const
150 docstring output = '<' + from_utf8(tag_);
151 if (!attr_.empty()) {
152 // Erase the beginning of the attributes if it contains space characters: this function deals with that
154 docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE);
155 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
156 [](int c) {return !std::isspace(c);}));
157 if (!attributes.empty()) {
158 output += ' ' + attributes;
166 bool FontTag::operator==(StartTag const & tag) const
168 FontTag const * const ftag = tag.asFontTag();
171 return (font_type_ == ftag->font_type_);
177 void XMLStream::writeError(std::string const &s) const
180 os_ << from_utf8("<!-- Output Error: " + s + " -->\n");
184 void XMLStream::writeError(docstring const &s) const
187 os_ << from_utf8("<!-- Output Error: ") << s << from_utf8(" -->\n");
191 bool XMLStream::closeFontTags()
193 if (isTagPending(xml::parsep_tag))
194 // we haven't had any content
197 // this may be a useless check, since we ought at least to have
198 // the parsep_tag. but it can't hurt too much to be careful.
199 if (tag_stack_.empty())
202 // first, we close any open font tags we can close
203 TagPtr *curtag = &tag_stack_.back();
204 while ((*curtag)->asFontTag()) {
205 if (**curtag != xml::parsep_tag)
206 os_ << (*curtag)->writeEndTag();
207 tag_stack_.pop_back();
208 // this shouldn't happen, since then the font tags
209 // weren't in any other tag.
210 // LASSERT(!tag_stack_.empty(), return true);
211 if (tag_stack_.empty())
213 curtag = &tag_stack_.back();
216 if (**curtag == xml::parsep_tag)
219 // so we've hit a non-font tag.
220 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
221 "but you might want to check these tags:");
222 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
223 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
224 for (; it != en; ++it) {
225 if (**it == xml::parsep_tag)
227 writeError((*it)->tag_);
233 void XMLStream::startDivision(bool keep_empty)
235 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
241 void XMLStream::endDivision()
243 if (isTagPending(xml::parsep_tag)) {
244 // this case is normal. it just means we didn't have content,
245 // so the parsep_tag never got moved onto the tag stack.
246 while (!pending_tags_.empty()) {
247 // clear all pending tags up to and including the parsep tag.
248 // note that we work from the back, because we want to get rid
249 // of everything that hasn't been used.
250 TagPtr const cur_tag = pending_tags_.back();
251 pending_tags_.pop_back();
252 if (*cur_tag == xml::parsep_tag)
257 dumpTagStack("EndDivision");
263 if (!isTagOpen(xml::parsep_tag)) {
264 writeError("No division separation tag found in endDivision().");
268 // this case is also normal, if the parsep tag is the last one
269 // on the stack. otherwise, it's an error.
270 while (!tag_stack_.empty()) {
271 TagPtr const cur_tag = tag_stack_.back();
272 tag_stack_.pop_back();
273 if (*cur_tag == xml::parsep_tag)
275 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
276 os_ << cur_tag->writeEndTag();
280 dumpTagStack("EndDivision");
285 void XMLStream::clearTagDeque()
287 while (!pending_tags_.empty()) {
288 TagPtr const & tag = pending_tags_.front();
289 if (*tag != xml::parsep_tag)
291 os_ << tag->writeTag();
292 tag_stack_.push_back(tag);
293 pending_tags_.pop_front();
298 XMLStream &XMLStream::operator<<(docstring const &d)
301 os_ << xml::xmlize(d, escape_);
302 escape_ = ESCAPE_ALL;
307 XMLStream &XMLStream::operator<<(const char *s)
310 docstring const d = from_ascii(s);
311 os_ << xml::xmlize(d, escape_);
312 escape_ = ESCAPE_ALL;
317 XMLStream &XMLStream::operator<<(char_type c)
320 os_ << xml::escapeChar(c, escape_);
321 escape_ = ESCAPE_ALL;
326 XMLStream &XMLStream::operator<<(char c)
329 os_ << xml::escapeChar(c, escape_);
330 escape_ = ESCAPE_ALL;
335 XMLStream &XMLStream::operator<<(int i)
339 escape_ = ESCAPE_ALL;
344 XMLStream &XMLStream::operator<<(EscapeSettings e)
351 XMLStream &XMLStream::operator<<(xml::StartTag const &tag)
353 if (tag.tag_.empty())
355 pending_tags_.push_back(makeTagPtr(tag));
362 XMLStream &XMLStream::operator<<(xml::ParTag const &tag)
364 if (tag.tag_.empty())
366 pending_tags_.push_back(makeTagPtr(tag));
371 XMLStream &XMLStream::operator<<(xml::CompTag const &tag)
373 if (tag.tag_.empty())
376 os_ << tag.writeTag();
381 XMLStream &XMLStream::operator<<(xml::FontTag const &tag)
383 if (tag.tag_.empty())
385 pending_tags_.push_back(makeTagPtr(tag));
390 XMLStream &XMLStream::operator<<(xml::CR const &)
393 os_ << from_ascii("\n");
398 bool XMLStream::isTagOpen(xml::StartTag const &stag, int maxdepth) const
400 auto sit = tag_stack_.begin();
401 auto sen = tag_stack_.cend();
402 for (; sit != sen && maxdepth != 0; ++sit) {
411 bool XMLStream::isTagOpen(xml::EndTag const &etag, int maxdepth) const
413 auto sit = tag_stack_.begin();
414 auto sen = tag_stack_.cend();
415 for (; sit != sen && maxdepth != 0; ++sit) {
424 bool XMLStream::isTagPending(xml::StartTag const &stag, int maxdepth) const
426 auto sit = pending_tags_.begin();
427 auto sen = pending_tags_.cend();
428 for (; sit != sen && maxdepth != 0; ++sit) {
437 // this is complicated, because we want to make sure that
438 // everything is properly nested. the code ought to make
439 // sure of that, but we won't assert (yet) if we run into
440 // a problem. we'll just output error messages and try our
441 // best to make things work.
442 XMLStream &XMLStream::operator<<(xml::EndTag const &etag)
444 if (etag.tag_.empty())
447 // if this tag is pending, we can simply discard it.
448 if (!pending_tags_.empty()) {
449 if (etag == *pending_tags_.back()) {
450 // we have <tag></tag>, so we discard it and remove it
451 // from the pending_tags_.
452 pending_tags_.pop_back();
456 // there is a pending tag that isn't the one we are trying
459 // is this tag itself pending?
460 // non-const iterators because we may call erase().
461 TagDeque::iterator dit = pending_tags_.begin();
462 TagDeque::iterator const den = pending_tags_.end();
463 for (; dit != den; ++dit) {
465 // it was pending, so we just erase it
466 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
467 + "' when other tags were pending. Last pending tag is `"
468 + to_utf8(pending_tags_.back()->writeTag())
469 + "'. Tag discarded.");
470 pending_tags_.erase(dit);
474 // so etag isn't itself pending. is it even open?
475 if (!isTagOpen(etag)) {
476 writeError("Tried to close `" + to_utf8(etag.tag_)
477 + "' when tag was not open. Tag discarded.");
480 // ok, so etag is open.
481 // our strategy will be as below: we will do what we need to
482 // do to close this tag.
483 string estr = "Closing tag `" + to_utf8(etag.tag_)
484 + "' when other tags are pending. Discarded pending tags:\n";
485 for (dit = pending_tags_.begin(); dit != den; ++dit)
486 estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
488 // clear the pending tags...
489 pending_tags_.clear();
490 // ...and then just fall through.
493 // make sure there are tags to be closed
494 if (tag_stack_.empty()) {
495 writeError("Tried to close `" + etag.tag_
496 + "' when no tags were open!");
500 // is the tag we are closing the last one we opened?
501 if (etag == *tag_stack_.back()) {
503 os_ << etag.writeEndTag();
504 // ...and forget about it
505 tag_stack_.pop_back();
509 // we are trying to close a tag other than the one last opened.
510 // let's first see if this particular tag is still open somehow.
511 if (!isTagOpen(etag)) {
512 writeError("Tried to close `" + etag.tag_
513 + "' when tag was not open. Tag discarded.");
517 // so the tag was opened, but other tags have been opened since
518 // and not yet closed.
519 // if it's a font tag, though...
520 if (etag.asFontTag()) {
521 // it won't be a problem if the other tags open since this one
522 // are also font tags.
523 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
524 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
525 for (; rit != ren; ++rit) {
528 if (!(*rit)->asFontTag()) {
529 // we'll just leave it and, presumably, have to close it later.
530 writeError("Unable to close font tag `" + etag.tag_
531 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
537 // <em>this is <strong>bold
538 // and are being asked to closed em. we want:
539 // <em>this is <strong>bold</strong></em><strong>
540 // first, we close the intervening tags...
541 TagPtr *curtag = &tag_stack_.back();
542 // ...remembering them in a stack.
544 while (etag != **curtag) {
545 os_ << (*curtag)->writeEndTag();
546 fontstack.push_back(*curtag);
547 tag_stack_.pop_back();
548 curtag = &tag_stack_.back();
550 os_ << etag.writeEndTag();
551 tag_stack_.pop_back();
553 // ...and restore the other tags.
554 rit = fontstack.rbegin();
555 ren = fontstack.rend();
556 for (; rit != ren; ++rit)
557 pending_tags_.push_back(*rit);
561 // it wasn't a font tag.
562 // so other tags were opened before this one and not properly closed.
563 // so we'll close them, too. that may cause other issues later, but it
564 // at least guarantees proper nesting.
565 writeError("Closing tag `" + etag.tag_
566 + "' when other tags are open, namely:");
567 TagPtr *curtag = &tag_stack_.back();
568 while (etag != **curtag) {
569 writeError((*curtag)->tag_);
570 if (**curtag != xml::parsep_tag)
571 os_ << (*curtag)->writeEndTag();
572 tag_stack_.pop_back();
573 curtag = &tag_stack_.back();
575 // curtag is now the one we actually want.
576 os_ << (*curtag)->writeEndTag();
577 tag_stack_.pop_back();
583 docstring xml::uniqueID(docstring const & label)
586 static atomic_uint seed(1000);
587 return label + convert<docstring>(++seed);
591 docstring xml::cleanID(docstring const & orig)
593 // The standard xml:id only allows letters,
594 // digits, '-' and '.' in a name.
595 // This routine replaces illegal characters by '-' or '.'
596 // and adds a number for uniqueness if need be.
597 docstring const allowed = from_ascii(".-_");
599 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
600 // are not mixed up in the document.
601 typedef map<docstring, docstring> MangledMap;
602 static QThreadStorage<MangledMap> tMangledNames;
603 static QThreadStorage<int> tMangleID;
605 MangledMap & mangledNames = tMangledNames.localData();
607 // If the name is already known, just return it.
608 MangledMap::const_iterator const known = mangledNames.find(orig);
609 if (known != mangledNames.end())
610 return known->second;
612 // Start creating the mangled name by iterating over the characters.
614 docstring::const_iterator it = orig.begin();
615 docstring::const_iterator end = orig.end();
617 // Make sure it starts with a letter.
618 if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size())
622 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
623 for (; it != end; ++it) {
625 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.'
626 || allowed.find(c) < allowed.size())
628 else if (c == '_' || c == ' ') {
632 else if (c == ':' || c == ',' || c == ';' || c == '!') {
643 int & mangleID = tMangleID.localData();
644 content += "-" + convert<docstring>(mangleID++);
647 mangledNames[orig] = content;
653 void xml::openTag(odocstream & os, string const & name, string const & attribute)
656 // This should be fixed in layout files later.
657 string param = subst(attribute, "<", "\"");
658 param = subst(param, ">", "\"");
660 // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
661 // if the name is *dummy*.
662 // We ignore dummy because dummy is not a valid docbook element and it is
663 // the internal name given to single paragraphs in the latex output.
664 // This allow us to simplify the code a lot and is a reasonable compromise.
665 if (!name.empty() && name != "!-- --" && name != "dummy") {
666 os << '<' << from_ascii(name);
668 os << ' ' << from_ascii(param);
674 void xml::closeTag(odocstream & os, string const & name)
676 if (!name.empty() && name != "!-- --" && name != "dummy")
677 os << "</" << from_ascii(name) << '>';
681 void xml::openTag(Buffer const & buf, odocstream & os,
682 OutputParams const & runparams, Paragraph const & par)
684 Layout const & style = par.layout();
685 string const & name = style.latexname();
686 string param = style.latexparam();
687 Counters & counters = buf.params().documentClass().counters();
689 string id = par.getID(buf, runparams);
693 if (param.find('#') != string::npos) {
694 string::size_type pos = param.find("id=<");
695 string::size_type end = param.find(">");
696 if( pos != string::npos && end != string::npos)
697 param.erase(pos, end-pos + 1);
699 attribute = id + ' ' + param;
701 if (param.find('#') != string::npos) {
703 if (!style.counter.empty())
704 // This uses InternalUpdate at the moment becuase xml output
705 // does not do anything with tracked counters, and it would need
706 // to track layouts if it did want to use them.
707 counters.step(style.counter, InternalUpdate);
709 counters.step(from_ascii(name), InternalUpdate);
710 int i = counters.value(from_ascii(name));
711 attribute = subst(param, "#", convert<string>(i));
716 openTag(os, name, attribute);
720 void xml::closeTag(odocstream & os, Paragraph const & par)
722 Layout const & style = par.layout();
723 closeTag(os, style.latexname());