3 * This file is part of LyX, the document processor.
4 * License details can be found in the file COPYING.
9 * Full author contact details are available in file CREDITS.
17 #include "BufferParams.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
23 #include "TextClass.h"
25 #include "support/convert.h"
26 #include "support/docstream.h"
27 #include "support/lstrings.h"
28 #include "support/textutils.h"
33 #include <QThreadStorage>
34 #include <support/lassert.h>
37 using namespace lyx::support;
43 docstring escapeChar(char_type c, XMLStream::EscapeSettings e)
46 switch (e) { // For HTML: always ESCAPE_NONE. For XML: it depends, hence the parameter.
47 case XMLStream::ESCAPE_NONE:
50 case XMLStream::ESCAPE_ALL:
54 } else if (c == '>') {
59 case XMLStream::ESCAPE_AND:
70 // escape what needs escaping
71 docstring xmlize(docstring const &str, XMLStream::EscapeSettings e) {
73 docstring::const_iterator it = str.begin();
74 docstring::const_iterator en = str.end();
75 for (; it != en; ++it)
76 d << escapeChar(*it, e);
81 docstring escapeChar(char c, XMLStream::EscapeSettings e) {
82 LATTEST(static_cast<unsigned char>(c) < 0x80);
83 return escapeChar(static_cast<char_type>(c), e);
87 docstring cleanAttr(docstring const & str)
90 docstring::const_iterator it = str.begin();
91 docstring::const_iterator en = str.end();
92 for (; it != en; ++it) {
93 char_type const c = *it;
94 newname += isAlnumASCII(c) ? c : char_type('_');
100 docstring StartTag::writeTag() const {
101 docstring output = '<' + tag_;
102 if (!attr_.empty()) {
103 docstring attributes = xml::xmlize(attr_, XMLStream::ESCAPE_NONE);
104 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
105 [](int c) {return !std::isspace(c);}));
106 if (!attributes.empty()) {
107 output += ' ' + attributes;
115 docstring StartTag::writeEndTag() const {
116 return from_utf8("</") + tag_ + from_utf8(">");
120 bool StartTag::operator==(FontTag const &rhs) const {
125 docstring EndTag::writeEndTag() const {
126 return from_utf8("</") + tag_ + from_utf8(">");
130 docstring CompTag::writeTag() const {
131 docstring output = '<' + from_utf8(tag_);
132 if (!attr_.empty()) {
133 // Erase the beginning of the attributes if it contains space characters: this function deals with that
135 docstring attributes = xmlize(from_utf8(attr_), XMLStream::ESCAPE_NONE);
136 attributes.erase(attributes.begin(), std::find_if(attributes.begin(), attributes.end(),
137 [](int c) {return !std::isspace(c);}));
138 if (!attributes.empty()) {
139 output += ' ' + attributes;
147 bool FontTag::operator==(StartTag const & tag) const
149 FontTag const * const ftag = tag.asFontTag();
152 return (font_type_ == ftag->font_type_);
158 void XMLStream::writeError(std::string const &s) const {
160 os_ << from_utf8("<!-- Output Error: " + s + " -->\n");
164 void XMLStream::writeError(docstring const &s) const {
166 os_ << from_utf8("<!-- Output Error: ") << s << from_utf8(" -->\n");
170 bool XMLStream::closeFontTags() {
171 if (isTagPending(xml::parsep_tag))
172 // we haven't had any content
175 // this may be a useless check, since we ought at least to have
176 // the parsep_tag. but it can't hurt too much to be careful.
177 if (tag_stack_.empty())
180 // first, we close any open font tags we can close
181 TagPtr *curtag = &tag_stack_.back();
182 while ((*curtag)->asFontTag()) {
183 if (**curtag != xml::parsep_tag)
184 os_ << (*curtag)->writeEndTag();
185 tag_stack_.pop_back();
186 // this shouldn't happen, since then the font tags
187 // weren't in any other tag.
188 LASSERT(!tag_stack_.empty(), return true);
189 curtag = &tag_stack_.back();
192 if (**curtag == xml::parsep_tag)
195 // so we've hit a non-font tag.
196 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
197 "but you might want to check these tags:");
198 TagDeque::const_reverse_iterator it = tag_stack_.rbegin();
199 TagDeque::const_reverse_iterator const en = tag_stack_.rend();
200 for (; it != en; ++it) {
201 if (**it == xml::parsep_tag)
203 writeError((*it)->tag_);
209 void XMLStream::startDivision(bool keep_empty) {
210 pending_tags_.push_back(makeTagPtr(xml::StartTag(xml::parsep_tag)));
216 void XMLStream::endDivision() {
217 if (isTagPending(xml::parsep_tag)) {
218 // this case is normal. it just means we didn't have content,
219 // so the parsep_tag never got moved onto the tag stack.
220 while (!pending_tags_.empty()) {
221 // clear all pending tags up to and including the parsep tag.
222 // note that we work from the back, because we want to get rid
223 // of everything that hasn't been used.
224 TagPtr const cur_tag = pending_tags_.back();
225 pending_tags_.pop_back();
226 if (*cur_tag == xml::parsep_tag)
231 dumpTagStack("EndDivision");
237 if (!isTagOpen(xml::parsep_tag)) {
238 writeError("No division separation tag found in endDivision().");
242 // this case is also normal, if the parsep tag is the last one
243 // on the stack. otherwise, it's an error.
244 while (!tag_stack_.empty()) {
245 TagPtr const cur_tag = tag_stack_.back();
246 tag_stack_.pop_back();
247 if (*cur_tag == xml::parsep_tag)
249 writeError("Tag `" + cur_tag->tag_ + "' still open at end of paragraph. Closing.");
250 os_ << cur_tag->writeEndTag();
254 dumpTagStack("EndDivision");
259 void XMLStream::clearTagDeque() {
260 while (!pending_tags_.empty()) {
261 TagPtr const & tag = pending_tags_.front();
262 if (*tag != xml::parsep_tag)
264 os_ << tag->writeTag();
265 tag_stack_.push_back(tag);
266 pending_tags_.pop_front();
271 XMLStream &XMLStream::operator<<(docstring const &d) {
273 os_ << xml::xmlize(d, escape_);
274 escape_ = ESCAPE_ALL;
279 XMLStream &XMLStream::operator<<(const char *s) {
281 docstring const d = from_ascii(s);
282 os_ << xml::xmlize(d, escape_);
283 escape_ = ESCAPE_ALL;
288 XMLStream &XMLStream::operator<<(char_type c) {
290 os_ << xml::escapeChar(c, escape_);
291 escape_ = ESCAPE_ALL;
296 XMLStream &XMLStream::operator<<(char c) {
298 os_ << xml::escapeChar(c, escape_);
299 escape_ = ESCAPE_ALL;
304 XMLStream &XMLStream::operator<<(int i) {
307 escape_ = ESCAPE_ALL;
312 XMLStream &XMLStream::operator<<(EscapeSettings e) {
318 XMLStream &XMLStream::operator<<(xml::StartTag const &tag) {
319 if (tag.tag_.empty())
321 pending_tags_.push_back(makeTagPtr(tag));
328 XMLStream &XMLStream::operator<<(xml::ParTag const &tag) {
329 if (tag.tag_.empty())
331 pending_tags_.push_back(makeTagPtr(tag));
336 XMLStream &XMLStream::operator<<(xml::CompTag const &tag) {
337 if (tag.tag_.empty())
340 os_ << tag.writeTag();
345 XMLStream &XMLStream::operator<<(xml::FontTag const &tag) {
346 if (tag.tag_.empty())
348 pending_tags_.push_back(makeTagPtr(tag));
353 XMLStream &XMLStream::operator<<(xml::CR const &) {
354 os_ << from_ascii("\n");
359 bool XMLStream::isTagOpen(xml::StartTag const &stag) const {
360 auto sit = tag_stack_.begin();
361 auto sen = tag_stack_.cend();
362 for (; sit != sen; ++sit)
369 bool XMLStream::isTagOpen(xml::EndTag const &etag) const {
370 auto sit = tag_stack_.begin();
371 auto sen = tag_stack_.cend();
372 for (; sit != sen; ++sit)
379 bool XMLStream::isTagPending(xml::StartTag const &stag) const {
380 auto sit = pending_tags_.begin();
381 auto sen = pending_tags_.cend();
382 for (; sit != sen; ++sit)
389 // this is complicated, because we want to make sure that
390 // everything is properly nested. the code ought to make
391 // sure of that, but we won't assert (yet) if we run into
392 // a problem. we'll just output error messages and try our
393 // best to make things work.
394 XMLStream &XMLStream::operator<<(xml::EndTag const &etag) {
395 if (etag.tag_.empty())
398 // if this tag is pending, we can simply discard it.
399 if (!pending_tags_.empty()) {
400 if (etag == *pending_tags_.back()) {
401 // we have <tag></tag>, so we discard it and remove it
402 // from the pending_tags_.
403 pending_tags_.pop_back();
407 // there is a pending tag that isn't the one we are trying
410 // is this tag itself pending?
411 // non-const iterators because we may call erase().
412 TagDeque::iterator dit = pending_tags_.begin();
413 TagDeque::iterator const den = pending_tags_.end();
414 for (; dit != den; ++dit) {
416 // it was pending, so we just erase it
417 writeError("Tried to close pending tag `" + to_utf8(etag.tag_)
418 + "' when other tags were pending. Last pending tag is `"
419 + to_utf8(pending_tags_.back()->writeTag())
420 + "'. Tag discarded.");
421 pending_tags_.erase(dit);
425 // so etag isn't itself pending. is it even open?
426 if (!isTagOpen(etag)) {
427 writeError("Tried to close `" + to_utf8(etag.tag_)
428 + "' when tag was not open. Tag discarded.");
431 // ok, so etag is open.
432 // our strategy will be as below: we will do what we need to
433 // do to close this tag.
434 string estr = "Closing tag `" + to_utf8(etag.tag_)
435 + "' when other tags are pending. Discarded pending tags:\n";
436 for (dit = pending_tags_.begin(); dit != den; ++dit)
437 estr += to_utf8(xml::xmlize((*dit)->writeTag(), XMLStream::ESCAPE_ALL)) + "\n";
439 // clear the pending tags...
440 pending_tags_.clear();
441 // ...and then just fall through.
444 // make sure there are tags to be closed
445 if (tag_stack_.empty()) {
446 writeError("Tried to close `" + etag.tag_
447 + "' when no tags were open!");
451 // is the tag we are closing the last one we opened?
452 if (etag == *tag_stack_.back()) {
454 os_ << etag.writeEndTag();
455 // ...and forget about it
456 tag_stack_.pop_back();
460 // we are trying to close a tag other than the one last opened.
461 // let's first see if this particular tag is still open somehow.
462 if (!isTagOpen(etag)) {
463 writeError("Tried to close `" + etag.tag_
464 + "' when tag was not open. Tag discarded.");
468 // so the tag was opened, but other tags have been opened since
469 // and not yet closed.
470 // if it's a font tag, though...
471 if (etag.asFontTag()) {
472 // it won't be a problem if the other tags open since this one
473 // are also font tags.
474 TagDeque::const_reverse_iterator rit = tag_stack_.rbegin();
475 TagDeque::const_reverse_iterator ren = tag_stack_.rend();
476 for (; rit != ren; ++rit) {
479 if (!(*rit)->asFontTag()) {
480 // we'll just leave it and, presumably, have to close it later.
481 writeError("Unable to close font tag `" + etag.tag_
482 + "' due to open non-font tag `" + (*rit)->tag_ + "'.");
488 // <em>this is <strong>bold
489 // and are being asked to closed em. we want:
490 // <em>this is <strong>bold</strong></em><strong>
491 // first, we close the intervening tags...
492 TagPtr *curtag = &tag_stack_.back();
493 // ...remembering them in a stack.
495 while (etag != **curtag) {
496 os_ << (*curtag)->writeEndTag();
497 fontstack.push_back(*curtag);
498 tag_stack_.pop_back();
499 curtag = &tag_stack_.back();
501 os_ << etag.writeEndTag();
502 tag_stack_.pop_back();
504 // ...and restore the other tags.
505 rit = fontstack.rbegin();
506 ren = fontstack.rend();
507 for (; rit != ren; ++rit)
508 pending_tags_.push_back(*rit);
512 // it wasn't a font tag.
513 // so other tags were opened before this one and not properly closed.
514 // so we'll close them, too. that may cause other issues later, but it
515 // at least guarantees proper nesting.
516 writeError("Closing tag `" + etag.tag_
517 + "' when other tags are open, namely:");
518 TagPtr *curtag = &tag_stack_.back();
519 while (etag != **curtag) {
520 writeError((*curtag)->tag_);
521 if (**curtag != xml::parsep_tag)
522 os_ << (*curtag)->writeEndTag();
523 tag_stack_.pop_back();
524 curtag = &tag_stack_.back();
526 // curtag is now the one we actually want.
527 os_ << (*curtag)->writeEndTag();
528 tag_stack_.pop_back();
534 docstring xml::escapeString(docstring const & raw, XMLStream::EscapeSettings e)
537 bin.reserve(raw.size() * 2); // crude approximation is sufficient
538 for (size_t i = 0; i != raw.size(); ++i)
539 bin += xml::escapeChar(raw[i], e);
545 docstring const xml::uniqueID(docstring const & label)
548 static atomic_uint seed(1000);
549 return label + convert<docstring>(++seed);
553 docstring xml::cleanID(docstring const &orig)
555 // The standard xml:id only allows letters,
556 // digits, '-' and '.' in a name.
557 // This routine replaces illegal characters by '-' or '.'
558 // and adds a number for uniqueness if need be.
559 docstring const allowed = from_ascii(".-_");
561 // Use a cache of already mangled names: the alterations may merge several IDs as one. This ensures that the IDs
562 // are not mixed up in the document.
563 typedef map<docstring, docstring> MangledMap;
564 static QThreadStorage<MangledMap> tMangledNames;
565 static QThreadStorage<int> tMangleID;
567 MangledMap & mangledNames = tMangledNames.localData();
569 // If the name is already known, just return it.
570 MangledMap::const_iterator const known = mangledNames.find(orig);
571 if (known != mangledNames.end())
572 return known->second;
574 // Start creating the mangled name by iterating over the characters.
576 docstring::const_iterator it = orig.begin();
577 docstring::const_iterator end = orig.end();
579 // Make sure it starts with a letter.
580 if (!isAlphaASCII(*it) && allowed.find(*it) >= allowed.size())
584 bool mangle = false; // Indicates whether the ID had to be changed, i.e. if ID no more ensured to be unique.
585 for (; it != end; ++it) {
587 if (isAlphaASCII(c) || isDigitASCII(c) || c == '-' || c == '.'
588 || allowed.find(c) < allowed.size())
590 else if (c == '_' || c == ' ') {
594 else if (c == ':' || c == ',' || c == ';' || c == '!') {
605 int & mangleID = tMangleID.localData();
606 content += "-" + convert<docstring>(mangleID++);
607 } else if (isDigitASCII(content[content.size() - 1]))
610 mangledNames[orig] = content;
616 void xml::openTag(odocstream & os, string const & name, string const & attribute)
619 // This should be fixed in layout files later.
620 string param = subst(attribute, "<", "\"");
621 param = subst(param, ">", "\"");
623 // Note: we ignore the name if it empty or if it is a comment "<!-- -->" or
624 // if the name is *dummy*.
625 // We ignore dummy because dummy is not a valid docbook element and it is
626 // the internal name given to single paragraphs in the latex output.
627 // This allow us to simplify the code a lot and is a reasonable compromise.
628 if (!name.empty() && name != "!-- --" && name != "dummy") {
629 os << '<' << from_ascii(name);
631 os << ' ' << from_ascii(param);
637 void xml::closeTag(odocstream & os, string const & name)
639 if (!name.empty() && name != "!-- --" && name != "dummy")
640 os << "</" << from_ascii(name) << '>';
644 void xml::openTag(Buffer const & buf, odocstream & os,
645 OutputParams const & runparams, Paragraph const & par)
647 Layout const & style = par.layout();
648 string const & name = style.latexname();
649 string param = style.latexparam();
650 Counters & counters = buf.params().documentClass().counters();
652 string id = par.getID(buf, runparams);
656 if (param.find('#') != string::npos) {
657 string::size_type pos = param.find("id=<");
658 string::size_type end = param.find(">");
659 if( pos != string::npos && end != string::npos)
660 param.erase(pos, end-pos + 1);
662 attribute = id + ' ' + param;
664 if (param.find('#') != string::npos) {
666 if (!style.counter.empty())
667 // This uses InternalUpdate at the moment becuase xml output
668 // does not do anything with tracked counters, and it would need
669 // to track layouts if it did want to use them.
670 counters.step(style.counter, InternalUpdate);
672 counters.step(from_ascii(name), InternalUpdate);
673 int i = counters.value(from_ascii(name));
674 attribute = subst(param, "#", convert<string>(i));
679 openTag(os, name, attribute);
683 void xml::closeTag(odocstream & os, Paragraph const & par)
685 Layout const & style = par.layout();
686 closeTag(os, style.latexname());