2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/lassert.h"
31 #include "support/debug.h"
32 #include "support/lstrings.h"
37 using namespace lyx::support;
43 docstring escapeChar(char_type c)
67 // escape what needs escaping
68 docstring htmlize(docstring const & str) {
70 docstring::const_iterator it = str.begin();
71 docstring::const_iterator en = str.end();
72 for (; it != en; ++it)
78 string escapeChar(char c)
102 // escape what needs escaping
103 string htmlize(string const & str) {
105 string::const_iterator it = str.begin();
106 string::const_iterator en = str.end();
107 for (; it != en; ++it)
108 d << escapeChar(*it);
113 string cleanAttr(string const & str)
116 string::const_iterator it = str.begin();
117 string::const_iterator en = str.end();
118 for (; it != en; ++it)
119 newname += isalnum(*it) ? *it : '_';
124 docstring cleanAttr(docstring const & str)
127 docstring::const_iterator it = str.begin();
128 docstring::const_iterator en = str.end();
129 for (; it != en; ++it)
138 bool isFontTag(string const & s)
140 return s == "em" || s == "strong"; // others?
145 docstring StartTag::asTag() const
147 string output = "<" + tag_;
149 output += " " + html::htmlize(attr_);
151 return from_utf8(output);
155 docstring StartTag::asEndTag() const
157 string output = "</" + tag_ + ">";
158 return from_utf8(output);
162 docstring EndTag::asEndTag() const
164 string output = "</" + tag_ + ">";
165 return from_utf8(output);
169 docstring CompTag::asTag() const
171 string output = "<" + tag_;
173 output += " " + html::htmlize(attr_);
175 return from_utf8(output);
179 ////////////////////////////////////////////////////////////////
183 ////////////////////////////////////////////////////////////////
185 XHTMLStream::XHTMLStream(odocstream & os)
186 : os_(os), nextraw_(false)
190 void XHTMLStream::cr()
193 os_ << from_ascii("\n");
197 void XHTMLStream::writeError(std::string const & s)
200 os_ << from_utf8("<!-- Output Error: " + s + " -->");
204 bool XHTMLStream::closeFontTags()
206 if (tag_stack_.empty())
208 // first, we close any open font tags we can close
209 StartTag curtag = tag_stack_.back();
210 while (html::isFontTag(curtag.tag_)) {
211 os_ << curtag.asEndTag();
212 tag_stack_.pop_back();
213 if (tag_stack_.empty())
214 // this probably shouldn't happen, since then the
215 // font tags weren't in any other tag. but that
216 // problem will likely be caught elsewhere.
218 curtag = tag_stack_.back();
220 // so we've hit a non-font tag. let's see if any of the
221 // remaining tags are font tags.
222 TagStack::const_iterator it = tag_stack_.begin();
223 TagStack::const_iterator en = tag_stack_.end();
224 bool noFontTags = true;
225 for (; it != en; ++it) {
226 if (html::isFontTag(it->tag_)) {
227 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().");
235 void XHTMLStream::clearTagDeque()
237 while (!pending_tags_.empty()) {
238 StartTag const & tag = pending_tags_.front();
242 tag_stack_.push_back(tag);
243 pending_tags_.pop_front();
248 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
255 os_ << html::htmlize(d);
260 XHTMLStream & XHTMLStream::operator<<(const char * s)
263 docstring const d = from_ascii(s);
268 os_ << html::htmlize(d);
273 XHTMLStream & XHTMLStream::operator<<(char_type c)
280 os_ << html::escapeChar(c);
285 XHTMLStream & XHTMLStream::operator<<(NextRaw const &)
292 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag)
294 if (tag.tag_.empty())
296 pending_tags_.push_back(tag);
303 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag)
305 if (tag.tag_.empty())
315 bool XHTMLStream::isTagOpen(string const & stag)
317 TagStack::const_iterator sit = tag_stack_.begin();
318 TagStack::const_iterator const sen = tag_stack_.end();
319 for (; sit != sen; ++sit)
320 // we could check for the
321 if (sit->tag_ == stag)
327 // this is complicated, because we want to make sure that
328 // everything is properly nested. the code ought to make
329 // sure of that, but we won't assert (yet) if we run into
330 // a problem. we'll just output error messages and try our
331 // best to make things work.
332 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
334 if (etag.tag_.empty())
336 // first make sure we're not closing an empty tag
337 if (!pending_tags_.empty()) {
338 StartTag const & stag = pending_tags_.back();
339 if (etag.tag_ == stag.tag_) {
340 // we have <tag></tag>, so we discard it and remove it
341 // from the pending_tags_.
342 pending_tags_.pop_back();
345 // there is a pending tag that isn't the one we are trying
347 // is this tag itself pending?
348 // non-const iterators because we may call erase().
349 TagDeque::iterator dit = pending_tags_.begin();
350 TagDeque::iterator const den = pending_tags_.end();
351 for (; dit != den; ++dit) {
352 if (dit->tag_ == etag.tag_) {
353 // it was pending, so we just erase it
354 writeError("Tried to close pending tag `" + etag.tag_
355 + "' when other tags were pending. Last pending tag is `"
356 + pending_tags_.back().tag_ + "'. Tag discarded.");
357 pending_tags_.erase(dit);
361 // so etag isn't itself pending. is it even open?
362 if (!isTagOpen(etag.tag_)) {
363 writeError("Tried to close `" + etag.tag_
364 + "' when tag was not open. Tag discarded.");
367 // ok, so etag is open.
368 // our strategy will be as below: we will do what we need to
369 // do to close this tag.
370 string estr = "Closing tag `" + etag.tag_
371 + "' when other tags are pending. Discarded pending tags:\n";
372 for (dit = pending_tags_.begin(); dit != den; ++dit)
373 estr += dit->tag_ + "\n";
375 // clear the pending tags...
376 pending_tags_.clear();
377 // ...and then just fall through.
380 // is the tag we are closing the last one we opened?
381 if (etag.tag_ == tag_stack_.back().tag_) {
383 os_ << etag.asEndTag();
384 // ...and forget about it
385 tag_stack_.pop_back();
389 // we are trying to close a tag other than the one last opened.
390 // let's first see if this particular tag is still open somehow.
391 if (!isTagOpen(etag.tag_)) {
392 writeError("Tried to close `" + etag.tag_
393 + "' when tag was not open. Tag discarded.");
397 // so the tag was opened, but other tags have been opened since
398 // and not yet closed.
399 // if it's a font tag, though...
400 if (html::isFontTag(etag.tag_)) {
401 // it won't be a problem if the other tags open since this one
402 // are also font tags.
403 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
404 TagStack::const_reverse_iterator ren = tag_stack_.rend();
405 for (; rit != ren; ++rit) {
406 if (rit->tag_ == etag.tag_)
408 if (!html::isFontTag(rit->tag_)) {
409 // we'll just leave it and, presumably, have to close it later.
410 writeError("Unable to close font tag `" + etag.tag_
411 + "' due to open non-font tag `" + rit->tag_ + "'.");
417 // <em>this is <strong>bold
418 // and are being asked to closed em. we want:
419 // <em>this is <strong>bold</strong></em><strong>
420 // first, we close the intervening tags...
421 StartTag curtag = tag_stack_.back();
422 // ...remembering them in a stack.
424 while (curtag.tag_ != etag.tag_) {
425 os_ << curtag.asEndTag();
426 fontstack.push_back(curtag);
427 tag_stack_.pop_back();
428 curtag = tag_stack_.back();
430 // now close our tag...
431 os_ << etag.asEndTag();
432 tag_stack_.pop_back();
434 // ...and restore the other tags.
435 rit = fontstack.rbegin();
436 ren = fontstack.rend();
437 for (; rit != ren; ++rit)
438 pending_tags_.push_back(*rit);
442 // it wasn't a font tag.
443 // so other tags were opened before this one and not properly closed.
444 // so we'll close them, too. that may cause other issues later, but it
445 // at least guarantees proper nesting.
446 writeError("Closing tag `" + etag.tag_
447 + "' when other tags are open, namely:");
448 StartTag curtag = tag_stack_.back();
449 while (curtag.tag_ != etag.tag_) {
450 writeError(curtag.tag_);
451 os_ << curtag.asEndTag();
452 tag_stack_.pop_back();
453 curtag = tag_stack_.back();
455 // curtag is now the one we actually want.
456 os_ << curtag.asEndTag();
457 tag_stack_.pop_back();
462 // End code for XHTMLStream
466 // convenience functions
468 inline void openTag(XHTMLStream & xs, Layout const & lay)
470 xs << StartTag(lay.htmltag(), lay.htmlattr());
474 inline void closeTag(XHTMLStream & xs, Layout const & lay)
476 xs << EndTag(lay.htmltag());
480 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
482 xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
486 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
488 xs << EndTag(lay.htmllabeltag());
492 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
494 xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
498 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
500 xs << EndTag(lay.htmlitemtag());
503 // end of convenience functions
505 ParagraphList::const_iterator searchParagraphHtml(
506 ParagraphList::const_iterator p,
507 ParagraphList::const_iterator const & pend)
509 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
516 ParagraphList::const_iterator searchEnvironmentHtml(
517 ParagraphList::const_iterator const pstart,
518 ParagraphList::const_iterator const & pend)
520 ParagraphList::const_iterator p = pstart;
521 Layout const & bstyle = p->layout();
522 size_t const depth = p->params().depth();
523 for (++p; p != pend; ++p) {
524 Layout const & style = p->layout();
525 // It shouldn't happen that e.g. a section command occurs inside
526 // a quotation environment, at a higher depth, but as of 6/2009,
527 // it can happen. We pretend that it's just at lowest depth.
528 if (style.latextype == LATEX_COMMAND)
530 // If depth is down, we're done
531 if (p->params().depth() < depth)
533 // If depth is up, we're not done
534 if (p->params().depth() > depth)
536 // Now we know we are at the same depth
537 if (style.latextype == LATEX_PARAGRAPH
538 || style.latexname() != bstyle.latexname())
545 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
547 OutputParams const & runparams,
549 ParagraphList::const_iterator const & pbegin,
550 ParagraphList::const_iterator const & pend)
552 ParagraphList::const_iterator const begin = text.paragraphs().begin();
553 ParagraphList::const_iterator par = pbegin;
554 for (; par != pend; ++par) {
555 Layout const & lay = par->layout();
556 if (!lay.counter.empty())
557 buf.params().documentClass().counters().step(lay.counter);
558 // FIXME We should see if there's a label to be output and
559 // do something with it.
563 // If we are already in a paragraph, and this is the first one, then we
564 // do not want to open the paragraph tag.
565 // we also do not want to open it if the current layout does not permit
566 // multiple paragraphs.
567 bool const opened = runparams.html_make_pars &&
568 (par != pbegin || !runparams.html_in_par);
571 docstring const deferred =
572 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
574 // We want to issue the closing tag if either:
575 // (i) We opened it, and either html_in_par is false,
576 // or we're not in the last paragraph, anyway.
577 // (ii) We didn't open it and html_in_par is true,
578 // but we are in the first par, and there is a next par.
579 ParagraphList::const_iterator nextpar = par;
581 bool const needclose =
582 (opened && (!runparams.html_in_par || nextpar != pend))
583 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
588 if (!deferred.empty()) {
589 xs << XHTMLStream::NextRaw() << deferred;
597 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
599 OutputParams const & runparams,
601 ParagraphList::const_iterator const & pbegin,
602 ParagraphList::const_iterator const & pend)
604 xs << StartTag("h2", "class='bibliography'");
605 xs << pbegin->layout().labelstring(false);
608 xs << StartTag("div", "class='bibliography'");
610 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
616 bool isNormalEnv(Layout const & lay)
618 return lay.latextype == LATEX_ENVIRONMENT
619 || lay.latextype == LATEX_BIB_ENVIRONMENT;
623 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
625 OutputParams const & runparams,
627 ParagraphList::const_iterator const & pbegin,
628 ParagraphList::const_iterator const & pend)
630 ParagraphList::const_iterator const begin = text.paragraphs().begin();
631 ParagraphList::const_iterator par = pbegin;
632 Layout const & bstyle = par->layout();
633 depth_type const origdepth = pbegin->params().depth();
635 // open tag for this environment
639 // we will on occasion need to remember a layout from before.
640 Layout const * lastlay = 0;
642 while (par != pend) {
643 Layout const & style = par->layout();
644 // the counter only gets stepped if we're in some kind of list,
645 // or if it's the first time through.
646 // note that enum, etc, are handled automatically.
647 // FIXME There may be a bug here about user defined enumeration
648 // types. If so, then we'll need to take the counter and add "i",
649 // "ii", etc, as with enum.
650 Counters & cnts = buf.params().documentClass().counters();
651 docstring const & cntr = style.counter;
652 if (!style.counter.empty()
653 && (par == pbegin || !isNormalEnv(style))
654 && cnts.hasCounter(cntr)
657 ParagraphList::const_iterator send;
658 // this will be positive, if we want to skip the initial word
659 // (if it's been taken for the label).
662 switch (style.latextype) {
663 case LATEX_ENVIRONMENT:
664 case LATEX_LIST_ENVIRONMENT:
665 case LATEX_ITEM_ENVIRONMENT: {
666 // There are two possiblities in this case.
667 // One is that we are still in the environment in which we
668 // started---which we will be if the depth is the same.
669 if (par->params().depth() == origdepth) {
670 LASSERT(bstyle == style, /* */);
672 closeItemTag(xs, *lastlay);
675 bool const labelfirst = style.htmllabelfirst();
676 if (isNormalEnv(style)) {
677 // in this case, we print the label only for the first
678 // paragraph (as in a theorem).
679 openItemTag(xs, style);
680 if (par == pbegin && style.htmllabeltag() != "NONE") {
681 docstring const lbl =
682 pbegin->expandLabel(style, buf.params(), false);
684 openLabelTag(xs, style);
686 closeLabelTag(xs, style);
690 } else { // some kind of list
692 openItemTag(xs, style);
693 if (style.labeltype == LABEL_MANUAL
694 && style.htmllabeltag() != "NONE") {
695 openLabelTag(xs, style);
696 sep = par->firstWordLyXHTML(xs, runparams);
697 closeLabelTag(xs, style);
700 else if (style.labeltype != LABEL_NO_LABEL
701 && style.htmllabeltag() != "NONE") {
702 openLabelTag(xs, style);
703 xs << par->expandLabel(style, buf.params(), false);
704 closeLabelTag(xs, style);
708 openItemTag(xs, style);
710 par->simpleLyXHTMLOnePar(buf, xs, runparams,
711 text.outerFont(distance(begin, par)), false, sep);
713 // We may not want to close the tag yet, in particular,
714 // if we're not at the end...
716 // and are doing items...
717 && !isNormalEnv(style)
718 // and if the depth has changed...
719 && par->params().depth() != origdepth) {
720 // then we'll save this layout for later, and close it when
721 // we get another item.
724 closeItemTag(xs, style);
727 // The other possibility is that the depth has increased, in which
728 // case we need to recurse.
730 send = searchEnvironmentHtml(par, pend);
731 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
735 case LATEX_PARAGRAPH:
736 send = searchParagraphHtml(par, pend);
737 par = makeParagraphs(buf, xs, runparams, text, par, send);
740 case LATEX_BIB_ENVIRONMENT:
743 par = makeParagraphs(buf, xs, runparams, text, par, send);
753 closeItemTag(xs, *lastlay);
754 closeTag(xs, bstyle);
760 void makeCommand(Buffer const & buf,
762 OutputParams const & runparams,
764 ParagraphList::const_iterator const & pbegin)
766 Layout const & style = pbegin->layout();
767 if (!style.counter.empty())
768 buf.params().documentClass().counters().step(style.counter);
772 // Label around sectioning number:
773 // FIXME Probably need to account for LABEL_MANUAL
774 if (style.labeltype != LABEL_NO_LABEL) {
775 openLabelTag(xs, style);
776 xs << pbegin->expandLabel(style, buf.params(), false);
777 closeLabelTag(xs, style);
778 // Otherwise the label might run together with the text
779 xs << from_ascii(" ");
782 ParagraphList::const_iterator const begin = text.paragraphs().begin();
783 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
784 text.outerFont(distance(begin, pbegin)));
789 } // end anonymous namespace
792 void xhtmlParagraphs(Text const & text,
795 OutputParams const & runparams)
797 ParagraphList const & paragraphs = text.paragraphs();
798 ParagraphList::const_iterator par = paragraphs.begin();
799 ParagraphList::const_iterator pend = paragraphs.end();
801 OutputParams ourparams = runparams;
802 while (par != pend) {
803 Layout const & style = par->layout();
804 ParagraphList::const_iterator lastpar = par;
805 ParagraphList::const_iterator send;
807 switch (style.latextype) {
808 case LATEX_COMMAND: {
809 // The files with which we are working never have more than
810 // one paragraph in a command structure.
812 // if (ourparams.html_in_par)
813 // fix it so we don't get sections inside standard, e.g.
814 // note that we may then need to make runparams not const, so we
815 // can communicate that back.
816 // FIXME Maybe this fix should be in the routines themselves, in case
817 // they are called from elsewhere.
818 makeCommand(buf, xs, ourparams, text, par);
822 case LATEX_ENVIRONMENT:
823 case LATEX_LIST_ENVIRONMENT:
824 case LATEX_ITEM_ENVIRONMENT: {
825 // FIXME Same fix here.
826 send = searchEnvironmentHtml(par, pend);
827 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
830 case LATEX_BIB_ENVIRONMENT: {
831 // FIXME Same fix here.
832 send = searchEnvironmentHtml(par, pend);
833 par = makeBibliography(buf, xs, ourparams, text, par, send);
836 case LATEX_PARAGRAPH:
837 send = searchParagraphHtml(par, pend);
838 par = makeParagraphs(buf, xs, ourparams, text, par, send);
842 // makeEnvironment may process more than one paragraphs and bypass pend
843 if (distance(lastpar, par) >= distance(lastpar, pend))