2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
23 #include "OutputParams.h"
24 #include "Paragraph.h"
25 #include "ParagraphList.h"
26 #include "ParagraphParameters.h"
29 #include "TextClass.h"
31 #include "support/convert.h"
32 #include "support/debug.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
39 using namespace lyx::support;
45 docstring escapeChar(char_type c)
69 // escape what needs escaping
70 docstring htmlize(docstring const & str) {
72 docstring::const_iterator it = str.begin();
73 docstring::const_iterator en = str.end();
74 for (; it != en; ++it)
80 string escapeChar(char c)
104 // escape what needs escaping
105 string htmlize(string const & str) {
107 string::const_iterator it = str.begin();
108 string::const_iterator en = str.end();
109 for (; it != en; ++it)
110 d << escapeChar(*it);
115 string cleanAttr(string const & str)
118 string::const_iterator it = str.begin();
119 string::const_iterator en = str.end();
120 for (; it != en; ++it)
121 newname += isalnum(*it) ? *it : '_';
126 docstring cleanAttr(docstring const & str)
129 docstring::const_iterator it = str.begin();
130 docstring::const_iterator en = str.end();
131 for (; it != en; ++it)
140 bool isFontTag(string const & s)
142 return s == "em" || s == "strong"; // others?
146 docstring StartTag::asTag() const
148 string output = "<" + tag_;
150 output += " " + html::htmlize(attr_);
152 return from_utf8(output);
156 docstring StartTag::asEndTag() const
158 string output = "</" + tag_ + ">";
159 return from_utf8(output);
163 docstring EndTag::asEndTag() const
165 string output = "</" + tag_ + ">";
166 return from_utf8(output);
170 docstring CompTag::asTag() const
172 string output = "<" + tag_;
174 output += " " + html::htmlize(attr_);
176 return from_utf8(output);
183 ////////////////////////////////////////////////////////////////
187 ////////////////////////////////////////////////////////////////
189 XHTMLStream::XHTMLStream(odocstream & os)
190 : os_(os), nextraw_(false)
194 void XHTMLStream::cr()
197 os_ << from_ascii("\n");
201 void XHTMLStream::writeError(std::string const & s)
204 os_ << from_utf8("<!-- Output Error: " + s + " -->");
208 bool XHTMLStream::closeFontTags()
210 if (tag_stack_.empty())
212 // first, we close any open font tags we can close
213 html::StartTag curtag = tag_stack_.back();
214 while (html::isFontTag(curtag.tag_)) {
215 os_ << curtag.asEndTag();
216 tag_stack_.pop_back();
217 if (tag_stack_.empty())
218 // this probably shouldn't happen, since then the
219 // font tags weren't in any other tag. but that
220 // problem will likely be caught elsewhere.
222 curtag = tag_stack_.back();
224 // so we've hit a non-font tag. let's see if any of the
225 // remaining tags are font tags.
226 TagStack::const_iterator it = tag_stack_.begin();
227 TagStack::const_iterator en = tag_stack_.end();
228 bool noFontTags = true;
229 for (; it != en; ++it) {
230 if (html::isFontTag(it->tag_)) {
231 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().\n"
232 "This is likely not a problem, but you might want to check.");
240 void XHTMLStream::clearTagDeque()
242 while (!pending_tags_.empty()) {
243 html::StartTag const & tag = pending_tags_.front();
246 tag_stack_.push_back(tag);
247 pending_tags_.pop_front();
252 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
259 os_ << html::htmlize(d);
264 XHTMLStream & XHTMLStream::operator<<(const char * s)
267 docstring const d = from_ascii(s);
272 os_ << html::htmlize(d);
277 XHTMLStream & XHTMLStream::operator<<(char_type c)
284 os_ << html::escapeChar(c);
289 XHTMLStream & XHTMLStream::operator<<(char c)
296 string const d = html::escapeChar(c);
297 os_ << from_ascii(d);
303 XHTMLStream & XHTMLStream::operator<<(int i)
312 XHTMLStream & XHTMLStream::operator<<(NextRaw const &)
319 XHTMLStream & XHTMLStream::operator<<(html::StartTag const & tag)
321 if (tag.tag_.empty())
323 pending_tags_.push_back(tag);
330 XHTMLStream & XHTMLStream::operator<<(html::CompTag const & tag)
332 if (tag.tag_.empty())
342 bool XHTMLStream::isTagOpen(string const & stag)
344 TagStack::const_iterator sit = tag_stack_.begin();
345 TagStack::const_iterator const sen = tag_stack_.end();
346 for (; sit != sen; ++sit)
347 if (sit->tag_ == stag)
353 // this is complicated, because we want to make sure that
354 // everything is properly nested. the code ought to make
355 // sure of that, but we won't assert (yet) if we run into
356 // a problem. we'll just output error messages and try our
357 // best to make things work.
358 XHTMLStream & XHTMLStream::operator<<(html::EndTag const & etag)
360 if (etag.tag_.empty())
363 // make sure there are tags to be closed
364 if (tag_stack_.empty()) {
365 writeError("Tried to close `" + etag.tag_
366 + "' when no tags were open!");
370 // first make sure we're not closing an empty tag
371 if (!pending_tags_.empty()) {
372 html::StartTag const & stag = pending_tags_.back();
373 if (etag.tag_ == stag.tag_) {
374 // we have <tag></tag>, so we discard it and remove it
375 // from the pending_tags_.
376 pending_tags_.pop_back();
379 // there is a pending tag that isn't the one we are trying
381 // is this tag itself pending?
382 // non-const iterators because we may call erase().
383 TagDeque::iterator dit = pending_tags_.begin();
384 TagDeque::iterator const den = pending_tags_.end();
385 for (; dit != den; ++dit) {
386 if (dit->tag_ == etag.tag_) {
387 // it was pending, so we just erase it
388 writeError("Tried to close pending tag `" + etag.tag_
389 + "' when other tags were pending. Last pending tag is `"
390 + pending_tags_.back().tag_ + "'. Tag discarded.");
391 pending_tags_.erase(dit);
395 // so etag isn't itself pending. is it even open?
396 if (!isTagOpen(etag.tag_)) {
397 writeError("Tried to close `" + etag.tag_
398 + "' when tag was not open. Tag discarded.");
401 // ok, so etag is open.
402 // our strategy will be as below: we will do what we need to
403 // do to close this tag.
404 string estr = "Closing tag `" + etag.tag_
405 + "' when other tags are pending. Discarded pending tags:\n";
406 for (dit = pending_tags_.begin(); dit != den; ++dit)
407 estr += dit->tag_ + "\n";
409 // clear the pending tags...
410 pending_tags_.clear();
411 // ...and then just fall through.
414 // is the tag we are closing the last one we opened?
415 if (etag.tag_ == tag_stack_.back().tag_) {
417 os_ << etag.asEndTag();
418 // ...and forget about it
419 tag_stack_.pop_back();
423 // we are trying to close a tag other than the one last opened.
424 // let's first see if this particular tag is still open somehow.
425 if (!isTagOpen(etag.tag_)) {
426 writeError("Tried to close `" + etag.tag_
427 + "' when tag was not open. Tag discarded.");
431 // so the tag was opened, but other tags have been opened since
432 // and not yet closed.
433 // if it's a font tag, though...
434 if (html::isFontTag(etag.tag_)) {
435 // it won't be a problem if the other tags open since this one
436 // are also font tags.
437 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
438 TagStack::const_reverse_iterator ren = tag_stack_.rend();
439 for (; rit != ren; ++rit) {
440 if (rit->tag_ == etag.tag_)
442 if (!html::isFontTag(rit->tag_)) {
443 // we'll just leave it and, presumably, have to close it later.
444 writeError("Unable to close font tag `" + etag.tag_
445 + "' due to open non-font tag `" + rit->tag_ + "'.");
451 // <em>this is <strong>bold
452 // and are being asked to closed em. we want:
453 // <em>this is <strong>bold</strong></em><strong>
454 // first, we close the intervening tags...
455 html::StartTag curtag = tag_stack_.back();
456 // ...remembering them in a stack.
458 while (curtag.tag_ != etag.tag_) {
459 os_ << curtag.asEndTag();
460 fontstack.push_back(curtag);
461 tag_stack_.pop_back();
462 curtag = tag_stack_.back();
464 // now close our tag...
465 os_ << etag.asEndTag();
466 tag_stack_.pop_back();
468 // ...and restore the other tags.
469 rit = fontstack.rbegin();
470 ren = fontstack.rend();
471 for (; rit != ren; ++rit)
472 pending_tags_.push_back(*rit);
476 // it wasn't a font tag.
477 // so other tags were opened before this one and not properly closed.
478 // so we'll close them, too. that may cause other issues later, but it
479 // at least guarantees proper nesting.
480 writeError("Closing tag `" + etag.tag_
481 + "' when other tags are open, namely:");
482 html::StartTag curtag = tag_stack_.back();
483 while (curtag.tag_ != etag.tag_) {
484 writeError(curtag.tag_);
485 os_ << curtag.asEndTag();
486 tag_stack_.pop_back();
487 curtag = tag_stack_.back();
489 // curtag is now the one we actually want.
490 os_ << curtag.asEndTag();
491 tag_stack_.pop_back();
496 // End code for XHTMLStream
500 // convenience functions
502 inline void openTag(XHTMLStream & xs, Layout const & lay)
504 xs << html::StartTag(lay.htmltag(), lay.htmlattr());
508 void openTag(XHTMLStream & xs, Layout const & lay,
509 ParagraphParameters const & params)
511 // FIXME Are there other things we should handle here?
512 string const align = alignmentToCSS(params.align());
517 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
518 xs << html::StartTag(lay.htmltag(), attrs);
522 inline void closeTag(XHTMLStream & xs, Layout const & lay)
524 xs << html::EndTag(lay.htmltag());
528 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
530 xs << html::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
534 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
536 xs << html::EndTag(lay.htmllabeltag());
540 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
542 xs << html::StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
546 void openItemTag(XHTMLStream & xs, Layout const & lay,
547 ParagraphParameters const & params)
549 // FIXME Are there other things we should handle here?
550 string const align = alignmentToCSS(params.align());
552 openItemTag(xs, lay);
555 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
556 xs << html::StartTag(lay.htmlitemtag(), attrs);
560 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
562 xs << html::EndTag(lay.htmlitemtag());
565 // end of convenience functions
567 ParagraphList::const_iterator searchParagraphHtml(
568 ParagraphList::const_iterator p,
569 ParagraphList::const_iterator const & pend)
571 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
578 ParagraphList::const_iterator searchEnvironmentHtml(
579 ParagraphList::const_iterator const pstart,
580 ParagraphList::const_iterator const & pend)
582 ParagraphList::const_iterator p = pstart;
583 Layout const & bstyle = p->layout();
584 size_t const depth = p->params().depth();
585 for (++p; p != pend; ++p) {
586 Layout const & style = p->layout();
587 // It shouldn't happen that e.g. a section command occurs inside
588 // a quotation environment, at a higher depth, but as of 6/2009,
589 // it can happen. We pretend that it's just at lowest depth.
590 if (style.latextype == LATEX_COMMAND)
592 // If depth is down, we're done
593 if (p->params().depth() < depth)
595 // If depth is up, we're not done
596 if (p->params().depth() > depth)
598 // Now we know we are at the same depth
599 if (style.latextype == LATEX_PARAGRAPH
600 || style.latexname() != bstyle.latexname())
607 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
609 OutputParams const & runparams,
611 ParagraphList::const_iterator const & pbegin,
612 ParagraphList::const_iterator const & pend)
614 ParagraphList::const_iterator const begin = text.paragraphs().begin();
615 ParagraphList::const_iterator par = pbegin;
616 for (; par != pend; ++par) {
617 Layout const & lay = par->layout();
618 if (!lay.counter.empty())
619 buf.params().documentClass().counters().step(lay.counter, OutputUpdate);
620 // FIXME We should see if there's a label to be output and
621 // do something with it.
625 // If we are already in a paragraph, and this is the first one, then we
626 // do not want to open the paragraph tag.
627 // we also do not want to open it if the current layout does not permit
628 // multiple paragraphs.
629 bool const opened = runparams.html_make_pars &&
630 (par != pbegin || !runparams.html_in_par);
632 openTag(xs, lay, par->params());
633 docstring const deferred =
634 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
636 // We want to issue the closing tag if either:
637 // (i) We opened it, and either html_in_par is false,
638 // or we're not in the last paragraph, anyway.
639 // (ii) We didn't open it and html_in_par is true,
640 // but we are in the first par, and there is a next par.
641 ParagraphList::const_iterator nextpar = par;
643 bool const needclose =
644 (opened && (!runparams.html_in_par || nextpar != pend))
645 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
650 if (!deferred.empty()) {
651 xs << XHTMLStream::NextRaw() << deferred;
659 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
661 OutputParams const & runparams,
663 ParagraphList::const_iterator const & pbegin,
664 ParagraphList::const_iterator const & pend)
667 // Use TextClass::htmlTOCLayout() to figure out how we should look.
668 xs << html::StartTag("h2", "class='bibliography'");
669 xs << pbegin->layout().labelstring(false);
670 xs << html::EndTag("h2");
672 xs << html::StartTag("div", "class='bibliography'");
674 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
675 xs << html::EndTag("div");
680 bool isNormalEnv(Layout const & lay)
682 return lay.latextype == LATEX_ENVIRONMENT
683 || lay.latextype == LATEX_BIB_ENVIRONMENT;
687 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
689 OutputParams const & runparams,
691 ParagraphList::const_iterator const & pbegin,
692 ParagraphList::const_iterator const & pend)
694 ParagraphList::const_iterator const begin = text.paragraphs().begin();
695 ParagraphList::const_iterator par = pbegin;
696 Layout const & bstyle = par->layout();
697 depth_type const origdepth = pbegin->params().depth();
699 // open tag for this environment
703 // we will on occasion need to remember a layout from before.
704 Layout const * lastlay = 0;
706 while (par != pend) {
707 Layout const & style = par->layout();
708 // the counter only gets stepped if we're in some kind of list,
709 // or if it's the first time through.
710 // note that enum, etc, are handled automatically.
711 // FIXME There may be a bug here about user defined enumeration
712 // types. If so, then we'll need to take the counter and add "i",
713 // "ii", etc, as with enum.
714 Counters & cnts = buf.params().documentClass().counters();
715 docstring const & cntr = style.counter;
716 if (!style.counter.empty()
717 && (par == pbegin || !isNormalEnv(style))
718 && cnts.hasCounter(cntr)
720 cnts.step(cntr, OutputUpdate);
721 ParagraphList::const_iterator send;
722 // this will be positive, if we want to skip the initial word
723 // (if it's been taken for the label).
726 switch (style.latextype) {
727 case LATEX_ENVIRONMENT:
728 case LATEX_LIST_ENVIRONMENT:
729 case LATEX_ITEM_ENVIRONMENT: {
730 // There are two possiblities in this case.
731 // One is that we are still in the environment in which we
732 // started---which we will be if the depth is the same.
733 if (par->params().depth() == origdepth) {
734 LASSERT(bstyle == style, /* */);
736 closeItemTag(xs, *lastlay);
740 bool const labelfirst = style.htmllabelfirst();
742 openItemTag(xs, style, par->params());
745 if (style.labeltype != LABEL_NO_LABEL &&
746 style.htmllabeltag() != "NONE") {
747 if (isNormalEnv(style)) {
748 // in this case, we print the label only for the first
749 // paragraph (as in a theorem).
751 docstring const lbl =
752 pbegin->params().labelString();
754 openLabelTag(xs, style);
756 closeLabelTag(xs, style);
760 } else { // some kind of list
761 if (style.labeltype == LABEL_MANUAL) {
762 openLabelTag(xs, style);
763 sep = par->firstWordLyXHTML(xs, runparams);
764 closeLabelTag(xs, style);
768 openLabelTag(xs, style);
769 xs << par->params().labelString();
770 closeLabelTag(xs, style);
774 } // end label output
777 openItemTag(xs, style, par->params());
779 par->simpleLyXHTMLOnePar(buf, xs, runparams,
780 text.outerFont(distance(begin, par)), sep);
783 // We may not want to close the tag yet, in particular:
784 // If we're not at the end...
786 // and are doing items...
787 && !isNormalEnv(style)
788 // and if the depth has changed...
789 && par->params().depth() != origdepth) {
790 // then we'll save this layout for later, and close it when
791 // we get another item.
794 closeItemTag(xs, style);
797 // The other possibility is that the depth has increased, in which
798 // case we need to recurse.
800 send = searchEnvironmentHtml(par, pend);
801 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
805 case LATEX_PARAGRAPH:
806 send = searchParagraphHtml(par, pend);
807 par = makeParagraphs(buf, xs, runparams, text, par, send);
810 case LATEX_BIB_ENVIRONMENT:
813 par = makeParagraphs(buf, xs, runparams, text, par, send);
823 closeItemTag(xs, *lastlay);
824 closeTag(xs, bstyle);
830 void makeCommand(Buffer const & buf,
832 OutputParams const & runparams,
834 ParagraphList::const_iterator const & pbegin)
836 Layout const & style = pbegin->layout();
837 if (!style.counter.empty())
838 buf.params().documentClass().counters().step(style.counter, OutputUpdate);
840 openTag(xs, style, pbegin->params());
842 // Label around sectioning number:
843 // FIXME Probably need to account for LABEL_MANUAL
844 if (style.labeltype != LABEL_NO_LABEL) {
845 openLabelTag(xs, style);
846 xs << pbegin->params().labelString();
847 closeLabelTag(xs, style);
848 // Otherwise the label might run together with the text
849 xs << from_ascii(" ");
852 ParagraphList::const_iterator const begin = text.paragraphs().begin();
853 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
854 text.outerFont(distance(begin, pbegin)));
859 } // end anonymous namespace
862 void xhtmlParagraphs(Text const & text,
865 OutputParams const & runparams)
867 ParagraphList const & paragraphs = text.paragraphs();
868 ParagraphList::const_iterator par = paragraphs.begin();
869 ParagraphList::const_iterator pend = paragraphs.end();
871 OutputParams ourparams = runparams;
872 while (par != pend) {
873 if (par->params().startOfAppendix()) {
874 // FIXME: only the counter corresponding to toplevel
875 // sectioning should be reset
876 Counters & cnts = buf.masterBuffer()->params().documentClass().counters();
880 Layout const & style = par->layout();
881 ParagraphList::const_iterator lastpar = par;
882 ParagraphList::const_iterator send;
884 switch (style.latextype) {
885 case LATEX_COMMAND: {
886 // The files with which we are working never have more than
887 // one paragraph in a command structure.
889 // if (ourparams.html_in_par)
890 // fix it so we don't get sections inside standard, e.g.
891 // note that we may then need to make runparams not const, so we
892 // can communicate that back.
893 // FIXME Maybe this fix should be in the routines themselves, in case
894 // they are called from elsewhere.
895 makeCommand(buf, xs, ourparams, text, par);
899 case LATEX_ENVIRONMENT:
900 case LATEX_LIST_ENVIRONMENT:
901 case LATEX_ITEM_ENVIRONMENT: {
902 // FIXME Same fix here.
903 send = searchEnvironmentHtml(par, pend);
904 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
907 case LATEX_BIB_ENVIRONMENT: {
908 // FIXME Same fix here.
909 send = searchEnvironmentHtml(par, pend);
910 par = makeBibliography(buf, xs, ourparams, text, par, send);
913 case LATEX_PARAGRAPH:
914 send = searchParagraphHtml(par, pend);
915 par = makeParagraphs(buf, xs, ourparams, text, par, send);
919 // makeEnvironment may process more than one paragraphs and bypass pend
920 if (distance(lastpar, par) >= distance(lastpar, pend))
926 string alignmentToCSS(LyXAlignment align) {
928 case LYX_ALIGN_BLOCK:
929 // we are NOT going to use text-align: justify!!
932 case LYX_ALIGN_RIGHT:
934 case LYX_ALIGN_CENTER: