2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
23 #include "OutputParams.h"
24 #include "Paragraph.h"
25 #include "ParagraphList.h"
26 #include "ParagraphParameters.h"
29 #include "TextClass.h"
31 #include "support/convert.h"
32 #include "support/debug.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
40 using namespace lyx::support;
46 docstring escapeChar(char_type c, XHTMLStream::EscapeSettings e)
50 case XHTMLStream::ESCAPE_NONE:
53 case XHTMLStream::ESCAPE_ALL:
57 } else if (c == '>') {
62 case XHTMLStream::ESCAPE_AND:
73 // escape what needs escaping
74 docstring htmlize(docstring const & str, XHTMLStream::EscapeSettings e) {
76 docstring::const_iterator it = str.begin();
77 docstring::const_iterator en = str.end();
78 for (; it != en; ++it)
79 d << escapeChar(*it, e);
84 string escapeChar(char c, XHTMLStream::EscapeSettings e)
88 case XHTMLStream::ESCAPE_NONE:
91 case XHTMLStream::ESCAPE_ALL:
95 } else if (c == '>') {
100 case XHTMLStream::ESCAPE_AND:
111 // escape what needs escaping
112 string htmlize(string const & str, XHTMLStream::EscapeSettings e) {
114 string::const_iterator it = str.begin();
115 string::const_iterator en = str.end();
116 for (; it != en; ++it)
117 d << escapeChar(*it, e);
122 string cleanAttr(string const & str)
125 string::const_iterator it = str.begin();
126 string::const_iterator en = str.end();
127 for (; it != en; ++it)
128 newname += isalnum(*it) ? *it : '_';
133 docstring cleanAttr(docstring const & str)
136 docstring::const_iterator it = str.begin();
137 docstring::const_iterator en = str.end();
138 for (; it != en; ++it) {
139 char_type const c = *it;
140 newname += isAlnumASCII(c) ? c : char_type('_');
146 bool isFontTag(string const & s)
149 return s == "em" || s == "strong" || s == "i" || s == "b";
153 docstring StartTag::asTag() const
155 string output = "<" + tag_;
157 output += " " + html::htmlize(attr_, XHTMLStream::ESCAPE_NONE);
159 return from_utf8(output);
163 docstring StartTag::asEndTag() const
165 string output = "</" + tag_ + ">";
166 return from_utf8(output);
170 docstring EndTag::asEndTag() const
172 string output = "</" + tag_ + ">";
173 return from_utf8(output);
177 docstring CompTag::asTag() const
179 string output = "<" + tag_;
181 output += " " + html::htmlize(attr_, XHTMLStream::ESCAPE_NONE);
183 return from_utf8(output);
190 ////////////////////////////////////////////////////////////////
194 ////////////////////////////////////////////////////////////////
196 XHTMLStream::XHTMLStream(odocstream & os)
197 : os_(os), escape_(ESCAPE_ALL)
201 void XHTMLStream::writeError(std::string const & s)
204 os_ << from_utf8("<!-- Output Error: " + s + " -->");
208 bool XHTMLStream::closeFontTags()
210 if (tag_stack_.empty())
212 // first, we close any open font tags we can close
213 html::StartTag curtag = tag_stack_.back();
214 while (html::isFontTag(curtag.tag_)) {
215 os_ << curtag.asEndTag();
216 tag_stack_.pop_back();
217 if (tag_stack_.empty())
218 // this probably shouldn't happen, since then the
219 // font tags weren't in any other tag. but that
220 // problem will likely be caught elsewhere.
222 curtag = tag_stack_.back();
224 // so we've hit a non-font tag. let's see if any of the
225 // remaining tags are font tags.
226 TagStack::const_iterator it = tag_stack_.begin();
227 TagStack::const_iterator en = tag_stack_.end();
228 bool noFontTags = true;
229 for (; it != en; ++it) {
230 if (html::isFontTag(it->tag_)) {
231 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().\n"
232 "This is likely not a problem, but you might want to check.");
240 void XHTMLStream::clearTagDeque()
242 while (!pending_tags_.empty()) {
243 html::StartTag const & tag = pending_tags_.front();
246 tag_stack_.push_back(tag);
247 pending_tags_.pop_front();
252 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
255 os_ << html::htmlize(d, escape_);
256 escape_ = ESCAPE_ALL;
261 XHTMLStream & XHTMLStream::operator<<(const char * s)
264 docstring const d = from_ascii(s);
265 os_ << html::htmlize(d, escape_);
266 escape_ = ESCAPE_ALL;
271 XHTMLStream & XHTMLStream::operator<<(char_type c)
274 os_ << html::escapeChar(c, escape_);
275 escape_ = ESCAPE_ALL;
280 XHTMLStream & XHTMLStream::operator<<(char c)
283 string const d = html::escapeChar(c, escape_);
284 escape_ = ESCAPE_ALL;
289 XHTMLStream & XHTMLStream::operator<<(int i)
293 escape_ = ESCAPE_ALL;
298 XHTMLStream & XHTMLStream::operator<<(EscapeSettings e)
305 XHTMLStream & XHTMLStream::operator<<(html::StartTag const & tag)
307 if (tag.tag_.empty())
309 pending_tags_.push_back(tag);
316 XHTMLStream & XHTMLStream::operator<<(html::CompTag const & tag)
318 if (tag.tag_.empty())
328 XHTMLStream & XHTMLStream::operator<<(html::CR const &)
331 os_ << from_ascii("\n");
336 bool XHTMLStream::isTagOpen(string const & stag)
338 TagStack::const_iterator sit = tag_stack_.begin();
339 TagStack::const_iterator const sen = tag_stack_.end();
340 for (; sit != sen; ++sit)
341 if (sit->tag_ == stag)
347 // this is complicated, because we want to make sure that
348 // everything is properly nested. the code ought to make
349 // sure of that, but we won't assert (yet) if we run into
350 // a problem. we'll just output error messages and try our
351 // best to make things work.
352 XHTMLStream & XHTMLStream::operator<<(html::EndTag const & etag)
354 if (etag.tag_.empty())
357 // make sure there are tags to be closed
358 if (tag_stack_.empty()) {
359 writeError("Tried to close `" + etag.tag_
360 + "' when no tags were open!");
364 // first make sure we're not closing an empty tag
365 if (!pending_tags_.empty()) {
366 html::StartTag const & stag = pending_tags_.back();
367 if (etag.tag_ == stag.tag_) {
368 // we have <tag></tag>, so we discard it and remove it
369 // from the pending_tags_.
370 pending_tags_.pop_back();
373 // there is a pending tag that isn't the one we are trying
375 // is this tag itself pending?
376 // non-const iterators because we may call erase().
377 TagStack::iterator dit = pending_tags_.begin();
378 TagStack::iterator const den = pending_tags_.end();
379 for (; dit != den; ++dit) {
380 if (dit->tag_ == etag.tag_) {
381 // it was pending, so we just erase it
382 writeError("Tried to close pending tag `" + etag.tag_
383 + "' when other tags were pending. Last pending tag is `"
384 + pending_tags_.back().tag_ + "'. Tag discarded.");
385 pending_tags_.erase(dit);
389 // so etag isn't itself pending. is it even open?
390 if (!isTagOpen(etag.tag_)) {
391 writeError("Tried to close `" + etag.tag_
392 + "' when tag was not open. Tag discarded.");
395 // ok, so etag is open.
396 // our strategy will be as below: we will do what we need to
397 // do to close this tag.
398 string estr = "Closing tag `" + etag.tag_
399 + "' when other tags are pending. Discarded pending tags:\n";
400 for (dit = pending_tags_.begin(); dit != den; ++dit)
401 estr += dit->tag_ + "\n";
403 // clear the pending tags...
404 pending_tags_.clear();
405 // ...and then just fall through.
408 // is the tag we are closing the last one we opened?
409 if (etag.tag_ == tag_stack_.back().tag_) {
411 os_ << etag.asEndTag();
412 // ...and forget about it
413 tag_stack_.pop_back();
417 // we are trying to close a tag other than the one last opened.
418 // let's first see if this particular tag is still open somehow.
419 if (!isTagOpen(etag.tag_)) {
420 writeError("Tried to close `" + etag.tag_
421 + "' when tag was not open. Tag discarded.");
425 // so the tag was opened, but other tags have been opened since
426 // and not yet closed.
427 // if it's a font tag, though...
428 if (html::isFontTag(etag.tag_)) {
429 // it won't be a problem if the other tags open since this one
430 // are also font tags.
431 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
432 TagStack::const_reverse_iterator ren = tag_stack_.rend();
433 for (; rit != ren; ++rit) {
434 if (rit->tag_ == etag.tag_)
436 if (!html::isFontTag(rit->tag_)) {
437 // we'll just leave it and, presumably, have to close it later.
438 writeError("Unable to close font tag `" + etag.tag_
439 + "' due to open non-font tag `" + rit->tag_ + "'.");
445 // <em>this is <strong>bold
446 // and are being asked to closed em. we want:
447 // <em>this is <strong>bold</strong></em><strong>
448 // first, we close the intervening tags...
449 html::StartTag curtag = tag_stack_.back();
450 // ...remembering them in a stack.
452 while (curtag.tag_ != etag.tag_) {
453 os_ << curtag.asEndTag();
454 fontstack.push_back(curtag);
455 tag_stack_.pop_back();
456 curtag = tag_stack_.back();
458 // now close our tag...
459 os_ << etag.asEndTag();
460 tag_stack_.pop_back();
462 // ...and restore the other tags.
463 rit = fontstack.rbegin();
464 ren = fontstack.rend();
465 for (; rit != ren; ++rit)
466 pending_tags_.push_back(*rit);
470 // it wasn't a font tag.
471 // so other tags were opened before this one and not properly closed.
472 // so we'll close them, too. that may cause other issues later, but it
473 // at least guarantees proper nesting.
474 writeError("Closing tag `" + etag.tag_
475 + "' when other tags are open, namely:");
476 html::StartTag curtag = tag_stack_.back();
477 while (curtag.tag_ != etag.tag_) {
478 writeError(curtag.tag_);
479 os_ << curtag.asEndTag();
480 tag_stack_.pop_back();
481 curtag = tag_stack_.back();
483 // curtag is now the one we actually want.
484 os_ << curtag.asEndTag();
485 tag_stack_.pop_back();
490 // End code for XHTMLStream
494 // convenience functions
496 inline void openTag(XHTMLStream & xs, Layout const & lay)
498 xs << html::StartTag(lay.htmltag(), lay.htmlattr());
502 void openTag(XHTMLStream & xs, Layout const & lay,
503 ParagraphParameters const & params)
505 // FIXME Are there other things we should handle here?
506 string const align = alignmentToCSS(params.align());
511 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
512 xs << html::StartTag(lay.htmltag(), attrs);
516 inline void closeTag(XHTMLStream & xs, Layout const & lay)
518 xs << html::EndTag(lay.htmltag());
522 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
524 xs << html::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
528 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
530 xs << html::EndTag(lay.htmllabeltag());
534 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
536 xs << html::StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
540 void openItemTag(XHTMLStream & xs, Layout const & lay,
541 ParagraphParameters const & params)
543 // FIXME Are there other things we should handle here?
544 string const align = alignmentToCSS(params.align());
546 openItemTag(xs, lay);
549 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
550 xs << html::StartTag(lay.htmlitemtag(), attrs);
554 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
556 xs << html::EndTag(lay.htmlitemtag());
559 // end of convenience functions
561 ParagraphList::const_iterator searchParagraphHtml(
562 ParagraphList::const_iterator p,
563 ParagraphList::const_iterator const & pend)
565 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
572 ParagraphList::const_iterator searchEnvironmentHtml(
573 ParagraphList::const_iterator const pstart,
574 ParagraphList::const_iterator const & pend)
576 ParagraphList::const_iterator p = pstart;
577 Layout const & bstyle = p->layout();
578 size_t const depth = p->params().depth();
579 for (++p; p != pend; ++p) {
580 Layout const & style = p->layout();
581 // It shouldn't happen that e.g. a section command occurs inside
582 // a quotation environment, at a higher depth, but as of 6/2009,
583 // it can happen. We pretend that it's just at lowest depth.
584 if (style.latextype == LATEX_COMMAND)
586 // If depth is down, we're done
587 if (p->params().depth() < depth)
589 // If depth is up, we're not done
590 if (p->params().depth() > depth)
592 // Now we know we are at the same depth
593 if (style.latextype == LATEX_PARAGRAPH
594 || style.latexname() != bstyle.latexname())
601 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
603 OutputParams const & runparams,
605 ParagraphList::const_iterator const & pbegin,
606 ParagraphList::const_iterator const & pend)
608 ParagraphList::const_iterator const begin = text.paragraphs().begin();
609 ParagraphList::const_iterator par = pbegin;
610 for (; par != pend; ++par) {
611 Layout const & lay = par->layout();
612 if (!lay.counter.empty())
613 buf.params().documentClass().counters().step(lay.counter, OutputUpdate);
614 // FIXME We should see if there's a label to be output and
615 // do something with it.
619 // If we are already in a paragraph, and this is the first one, then we
620 // do not want to open the paragraph tag.
621 // we also do not want to open it if the current layout does not permit
622 // multiple paragraphs.
623 bool const opened = runparams.html_make_pars &&
624 (par != pbegin || !runparams.html_in_par);
626 openTag(xs, lay, par->params());
627 docstring const deferred =
628 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
630 // We want to issue the closing tag if either:
631 // (i) We opened it, and either html_in_par is false,
632 // or we're not in the last paragraph, anyway.
633 // (ii) We didn't open it and html_in_par is true,
634 // but we are in the first par, and there is a next par.
635 ParagraphList::const_iterator nextpar = par;
637 bool const needclose =
638 (opened && (!runparams.html_in_par || nextpar != pend))
639 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
644 if (!deferred.empty()) {
645 xs << XHTMLStream::ESCAPE_NONE << deferred << html::CR();
652 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
654 OutputParams const & runparams,
656 ParagraphList::const_iterator const & pbegin,
657 ParagraphList::const_iterator const & pend)
660 // Use TextClass::htmlTOCLayout() to figure out how we should look.
661 xs << html::StartTag("h2", "class='bibliography'")
662 << pbegin->layout().labelstring(false)
663 << html::EndTag("h2")
665 << html::StartTag("div", "class='bibliography'")
667 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
668 xs << html::EndTag("div");
673 bool isNormalEnv(Layout const & lay)
675 return lay.latextype == LATEX_ENVIRONMENT
676 || lay.latextype == LATEX_BIB_ENVIRONMENT;
680 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
682 OutputParams const & runparams,
684 ParagraphList::const_iterator const & pbegin,
685 ParagraphList::const_iterator const & pend)
687 ParagraphList::const_iterator const begin = text.paragraphs().begin();
688 ParagraphList::const_iterator par = pbegin;
689 Layout const & bstyle = par->layout();
690 depth_type const origdepth = pbegin->params().depth();
692 // open tag for this environment
696 // we will on occasion need to remember a layout from before.
697 Layout const * lastlay = 0;
699 while (par != pend) {
700 Layout const & style = par->layout();
701 // the counter only gets stepped if we're in some kind of list,
702 // or if it's the first time through.
703 // note that enum, etc, are handled automatically.
704 // FIXME There may be a bug here about user defined enumeration
705 // types. If so, then we'll need to take the counter and add "i",
706 // "ii", etc, as with enum.
707 Counters & cnts = buf.params().documentClass().counters();
708 docstring const & cntr = style.counter;
709 if (!style.counter.empty()
710 && (par == pbegin || !isNormalEnv(style))
711 && cnts.hasCounter(cntr)
713 cnts.step(cntr, OutputUpdate);
714 ParagraphList::const_iterator send;
715 // this will be positive, if we want to skip the initial word
716 // (if it's been taken for the label).
719 switch (style.latextype) {
720 case LATEX_ENVIRONMENT:
721 case LATEX_LIST_ENVIRONMENT:
722 case LATEX_ITEM_ENVIRONMENT: {
723 // There are two possiblities in this case.
724 // One is that we are still in the environment in which we
725 // started---which we will be if the depth is the same.
726 if (par->params().depth() == origdepth) {
727 LASSERT(bstyle == style, /* */);
729 closeItemTag(xs, *lastlay);
733 bool const labelfirst = style.htmllabelfirst();
735 openItemTag(xs, style, par->params());
738 if (style.labeltype != LABEL_NO_LABEL &&
739 style.htmllabeltag() != "NONE") {
740 if (isNormalEnv(style)) {
741 // in this case, we print the label only for the first
742 // paragraph (as in a theorem).
744 docstring const lbl =
745 pbegin->params().labelString();
747 openLabelTag(xs, style);
749 closeLabelTag(xs, style);
753 } else { // some kind of list
754 if (style.labeltype == LABEL_MANUAL) {
755 openLabelTag(xs, style);
756 sep = par->firstWordLyXHTML(xs, runparams);
757 closeLabelTag(xs, style);
761 openLabelTag(xs, style);
762 xs << par->params().labelString();
763 closeLabelTag(xs, style);
767 } // end label output
770 openItemTag(xs, style, par->params());
772 par->simpleLyXHTMLOnePar(buf, xs, runparams,
773 text.outerFont(distance(begin, par)), sep);
776 // We may not want to close the tag yet, in particular:
777 // If we're not at the end...
779 // and are doing items...
780 && !isNormalEnv(style)
781 // and if the depth has changed...
782 && par->params().depth() != origdepth) {
783 // then we'll save this layout for later, and close it when
784 // we get another item.
787 closeItemTag(xs, style);
790 // The other possibility is that the depth has increased, in which
791 // case we need to recurse.
793 send = searchEnvironmentHtml(par, pend);
794 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
798 case LATEX_PARAGRAPH:
799 send = searchParagraphHtml(par, pend);
800 par = makeParagraphs(buf, xs, runparams, text, par, send);
803 case LATEX_BIB_ENVIRONMENT:
806 par = makeParagraphs(buf, xs, runparams, text, par, send);
816 closeItemTag(xs, *lastlay);
817 closeTag(xs, bstyle);
823 void makeCommand(Buffer const & buf,
825 OutputParams const & runparams,
827 ParagraphList::const_iterator const & pbegin)
829 Layout const & style = pbegin->layout();
830 if (!style.counter.empty())
831 buf.params().documentClass().counters().step(style.counter, OutputUpdate);
833 openTag(xs, style, pbegin->params());
835 // Label around sectioning number:
836 // FIXME Probably need to account for LABEL_MANUAL
837 if (style.labeltype != LABEL_NO_LABEL) {
838 openLabelTag(xs, style);
839 xs << pbegin->params().labelString();
840 closeLabelTag(xs, style);
841 // Otherwise the label might run together with the text
842 xs << from_ascii(" ");
845 ParagraphList::const_iterator const begin = text.paragraphs().begin();
846 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
847 text.outerFont(distance(begin, pbegin)));
852 } // end anonymous namespace
855 void xhtmlParagraphs(Text const & text,
858 OutputParams const & runparams)
860 ParagraphList const & paragraphs = text.paragraphs();
861 ParagraphList::const_iterator par = paragraphs.begin();
862 ParagraphList::const_iterator pend = paragraphs.end();
864 OutputParams ourparams = runparams;
865 while (par != pend) {
866 if (par->params().startOfAppendix()) {
867 // FIXME: only the counter corresponding to toplevel
868 // sectioning should be reset
869 Counters & cnts = buf.masterBuffer()->params().documentClass().counters();
873 Layout const & style = par->layout();
874 ParagraphList::const_iterator lastpar = par;
875 ParagraphList::const_iterator send;
877 switch (style.latextype) {
878 case LATEX_COMMAND: {
879 // The files with which we are working never have more than
880 // one paragraph in a command structure.
882 // if (ourparams.html_in_par)
883 // fix it so we don't get sections inside standard, e.g.
884 // note that we may then need to make runparams not const, so we
885 // can communicate that back.
886 // FIXME Maybe this fix should be in the routines themselves, in case
887 // they are called from elsewhere.
888 makeCommand(buf, xs, ourparams, text, par);
892 case LATEX_ENVIRONMENT:
893 case LATEX_LIST_ENVIRONMENT:
894 case LATEX_ITEM_ENVIRONMENT: {
895 // FIXME Same fix here.
896 send = searchEnvironmentHtml(par, pend);
897 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
900 case LATEX_BIB_ENVIRONMENT: {
901 // FIXME Same fix here.
902 send = searchEnvironmentHtml(par, pend);
903 par = makeBibliography(buf, xs, ourparams, text, par, send);
906 case LATEX_PARAGRAPH:
907 send = searchParagraphHtml(par, pend);
908 par = makeParagraphs(buf, xs, ourparams, text, par, send);
912 // makeEnvironment may process more than one paragraphs and bypass pend
913 if (distance(lastpar, par) >= distance(lastpar, pend))
919 string alignmentToCSS(LyXAlignment align) {
921 case LYX_ALIGN_BLOCK:
922 // we are NOT going to use text-align: justify!!
925 case LYX_ALIGN_RIGHT:
927 case LYX_ALIGN_CENTER: