2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/lassert.h"
33 #include "support/lstrings.h"
38 using namespace lyx::support;
44 docstring escapeChar(char_type c)
68 // escape what needs escaping
69 docstring htmlize(docstring const & str) {
71 docstring::const_iterator it = str.begin();
72 docstring::const_iterator en = str.end();
73 for (; it != en; ++it)
79 string escapeChar(char c)
103 // escape what needs escaping
104 string htmlize(string const & str) {
106 string::const_iterator it = str.begin();
107 string::const_iterator en = str.end();
108 for (; it != en; ++it)
109 d << escapeChar(*it);
114 string cleanAttr(string const & str)
117 string::const_iterator it = str.begin();
118 string::const_iterator en = str.end();
119 for (; it != en; ++it)
120 newname += isalnum(*it) ? *it : '_';
125 docstring cleanAttr(docstring const & str)
128 docstring::const_iterator it = str.begin();
129 docstring::const_iterator en = str.end();
130 for (; it != en; ++it)
139 bool isFontTag(string const & s)
141 return s == "em" || s == "strong"; // others?
145 docstring StartTag::asTag() const
147 string output = "<" + tag_;
149 output += " " + html::htmlize(attr_);
151 return from_utf8(output);
155 docstring StartTag::asEndTag() const
157 string output = "</" + tag_ + ">";
158 return from_utf8(output);
162 docstring EndTag::asEndTag() const
164 string output = "</" + tag_ + ">";
165 return from_utf8(output);
169 docstring CompTag::asTag() const
171 string output = "<" + tag_;
173 output += " " + html::htmlize(attr_);
175 return from_utf8(output);
182 ////////////////////////////////////////////////////////////////
186 ////////////////////////////////////////////////////////////////
188 XHTMLStream::XHTMLStream(odocstream & os)
189 : os_(os), nextraw_(false)
193 void XHTMLStream::cr()
196 os_ << from_ascii("\n");
200 void XHTMLStream::writeError(std::string const & s)
203 os_ << from_utf8("<!-- Output Error: " + s + " -->");
207 bool XHTMLStream::closeFontTags()
209 if (tag_stack_.empty())
211 // first, we close any open font tags we can close
212 html::StartTag curtag = tag_stack_.back();
213 while (html::isFontTag(curtag.tag_)) {
214 os_ << curtag.asEndTag();
215 tag_stack_.pop_back();
216 if (tag_stack_.empty())
217 // this probably shouldn't happen, since then the
218 // font tags weren't in any other tag. but that
219 // problem will likely be caught elsewhere.
221 curtag = tag_stack_.back();
223 // so we've hit a non-font tag. let's see if any of the
224 // remaining tags are font tags.
225 TagStack::const_iterator it = tag_stack_.begin();
226 TagStack::const_iterator en = tag_stack_.end();
227 bool noFontTags = true;
228 for (; it != en; ++it) {
229 if (html::isFontTag(it->tag_)) {
230 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().\n"
231 "This is likely not a problem, but you might want to check.");
239 void XHTMLStream::clearTagDeque()
241 while (!pending_tags_.empty()) {
242 html::StartTag const & tag = pending_tags_.front();
245 tag_stack_.push_back(tag);
246 pending_tags_.pop_front();
251 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
258 os_ << html::htmlize(d);
263 XHTMLStream & XHTMLStream::operator<<(const char * s)
266 docstring const d = from_ascii(s);
271 os_ << html::htmlize(d);
276 XHTMLStream & XHTMLStream::operator<<(char_type c)
283 os_ << html::escapeChar(c);
288 XHTMLStream & XHTMLStream::operator<<(int i)
297 XHTMLStream & XHTMLStream::operator<<(NextRaw const &)
304 XHTMLStream & XHTMLStream::operator<<(html::StartTag const & tag)
306 if (tag.tag_.empty())
308 pending_tags_.push_back(tag);
315 XHTMLStream & XHTMLStream::operator<<(html::CompTag const & tag)
317 if (tag.tag_.empty())
327 bool XHTMLStream::isTagOpen(string const & stag)
329 TagStack::const_iterator sit = tag_stack_.begin();
330 TagStack::const_iterator const sen = tag_stack_.end();
331 for (; sit != sen; ++sit)
332 if (sit->tag_ == stag)
338 // this is complicated, because we want to make sure that
339 // everything is properly nested. the code ought to make
340 // sure of that, but we won't assert (yet) if we run into
341 // a problem. we'll just output error messages and try our
342 // best to make things work.
343 XHTMLStream & XHTMLStream::operator<<(html::EndTag const & etag)
345 if (etag.tag_.empty())
348 // make sure there are tags to be closed
349 if (tag_stack_.empty()) {
350 writeError("Tried to close `" + etag.tag_
351 + "' when no tags were open!");
355 // first make sure we're not closing an empty tag
356 if (!pending_tags_.empty()) {
357 html::StartTag const & stag = pending_tags_.back();
358 if (etag.tag_ == stag.tag_) {
359 // we have <tag></tag>, so we discard it and remove it
360 // from the pending_tags_.
361 pending_tags_.pop_back();
364 // there is a pending tag that isn't the one we are trying
366 // is this tag itself pending?
367 // non-const iterators because we may call erase().
368 TagDeque::iterator dit = pending_tags_.begin();
369 TagDeque::iterator const den = pending_tags_.end();
370 for (; dit != den; ++dit) {
371 if (dit->tag_ == etag.tag_) {
372 // it was pending, so we just erase it
373 writeError("Tried to close pending tag `" + etag.tag_
374 + "' when other tags were pending. Last pending tag is `"
375 + pending_tags_.back().tag_ + "'. Tag discarded.");
376 pending_tags_.erase(dit);
380 // so etag isn't itself pending. is it even open?
381 if (!isTagOpen(etag.tag_)) {
382 writeError("Tried to close `" + etag.tag_
383 + "' when tag was not open. Tag discarded.");
386 // ok, so etag is open.
387 // our strategy will be as below: we will do what we need to
388 // do to close this tag.
389 string estr = "Closing tag `" + etag.tag_
390 + "' when other tags are pending. Discarded pending tags:\n";
391 for (dit = pending_tags_.begin(); dit != den; ++dit)
392 estr += dit->tag_ + "\n";
394 // clear the pending tags...
395 pending_tags_.clear();
396 // ...and then just fall through.
399 // is the tag we are closing the last one we opened?
400 if (etag.tag_ == tag_stack_.back().tag_) {
402 os_ << etag.asEndTag();
403 // ...and forget about it
404 tag_stack_.pop_back();
408 // we are trying to close a tag other than the one last opened.
409 // let's first see if this particular tag is still open somehow.
410 if (!isTagOpen(etag.tag_)) {
411 writeError("Tried to close `" + etag.tag_
412 + "' when tag was not open. Tag discarded.");
416 // so the tag was opened, but other tags have been opened since
417 // and not yet closed.
418 // if it's a font tag, though...
419 if (html::isFontTag(etag.tag_)) {
420 // it won't be a problem if the other tags open since this one
421 // are also font tags.
422 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
423 TagStack::const_reverse_iterator ren = tag_stack_.rend();
424 for (; rit != ren; ++rit) {
425 if (rit->tag_ == etag.tag_)
427 if (!html::isFontTag(rit->tag_)) {
428 // we'll just leave it and, presumably, have to close it later.
429 writeError("Unable to close font tag `" + etag.tag_
430 + "' due to open non-font tag `" + rit->tag_ + "'.");
436 // <em>this is <strong>bold
437 // and are being asked to closed em. we want:
438 // <em>this is <strong>bold</strong></em><strong>
439 // first, we close the intervening tags...
440 html::StartTag curtag = tag_stack_.back();
441 // ...remembering them in a stack.
443 while (curtag.tag_ != etag.tag_) {
444 os_ << curtag.asEndTag();
445 fontstack.push_back(curtag);
446 tag_stack_.pop_back();
447 curtag = tag_stack_.back();
449 // now close our tag...
450 os_ << etag.asEndTag();
451 tag_stack_.pop_back();
453 // ...and restore the other tags.
454 rit = fontstack.rbegin();
455 ren = fontstack.rend();
456 for (; rit != ren; ++rit)
457 pending_tags_.push_back(*rit);
461 // it wasn't a font tag.
462 // so other tags were opened before this one and not properly closed.
463 // so we'll close them, too. that may cause other issues later, but it
464 // at least guarantees proper nesting.
465 writeError("Closing tag `" + etag.tag_
466 + "' when other tags are open, namely:");
467 html::StartTag curtag = tag_stack_.back();
468 while (curtag.tag_ != etag.tag_) {
469 writeError(curtag.tag_);
470 os_ << curtag.asEndTag();
471 tag_stack_.pop_back();
472 curtag = tag_stack_.back();
474 // curtag is now the one we actually want.
475 os_ << curtag.asEndTag();
476 tag_stack_.pop_back();
481 // End code for XHTMLStream
485 // convenience functions
487 inline void openTag(XHTMLStream & xs, Layout const & lay)
489 xs << html::StartTag(lay.htmltag(), lay.htmlattr());
493 inline void closeTag(XHTMLStream & xs, Layout const & lay)
495 xs << html::EndTag(lay.htmltag());
499 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
501 xs << html::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
505 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
507 xs << html::EndTag(lay.htmllabeltag());
511 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
513 xs << html::StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
517 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
519 xs << html::EndTag(lay.htmlitemtag());
522 // end of convenience functions
524 ParagraphList::const_iterator searchParagraphHtml(
525 ParagraphList::const_iterator p,
526 ParagraphList::const_iterator const & pend)
528 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
535 ParagraphList::const_iterator searchEnvironmentHtml(
536 ParagraphList::const_iterator const pstart,
537 ParagraphList::const_iterator const & pend)
539 ParagraphList::const_iterator p = pstart;
540 Layout const & bstyle = p->layout();
541 size_t const depth = p->params().depth();
542 for (++p; p != pend; ++p) {
543 Layout const & style = p->layout();
544 // It shouldn't happen that e.g. a section command occurs inside
545 // a quotation environment, at a higher depth, but as of 6/2009,
546 // it can happen. We pretend that it's just at lowest depth.
547 if (style.latextype == LATEX_COMMAND)
549 // If depth is down, we're done
550 if (p->params().depth() < depth)
552 // If depth is up, we're not done
553 if (p->params().depth() > depth)
555 // Now we know we are at the same depth
556 if (style.latextype == LATEX_PARAGRAPH
557 || style.latexname() != bstyle.latexname())
564 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
566 OutputParams const & runparams,
568 ParagraphList::const_iterator const & pbegin,
569 ParagraphList::const_iterator const & pend)
571 ParagraphList::const_iterator const begin = text.paragraphs().begin();
572 ParagraphList::const_iterator par = pbegin;
573 for (; par != pend; ++par) {
574 Layout const & lay = par->layout();
575 if (!lay.counter.empty())
576 buf.params().documentClass().counters().step(lay.counter);
577 // FIXME We should see if there's a label to be output and
578 // do something with it.
582 // If we are already in a paragraph, and this is the first one, then we
583 // do not want to open the paragraph tag.
584 // we also do not want to open it if the current layout does not permit
585 // multiple paragraphs.
586 bool const opened = runparams.html_make_pars &&
587 (par != pbegin || !runparams.html_in_par);
590 docstring const deferred =
591 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
593 // We want to issue the closing tag if either:
594 // (i) We opened it, and either html_in_par is false,
595 // or we're not in the last paragraph, anyway.
596 // (ii) We didn't open it and html_in_par is true,
597 // but we are in the first par, and there is a next par.
598 ParagraphList::const_iterator nextpar = par;
600 bool const needclose =
601 (opened && (!runparams.html_in_par || nextpar != pend))
602 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
607 if (!deferred.empty()) {
608 xs << XHTMLStream::NextRaw() << deferred;
616 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
618 OutputParams const & runparams,
620 ParagraphList::const_iterator const & pbegin,
621 ParagraphList::const_iterator const & pend)
624 // Use TextClass::htmlTOCLayout() to figure out how we should look.
625 xs << html::StartTag("h2", "class='bibliography'");
626 xs << pbegin->layout().labelstring(false);
627 xs << html::EndTag("h2");
629 xs << html::StartTag("div", "class='bibliography'");
631 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
632 xs << html::EndTag("div");
637 bool isNormalEnv(Layout const & lay)
639 return lay.latextype == LATEX_ENVIRONMENT
640 || lay.latextype == LATEX_BIB_ENVIRONMENT;
644 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
646 OutputParams const & runparams,
648 ParagraphList::const_iterator const & pbegin,
649 ParagraphList::const_iterator const & pend)
651 ParagraphList::const_iterator const begin = text.paragraphs().begin();
652 ParagraphList::const_iterator par = pbegin;
653 Layout const & bstyle = par->layout();
654 depth_type const origdepth = pbegin->params().depth();
656 // open tag for this environment
660 // we will on occasion need to remember a layout from before.
661 Layout const * lastlay = 0;
663 while (par != pend) {
664 Layout const & style = par->layout();
665 // the counter only gets stepped if we're in some kind of list,
666 // or if it's the first time through.
667 // note that enum, etc, are handled automatically.
668 // FIXME There may be a bug here about user defined enumeration
669 // types. If so, then we'll need to take the counter and add "i",
670 // "ii", etc, as with enum.
671 Counters & cnts = buf.params().documentClass().counters();
672 docstring const & cntr = style.counter;
673 if (!style.counter.empty()
674 && (par == pbegin || !isNormalEnv(style))
675 && cnts.hasCounter(cntr)
678 ParagraphList::const_iterator send;
679 // this will be positive, if we want to skip the initial word
680 // (if it's been taken for the label).
683 switch (style.latextype) {
684 case LATEX_ENVIRONMENT:
685 case LATEX_LIST_ENVIRONMENT:
686 case LATEX_ITEM_ENVIRONMENT: {
687 // There are two possiblities in this case.
688 // One is that we are still in the environment in which we
689 // started---which we will be if the depth is the same.
690 if (par->params().depth() == origdepth) {
691 LASSERT(bstyle == style, /* */);
693 closeItemTag(xs, *lastlay);
696 if (isNormalEnv(style)) {
697 // in this case, we print the label only for the first
698 // paragraph (as in a theorem).
699 openItemTag(xs, style);
700 if (par == pbegin && style.htmllabeltag() != "NONE") {
701 docstring const lbl =
702 pbegin->expandLabel(style, buf.params(), false);
704 openLabelTag(xs, style);
706 closeLabelTag(xs, style);
710 } else { // some kind of list
711 bool const labelfirst = style.htmllabelfirst();
713 openItemTag(xs, style);
714 if (style.labeltype == LABEL_MANUAL
715 && style.htmllabeltag() != "NONE") {
716 openLabelTag(xs, style);
717 sep = par->firstWordLyXHTML(xs, runparams);
718 closeLabelTag(xs, style);
721 else if (style.labeltype != LABEL_NO_LABEL
722 && style.htmllabeltag() != "NONE") {
723 openLabelTag(xs, style);
724 xs << par->expandLabel(style, buf.params(), false);
725 closeLabelTag(xs, style);
729 openItemTag(xs, style);
731 par->simpleLyXHTMLOnePar(buf, xs, runparams,
732 text.outerFont(distance(begin, par)), sep);
734 // We may not want to close the tag yet, in particular,
735 // if we're not at the end...
737 // and are doing items...
738 && !isNormalEnv(style)
739 // and if the depth has changed...
740 && par->params().depth() != origdepth) {
741 // then we'll save this layout for later, and close it when
742 // we get another item.
745 closeItemTag(xs, style);
748 // The other possibility is that the depth has increased, in which
749 // case we need to recurse.
751 send = searchEnvironmentHtml(par, pend);
752 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
756 case LATEX_PARAGRAPH:
757 send = searchParagraphHtml(par, pend);
758 par = makeParagraphs(buf, xs, runparams, text, par, send);
761 case LATEX_BIB_ENVIRONMENT:
764 par = makeParagraphs(buf, xs, runparams, text, par, send);
774 closeItemTag(xs, *lastlay);
775 closeTag(xs, bstyle);
781 void makeCommand(Buffer const & buf,
783 OutputParams const & runparams,
785 ParagraphList::const_iterator const & pbegin)
787 Layout const & style = pbegin->layout();
788 if (!style.counter.empty())
789 buf.params().documentClass().counters().step(style.counter);
793 // Label around sectioning number:
794 // FIXME Probably need to account for LABEL_MANUAL
795 if (style.labeltype != LABEL_NO_LABEL) {
796 openLabelTag(xs, style);
797 xs << pbegin->expandLabel(style, buf.params(), false);
798 closeLabelTag(xs, style);
799 // Otherwise the label might run together with the text
800 xs << from_ascii(" ");
803 ParagraphList::const_iterator const begin = text.paragraphs().begin();
804 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
805 text.outerFont(distance(begin, pbegin)));
810 } // end anonymous namespace
813 void xhtmlParagraphs(Text const & text,
816 OutputParams const & runparams)
818 ParagraphList const & paragraphs = text.paragraphs();
819 ParagraphList::const_iterator par = paragraphs.begin();
820 ParagraphList::const_iterator pend = paragraphs.end();
822 OutputParams ourparams = runparams;
823 while (par != pend) {
824 Layout const & style = par->layout();
825 ParagraphList::const_iterator lastpar = par;
826 ParagraphList::const_iterator send;
828 switch (style.latextype) {
829 case LATEX_COMMAND: {
830 // The files with which we are working never have more than
831 // one paragraph in a command structure.
833 // if (ourparams.html_in_par)
834 // fix it so we don't get sections inside standard, e.g.
835 // note that we may then need to make runparams not const, so we
836 // can communicate that back.
837 // FIXME Maybe this fix should be in the routines themselves, in case
838 // they are called from elsewhere.
839 makeCommand(buf, xs, ourparams, text, par);
843 case LATEX_ENVIRONMENT:
844 case LATEX_LIST_ENVIRONMENT:
845 case LATEX_ITEM_ENVIRONMENT: {
846 // FIXME Same fix here.
847 send = searchEnvironmentHtml(par, pend);
848 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
851 case LATEX_BIB_ENVIRONMENT: {
852 // FIXME Same fix here.
853 send = searchEnvironmentHtml(par, pend);
854 par = makeBibliography(buf, xs, ourparams, text, par, send);
857 case LATEX_PARAGRAPH:
858 send = searchParagraphHtml(par, pend);
859 par = makeParagraphs(buf, xs, ourparams, text, par, send);
863 // makeEnvironment may process more than one paragraphs and bypass pend
864 if (distance(lastpar, par) >= distance(lastpar, pend))