2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/lassert.h"
33 #include "support/lstrings.h"
38 using namespace lyx::support;
44 docstring escapeChar(char_type c)
68 // escape what needs escaping
69 docstring htmlize(docstring const & str) {
71 docstring::const_iterator it = str.begin();
72 docstring::const_iterator en = str.end();
73 for (; it != en; ++it)
79 string escapeChar(char c)
103 // escape what needs escaping
104 string htmlize(string const & str) {
106 string::const_iterator it = str.begin();
107 string::const_iterator en = str.end();
108 for (; it != en; ++it)
109 d << escapeChar(*it);
114 string cleanAttr(string const & str)
117 string::const_iterator it = str.begin();
118 string::const_iterator en = str.end();
119 for (; it != en; ++it)
120 newname += isalnum(*it) ? *it : '_';
125 docstring cleanAttr(docstring const & str)
128 docstring::const_iterator it = str.begin();
129 docstring::const_iterator en = str.end();
130 for (; it != en; ++it)
139 bool isFontTag(string const & s)
141 return s == "em" || s == "strong"; // others?
146 docstring StartTag::asTag() const
148 string output = "<" + tag_;
150 output += " " + html::htmlize(attr_);
152 return from_utf8(output);
156 docstring StartTag::asEndTag() const
158 string output = "</" + tag_ + ">";
159 return from_utf8(output);
163 docstring EndTag::asEndTag() const
165 string output = "</" + tag_ + ">";
166 return from_utf8(output);
170 docstring CompTag::asTag() const
172 string output = "<" + tag_;
174 output += " " + html::htmlize(attr_);
176 return from_utf8(output);
180 ////////////////////////////////////////////////////////////////
184 ////////////////////////////////////////////////////////////////
186 XHTMLStream::XHTMLStream(odocstream & os)
187 : os_(os), nextraw_(false)
191 void XHTMLStream::cr()
194 os_ << from_ascii("\n");
198 void XHTMLStream::writeError(std::string const & s)
201 os_ << from_utf8("<!-- Output Error: " + s + " -->");
205 bool XHTMLStream::closeFontTags()
207 if (tag_stack_.empty())
209 // first, we close any open font tags we can close
210 StartTag curtag = tag_stack_.back();
211 while (html::isFontTag(curtag.tag_)) {
212 os_ << curtag.asEndTag();
213 tag_stack_.pop_back();
214 if (tag_stack_.empty())
215 // this probably shouldn't happen, since then the
216 // font tags weren't in any other tag. but that
217 // problem will likely be caught elsewhere.
219 curtag = tag_stack_.back();
221 // so we've hit a non-font tag. let's see if any of the
222 // remaining tags are font tags.
223 TagStack::const_iterator it = tag_stack_.begin();
224 TagStack::const_iterator en = tag_stack_.end();
225 bool noFontTags = true;
226 for (; it != en; ++it) {
227 if (html::isFontTag(it->tag_)) {
228 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().\n"
229 "This is likely not a problem, but you might want to check.");
237 void XHTMLStream::clearTagDeque()
239 while (!pending_tags_.empty()) {
240 StartTag const & tag = pending_tags_.front();
243 tag_stack_.push_back(tag);
244 pending_tags_.pop_front();
249 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
256 os_ << html::htmlize(d);
261 XHTMLStream & XHTMLStream::operator<<(const char * s)
264 docstring const d = from_ascii(s);
269 os_ << html::htmlize(d);
274 XHTMLStream & XHTMLStream::operator<<(char_type c)
281 os_ << html::escapeChar(c);
286 XHTMLStream & XHTMLStream::operator<<(int i)
295 XHTMLStream & XHTMLStream::operator<<(NextRaw const &)
302 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag)
304 if (tag.tag_.empty())
306 pending_tags_.push_back(tag);
313 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag)
315 if (tag.tag_.empty())
325 bool XHTMLStream::isTagOpen(string const & stag)
327 TagStack::const_iterator sit = tag_stack_.begin();
328 TagStack::const_iterator const sen = tag_stack_.end();
329 for (; sit != sen; ++sit)
330 // we could check for the
331 if (sit->tag_ == stag)
337 // this is complicated, because we want to make sure that
338 // everything is properly nested. the code ought to make
339 // sure of that, but we won't assert (yet) if we run into
340 // a problem. we'll just output error messages and try our
341 // best to make things work.
342 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
344 if (etag.tag_.empty())
347 // make sure there are tags to be closed
348 if (tag_stack_.empty()) {
349 writeError("Tried to close `" + etag.tag_
350 + "' when no tags were open!");
354 // first make sure we're not closing an empty tag
355 if (!pending_tags_.empty()) {
356 StartTag const & stag = pending_tags_.back();
357 if (etag.tag_ == stag.tag_) {
358 // we have <tag></tag>, so we discard it and remove it
359 // from the pending_tags_.
360 pending_tags_.pop_back();
363 // there is a pending tag that isn't the one we are trying
365 // is this tag itself pending?
366 // non-const iterators because we may call erase().
367 TagDeque::iterator dit = pending_tags_.begin();
368 TagDeque::iterator const den = pending_tags_.end();
369 for (; dit != den; ++dit) {
370 if (dit->tag_ == etag.tag_) {
371 // it was pending, so we just erase it
372 writeError("Tried to close pending tag `" + etag.tag_
373 + "' when other tags were pending. Last pending tag is `"
374 + pending_tags_.back().tag_ + "'. Tag discarded.");
375 pending_tags_.erase(dit);
379 // so etag isn't itself pending. is it even open?
380 if (!isTagOpen(etag.tag_)) {
381 writeError("Tried to close `" + etag.tag_
382 + "' when tag was not open. Tag discarded.");
385 // ok, so etag is open.
386 // our strategy will be as below: we will do what we need to
387 // do to close this tag.
388 string estr = "Closing tag `" + etag.tag_
389 + "' when other tags are pending. Discarded pending tags:\n";
390 for (dit = pending_tags_.begin(); dit != den; ++dit)
391 estr += dit->tag_ + "\n";
393 // clear the pending tags...
394 pending_tags_.clear();
395 // ...and then just fall through.
398 // is the tag we are closing the last one we opened?
399 if (etag.tag_ == tag_stack_.back().tag_) {
401 os_ << etag.asEndTag();
402 // ...and forget about it
403 tag_stack_.pop_back();
407 // we are trying to close a tag other than the one last opened.
408 // let's first see if this particular tag is still open somehow.
409 if (!isTagOpen(etag.tag_)) {
410 writeError("Tried to close `" + etag.tag_
411 + "' when tag was not open. Tag discarded.");
415 // so the tag was opened, but other tags have been opened since
416 // and not yet closed.
417 // if it's a font tag, though...
418 if (html::isFontTag(etag.tag_)) {
419 // it won't be a problem if the other tags open since this one
420 // are also font tags.
421 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
422 TagStack::const_reverse_iterator ren = tag_stack_.rend();
423 for (; rit != ren; ++rit) {
424 if (rit->tag_ == etag.tag_)
426 if (!html::isFontTag(rit->tag_)) {
427 // we'll just leave it and, presumably, have to close it later.
428 writeError("Unable to close font tag `" + etag.tag_
429 + "' due to open non-font tag `" + rit->tag_ + "'.");
435 // <em>this is <strong>bold
436 // and are being asked to closed em. we want:
437 // <em>this is <strong>bold</strong></em><strong>
438 // first, we close the intervening tags...
439 StartTag curtag = tag_stack_.back();
440 // ...remembering them in a stack.
442 while (curtag.tag_ != etag.tag_) {
443 os_ << curtag.asEndTag();
444 fontstack.push_back(curtag);
445 tag_stack_.pop_back();
446 curtag = tag_stack_.back();
448 // now close our tag...
449 os_ << etag.asEndTag();
450 tag_stack_.pop_back();
452 // ...and restore the other tags.
453 rit = fontstack.rbegin();
454 ren = fontstack.rend();
455 for (; rit != ren; ++rit)
456 pending_tags_.push_back(*rit);
460 // it wasn't a font tag.
461 // so other tags were opened before this one and not properly closed.
462 // so we'll close them, too. that may cause other issues later, but it
463 // at least guarantees proper nesting.
464 writeError("Closing tag `" + etag.tag_
465 + "' when other tags are open, namely:");
466 StartTag curtag = tag_stack_.back();
467 while (curtag.tag_ != etag.tag_) {
468 writeError(curtag.tag_);
469 os_ << curtag.asEndTag();
470 tag_stack_.pop_back();
471 curtag = tag_stack_.back();
473 // curtag is now the one we actually want.
474 os_ << curtag.asEndTag();
475 tag_stack_.pop_back();
480 // End code for XHTMLStream
484 // convenience functions
486 inline void openTag(XHTMLStream & xs, Layout const & lay)
488 xs << StartTag(lay.htmltag(), lay.htmlattr());
492 inline void closeTag(XHTMLStream & xs, Layout const & lay)
494 xs << EndTag(lay.htmltag());
498 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
500 xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
504 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
506 xs << EndTag(lay.htmllabeltag());
510 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
512 xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
516 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
518 xs << EndTag(lay.htmlitemtag());
521 // end of convenience functions
523 ParagraphList::const_iterator searchParagraphHtml(
524 ParagraphList::const_iterator p,
525 ParagraphList::const_iterator const & pend)
527 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
534 ParagraphList::const_iterator searchEnvironmentHtml(
535 ParagraphList::const_iterator const pstart,
536 ParagraphList::const_iterator const & pend)
538 ParagraphList::const_iterator p = pstart;
539 Layout const & bstyle = p->layout();
540 size_t const depth = p->params().depth();
541 for (++p; p != pend; ++p) {
542 Layout const & style = p->layout();
543 // It shouldn't happen that e.g. a section command occurs inside
544 // a quotation environment, at a higher depth, but as of 6/2009,
545 // it can happen. We pretend that it's just at lowest depth.
546 if (style.latextype == LATEX_COMMAND)
548 // If depth is down, we're done
549 if (p->params().depth() < depth)
551 // If depth is up, we're not done
552 if (p->params().depth() > depth)
554 // Now we know we are at the same depth
555 if (style.latextype == LATEX_PARAGRAPH
556 || style.latexname() != bstyle.latexname())
563 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
565 OutputParams const & runparams,
567 ParagraphList::const_iterator const & pbegin,
568 ParagraphList::const_iterator const & pend)
570 ParagraphList::const_iterator const begin = text.paragraphs().begin();
571 ParagraphList::const_iterator par = pbegin;
572 for (; par != pend; ++par) {
573 Layout const & lay = par->layout();
574 if (!lay.counter.empty())
575 buf.params().documentClass().counters().step(lay.counter);
576 // FIXME We should see if there's a label to be output and
577 // do something with it.
581 // If we are already in a paragraph, and this is the first one, then we
582 // do not want to open the paragraph tag.
583 // we also do not want to open it if the current layout does not permit
584 // multiple paragraphs.
585 bool const opened = runparams.html_make_pars &&
586 (par != pbegin || !runparams.html_in_par);
589 docstring const deferred =
590 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
592 // We want to issue the closing tag if either:
593 // (i) We opened it, and either html_in_par is false,
594 // or we're not in the last paragraph, anyway.
595 // (ii) We didn't open it and html_in_par is true,
596 // but we are in the first par, and there is a next par.
597 ParagraphList::const_iterator nextpar = par;
599 bool const needclose =
600 (opened && (!runparams.html_in_par || nextpar != pend))
601 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
606 if (!deferred.empty()) {
607 xs << XHTMLStream::NextRaw() << deferred;
615 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
617 OutputParams const & runparams,
619 ParagraphList::const_iterator const & pbegin,
620 ParagraphList::const_iterator const & pend)
622 xs << StartTag("h2", "class='bibliography'");
623 xs << pbegin->layout().labelstring(false);
626 xs << StartTag("div", "class='bibliography'");
628 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
634 bool isNormalEnv(Layout const & lay)
636 return lay.latextype == LATEX_ENVIRONMENT
637 || lay.latextype == LATEX_BIB_ENVIRONMENT;
641 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
643 OutputParams const & runparams,
645 ParagraphList::const_iterator const & pbegin,
646 ParagraphList::const_iterator const & pend)
648 ParagraphList::const_iterator const begin = text.paragraphs().begin();
649 ParagraphList::const_iterator par = pbegin;
650 Layout const & bstyle = par->layout();
651 depth_type const origdepth = pbegin->params().depth();
653 // open tag for this environment
657 // we will on occasion need to remember a layout from before.
658 Layout const * lastlay = 0;
660 while (par != pend) {
661 Layout const & style = par->layout();
662 // the counter only gets stepped if we're in some kind of list,
663 // or if it's the first time through.
664 // note that enum, etc, are handled automatically.
665 // FIXME There may be a bug here about user defined enumeration
666 // types. If so, then we'll need to take the counter and add "i",
667 // "ii", etc, as with enum.
668 Counters & cnts = buf.params().documentClass().counters();
669 docstring const & cntr = style.counter;
670 if (!style.counter.empty()
671 && (par == pbegin || !isNormalEnv(style))
672 && cnts.hasCounter(cntr)
675 ParagraphList::const_iterator send;
676 // this will be positive, if we want to skip the initial word
677 // (if it's been taken for the label).
680 switch (style.latextype) {
681 case LATEX_ENVIRONMENT:
682 case LATEX_LIST_ENVIRONMENT:
683 case LATEX_ITEM_ENVIRONMENT: {
684 // There are two possiblities in this case.
685 // One is that we are still in the environment in which we
686 // started---which we will be if the depth is the same.
687 if (par->params().depth() == origdepth) {
688 LASSERT(bstyle == style, /* */);
690 closeItemTag(xs, *lastlay);
693 if (isNormalEnv(style)) {
694 // in this case, we print the label only for the first
695 // paragraph (as in a theorem).
696 openItemTag(xs, style);
697 if (par == pbegin && style.htmllabeltag() != "NONE") {
698 docstring const lbl =
699 pbegin->expandLabel(style, buf.params(), false);
701 openLabelTag(xs, style);
703 closeLabelTag(xs, style);
707 } else { // some kind of list
708 bool const labelfirst = style.htmllabelfirst();
710 openItemTag(xs, style);
711 if (style.labeltype == LABEL_MANUAL
712 && style.htmllabeltag() != "NONE") {
713 openLabelTag(xs, style);
714 sep = par->firstWordLyXHTML(xs, runparams);
715 closeLabelTag(xs, style);
718 else if (style.labeltype != LABEL_NO_LABEL
719 && style.htmllabeltag() != "NONE") {
720 openLabelTag(xs, style);
721 xs << par->expandLabel(style, buf.params(), false);
722 closeLabelTag(xs, style);
726 openItemTag(xs, style);
728 par->simpleLyXHTMLOnePar(buf, xs, runparams,
729 text.outerFont(distance(begin, par)), sep);
731 // We may not want to close the tag yet, in particular,
732 // if we're not at the end...
734 // and are doing items...
735 && !isNormalEnv(style)
736 // and if the depth has changed...
737 && par->params().depth() != origdepth) {
738 // then we'll save this layout for later, and close it when
739 // we get another item.
742 closeItemTag(xs, style);
745 // The other possibility is that the depth has increased, in which
746 // case we need to recurse.
748 send = searchEnvironmentHtml(par, pend);
749 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
753 case LATEX_PARAGRAPH:
754 send = searchParagraphHtml(par, pend);
755 par = makeParagraphs(buf, xs, runparams, text, par, send);
758 case LATEX_BIB_ENVIRONMENT:
761 par = makeParagraphs(buf, xs, runparams, text, par, send);
771 closeItemTag(xs, *lastlay);
772 closeTag(xs, bstyle);
778 void makeCommand(Buffer const & buf,
780 OutputParams const & runparams,
782 ParagraphList::const_iterator const & pbegin)
784 Layout const & style = pbegin->layout();
785 if (!style.counter.empty())
786 buf.params().documentClass().counters().step(style.counter);
790 // Label around sectioning number:
791 // FIXME Probably need to account for LABEL_MANUAL
792 if (style.labeltype != LABEL_NO_LABEL) {
793 openLabelTag(xs, style);
794 xs << pbegin->expandLabel(style, buf.params(), false);
795 closeLabelTag(xs, style);
796 // Otherwise the label might run together with the text
797 xs << from_ascii(" ");
800 ParagraphList::const_iterator const begin = text.paragraphs().begin();
801 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
802 text.outerFont(distance(begin, pbegin)));
807 } // end anonymous namespace
810 void xhtmlParagraphs(Text const & text,
813 OutputParams const & runparams)
815 ParagraphList const & paragraphs = text.paragraphs();
816 ParagraphList::const_iterator par = paragraphs.begin();
817 ParagraphList::const_iterator pend = paragraphs.end();
819 OutputParams ourparams = runparams;
820 while (par != pend) {
821 Layout const & style = par->layout();
822 ParagraphList::const_iterator lastpar = par;
823 ParagraphList::const_iterator send;
825 switch (style.latextype) {
826 case LATEX_COMMAND: {
827 // The files with which we are working never have more than
828 // one paragraph in a command structure.
830 // if (ourparams.html_in_par)
831 // fix it so we don't get sections inside standard, e.g.
832 // note that we may then need to make runparams not const, so we
833 // can communicate that back.
834 // FIXME Maybe this fix should be in the routines themselves, in case
835 // they are called from elsewhere.
836 makeCommand(buf, xs, ourparams, text, par);
840 case LATEX_ENVIRONMENT:
841 case LATEX_LIST_ENVIRONMENT:
842 case LATEX_ITEM_ENVIRONMENT: {
843 // FIXME Same fix here.
844 send = searchEnvironmentHtml(par, pend);
845 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
848 case LATEX_BIB_ENVIRONMENT: {
849 // FIXME Same fix here.
850 send = searchEnvironmentHtml(par, pend);
851 par = makeBibliography(buf, xs, ourparams, text, par, send);
854 case LATEX_PARAGRAPH:
855 send = searchParagraphHtml(par, pend);
856 par = makeParagraphs(buf, xs, ourparams, text, par, send);
860 // makeEnvironment may process more than one paragraphs and bypass pend
861 if (distance(lastpar, par) >= distance(lastpar, pend))