2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/lassert.h"
33 #include "support/lstrings.h"
38 using namespace lyx::support;
44 docstring escapeChar(char_type c)
68 // escape what needs escaping
69 docstring htmlize(docstring const & str) {
71 docstring::const_iterator it = str.begin();
72 docstring::const_iterator en = str.end();
73 for (; it != en; ++it)
79 string escapeChar(char c)
103 // escape what needs escaping
104 string htmlize(string const & str) {
106 string::const_iterator it = str.begin();
107 string::const_iterator en = str.end();
108 for (; it != en; ++it)
109 d << escapeChar(*it);
114 string cleanAttr(string const & str)
117 string::const_iterator it = str.begin();
118 string::const_iterator en = str.end();
119 for (; it != en; ++it)
120 newname += isalnum(*it) ? *it : '_';
125 docstring cleanAttr(docstring const & str)
128 docstring::const_iterator it = str.begin();
129 docstring::const_iterator en = str.end();
130 for (; it != en; ++it)
139 bool isFontTag(string const & s)
141 return s == "em" || s == "strong"; // others?
146 docstring StartTag::asTag() const
148 string output = "<" + tag_;
150 output += " " + html::htmlize(attr_);
152 return from_utf8(output);
156 docstring StartTag::asEndTag() const
158 string output = "</" + tag_ + ">";
159 return from_utf8(output);
163 docstring EndTag::asEndTag() const
165 string output = "</" + tag_ + ">";
166 return from_utf8(output);
170 docstring CompTag::asTag() const
172 string output = "<" + tag_;
174 output += " " + html::htmlize(attr_);
176 return from_utf8(output);
180 ////////////////////////////////////////////////////////////////
184 ////////////////////////////////////////////////////////////////
186 XHTMLStream::XHTMLStream(odocstream & os)
187 : os_(os), nextraw_(false)
191 void XHTMLStream::cr()
194 os_ << from_ascii("\n");
198 void XHTMLStream::writeError(std::string const & s)
201 os_ << from_utf8("<!-- Output Error: " + s + " -->");
205 bool XHTMLStream::closeFontTags()
207 if (tag_stack_.empty())
209 // first, we close any open font tags we can close
210 StartTag curtag = tag_stack_.back();
211 while (html::isFontTag(curtag.tag_)) {
212 os_ << curtag.asEndTag();
213 tag_stack_.pop_back();
214 if (tag_stack_.empty())
215 // this probably shouldn't happen, since then the
216 // font tags weren't in any other tag. but that
217 // problem will likely be caught elsewhere.
219 curtag = tag_stack_.back();
221 // so we've hit a non-font tag. let's see if any of the
222 // remaining tags are font tags.
223 TagStack::const_iterator it = tag_stack_.begin();
224 TagStack::const_iterator en = tag_stack_.end();
225 bool noFontTags = true;
226 for (; it != en; ++it) {
227 if (html::isFontTag(it->tag_)) {
228 writeError("Font tag `" + it->tag_ + "' still open in closeFontTags().");
236 void XHTMLStream::clearTagDeque()
238 while (!pending_tags_.empty()) {
239 StartTag const & tag = pending_tags_.front();
242 tag_stack_.push_back(tag);
243 pending_tags_.pop_front();
248 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
255 os_ << html::htmlize(d);
260 XHTMLStream & XHTMLStream::operator<<(const char * s)
263 docstring const d = from_ascii(s);
268 os_ << html::htmlize(d);
273 XHTMLStream & XHTMLStream::operator<<(char_type c)
280 os_ << html::escapeChar(c);
285 XHTMLStream & XHTMLStream::operator<<(int i)
294 XHTMLStream & XHTMLStream::operator<<(NextRaw const &)
301 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag)
303 if (tag.tag_.empty())
305 pending_tags_.push_back(tag);
312 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag)
314 if (tag.tag_.empty())
324 bool XHTMLStream::isTagOpen(string const & stag)
326 TagStack::const_iterator sit = tag_stack_.begin();
327 TagStack::const_iterator const sen = tag_stack_.end();
328 for (; sit != sen; ++sit)
329 // we could check for the
330 if (sit->tag_ == stag)
336 // this is complicated, because we want to make sure that
337 // everything is properly nested. the code ought to make
338 // sure of that, but we won't assert (yet) if we run into
339 // a problem. we'll just output error messages and try our
340 // best to make things work.
341 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
343 if (etag.tag_.empty())
346 // make sure there are tags to be closed
347 if (tag_stack_.empty()) {
348 writeError("Tried to close `" + etag.tag_
349 + "' when no tags were open!");
353 // first make sure we're not closing an empty tag
354 if (!pending_tags_.empty()) {
355 StartTag const & stag = pending_tags_.back();
356 if (etag.tag_ == stag.tag_) {
357 // we have <tag></tag>, so we discard it and remove it
358 // from the pending_tags_.
359 pending_tags_.pop_back();
362 // there is a pending tag that isn't the one we are trying
364 // is this tag itself pending?
365 // non-const iterators because we may call erase().
366 TagDeque::iterator dit = pending_tags_.begin();
367 TagDeque::iterator const den = pending_tags_.end();
368 for (; dit != den; ++dit) {
369 if (dit->tag_ == etag.tag_) {
370 // it was pending, so we just erase it
371 writeError("Tried to close pending tag `" + etag.tag_
372 + "' when other tags were pending. Last pending tag is `"
373 + pending_tags_.back().tag_ + "'. Tag discarded.");
374 pending_tags_.erase(dit);
378 // so etag isn't itself pending. is it even open?
379 if (!isTagOpen(etag.tag_)) {
380 writeError("Tried to close `" + etag.tag_
381 + "' when tag was not open. Tag discarded.");
384 // ok, so etag is open.
385 // our strategy will be as below: we will do what we need to
386 // do to close this tag.
387 string estr = "Closing tag `" + etag.tag_
388 + "' when other tags are pending. Discarded pending tags:\n";
389 for (dit = pending_tags_.begin(); dit != den; ++dit)
390 estr += dit->tag_ + "\n";
392 // clear the pending tags...
393 pending_tags_.clear();
394 // ...and then just fall through.
397 // is the tag we are closing the last one we opened?
398 if (etag.tag_ == tag_stack_.back().tag_) {
400 os_ << etag.asEndTag();
401 // ...and forget about it
402 tag_stack_.pop_back();
406 // we are trying to close a tag other than the one last opened.
407 // let's first see if this particular tag is still open somehow.
408 if (!isTagOpen(etag.tag_)) {
409 writeError("Tried to close `" + etag.tag_
410 + "' when tag was not open. Tag discarded.");
414 // so the tag was opened, but other tags have been opened since
415 // and not yet closed.
416 // if it's a font tag, though...
417 if (html::isFontTag(etag.tag_)) {
418 // it won't be a problem if the other tags open since this one
419 // are also font tags.
420 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
421 TagStack::const_reverse_iterator ren = tag_stack_.rend();
422 for (; rit != ren; ++rit) {
423 if (rit->tag_ == etag.tag_)
425 if (!html::isFontTag(rit->tag_)) {
426 // we'll just leave it and, presumably, have to close it later.
427 writeError("Unable to close font tag `" + etag.tag_
428 + "' due to open non-font tag `" + rit->tag_ + "'.");
434 // <em>this is <strong>bold
435 // and are being asked to closed em. we want:
436 // <em>this is <strong>bold</strong></em><strong>
437 // first, we close the intervening tags...
438 StartTag curtag = tag_stack_.back();
439 // ...remembering them in a stack.
441 while (curtag.tag_ != etag.tag_) {
442 os_ << curtag.asEndTag();
443 fontstack.push_back(curtag);
444 tag_stack_.pop_back();
445 curtag = tag_stack_.back();
447 // now close our tag...
448 os_ << etag.asEndTag();
449 tag_stack_.pop_back();
451 // ...and restore the other tags.
452 rit = fontstack.rbegin();
453 ren = fontstack.rend();
454 for (; rit != ren; ++rit)
455 pending_tags_.push_back(*rit);
459 // it wasn't a font tag.
460 // so other tags were opened before this one and not properly closed.
461 // so we'll close them, too. that may cause other issues later, but it
462 // at least guarantees proper nesting.
463 writeError("Closing tag `" + etag.tag_
464 + "' when other tags are open, namely:");
465 StartTag curtag = tag_stack_.back();
466 while (curtag.tag_ != etag.tag_) {
467 writeError(curtag.tag_);
468 os_ << curtag.asEndTag();
469 tag_stack_.pop_back();
470 curtag = tag_stack_.back();
472 // curtag is now the one we actually want.
473 os_ << curtag.asEndTag();
474 tag_stack_.pop_back();
479 // End code for XHTMLStream
483 // convenience functions
485 inline void openTag(XHTMLStream & xs, Layout const & lay)
487 xs << StartTag(lay.htmltag(), lay.htmlattr());
491 inline void closeTag(XHTMLStream & xs, Layout const & lay)
493 xs << EndTag(lay.htmltag());
497 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
499 xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
503 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
505 xs << EndTag(lay.htmllabeltag());
509 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
511 xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
515 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
517 xs << EndTag(lay.htmlitemtag());
520 // end of convenience functions
522 ParagraphList::const_iterator searchParagraphHtml(
523 ParagraphList::const_iterator p,
524 ParagraphList::const_iterator const & pend)
526 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
533 ParagraphList::const_iterator searchEnvironmentHtml(
534 ParagraphList::const_iterator const pstart,
535 ParagraphList::const_iterator const & pend)
537 ParagraphList::const_iterator p = pstart;
538 Layout const & bstyle = p->layout();
539 size_t const depth = p->params().depth();
540 for (++p; p != pend; ++p) {
541 Layout const & style = p->layout();
542 // It shouldn't happen that e.g. a section command occurs inside
543 // a quotation environment, at a higher depth, but as of 6/2009,
544 // it can happen. We pretend that it's just at lowest depth.
545 if (style.latextype == LATEX_COMMAND)
547 // If depth is down, we're done
548 if (p->params().depth() < depth)
550 // If depth is up, we're not done
551 if (p->params().depth() > depth)
553 // Now we know we are at the same depth
554 if (style.latextype == LATEX_PARAGRAPH
555 || style.latexname() != bstyle.latexname())
562 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
564 OutputParams const & runparams,
566 ParagraphList::const_iterator const & pbegin,
567 ParagraphList::const_iterator const & pend)
569 ParagraphList::const_iterator const begin = text.paragraphs().begin();
570 ParagraphList::const_iterator par = pbegin;
571 for (; par != pend; ++par) {
572 Layout const & lay = par->layout();
573 if (!lay.counter.empty())
574 buf.params().documentClass().counters().step(lay.counter);
575 // FIXME We should see if there's a label to be output and
576 // do something with it.
580 // If we are already in a paragraph, and this is the first one, then we
581 // do not want to open the paragraph tag.
582 // we also do not want to open it if the current layout does not permit
583 // multiple paragraphs.
584 bool const opened = runparams.html_make_pars &&
585 (par != pbegin || !runparams.html_in_par);
588 docstring const deferred =
589 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
591 // We want to issue the closing tag if either:
592 // (i) We opened it, and either html_in_par is false,
593 // or we're not in the last paragraph, anyway.
594 // (ii) We didn't open it and html_in_par is true,
595 // but we are in the first par, and there is a next par.
596 ParagraphList::const_iterator nextpar = par;
598 bool const needclose =
599 (opened && (!runparams.html_in_par || nextpar != pend))
600 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
605 if (!deferred.empty()) {
606 xs << XHTMLStream::NextRaw() << deferred;
614 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
616 OutputParams const & runparams,
618 ParagraphList::const_iterator const & pbegin,
619 ParagraphList::const_iterator const & pend)
621 xs << StartTag("h2", "class='bibliography'");
622 xs << pbegin->layout().labelstring(false);
625 xs << StartTag("div", "class='bibliography'");
627 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
633 bool isNormalEnv(Layout const & lay)
635 return lay.latextype == LATEX_ENVIRONMENT
636 || lay.latextype == LATEX_BIB_ENVIRONMENT;
640 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
642 OutputParams const & runparams,
644 ParagraphList::const_iterator const & pbegin,
645 ParagraphList::const_iterator const & pend)
647 ParagraphList::const_iterator const begin = text.paragraphs().begin();
648 ParagraphList::const_iterator par = pbegin;
649 Layout const & bstyle = par->layout();
650 depth_type const origdepth = pbegin->params().depth();
652 // open tag for this environment
656 // we will on occasion need to remember a layout from before.
657 Layout const * lastlay = 0;
659 while (par != pend) {
660 Layout const & style = par->layout();
661 // the counter only gets stepped if we're in some kind of list,
662 // or if it's the first time through.
663 // note that enum, etc, are handled automatically.
664 // FIXME There may be a bug here about user defined enumeration
665 // types. If so, then we'll need to take the counter and add "i",
666 // "ii", etc, as with enum.
667 Counters & cnts = buf.params().documentClass().counters();
668 docstring const & cntr = style.counter;
669 if (!style.counter.empty()
670 && (par == pbegin || !isNormalEnv(style))
671 && cnts.hasCounter(cntr)
674 ParagraphList::const_iterator send;
675 // this will be positive, if we want to skip the initial word
676 // (if it's been taken for the label).
679 switch (style.latextype) {
680 case LATEX_ENVIRONMENT:
681 case LATEX_LIST_ENVIRONMENT:
682 case LATEX_ITEM_ENVIRONMENT: {
683 // There are two possiblities in this case.
684 // One is that we are still in the environment in which we
685 // started---which we will be if the depth is the same.
686 if (par->params().depth() == origdepth) {
687 LASSERT(bstyle == style, /* */);
689 closeItemTag(xs, *lastlay);
692 if (isNormalEnv(style)) {
693 // in this case, we print the label only for the first
694 // paragraph (as in a theorem).
695 openItemTag(xs, style);
696 if (par == pbegin && style.htmllabeltag() != "NONE") {
697 docstring const lbl =
698 pbegin->expandLabel(style, buf.params(), false);
700 openLabelTag(xs, style);
702 closeLabelTag(xs, style);
706 } else { // some kind of list
707 bool const labelfirst = style.htmllabelfirst();
709 openItemTag(xs, style);
710 if (style.labeltype == LABEL_MANUAL
711 && style.htmllabeltag() != "NONE") {
712 openLabelTag(xs, style);
713 sep = par->firstWordLyXHTML(xs, runparams);
714 closeLabelTag(xs, style);
717 else if (style.labeltype != LABEL_NO_LABEL
718 && style.htmllabeltag() != "NONE") {
719 openLabelTag(xs, style);
720 xs << par->expandLabel(style, buf.params(), false);
721 closeLabelTag(xs, style);
725 openItemTag(xs, style);
727 par->simpleLyXHTMLOnePar(buf, xs, runparams,
728 text.outerFont(distance(begin, par)), false, sep);
730 // We may not want to close the tag yet, in particular,
731 // if we're not at the end...
733 // and are doing items...
734 && !isNormalEnv(style)
735 // and if the depth has changed...
736 && par->params().depth() != origdepth) {
737 // then we'll save this layout for later, and close it when
738 // we get another item.
741 closeItemTag(xs, style);
744 // The other possibility is that the depth has increased, in which
745 // case we need to recurse.
747 send = searchEnvironmentHtml(par, pend);
748 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
752 case LATEX_PARAGRAPH:
753 send = searchParagraphHtml(par, pend);
754 par = makeParagraphs(buf, xs, runparams, text, par, send);
757 case LATEX_BIB_ENVIRONMENT:
760 par = makeParagraphs(buf, xs, runparams, text, par, send);
770 closeItemTag(xs, *lastlay);
771 closeTag(xs, bstyle);
777 void makeCommand(Buffer const & buf,
779 OutputParams const & runparams,
781 ParagraphList::const_iterator const & pbegin)
783 Layout const & style = pbegin->layout();
784 if (!style.counter.empty())
785 buf.params().documentClass().counters().step(style.counter);
789 // Label around sectioning number:
790 // FIXME Probably need to account for LABEL_MANUAL
791 if (style.labeltype != LABEL_NO_LABEL) {
792 openLabelTag(xs, style);
793 xs << pbegin->expandLabel(style, buf.params(), false);
794 closeLabelTag(xs, style);
795 // Otherwise the label might run together with the text
796 xs << from_ascii(" ");
799 ParagraphList::const_iterator const begin = text.paragraphs().begin();
800 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
801 text.outerFont(distance(begin, pbegin)));
806 } // end anonymous namespace
809 void xhtmlParagraphs(Text const & text,
812 OutputParams const & runparams)
814 ParagraphList const & paragraphs = text.paragraphs();
815 ParagraphList::const_iterator par = paragraphs.begin();
816 ParagraphList::const_iterator pend = paragraphs.end();
818 OutputParams ourparams = runparams;
819 while (par != pend) {
820 Layout const & style = par->layout();
821 ParagraphList::const_iterator lastpar = par;
822 ParagraphList::const_iterator send;
824 switch (style.latextype) {
825 case LATEX_COMMAND: {
826 // The files with which we are working never have more than
827 // one paragraph in a command structure.
829 // if (ourparams.html_in_par)
830 // fix it so we don't get sections inside standard, e.g.
831 // note that we may then need to make runparams not const, so we
832 // can communicate that back.
833 // FIXME Maybe this fix should be in the routines themselves, in case
834 // they are called from elsewhere.
835 makeCommand(buf, xs, ourparams, text, par);
839 case LATEX_ENVIRONMENT:
840 case LATEX_LIST_ENVIRONMENT:
841 case LATEX_ITEM_ENVIRONMENT: {
842 // FIXME Same fix here.
843 send = searchEnvironmentHtml(par, pend);
844 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
847 case LATEX_BIB_ENVIRONMENT: {
848 // FIXME Same fix here.
849 send = searchEnvironmentHtml(par, pend);
850 par = makeBibliography(buf, xs, ourparams, text, par, send);
853 case LATEX_PARAGRAPH:
854 send = searchParagraphHtml(par, pend);
855 par = makeParagraphs(buf, xs, ourparams, text, par, send);
859 // makeEnvironment may process more than one paragraphs and bypass pend
860 if (distance(lastpar, par) >= distance(lastpar, pend))