2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
23 #include "OutputParams.h"
24 #include "Paragraph.h"
25 #include "ParagraphList.h"
26 #include "ParagraphParameters.h"
29 #include "TextClass.h"
31 #include "support/convert.h"
32 #include "support/debug.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
40 using namespace lyx::support;
46 docstring escapeChar(char_type c, XHTMLStream::EscapeSettings e)
50 case XHTMLStream::ESCAPE_NONE:
53 case XHTMLStream::ESCAPE_ALL:
57 } else if (c == '>') {
62 case XHTMLStream::ESCAPE_AND:
73 // escape what needs escaping
74 docstring htmlize(docstring const & str, XHTMLStream::EscapeSettings e) {
76 docstring::const_iterator it = str.begin();
77 docstring::const_iterator en = str.end();
78 for (; it != en; ++it)
79 d << escapeChar(*it, e);
84 string escapeChar(char c, XHTMLStream::EscapeSettings e)
88 case XHTMLStream::ESCAPE_NONE:
91 case XHTMLStream::ESCAPE_ALL:
95 } else if (c == '>') {
100 case XHTMLStream::ESCAPE_AND:
111 // escape what needs escaping
112 string htmlize(string const & str, XHTMLStream::EscapeSettings e) {
114 string::const_iterator it = str.begin();
115 string::const_iterator en = str.end();
116 for (; it != en; ++it)
117 d << escapeChar(*it, e);
122 string cleanAttr(string const & str)
125 string::const_iterator it = str.begin();
126 string::const_iterator en = str.end();
127 for (; it != en; ++it)
128 newname += isalnum(*it) ? *it : '_';
133 docstring cleanAttr(docstring const & str)
136 docstring::const_iterator it = str.begin();
137 docstring::const_iterator en = str.end();
138 for (; it != en; ++it) {
139 char_type const c = *it;
140 newname += isAlnumASCII(c) ? c : char_type('_');
146 bool isFontTag(string const & s)
149 return s == "em" || s == "strong" || s == "i" || s == "b";
153 docstring StartTag::asTag() const
155 string output = "<" + tag_;
157 output += " " + html::htmlize(attr_, XHTMLStream::ESCAPE_NONE);
159 return from_utf8(output);
163 docstring StartTag::asEndTag() const
165 string output = "</" + tag_ + ">";
166 return from_utf8(output);
170 docstring EndTag::asEndTag() const
172 string output = "</" + tag_ + ">";
173 return from_utf8(output);
177 docstring CompTag::asTag() const
179 string output = "<" + tag_;
181 output += " " + html::htmlize(attr_, XHTMLStream::ESCAPE_NONE);
183 return from_utf8(output);
190 ////////////////////////////////////////////////////////////////
194 ////////////////////////////////////////////////////////////////
196 XHTMLStream::XHTMLStream(odocstream & os)
197 : os_(os), escape_(ESCAPE_ALL)
201 void XHTMLStream::writeError(std::string const & s)
204 os_ << from_utf8("<!-- Output Error: " + s + " -->\n");
209 // an illegal tag for internal use
210 static string const parsep_tag = "&LyX_parsep_tag&";
214 bool XHTMLStream::closeFontTags()
216 if (tag_stack_.empty())
218 // first, we close any open font tags we can close
219 html::StartTag curtag = tag_stack_.back();
220 while (html::isFontTag(curtag.tag_)) {
221 os_ << curtag.asEndTag();
222 tag_stack_.pop_back();
223 if (tag_stack_.empty())
224 // this probably shouldn't happen, since then the
225 // font tags weren't in any other tag. but that
226 // problem will likely be caught elsewhere.
228 curtag = tag_stack_.back();
231 if (curtag.tag_ == parsep_tag)
234 // so we've hit a non-font tag.
235 writeError("Tags still open in closeFontTags(). Probably not a problem,\n"
236 "but you might want to check these tags:");
237 TagStack::const_reverse_iterator it = tag_stack_.rbegin();
238 TagStack::const_reverse_iterator const en = tag_stack_.rend();
239 for (; it != en; ++it) {
240 string const tagname = it->tag_;
241 if (tagname == parsep_tag)
243 writeError(it->tag_);
249 void XHTMLStream::startParagraph(bool keep_empty)
251 pending_tags_.push_back(html::StartTag(parsep_tag));
257 void XHTMLStream::endParagraph()
259 if (!isTagOpen(parsep_tag)) {
261 TagStack::const_iterator dit = pending_tags_.begin();
262 TagStack::const_iterator const den = pending_tags_.end();
264 for (; dit != den; ++dit) {
265 if (dit->tag_ == parsep_tag) {
272 writeError("No paragraph separation tag found in endParagraph().");
276 // this case is normal.
277 while (!pending_tags_.empty()) {
278 // clear all pending tags up to and including the parsep tag.
279 // note that we work from the back, because we want to get rid
280 // of everything that hasnt' been used.
281 html::StartTag const cur_tag = pending_tags_.back();
282 string const & tag = cur_tag.tag_;
283 tag_stack_.pop_back();
284 if (tag == parsep_tag)
290 // this case is also normal, if the parsep tag is the last one
291 // on the stack. otherwise, it's an error.
292 while (!tag_stack_.empty()) {
293 html::StartTag const cur_tag = tag_stack_.back();
294 string const & tag = cur_tag.tag_;
295 tag_stack_.pop_back();
296 if (tag == parsep_tag)
298 writeError("Tag `" + tag + "' still open at end of paragraph. Closing.");
299 os_ << cur_tag.asEndTag();
304 void XHTMLStream::clearTagDeque()
306 while (!pending_tags_.empty()) {
307 html::StartTag const & tag = pending_tags_.front();
308 if (tag.tag_ != parsep_tag)
311 tag_stack_.push_back(tag);
312 pending_tags_.pop_front();
317 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
320 os_ << html::htmlize(d, escape_);
321 escape_ = ESCAPE_ALL;
326 XHTMLStream & XHTMLStream::operator<<(const char * s)
329 docstring const d = from_ascii(s);
330 os_ << html::htmlize(d, escape_);
331 escape_ = ESCAPE_ALL;
336 XHTMLStream & XHTMLStream::operator<<(char_type c)
339 os_ << html::escapeChar(c, escape_);
340 escape_ = ESCAPE_ALL;
345 XHTMLStream & XHTMLStream::operator<<(char c)
348 string const d = html::escapeChar(c, escape_);
349 escape_ = ESCAPE_ALL;
354 XHTMLStream & XHTMLStream::operator<<(int i)
358 escape_ = ESCAPE_ALL;
363 XHTMLStream & XHTMLStream::operator<<(EscapeSettings e)
370 XHTMLStream & XHTMLStream::operator<<(html::StartTag const & tag)
372 if (tag.tag_.empty())
374 pending_tags_.push_back(tag);
381 XHTMLStream & XHTMLStream::operator<<(html::CompTag const & tag)
383 if (tag.tag_.empty())
393 XHTMLStream & XHTMLStream::operator<<(html::CR const &)
396 os_ << from_ascii("\n");
401 bool XHTMLStream::isTagOpen(string const & stag)
403 TagStack::const_iterator sit = tag_stack_.begin();
404 TagStack::const_iterator const sen = tag_stack_.end();
405 for (; sit != sen; ++sit)
406 if (sit->tag_ == stag)
412 // this is complicated, because we want to make sure that
413 // everything is properly nested. the code ought to make
414 // sure of that, but we won't assert (yet) if we run into
415 // a problem. we'll just output error messages and try our
416 // best to make things work.
417 XHTMLStream & XHTMLStream::operator<<(html::EndTag const & etag)
419 if (etag.tag_.empty())
422 // make sure there are tags to be closed
423 if (tag_stack_.empty()) {
424 writeError("Tried to close `" + etag.tag_
425 + "' when no tags were open!");
429 // first make sure we're not closing an empty tag
430 if (!pending_tags_.empty()) {
431 html::StartTag const & stag = pending_tags_.back();
432 if (etag.tag_ == stag.tag_) {
433 // we have <tag></tag>, so we discard it and remove it
434 // from the pending_tags_.
435 pending_tags_.pop_back();
438 // there is a pending tag that isn't the one we are trying
440 // is this tag itself pending?
441 // non-const iterators because we may call erase().
442 TagStack::iterator dit = pending_tags_.begin();
443 TagStack::iterator const den = pending_tags_.end();
444 for (; dit != den; ++dit) {
445 if (dit->tag_ == etag.tag_) {
446 // it was pending, so we just erase it
447 writeError("Tried to close pending tag `" + etag.tag_
448 + "' when other tags were pending. Last pending tag is `"
449 + pending_tags_.back().tag_ + "'. Tag discarded.");
450 pending_tags_.erase(dit);
454 // so etag isn't itself pending. is it even open?
455 if (!isTagOpen(etag.tag_)) {
456 writeError("Tried to close `" + etag.tag_
457 + "' when tag was not open. Tag discarded.");
460 // ok, so etag is open.
461 // our strategy will be as below: we will do what we need to
462 // do to close this tag.
463 string estr = "Closing tag `" + etag.tag_
464 + "' when other tags are pending. Discarded pending tags:\n";
465 for (dit = pending_tags_.begin(); dit != den; ++dit)
466 estr += dit->tag_ + "\n";
468 // clear the pending tags...
469 pending_tags_.clear();
470 // ...and then just fall through.
473 // is the tag we are closing the last one we opened?
474 if (etag.tag_ == tag_stack_.back().tag_) {
476 os_ << etag.asEndTag();
477 // ...and forget about it
478 tag_stack_.pop_back();
482 // we are trying to close a tag other than the one last opened.
483 // let's first see if this particular tag is still open somehow.
484 if (!isTagOpen(etag.tag_)) {
485 writeError("Tried to close `" + etag.tag_
486 + "' when tag was not open. Tag discarded.");
490 // so the tag was opened, but other tags have been opened since
491 // and not yet closed.
492 // if it's a font tag, though...
493 if (html::isFontTag(etag.tag_)) {
494 // it won't be a problem if the other tags open since this one
495 // are also font tags.
496 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
497 TagStack::const_reverse_iterator ren = tag_stack_.rend();
498 for (; rit != ren; ++rit) {
499 if (rit->tag_ == etag.tag_)
501 if (!html::isFontTag(rit->tag_)) {
502 // we'll just leave it and, presumably, have to close it later.
503 writeError("Unable to close font tag `" + etag.tag_
504 + "' due to open non-font tag `" + rit->tag_ + "'.");
510 // <em>this is <strong>bold
511 // and are being asked to closed em. we want:
512 // <em>this is <strong>bold</strong></em><strong>
513 // first, we close the intervening tags...
514 html::StartTag curtag = tag_stack_.back();
515 // ...remembering them in a stack.
517 while (curtag.tag_ != etag.tag_) {
518 os_ << curtag.asEndTag();
519 fontstack.push_back(curtag);
520 tag_stack_.pop_back();
521 curtag = tag_stack_.back();
523 // now close our tag...
524 os_ << etag.asEndTag();
525 tag_stack_.pop_back();
527 // ...and restore the other tags.
528 rit = fontstack.rbegin();
529 ren = fontstack.rend();
530 for (; rit != ren; ++rit)
531 pending_tags_.push_back(*rit);
535 // it wasn't a font tag.
536 // so other tags were opened before this one and not properly closed.
537 // so we'll close them, too. that may cause other issues later, but it
538 // at least guarantees proper nesting.
539 writeError("Closing tag `" + etag.tag_
540 + "' when other tags are open, namely:");
541 html::StartTag curtag = tag_stack_.back();
542 while (curtag.tag_ != etag.tag_) {
543 writeError(curtag.tag_);
544 if (curtag.tag_ != parsep_tag)
545 os_ << curtag.asEndTag();
546 tag_stack_.pop_back();
547 curtag = tag_stack_.back();
549 // curtag is now the one we actually want.
550 os_ << curtag.asEndTag();
551 tag_stack_.pop_back();
556 // End code for XHTMLStream
560 // convenience functions
562 inline void openTag(XHTMLStream & xs, Layout const & lay)
564 xs << html::StartTag(lay.htmltag(), lay.htmlattr());
568 void openTag(XHTMLStream & xs, Layout const & lay,
569 ParagraphParameters const & params)
571 // FIXME Are there other things we should handle here?
572 string const align = alignmentToCSS(params.align());
577 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
578 xs << html::StartTag(lay.htmltag(), attrs);
582 inline void closeTag(XHTMLStream & xs, Layout const & lay)
584 xs << html::EndTag(lay.htmltag());
588 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
590 xs << html::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
594 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
596 xs << html::EndTag(lay.htmllabeltag());
600 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
602 xs << html::StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
606 void openItemTag(XHTMLStream & xs, Layout const & lay,
607 ParagraphParameters const & params)
609 // FIXME Are there other things we should handle here?
610 string const align = alignmentToCSS(params.align());
612 openItemTag(xs, lay);
615 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
616 xs << html::StartTag(lay.htmlitemtag(), attrs);
620 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
622 xs << html::EndTag(lay.htmlitemtag());
625 // end of convenience functions
627 ParagraphList::const_iterator searchParagraphHtml(
628 ParagraphList::const_iterator p,
629 ParagraphList::const_iterator const & pend)
631 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
638 ParagraphList::const_iterator searchEnvironmentHtml(
639 ParagraphList::const_iterator const pstart,
640 ParagraphList::const_iterator const & pend)
642 ParagraphList::const_iterator p = pstart;
643 Layout const & bstyle = p->layout();
644 size_t const depth = p->params().depth();
645 for (++p; p != pend; ++p) {
646 Layout const & style = p->layout();
647 // It shouldn't happen that e.g. a section command occurs inside
648 // a quotation environment, at a higher depth, but as of 6/2009,
649 // it can happen. We pretend that it's just at lowest depth.
650 if (style.latextype == LATEX_COMMAND)
652 // If depth is down, we're done
653 if (p->params().depth() < depth)
655 // If depth is up, we're not done
656 if (p->params().depth() > depth)
658 // Now we know we are at the same depth
659 if (style.latextype == LATEX_PARAGRAPH
660 || style.latexname() != bstyle.latexname())
667 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
669 OutputParams const & runparams,
671 ParagraphList::const_iterator const & pbegin,
672 ParagraphList::const_iterator const & pend)
674 ParagraphList::const_iterator const begin = text.paragraphs().begin();
675 ParagraphList::const_iterator par = pbegin;
676 for (; par != pend; ++par) {
677 Layout const & lay = par->layout();
678 if (!lay.counter.empty())
679 buf.params().documentClass().counters().step(lay.counter, OutputUpdate);
680 // FIXME We should see if there's a label to be output and
681 // do something with it.
685 // If we are already in a paragraph, and this is the first one, then we
686 // do not want to open the paragraph tag.
687 // we also do not want to open it if the current layout does not permit
688 // multiple paragraphs.
689 bool const opened = runparams.html_make_pars &&
690 (par != pbegin || !runparams.html_in_par);
692 openTag(xs, lay, par->params());
693 docstring const deferred =
694 par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
696 // We want to issue the closing tag if either:
697 // (i) We opened it, and either html_in_par is false,
698 // or we're not in the last paragraph, anyway.
699 // (ii) We didn't open it and html_in_par is true,
700 // but we are in the first par, and there is a next par.
701 ParagraphList::const_iterator nextpar = par;
703 bool const needclose =
704 (opened && (!runparams.html_in_par || nextpar != pend))
705 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
710 if (!deferred.empty()) {
711 xs << XHTMLStream::ESCAPE_NONE << deferred << html::CR();
718 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
720 OutputParams const & runparams,
722 ParagraphList::const_iterator const & pbegin,
723 ParagraphList::const_iterator const & pend)
726 // Use TextClass::htmlTOCLayout() to figure out how we should look.
727 xs << html::StartTag("h2", "class='bibliography'")
728 << pbegin->layout().labelstring(false)
729 << html::EndTag("h2")
731 << html::StartTag("div", "class='bibliography'")
733 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
734 xs << html::EndTag("div");
739 bool isNormalEnv(Layout const & lay)
741 return lay.latextype == LATEX_ENVIRONMENT
742 || lay.latextype == LATEX_BIB_ENVIRONMENT;
746 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
748 OutputParams const & runparams,
750 ParagraphList::const_iterator const & pbegin,
751 ParagraphList::const_iterator const & pend)
753 ParagraphList::const_iterator const begin = text.paragraphs().begin();
754 ParagraphList::const_iterator par = pbegin;
755 Layout const & bstyle = par->layout();
756 depth_type const origdepth = pbegin->params().depth();
758 // open tag for this environment
762 // we will on occasion need to remember a layout from before.
763 Layout const * lastlay = 0;
765 while (par != pend) {
766 Layout const & style = par->layout();
767 // the counter only gets stepped if we're in some kind of list,
768 // or if it's the first time through.
769 // note that enum, etc, are handled automatically.
770 // FIXME There may be a bug here about user defined enumeration
771 // types. If so, then we'll need to take the counter and add "i",
772 // "ii", etc, as with enum.
773 Counters & cnts = buf.params().documentClass().counters();
774 docstring const & cntr = style.counter;
775 if (!style.counter.empty()
776 && (par == pbegin || !isNormalEnv(style))
777 && cnts.hasCounter(cntr)
779 cnts.step(cntr, OutputUpdate);
780 ParagraphList::const_iterator send;
781 // this will be positive, if we want to skip the initial word
782 // (if it's been taken for the label).
785 switch (style.latextype) {
786 case LATEX_ENVIRONMENT:
787 case LATEX_LIST_ENVIRONMENT:
788 case LATEX_ITEM_ENVIRONMENT: {
789 // There are two possiblities in this case.
790 // One is that we are still in the environment in which we
791 // started---which we will be if the depth is the same.
792 if (par->params().depth() == origdepth) {
793 LASSERT(bstyle == style, /* */);
795 closeItemTag(xs, *lastlay);
799 bool const labelfirst = style.htmllabelfirst();
801 openItemTag(xs, style, par->params());
804 if (style.labeltype != LABEL_NO_LABEL &&
805 style.htmllabeltag() != "NONE") {
806 if (isNormalEnv(style)) {
807 // in this case, we print the label only for the first
808 // paragraph (as in a theorem).
810 docstring const lbl =
811 pbegin->params().labelString();
813 openLabelTag(xs, style);
815 closeLabelTag(xs, style);
819 } else { // some kind of list
820 if (style.labeltype == LABEL_MANUAL) {
821 openLabelTag(xs, style);
822 sep = par->firstWordLyXHTML(xs, runparams);
823 closeLabelTag(xs, style);
827 openLabelTag(xs, style);
828 xs << par->params().labelString();
829 closeLabelTag(xs, style);
833 } // end label output
836 openItemTag(xs, style, par->params());
838 par->simpleLyXHTMLOnePar(buf, xs, runparams,
839 text.outerFont(distance(begin, par)), sep);
842 // We may not want to close the tag yet, in particular:
843 // If we're not at the end...
845 // and are doing items...
846 && !isNormalEnv(style)
847 // and if the depth has changed...
848 && par->params().depth() != origdepth) {
849 // then we'll save this layout for later, and close it when
850 // we get another item.
853 closeItemTag(xs, style);
856 // The other possibility is that the depth has increased, in which
857 // case we need to recurse.
859 send = searchEnvironmentHtml(par, pend);
860 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
864 case LATEX_PARAGRAPH:
865 send = searchParagraphHtml(par, pend);
866 par = makeParagraphs(buf, xs, runparams, text, par, send);
869 case LATEX_BIB_ENVIRONMENT:
872 par = makeParagraphs(buf, xs, runparams, text, par, send);
882 closeItemTag(xs, *lastlay);
883 closeTag(xs, bstyle);
889 void makeCommand(Buffer const & buf,
891 OutputParams const & runparams,
893 ParagraphList::const_iterator const & pbegin)
895 Layout const & style = pbegin->layout();
896 if (!style.counter.empty())
897 buf.params().documentClass().counters().step(style.counter, OutputUpdate);
899 openTag(xs, style, pbegin->params());
901 // Label around sectioning number:
902 // FIXME Probably need to account for LABEL_MANUAL
903 if (style.labeltype != LABEL_NO_LABEL) {
904 openLabelTag(xs, style);
905 xs << pbegin->params().labelString();
906 closeLabelTag(xs, style);
907 // Otherwise the label might run together with the text
908 xs << from_ascii(" ");
911 ParagraphList::const_iterator const begin = text.paragraphs().begin();
912 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
913 text.outerFont(distance(begin, pbegin)));
918 } // end anonymous namespace
921 void xhtmlParagraphs(Text const & text,
924 OutputParams const & runparams)
926 ParagraphList const & paragraphs = text.paragraphs();
927 if (runparams.par_begin == runparams.par_end) {
928 runparams.par_begin = 0;
929 runparams.par_end = paragraphs.size();
931 pit_type bpit = runparams.par_begin;
932 pit_type const epit = runparams.par_end;
933 LASSERT(bpit < epit, /* */);
935 OutputParams ourparams = runparams;
936 ParagraphList::const_iterator const pend =
937 (epit == (int) paragraphs.size()) ?
938 paragraphs.end() : paragraphs.constIterator(epit);
939 while (bpit < epit) {
940 ParagraphList::const_iterator par = paragraphs.constIterator(bpit);
941 if (par->params().startOfAppendix()) {
942 // FIXME: only the counter corresponding to toplevel
943 // sectioning should be reset
944 Counters & cnts = buf.masterBuffer()->params().documentClass().counters();
948 Layout const & style = par->layout();
949 ParagraphList::const_iterator const lastpar = par;
950 ParagraphList::const_iterator send;
952 switch (style.latextype) {
953 case LATEX_COMMAND: {
954 // The files with which we are working never have more than
955 // one paragraph in a command structure.
957 // if (ourparams.html_in_par)
958 // fix it so we don't get sections inside standard, e.g.
959 // note that we may then need to make runparams not const, so we
960 // can communicate that back.
961 // FIXME Maybe this fix should be in the routines themselves, in case
962 // they are called from elsewhere.
963 makeCommand(buf, xs, ourparams, text, par);
967 case LATEX_ENVIRONMENT:
968 case LATEX_LIST_ENVIRONMENT:
969 case LATEX_ITEM_ENVIRONMENT: {
970 // FIXME Same fix here.
971 send = searchEnvironmentHtml(par, pend);
972 par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
975 case LATEX_BIB_ENVIRONMENT: {
976 // FIXME Same fix here.
977 send = searchEnvironmentHtml(par, pend);
978 par = makeBibliography(buf, xs, ourparams, text, par, send);
981 case LATEX_PARAGRAPH:
982 send = searchParagraphHtml(par, pend);
983 par = makeParagraphs(buf, xs, ourparams, text, par, send);
986 bpit += distance(lastpar, par);
991 string alignmentToCSS(LyXAlignment align) {
993 case LYX_ALIGN_BLOCK:
994 // we are NOT going to use text-align: justify!!
997 case LYX_ALIGN_RIGHT:
999 case LYX_ALIGN_CENTER: