2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Richard Kimberly Heck
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "BufferParams.h"
22 #include "LayoutEnums.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/lassert.h"
35 // Uncomment to activate debugging code.
36 // #define XHTML_DEBUG
39 using namespace lyx::support;
44 docstring fontToHtmlTag(xml::FontTypes type)
47 case xml::FontTypes::FT_EMPH:
48 return from_utf8("em");
49 case xml::FontTypes::FT_BOLD:
50 return from_utf8("b");
51 case xml::FontTypes::FT_NOUN:
52 return from_utf8("dfn");
53 case xml::FontTypes::FT_UBAR:
54 case xml::FontTypes::FT_WAVE:
55 case xml::FontTypes::FT_DBAR:
56 return from_utf8("u");
57 case xml::FontTypes::FT_SOUT:
58 case xml::FontTypes::FT_XOUT:
59 return from_utf8("del");
60 case xml::FontTypes::FT_ITALIC:
61 return from_utf8("i");
62 case xml::FontTypes::FT_UPRIGHT:
63 case xml::FontTypes::FT_SLANTED:
64 case xml::FontTypes::FT_SMALLCAPS:
65 case xml::FontTypes::FT_ROMAN:
66 case xml::FontTypes::FT_SANS:
67 case xml::FontTypes::FT_TYPE:
68 case xml::FontTypes::FT_SIZE_TINY:
69 case xml::FontTypes::FT_SIZE_SCRIPT:
70 case xml::FontTypes::FT_SIZE_FOOTNOTE:
71 case xml::FontTypes::FT_SIZE_SMALL:
72 case xml::FontTypes::FT_SIZE_NORMAL:
73 case xml::FontTypes::FT_SIZE_LARGE:
74 case xml::FontTypes::FT_SIZE_LARGER:
75 case xml::FontTypes::FT_SIZE_LARGEST:
76 case xml::FontTypes::FT_SIZE_HUGE:
77 case xml::FontTypes::FT_SIZE_HUGER:
78 case xml::FontTypes::FT_SIZE_INCREASE:
79 case xml::FontTypes::FT_SIZE_DECREASE:
80 return from_utf8("span");
87 docstring fontToHtmlAttribute(xml::FontTypes type)
90 case xml::FontTypes::FT_EMPH:
91 case xml::FontTypes::FT_BOLD:
92 return from_ascii("");
93 case xml::FontTypes::FT_NOUN:
94 return from_ascii("class='lyxnoun'");
95 case xml::FontTypes::FT_UBAR:
96 return from_ascii("");
97 case xml::FontTypes::FT_DBAR:
98 return from_ascii("class='dline'");
99 case xml::FontTypes::FT_XOUT:
100 case xml::FontTypes::FT_SOUT:
101 return from_ascii("class='strikeout'");
102 case xml::FontTypes::FT_WAVE:
103 return from_ascii("class='wline'");
104 case xml::FontTypes::FT_ITALIC:
105 return from_ascii("");
106 case xml::FontTypes::FT_UPRIGHT:
107 return from_ascii("style='font-style:normal;'");
108 case xml::FontTypes::FT_SLANTED:
109 return from_ascii("style='font-style:oblique;'");
110 case xml::FontTypes::FT_SMALLCAPS:
111 return from_ascii("style='font-variant:small-caps;'");
112 case xml::FontTypes::FT_ROMAN:
113 return from_ascii("style='font-family:serif;'");
114 case xml::FontTypes::FT_SANS:
115 return from_ascii("style='font-family:sans-serif;'");
116 case xml::FontTypes::FT_TYPE:
117 return from_ascii("style='font-family:monospace;'");
118 case xml::FontTypes::FT_SIZE_TINY:
119 case xml::FontTypes::FT_SIZE_SCRIPT:
120 case xml::FontTypes::FT_SIZE_FOOTNOTE:
121 return from_ascii("style='font-size:x-small;'");
122 case xml::FontTypes::FT_SIZE_SMALL:
123 return from_ascii("style='font-size:small;'");
124 case xml::FontTypes::FT_SIZE_NORMAL:
125 return from_ascii("style='font-size:normal;'");
126 case xml::FontTypes::FT_SIZE_LARGE:
127 return from_ascii("style='font-size:large;'");
128 case xml::FontTypes::FT_SIZE_LARGER:
129 case xml::FontTypes::FT_SIZE_LARGEST:
130 return from_ascii("style='font-size:x-large;'");
131 case xml::FontTypes::FT_SIZE_HUGE:
132 case xml::FontTypes::FT_SIZE_HUGER:
133 return from_ascii("style='font-size:xx-large;'");
134 case xml::FontTypes::FT_SIZE_INCREASE:
135 return from_ascii("style='font-size:larger;'");
136 case xml::FontTypes::FT_SIZE_DECREASE:
137 return from_ascii("style='font-size:smaller;'");
140 return from_ascii("");
144 xml::FontTag xhtmlStartFontTag(xml::FontTypes type)
146 return xml::FontTag(fontToHtmlTag(type), fontToHtmlAttribute(type), type);
150 xml::EndFontTag xhtmlEndFontTag(xml::FontTypes type)
152 return xml::EndFontTag(fontToHtmlTag(type), type);
157 // convenience functions
159 inline void openParTag(XMLStream & xs, Layout const & lay,
160 std::string const & parlabel)
162 string attrs = lay.htmlGetAttrString();
163 if (!parlabel.empty())
164 attrs += " id='" + parlabel + "'";
165 xs << xml::ParTag(lay.htmltag(), attrs);
169 void openParTag(XMLStream & xs, Layout const & lay,
170 std::string const & cssclass,
171 std::string const & parlabel) {
172 string attrs = "class='" + cssclass + "'";
173 if (!parlabel.empty())
174 attrs += " id='" + parlabel + "'";
175 xs << xml::ParTag(lay.htmltag(), attrs);
178 void openParTag(XMLStream & xs, Layout const & lay,
179 ParagraphParameters const & params,
180 std::string const & parlabel)
182 // FIXME Are there other things we should handle here?
183 string const align = alignmentToCSS(params.align());
185 openParTag(xs, lay, parlabel);
188 string attrs = lay.htmlGetAttrString() + " style='text-align: " + align + ";'";
189 if (!parlabel.empty())
190 attrs += " id='" + parlabel + "'";
191 xs << xml::ParTag(lay.htmltag(), attrs);
195 inline void closeTag(XMLStream & xs, Layout const & lay)
197 xs << xml::EndTag(lay.htmltag());
201 inline void openLabelTag(XMLStream & xs, Layout const & lay)
203 xs << xml::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
207 inline void closeLabelTag(XMLStream & xs, Layout const & lay)
209 xs << xml::EndTag(lay.htmllabeltag());
213 inline void openItemTag(XMLStream & xs, Layout const & lay,
214 std::string const & parlabel)
216 if (lay.htmlitemtag() != "NONE") {
217 string attrs = lay.htmlitemattr();
218 if (!parlabel.empty())
219 attrs += " id='" + parlabel + "'";
220 xs << xml::StartTag(lay.htmlitemtag(), attrs, true);
225 void openItemTag(XMLStream & xs, Layout const & lay,
226 ParagraphParameters const & params,
227 std::string const & parlabel)
229 if (lay.htmlitemtag() != "NONE") {
230 // FIXME Are there other things we should handle here?
231 string const align = alignmentToCSS(params.align());
233 openItemTag(xs, lay, parlabel);
236 string attrs = lay.htmlGetAttrString() + " style='text-align: " + align + ";'";
237 if (!parlabel.empty())
238 attrs += " id='" + parlabel + "'";
239 xs << xml::StartTag(lay.htmlitemtag(), attrs);
244 inline void closeItemTag(XMLStream & xs, Layout const & lay)
246 if (lay.htmlitemtag() != "NONE") {
247 xs << xml::EndTag(lay.htmlitemtag());
251 // end of convenience functions
253 ParagraphList::const_iterator findLastParagraph(
254 ParagraphList::const_iterator p,
255 ParagraphList::const_iterator const & pend)
257 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
264 ParagraphList::const_iterator findEndOfEnvironment(
265 ParagraphList::const_iterator const & pstart,
266 ParagraphList::const_iterator const & pend)
268 ParagraphList::const_iterator p = pstart;
269 Layout const & bstyle = p->layout();
270 size_t const depth = p->params().depth();
271 for (++p; p != pend; ++p) {
272 Layout const & style = p->layout();
273 // It shouldn't happen that e.g. a section command occurs inside
274 // a quotation environment, at a higher depth, but as of 6/2009,
275 // it can happen. We pretend that it's just at lowest depth.
276 if (style.latextype == LATEX_COMMAND)
279 // If depth is down, we're done
280 if (p->params().depth() < depth)
283 // If depth is up, we're not done
284 if (p->params().depth() > depth)
287 // FIXME I am not sure about the first check.
288 // Surely we *could* have different layouts that count as
289 // LATEX_PARAGRAPH, right?
290 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
297 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
299 OutputParams const & runparams,
301 ParagraphList::const_iterator const & pbegin,
302 ParagraphList::const_iterator const & pend)
304 ParagraphList::const_iterator const begin = text.paragraphs().begin();
305 ParagraphList::const_iterator par = pbegin;
306 for (; par != pend; ++par) {
307 Layout const & lay = par->layout();
308 if (!lay.counter.empty())
309 buf.masterBuffer()->params().
310 documentClass().counters().step(lay.counter, OutputUpdate);
312 // FIXME We should see if there's a label to be output and
313 // do something with it.
317 // We want to open the paragraph tag if:
318 // (i) the current layout permits multiple paragraphs
319 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
320 // we are, but this is not the first paragraph
322 // But there is also a special case, and we first see whether we are in it.
323 // We do not want to open the paragraph tag if this paragraph contains
324 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
325 // as a branch). On the other hand, if that single item has a font change
326 // applied to it, then we still do need to open the paragraph.
328 // Obviously, this is very fragile. The main reason we need to do this is
329 // because of branches, e.g., a branch that contains an entire new section.
330 // We do not really want to wrap that whole thing in a <div>...</div>.
331 bool special_case = false;
332 Inset const * specinset = par->size() == 1 ? par->getInset(0) : nullptr;
333 if (specinset && !specinset->getLayout().htmlisblock()) {
334 Layout const & style = par->layout();
335 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
336 style.labelfont : style.font;
337 FontInfo const our_font =
338 par->getFont(buf.masterBuffer()->params(), 0,
339 text.outerFont(distance(begin, par))).fontInfo();
340 if (first_font == our_font)
344 bool const open_par = runparams.html_make_pars
345 && (!runparams.html_in_par || par != pbegin)
348 // We want to issue the closing tag if either:
349 // (i) We opened it, and either html_in_par is false,
350 // or we're not in the last paragraph, anyway.
351 // (ii) We didn't open it and html_in_par is true,
352 // but we are in the first par, and there is a next par.
353 ParagraphList::const_iterator nextpar = par;
355 bool const close_par =
356 (open_par && (!runparams.html_in_par || nextpar != pend))
357 || (!open_par && runparams.html_in_par && par == pbegin && nextpar != pend);
360 // We do not issue the paragraph id if we are doing
361 // this for the TOC (or some similar purpose)
362 openParTag(xs, lay, par->params(),
363 runparams.for_toc ? "" : par->magicLabel());
366 docstring const deferred = par->simpleLyXHTMLOnePar(buf, xs,
367 runparams, text.outerFont(distance(begin, par)),
368 open_par, close_par);
375 if (!deferred.empty()) {
376 xs << XMLStream::ESCAPE_NONE << deferred << xml::CR();
383 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
385 OutputParams const & runparams,
387 ParagraphList::const_iterator const & pbegin,
388 ParagraphList::const_iterator const & pend)
391 // Use TextClass::htmlTOCLayout() to figure out how we should look.
392 xs << xml::StartTag("h2", "class='bibliography'")
393 << pbegin->layout().labelstring(false)
396 << xml::StartTag("div", "class='bibliography'")
398 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
399 xs << xml::EndTag("div");
404 bool isNormalEnv(Layout const & lay)
406 return lay.latextype == LATEX_ENVIRONMENT
407 || lay.latextype == LATEX_BIB_ENVIRONMENT;
411 ParagraphList::const_iterator makeEnvironment(Buffer const & buf,
413 OutputParams const & runparams,
415 ParagraphList::const_iterator const & pbegin,
416 ParagraphList::const_iterator const & pend)
418 ParagraphList::const_iterator const begin = text.paragraphs().begin();
419 ParagraphList::const_iterator par = pbegin;
420 Layout const & bstyle = par->layout();
421 depth_type const origdepth = par->params().depth();
422 string const parId = bstyle.htmlitemtag().empty() ? par->magicLabel() : "";
424 // open tag for this environment
425 if ((bstyle.labeltype == LABEL_ENUMERATE || bstyle.labeltype == LABEL_ITEMIZE)
426 && bstyle.htmlclass().empty()) {
427 // In this case, we have to calculate the CSS class ourselves, each time
429 // FIXME We assume in these cases that the standard counters are being used.
430 // (We also do not deal with 'resume' counters, though I'm not sure that can
431 // be done at all in HTML.)
433 // Code adapted from Buffer::Impl::setLabel
434 bool const isenum = bstyle.labeltype == LABEL_ENUMERATE;
435 docstring enumcounter = bstyle.counter.empty() ?
436 ( isenum ? from_ascii("enum") : from_ascii("lyxitem") ) :
438 switch (par->itemdepth) {
452 // not a valid enumdepth...
455 const string cssClass = string(isenum ? "lyxenum" : "lyxitem") + " "
456 + to_utf8(enumcounter);
457 openParTag(xs, bstyle, cssClass, parId);
460 openParTag(xs, bstyle, parId);
463 // we will on occasion need to remember a layout from before.
464 Layout const * lastlay = nullptr;
466 while (par != pend) {
467 Layout const & style = par->layout();
468 // the counter only gets stepped if we're in some kind of list,
469 // or if it's the first time through.
470 // note that enum, etc, are handled automatically.
471 // FIXME There may be a bug here about user defined enumeration
472 // types. If so, then we'll need to take the counter and add "i",
473 // "ii", etc, as with enum.
474 Counters & cnts = buf.masterBuffer()->params().documentClass().counters();
475 docstring const & cntr = style.counter;
476 if (!style.counter.empty()
477 && (par == pbegin || !isNormalEnv(style))
478 && cnts.hasCounter(cntr)
480 cnts.step(cntr, OutputUpdate);
481 ParagraphList::const_iterator send;
483 switch (style.latextype) {
484 case LATEX_ENVIRONMENT:
485 case LATEX_LIST_ENVIRONMENT:
486 case LATEX_ITEM_ENVIRONMENT: {
487 // There are two possibilities in this case.
488 // One is that we are still in the environment in which we
489 // started---which we will be if the depth is the same.
490 if (par->params().depth() == origdepth) {
491 LATTEST(bstyle == style);
492 if (lastlay != nullptr) {
493 closeItemTag(xs, *lastlay);
497 // this will be positive, if we want to skip the
498 // initial word (if it's been taken for the label).
500 bool const labelfirst = style.htmllabelfirst();
502 openItemTag(xs, style, par->params(), par->magicLabel());
505 if (style.labeltype != LABEL_NO_LABEL &&
506 style.htmllabeltag() != "NONE") {
507 if (isNormalEnv(style)) {
508 // in this case, we print the label only for the first
509 // paragraph (as in a theorem).
511 docstring const lbl =
512 pbegin->params().labelString();
514 openLabelTag(xs, style);
516 closeLabelTag(xs, style);
520 } else { // some kind of list
521 if (style.labeltype == LABEL_MANUAL) {
522 openLabelTag(xs, style);
523 sep = par->firstWordLyXHTML(xs, runparams);
524 closeLabelTag(xs, style);
528 docstring const & ls = par->params().labelString();
530 openLabelTag(xs, style);
532 closeLabelTag(xs, style);
537 } // end label output
540 openItemTag(xs, style, par->params(), par->magicLabel());
542 docstring deferred = par->simpleLyXHTMLOnePar(buf, xs, runparams,
543 text.outerFont(distance(begin, par)), true, true, sep);
544 xs << XMLStream::ESCAPE_NONE << deferred;
547 // We may not want to close the tag yet, in particular:
548 // If we're not at the end...
550 // and are doing items...
551 && !isNormalEnv(style)
552 // and if the depth has changed...
553 && par->params().depth() != origdepth) {
554 // then we'll save this layout for later, and close it when
555 // we get another item.
558 closeItemTag(xs, style);
561 // The other possibility is that the depth has increased, in which
562 // case we need to recurse.
564 send = findEndOfEnvironment(par, pend);
565 par = makeEnvironment(buf, xs, runparams, text, par, send);
569 case LATEX_PARAGRAPH:
570 send = findLastParagraph(par, pend);
571 par = makeParagraphs(buf, xs, runparams, text, par, send);
574 case LATEX_BIB_ENVIRONMENT:
577 par = makeParagraphs(buf, xs, runparams, text, par, send);
586 if (lastlay != nullptr)
587 closeItemTag(xs, *lastlay);
588 closeTag(xs, bstyle);
594 void makeCommand(Buffer const & buf,
596 OutputParams const & runparams,
598 ParagraphList::const_iterator const & pbegin)
600 Layout const & style = pbegin->layout();
601 if (!style.counter.empty())
602 buf.masterBuffer()->params().
603 documentClass().counters().step(style.counter, OutputUpdate);
605 bool const make_parid = !runparams.for_toc && runparams.html_make_pars && style.itemtag().empty();
606 openParTag(xs, style, pbegin->params(),
607 make_parid ? pbegin->magicLabel() : "");
609 // Label around sectioning number:
610 // FIXME Probably need to account for LABEL_MANUAL
611 // FIXME Probably also need now to account for labels ABOVE and CENTERED.
612 if (style.labeltype != LABEL_NO_LABEL) {
613 openLabelTag(xs, style);
614 xs << pbegin->params().labelString();
615 closeLabelTag(xs, style);
616 // Otherwise the label might run together with the text
617 xs << from_ascii(" ");
620 ParagraphList::const_iterator const begin = text.paragraphs().begin();
621 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
622 text.outerFont(distance(begin, pbegin)));
627 } // end anonymous namespace
630 void xhtmlParagraphs(Text const & text,
633 OutputParams const & runparams)
635 ParagraphList const & paragraphs = text.paragraphs();
636 if (runparams.par_begin == runparams.par_end) {
637 runparams.par_begin = 0;
638 runparams.par_end = paragraphs.size();
640 pit_type bpit = runparams.par_begin;
641 pit_type const epit = runparams.par_end;
643 { xs << XMLStream::ESCAPE_NONE << "<!-- XHTML output error! -->\n"; return; });
645 OutputParams ourparams = runparams;
646 ParagraphList::const_iterator const pend =
647 (epit == (int) paragraphs.size()) ?
648 paragraphs.end() : paragraphs.iterator_at(epit);
649 std::stack<int> headerLevels;
651 while (bpit < epit) {
652 ParagraphList::const_iterator par = paragraphs.iterator_at(bpit);
653 if (par->params().startOfAppendix()) {
654 // We want to reset the counter corresponding to toplevel sectioning
656 buf.masterBuffer()->params().documentClass().getTOCLayout();
657 docstring const cnt = lay.counter;
660 buf.masterBuffer()->params().documentClass().counters();
664 Layout const & style = par->layout();
665 ParagraphList::const_iterator const lastpar = par;
666 ParagraphList::const_iterator send;
668 // Think about adding <section> and/or </section>s.
669 // Document title is not in Sectioning, but rather in FrontMatter, so that it does not need to be taken
671 if (style.category() == from_utf8("Sectioning")) {
672 int level = style.toclevel;
674 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a
675 // <h2> after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
676 // - current: h2; back: h1; do not close any <section>
677 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
678 while (!headerLevels.empty() && level <= headerLevels.top()) {
679 // Output the tag only if it corresponds to a legit section.
680 int stackLevel = headerLevels.top();
681 if (stackLevel != Layout::NOT_IN_TOC) {
682 xs << xml::EndTag("section");
688 // Open the new section: first push it onto the stack, then output it in XHTML.
689 headerLevels.push(level);
690 // Some sectioning-like elements should not be output (such as FrontMatter).
691 if (level != Layout::NOT_IN_TOC ) {
692 xs << xml::StartTag("section");
697 switch (style.latextype) {
698 case LATEX_COMMAND: {
699 // The files with which we are working never have more than
700 // one paragraph in a command structure.
702 // if (ourparams.html_in_par)
703 // fix it so we don't get sections inside standard, e.g.
704 // note that we may then need to make runparams not const, so we
705 // can communicate that back.
706 // FIXME Maybe this fix should be in the routines themselves, in case
707 // they are called from elsewhere.
708 makeCommand(buf, xs, ourparams, text, par);
712 case LATEX_ENVIRONMENT:
713 case LATEX_LIST_ENVIRONMENT:
714 case LATEX_ITEM_ENVIRONMENT: {
715 // FIXME Same fix here.
716 send = findEndOfEnvironment(par, pend);
717 par = makeEnvironment(buf, xs, ourparams, text, par, send);
720 case LATEX_BIB_ENVIRONMENT: {
721 // FIXME Same fix here.
722 send = findEndOfEnvironment(par, pend);
723 par = makeBibliography(buf, xs, ourparams, text, par, send);
726 case LATEX_PARAGRAPH:
727 send = findLastParagraph(par, pend);
728 par = makeParagraphs(buf, xs, ourparams, text, par, send);
731 bpit += distance(lastpar, par);
734 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
736 while (!headerLevels.empty() && headerLevels.top() != Layout::NOT_IN_TOC) {
738 xs << xml::EndTag("section");
744 string alignmentToCSS(LyXAlignment align)
747 case LYX_ALIGN_BLOCK:
748 // we are NOT going to use text-align: justify!!
751 case LYX_ALIGN_RIGHT:
753 case LYX_ALIGN_CENTER: