2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
23 #include "OutputParams.h"
24 #include "Paragraph.h"
25 #include "ParagraphList.h"
26 #include "ParagraphParameters.h"
29 #include "TextClass.h"
31 #include "support/convert.h"
32 #include "support/debug.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
40 // Uncomment to activate debugging code.
41 // #define XHTML_DEBUG
44 using namespace lyx::support;
49 docstring fontToHtmlTag(xml::FontTypes type)
52 case xml::FontTypes::FT_EMPH:
53 return from_utf8("em");
54 case xml::FontTypes::FT_BOLD:
55 return from_utf8("b");
56 case xml::FontTypes::FT_NOUN:
57 return from_utf8("dfn");
58 case xml::FontTypes::FT_UBAR:
59 case xml::FontTypes::FT_WAVE:
60 case xml::FontTypes::FT_DBAR:
61 return from_utf8("u");
62 case xml::FontTypes::FT_SOUT:
63 case xml::FontTypes::FT_XOUT:
64 return from_utf8("del");
65 case xml::FontTypes::FT_ITALIC:
66 return from_utf8("i");
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
72 case xml::FontTypes::FT_TYPE:
73 case xml::FontTypes::FT_SIZE_TINY:
74 case xml::FontTypes::FT_SIZE_SCRIPT:
75 case xml::FontTypes::FT_SIZE_FOOTNOTE:
76 case xml::FontTypes::FT_SIZE_SMALL:
77 case xml::FontTypes::FT_SIZE_NORMAL:
78 case xml::FontTypes::FT_SIZE_LARGE:
79 case xml::FontTypes::FT_SIZE_LARGER:
80 case xml::FontTypes::FT_SIZE_LARGEST:
81 case xml::FontTypes::FT_SIZE_HUGE:
82 case xml::FontTypes::FT_SIZE_HUGER:
83 case xml::FontTypes::FT_SIZE_INCREASE:
84 case xml::FontTypes::FT_SIZE_DECREASE:
85 return from_utf8("span");
92 docstring fontToHtmlAttribute(xml::FontTypes type)
95 case xml::FontTypes::FT_EMPH:
96 case xml::FontTypes::FT_BOLD:
97 return from_ascii("");
98 case xml::FontTypes::FT_NOUN:
99 return from_ascii("class='lyxnoun'");
100 case xml::FontTypes::FT_UBAR:
101 return from_ascii("");
102 case xml::FontTypes::FT_DBAR:
103 return from_ascii("class='dline'");
104 case xml::FontTypes::FT_XOUT:
105 case xml::FontTypes::FT_SOUT:
106 return from_ascii("class='strikeout'");
107 case xml::FontTypes::FT_WAVE:
108 return from_ascii("class='wline'");
109 case xml::FontTypes::FT_ITALIC:
110 return from_ascii("");
111 case xml::FontTypes::FT_UPRIGHT:
112 return from_ascii("style='font-style:normal;'");
113 case xml::FontTypes::FT_SLANTED:
114 return from_ascii("style='font-style:oblique;'");
115 case xml::FontTypes::FT_SMALLCAPS:
116 return from_ascii("style='font-variant:small-caps;'");
117 case xml::FontTypes::FT_ROMAN:
118 return from_ascii("style='font-family:serif;'");
119 case xml::FontTypes::FT_SANS:
120 return from_ascii("style='font-family:sans-serif;'");
121 case xml::FontTypes::FT_TYPE:
122 return from_ascii("style='font-family:monospace;'");
123 case xml::FontTypes::FT_SIZE_TINY:
124 case xml::FontTypes::FT_SIZE_SCRIPT:
125 case xml::FontTypes::FT_SIZE_FOOTNOTE:
126 return from_ascii("style='font-size:x-small;'");
127 case xml::FontTypes::FT_SIZE_SMALL:
128 return from_ascii("style='font-size:small;'");
129 case xml::FontTypes::FT_SIZE_NORMAL:
130 return from_ascii("style='font-size:normal;'");
131 case xml::FontTypes::FT_SIZE_LARGE:
132 return from_ascii("style='font-size:large;'");
133 case xml::FontTypes::FT_SIZE_LARGER:
134 case xml::FontTypes::FT_SIZE_LARGEST:
135 return from_ascii("style='font-size:x-large;'");
136 case xml::FontTypes::FT_SIZE_HUGE:
137 case xml::FontTypes::FT_SIZE_HUGER:
138 return from_ascii("style='font-size:xx-large;'");
139 case xml::FontTypes::FT_SIZE_INCREASE:
140 return from_ascii("style='font-size:larger;'");
141 case xml::FontTypes::FT_SIZE_DECREASE:
142 return from_ascii("style='font-size:smaller;'");
145 return from_ascii("");
149 xml::FontTag xhtmlStartFontTag(xml::FontTypes type)
151 return xml::FontTag(fontToHtmlTag(type), fontToHtmlAttribute(type), type);
155 xml::EndFontTag xhtmlEndFontTag(xml::FontTypes type)
157 return xml::EndFontTag(fontToHtmlTag(type), type);
162 // convenience functions
164 inline void openParTag(XMLStream & xs, Layout const & lay,
165 const std::string & parlabel)
167 string attrs = lay.htmlattr();
168 if (!parlabel.empty())
169 attrs += " id='" + parlabel + "'";
170 xs << xml::ParTag(lay.htmltag(), attrs);
174 void openParTag(XMLStream & xs, Layout const & lay,
175 ParagraphParameters const & params,
176 const std::string & parlabel)
178 // FIXME Are there other things we should handle here?
179 string const align = alignmentToCSS(params.align());
181 openParTag(xs, lay, parlabel);
184 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
185 if (!parlabel.empty())
186 attrs += " id='" + parlabel + "'";
187 xs << xml::ParTag(lay.htmltag(), attrs);
191 inline void closeTag(XMLStream & xs, Layout const & lay)
193 xs << xml::EndTag(lay.htmltag());
197 inline void openLabelTag(XMLStream & xs, Layout const & lay)
199 xs << xml::StartTag(lay.htmllabeltag(), lay.htmllabelattr());
203 inline void closeLabelTag(XMLStream & xs, Layout const & lay)
205 xs << xml::EndTag(lay.htmllabeltag());
209 inline void openItemTag(XMLStream & xs, Layout const & lay)
211 xs << xml::StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
215 void openItemTag(XMLStream & xs, Layout const & lay,
216 ParagraphParameters const & params)
218 // FIXME Are there other things we should handle here?
219 string const align = alignmentToCSS(params.align());
221 openItemTag(xs, lay);
224 string attrs = lay.htmlattr() + " style='text-align: " + align + ";'";
225 xs << xml::StartTag(lay.htmlitemtag(), attrs);
229 inline void closeItemTag(XMLStream & xs, Layout const & lay)
231 xs << xml::EndTag(lay.htmlitemtag());
234 // end of convenience functions
236 ParagraphList::const_iterator findLastParagraph(
237 ParagraphList::const_iterator p,
238 ParagraphList::const_iterator const & pend)
240 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
247 ParagraphList::const_iterator findEndOfEnvironment(
248 ParagraphList::const_iterator const & pstart,
249 ParagraphList::const_iterator const & pend)
251 ParagraphList::const_iterator p = pstart;
252 Layout const & bstyle = p->layout();
253 size_t const depth = p->params().depth();
254 for (++p; p != pend; ++p) {
255 Layout const & style = p->layout();
256 // It shouldn't happen that e.g. a section command occurs inside
257 // a quotation environment, at a higher depth, but as of 6/2009,
258 // it can happen. We pretend that it's just at lowest depth.
259 if (style.latextype == LATEX_COMMAND)
262 // If depth is down, we're done
263 if (p->params().depth() < depth)
266 // If depth is up, we're not done
267 if (p->params().depth() > depth)
270 // FIXME I am not sure about the first check.
271 // Surely we *could* have different layouts that count as
272 // LATEX_PARAGRAPH, right?
273 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
280 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
282 OutputParams const & runparams,
284 ParagraphList::const_iterator const & pbegin,
285 ParagraphList::const_iterator const & pend)
287 ParagraphList::const_iterator const begin = text.paragraphs().begin();
288 ParagraphList::const_iterator par = pbegin;
289 for (; par != pend; ++par) {
290 Layout const & lay = par->layout();
291 if (!lay.counter.empty())
292 buf.masterBuffer()->params().
293 documentClass().counters().step(lay.counter, OutputUpdate);
295 // FIXME We should see if there's a label to be output and
296 // do something with it.
300 // We want to open the paragraph tag if:
301 // (i) the current layout permits multiple paragraphs
302 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
303 // we are, but this is not the first paragraph
305 // But there is also a special case, and we first see whether we are in it.
306 // We do not want to open the paragraph tag if this paragraph contains
307 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
308 // as a branch). On the other hand, if that single item has a font change
309 // applied to it, then we still do need to open the paragraph.
311 // Obviously, this is very fragile. The main reason we need to do this is
312 // because of branches, e.g., a branch that contains an entire new section.
313 // We do not really want to wrap that whole thing in a <div>...</div>.
314 bool special_case = false;
315 Inset const * specinset = par->size() == 1 ? par->getInset(0) : nullptr;
316 if (specinset && !specinset->getLayout().htmlisblock()) {
317 Layout const & style = par->layout();
318 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
319 style.labelfont : style.font;
320 FontInfo const our_font =
321 par->getFont(buf.masterBuffer()->params(), 0,
322 text.outerFont(distance(begin, par))).fontInfo();
323 if (first_font == our_font)
327 bool const open_par = runparams.html_make_pars
328 && (!runparams.html_in_par || par != pbegin)
331 // We want to issue the closing tag if either:
332 // (i) We opened it, and either html_in_par is false,
333 // or we're not in the last paragraph, anyway.
334 // (ii) We didn't open it and html_in_par is true,
335 // but we are in the first par, and there is a next par.
336 ParagraphList::const_iterator nextpar = par;
338 bool const close_par =
339 (open_par && (!runparams.html_in_par || nextpar != pend))
340 || (!open_par && runparams.html_in_par && par == pbegin && nextpar != pend);
343 // We do not issue the paragraph id if we are doing
344 // this for the TOC (or some similar purpose)
345 openParTag(xs, lay, par->params(),
346 runparams.for_toc ? "" : par->magicLabel());
349 docstring const deferred = par->simpleLyXHTMLOnePar(buf, xs,
350 runparams, text.outerFont(distance(begin, par)),
351 open_par, close_par);
358 if (!deferred.empty()) {
359 xs << XMLStream::ESCAPE_NONE << deferred << xml::CR();
366 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
368 OutputParams const & runparams,
370 ParagraphList::const_iterator const & pbegin,
371 ParagraphList::const_iterator const & pend)
374 // Use TextClass::htmlTOCLayout() to figure out how we should look.
375 xs << xml::StartTag("h2", "class='bibliography'")
376 << pbegin->layout().labelstring(false)
379 << xml::StartTag("div", "class='bibliography'")
381 makeParagraphs(buf, xs, runparams, text, pbegin, pend);
382 xs << xml::EndTag("div");
387 bool isNormalEnv(Layout const & lay)
389 return lay.latextype == LATEX_ENVIRONMENT
390 || lay.latextype == LATEX_BIB_ENVIRONMENT;
394 ParagraphList::const_iterator makeEnvironment(Buffer const & buf,
396 OutputParams const & runparams,
398 ParagraphList::const_iterator const & pbegin,
399 ParagraphList::const_iterator const & pend)
401 ParagraphList::const_iterator const begin = text.paragraphs().begin();
402 ParagraphList::const_iterator par = pbegin;
403 Layout const & bstyle = par->layout();
404 depth_type const origdepth = pbegin->params().depth();
406 // open tag for this environment
407 openParTag(xs, bstyle, pbegin->magicLabel());
410 // we will on occasion need to remember a layout from before.
411 Layout const * lastlay = nullptr;
413 while (par != pend) {
414 Layout const & style = par->layout();
415 // the counter only gets stepped if we're in some kind of list,
416 // or if it's the first time through.
417 // note that enum, etc, are handled automatically.
418 // FIXME There may be a bug here about user defined enumeration
419 // types. If so, then we'll need to take the counter and add "i",
420 // "ii", etc, as with enum.
421 Counters & cnts = buf.masterBuffer()->params().documentClass().counters();
422 docstring const & cntr = style.counter;
423 if (!style.counter.empty()
424 && (par == pbegin || !isNormalEnv(style))
425 && cnts.hasCounter(cntr)
427 cnts.step(cntr, OutputUpdate);
428 ParagraphList::const_iterator send;
430 switch (style.latextype) {
431 case LATEX_ENVIRONMENT:
432 case LATEX_LIST_ENVIRONMENT:
433 case LATEX_ITEM_ENVIRONMENT: {
434 // There are two possibilities in this case.
435 // One is that we are still in the environment in which we
436 // started---which we will be if the depth is the same.
437 if (par->params().depth() == origdepth) {
438 LATTEST(bstyle == style);
439 if (lastlay != nullptr) {
440 closeItemTag(xs, *lastlay);
444 // this will be positive, if we want to skip the
445 // initial word (if it's been taken for the label).
447 bool const labelfirst = style.htmllabelfirst();
449 openItemTag(xs, style, par->params());
452 if (style.labeltype != LABEL_NO_LABEL &&
453 style.htmllabeltag() != "NONE") {
454 if (isNormalEnv(style)) {
455 // in this case, we print the label only for the first
456 // paragraph (as in a theorem).
458 docstring const lbl =
459 pbegin->params().labelString();
461 openLabelTag(xs, style);
463 closeLabelTag(xs, style);
467 } else { // some kind of list
468 if (style.labeltype == LABEL_MANUAL) {
469 openLabelTag(xs, style);
470 sep = par->firstWordLyXHTML(xs, runparams);
471 closeLabelTag(xs, style);
475 openLabelTag(xs, style);
476 xs << par->params().labelString();
477 closeLabelTag(xs, style);
481 } // end label output
484 openItemTag(xs, style, par->params());
486 docstring deferred = par->simpleLyXHTMLOnePar(buf, xs, runparams,
487 text.outerFont(distance(begin, par)), true, true, sep);
488 xs << XMLStream::ESCAPE_NONE << deferred;
491 // We may not want to close the tag yet, in particular:
492 // If we're not at the end...
494 // and are doing items...
495 && !isNormalEnv(style)
496 // and if the depth has changed...
497 && par->params().depth() != origdepth) {
498 // then we'll save this layout for later, and close it when
499 // we get another item.
502 closeItemTag(xs, style);
505 // The other possibility is that the depth has increased, in which
506 // case we need to recurse.
508 send = findEndOfEnvironment(par, pend);
509 par = makeEnvironment(buf, xs, runparams, text, par, send);
513 case LATEX_PARAGRAPH:
514 send = findLastParagraph(par, pend);
515 par = makeParagraphs(buf, xs, runparams, text, par, send);
518 case LATEX_BIB_ENVIRONMENT:
521 par = makeParagraphs(buf, xs, runparams, text, par, send);
530 if (lastlay != nullptr)
531 closeItemTag(xs, *lastlay);
532 closeTag(xs, bstyle);
538 void makeCommand(Buffer const & buf,
540 OutputParams const & runparams,
542 ParagraphList::const_iterator const & pbegin)
544 Layout const & style = pbegin->layout();
545 if (!style.counter.empty())
546 buf.masterBuffer()->params().
547 documentClass().counters().step(style.counter, OutputUpdate);
549 bool const make_parid = !runparams.for_toc && runparams.html_make_pars;
551 openParTag(xs, style, pbegin->params(),
552 make_parid ? pbegin->magicLabel() : "");
554 // Label around sectioning number:
555 // FIXME Probably need to account for LABEL_MANUAL
556 // FIXME Probably also need now to account for labels ABOVE and CENTERED.
557 if (style.labeltype != LABEL_NO_LABEL) {
558 openLabelTag(xs, style);
559 xs << pbegin->params().labelString();
560 closeLabelTag(xs, style);
561 // Otherwise the label might run together with the text
562 xs << from_ascii(" ");
565 ParagraphList::const_iterator const begin = text.paragraphs().begin();
566 pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
567 text.outerFont(distance(begin, pbegin)));
572 } // end anonymous namespace
575 void xhtmlParagraphs(Text const & text,
578 OutputParams const & runparams)
580 ParagraphList const & paragraphs = text.paragraphs();
581 if (runparams.par_begin == runparams.par_end) {
582 runparams.par_begin = 0;
583 runparams.par_end = paragraphs.size();
585 pit_type bpit = runparams.par_begin;
586 pit_type const epit = runparams.par_end;
588 { xs << XMLStream::ESCAPE_NONE << "<!-- XHTML output error! -->\n"; return; });
590 OutputParams ourparams = runparams;
591 ParagraphList::const_iterator const pend =
592 (epit == (int) paragraphs.size()) ?
593 paragraphs.end() : paragraphs.iterator_at(epit);
594 std::stack<int> headerLevels;
596 while (bpit < epit) {
597 ParagraphList::const_iterator par = paragraphs.iterator_at(bpit);
598 if (par->params().startOfAppendix()) {
599 // We want to reset the counter corresponding to toplevel sectioning
601 buf.masterBuffer()->params().documentClass().getTOCLayout();
602 docstring const cnt = lay.counter;
605 buf.masterBuffer()->params().documentClass().counters();
609 Layout const & style = par->layout();
610 ParagraphList::const_iterator const lastpar = par;
611 ParagraphList::const_iterator send;
613 // Think about adding <section> and/or </section>s.
614 // Document title is not in Sectioning, but rather in FrontMatter, so that it does not need to be taken
616 if (style.category() == from_utf8("Sectioning")) {
617 int level = style.toclevel;
619 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a
620 // <h2> after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
621 // - current: h2; back: h1; do not close any <section>
622 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
623 while (!headerLevels.empty() && level <= headerLevels.top()) {
624 // Output the tag only if it corresponds to a legit section.
625 int stackLevel = headerLevels.top();
626 if (stackLevel != Layout::NOT_IN_TOC) {
627 xs << xml::EndTag("section");
633 // Open the new section: first push it onto the stack, then output it in XHTML.
634 headerLevels.push(level);
635 // Some sectioning-like elements should not be output (such as FrontMatter).
636 if (level != Layout::NOT_IN_TOC ) {
637 xs << xml::StartTag("section");
642 switch (style.latextype) {
643 case LATEX_COMMAND: {
644 // The files with which we are working never have more than
645 // one paragraph in a command structure.
647 // if (ourparams.html_in_par)
648 // fix it so we don't get sections inside standard, e.g.
649 // note that we may then need to make runparams not const, so we
650 // can communicate that back.
651 // FIXME Maybe this fix should be in the routines themselves, in case
652 // they are called from elsewhere.
653 makeCommand(buf, xs, ourparams, text, par);
657 case LATEX_ENVIRONMENT:
658 case LATEX_LIST_ENVIRONMENT:
659 case LATEX_ITEM_ENVIRONMENT: {
660 // FIXME Same fix here.
661 send = findEndOfEnvironment(par, pend);
662 par = makeEnvironment(buf, xs, ourparams, text, par, send);
665 case LATEX_BIB_ENVIRONMENT: {
666 // FIXME Same fix here.
667 send = findEndOfEnvironment(par, pend);
668 par = makeBibliography(buf, xs, ourparams, text, par, send);
671 case LATEX_PARAGRAPH:
672 send = findLastParagraph(par, pend);
673 par = makeParagraphs(buf, xs, ourparams, text, par, send);
676 bpit += distance(lastpar, par);
679 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
681 while (!headerLevels.empty() && headerLevels.top() != Layout::NOT_IN_TOC) {
683 xs << xml::EndTag("section");
689 string alignmentToCSS(LyXAlignment align)
692 case LYX_ALIGN_BLOCK:
693 // we are NOT going to use text-align: justify!!
696 case LYX_ALIGN_RIGHT:
698 case LYX_ALIGN_CENTER: