2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
19 #include "Paragraph.h"
20 #include "ParagraphList.h"
21 #include "ParagraphParameters.h"
24 #include "TextClass.h"
26 #include "insets/InsetBibtex.h"
27 #include "insets/InsetBibitem.h"
28 #include "insets/InsetLabel.h"
29 #include "insets/InsetNote.h"
31 #include "support/lassert.h"
39 using namespace lyx::support;
45 std::string fontToDocBookTag(xml::FontTypes type)
48 case xml::FontTypes::FT_EMPH:
49 case xml::FontTypes::FT_BOLD:
51 case xml::FontTypes::FT_NOUN:
53 case xml::FontTypes::FT_UBAR:
54 case xml::FontTypes::FT_WAVE:
55 case xml::FontTypes::FT_DBAR:
56 case xml::FontTypes::FT_SOUT:
57 case xml::FontTypes::FT_XOUT:
58 case xml::FontTypes::FT_ITALIC:
59 case xml::FontTypes::FT_UPRIGHT:
60 case xml::FontTypes::FT_SLANTED:
61 case xml::FontTypes::FT_SMALLCAPS:
62 case xml::FontTypes::FT_ROMAN:
63 case xml::FontTypes::FT_SANS:
65 case xml::FontTypes::FT_TYPE:
67 case xml::FontTypes::FT_SIZE_TINY:
68 case xml::FontTypes::FT_SIZE_SCRIPT:
69 case xml::FontTypes::FT_SIZE_FOOTNOTE:
70 case xml::FontTypes::FT_SIZE_SMALL:
71 case xml::FontTypes::FT_SIZE_NORMAL:
72 case xml::FontTypes::FT_SIZE_LARGE:
73 case xml::FontTypes::FT_SIZE_LARGER:
74 case xml::FontTypes::FT_SIZE_LARGEST:
75 case xml::FontTypes::FT_SIZE_HUGE:
76 case xml::FontTypes::FT_SIZE_HUGER:
77 case xml::FontTypes::FT_SIZE_INCREASE:
78 case xml::FontTypes::FT_SIZE_DECREASE:
86 string fontToRole(xml::FontTypes type)
88 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
89 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
90 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
91 // Hence, it is not a problem to have many roles by default here.
92 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
94 case xml::FontTypes::FT_ITALIC:
95 case xml::FontTypes::FT_EMPH:
97 case xml::FontTypes::FT_BOLD:
99 case xml::FontTypes::FT_NOUN: // Outputs a <person>
100 case xml::FontTypes::FT_TYPE: // Outputs a <code>
102 case xml::FontTypes::FT_UBAR:
105 // All other roles are non-standard for DocBook.
107 case xml::FontTypes::FT_WAVE:
109 case xml::FontTypes::FT_DBAR:
111 case xml::FontTypes::FT_SOUT:
113 case xml::FontTypes::FT_XOUT:
115 case xml::FontTypes::FT_UPRIGHT:
117 case xml::FontTypes::FT_SLANTED:
119 case xml::FontTypes::FT_SMALLCAPS:
121 case xml::FontTypes::FT_ROMAN:
123 case xml::FontTypes::FT_SANS:
125 case xml::FontTypes::FT_SIZE_TINY:
127 case xml::FontTypes::FT_SIZE_SCRIPT:
128 return "size_script";
129 case xml::FontTypes::FT_SIZE_FOOTNOTE:
130 return "size_footnote";
131 case xml::FontTypes::FT_SIZE_SMALL:
133 case xml::FontTypes::FT_SIZE_NORMAL:
134 return "size_normal";
135 case xml::FontTypes::FT_SIZE_LARGE:
137 case xml::FontTypes::FT_SIZE_LARGER:
138 return "size_larger";
139 case xml::FontTypes::FT_SIZE_LARGEST:
140 return "size_largest";
141 case xml::FontTypes::FT_SIZE_HUGE:
143 case xml::FontTypes::FT_SIZE_HUGER:
145 case xml::FontTypes::FT_SIZE_INCREASE:
146 return "size_increase";
147 case xml::FontTypes::FT_SIZE_DECREASE:
148 return "size_decrease";
155 string fontToAttribute(xml::FontTypes type) {
156 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
158 string role = fontToRole(type);
160 return "role='" + role + "'";
167 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
171 // Contents of the block.
176 // <paratag>Contents of the paragraph.</paratag>
179 // Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
181 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
183 xs << xml::StartTag(tag, attr);
187 void closeInlineTag(XMLStream & xs, const std::string & tag)
189 xs << xml::EndTag(tag);
193 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
195 if (!xs.isLastTagCR())
197 xs << xml::StartTag(tag, attr);
201 void closeParTag(XMLStream & xs, const std::string & tag)
203 xs << xml::EndTag(tag);
208 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
210 if (!xs.isLastTagCR())
212 xs << xml::StartTag(tag, attr);
217 void closeBlockTag(XMLStream & xs, const std::string & tag)
219 if (!xs.isLastTagCR())
221 xs << xml::EndTag(tag);
226 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
228 if (tag.empty() || tag == "NONE")
231 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
232 openParTag(xs, tag, attr);
233 else if (tagtype == "block")
234 openBlockTag(xs, tag, attr);
235 else if (tagtype == "inline")
236 openInlineTag(xs, tag, attr);
238 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
242 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
244 if (tag.empty() || tag == "NONE")
247 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
248 closeParTag(xs, tag);
249 else if (tagtype == "block")
250 closeBlockTag(xs, tag);
251 else if (tagtype == "inline")
252 closeInlineTag(xs, tag);
254 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
258 // Higher-level convenience functions.
260 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
262 Layout const & lay = par->layout();
267 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
268 // (usually, they won't have the same layout) and the CURRENT one allows merging.
269 // The main use case is author information in several paragraphs: if the name of the author is the
270 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
271 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
272 // layout, same wrapper tag).
273 bool openWrapper = lay.docbookwrappertag() != "NONE";
274 if (prevpar != nullptr) {
275 Layout const & prevlay = prevpar->layout();
276 if (prevlay.docbookwrappertag() != "NONE") {
277 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
278 && !lay.docbookwrappermergewithprevious();
284 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
286 const string & tag = lay.docbooktag();
288 auto xmltag = xml::ParTag(tag, lay.docbookattr());
289 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
290 // TODO: required or not?
291 // TODO: avoid creating a ParTag object just for this query...
292 openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
295 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
296 openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
300 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
305 // See comment in openParTag.
306 Layout const & lay = par->layout();
307 bool closeWrapper = lay.docbookwrappertag() != "NONE";
308 if (nextpar != nullptr) {
309 Layout const & nextlay = nextpar->layout();
310 if (nextlay.docbookwrappertag() != "NONE") {
311 if (nextpar->getDepth() == par->getDepth()) {
312 // Same depth: the basic condition applies.
313 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
314 && !nextlay.docbookwrappermergewithprevious();
315 } else if (nextpar->getDepth() > par->getDepth()) {
316 // The next paragraph is deeper: no need to close the wrapper, only to open it (cf. openParTag).
319 // This paragraph is deeper than the next one: close the wrapper,
320 // disregarding docbookwrappermergewithprevious.
321 // Hypothesis: nextlay.docbookwrappertag() == lay.docbookwrappertag(). TODO: THIS IS WRONG! Loop back until a layout with the right depth is found?
322 closeWrapper = 1L + (long long) par->getDepth() - (long long) nextpar->getDepth(); // > 0, as nextpar->getDepth() < par->getDepth()
325 if (nextpar->getDepth() == par->getDepth()) {
326 // This is not wrapped: this must be the rest of the item, still within the wrapper.
328 } else if (nextpar->getDepth() > par->getDepth()) {
329 // The next paragraph is deeper: no need to close the wrapper, only to open it (cf. openParTag).
332 // This paragraph is deeper than the next one: close the wrapper,
333 // disregarding docbookwrappermergewithprevious.
334 // Hypothesis: nextlay.docbookwrappertag() == lay.docbookwrappertag(). TODO: THIS IS WRONG! Loop back until a layout with the right depth is found?
335 closeWrapper = 1L + (long long) par->getDepth() - (long long) nextpar->getDepth(); // > 0, as nextpar->getDepth() < par->getDepth()
341 closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
342 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
343 closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
345 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
349 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
351 openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
355 void closeLabelTag(XMLStream & xs, Layout const & lay)
357 closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
361 void openItemTag(XMLStream & xs, Layout const & lay)
363 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
367 void closeItemTag(XMLStream & xs, Layout const & lay)
369 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
373 ParagraphList::const_iterator makeAny(Text const &,
376 OutputParams const &,
377 ParagraphList::const_iterator);
380 void makeBibliography(
384 OutputParams const & runparams,
385 ParagraphList::const_iterator const & par)
387 // If this is the first paragraph in a bibliography, open the bibliography tag.
388 auto pbegin_before = text.paragraphs().getParagraphBefore(par);
389 if (pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
390 xs << xml::StartTag("bibliography");
394 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
395 // Don't forget the citation ID!
397 for (auto i = 0; i < par->size(); ++i) {
398 Inset const *ip = par->getInset(i);
401 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
402 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
406 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
408 // Generate the entry. Concatenate the different parts of the paragraph if any.
409 auto const begin = text.paragraphs().begin();
410 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(begin, par)), 0);
411 for (auto & parXML : pars)
412 xs << XMLStream::ESCAPE_NONE << parXML;
414 // End the precooked bibliography entry.
415 xs << xml::EndTag("bibliomixed");
418 // If this is the last paragraph in a bibliography, close the bibliography tag.
419 auto const end = text.paragraphs().end();
420 bool endBibliography = par == end;
421 if (!endBibliography) {
424 endBibliography = par->layout().latextype != LATEX_BIB_ENVIRONMENT;
427 if (endBibliography) {
428 xs << xml::EndTag("bibliography");
438 OutputParams const & runparams,
439 ParagraphList::const_iterator const & par)
441 auto const begin = text.paragraphs().begin();
442 auto const end = text.paragraphs().end();
443 auto prevpar = text.paragraphs().getParagraphBefore(par);
445 // We want to open the paragraph tag if:
446 // (i) the current layout permits multiple paragraphs
447 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
448 // we are, but this is not the first paragraph
450 // But there is also a special case, and we first see whether we are in it.
451 // We do not want to open the paragraph tag if this paragraph contains
452 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
453 // as a branch). On the other hand, if that single item has a font change
454 // applied to it, then we still do need to open the paragraph.
456 // Obviously, this is very fragile. The main reason we need to do this is
457 // because of branches, e.g., a branch that contains an entire new section.
458 // We do not really want to wrap that whole thing in a <div>...</div>.
459 bool special_case = false;
460 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
461 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
462 Layout const &style = par->layout();
463 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
464 style.labelfont : style.font;
465 FontInfo const our_font =
466 par->getFont(buf.masterBuffer()->params(), 0,
467 text.outerFont(std::distance(begin, par))).fontInfo();
469 if (first_font == our_font)
473 // Plain layouts must be ignored.
474 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
476 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
477 if (!special_case && par->size() == 1 && par->getInset(0)) {
478 Inset const * firstInset = par->getInset(0);
480 // Floats cannot be in paragraphs.
481 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
483 // Bibliographies cannot be in paragraphs.
484 if (!special_case && firstInset->asInsetCommand())
485 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
487 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
488 if (!special_case && firstInset->asInsetMath())
491 // ERTs are in comments, not paragraphs.
492 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
495 // Listings should not get into their own paragraph.
496 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
500 bool const open_par = runparams.docbook_make_pars
501 && !runparams.docbook_in_par
504 // We want to issue the closing tag if either:
505 // (i) We opened it, and either docbook_in_par is false,
506 // or we're not in the last paragraph, anyway.
507 // (ii) We didn't open it and docbook_in_par is true,
508 // but we are in the first par, and there is a next par.
511 bool const close_par = open_par && (!runparams.docbook_in_par);
513 // Determine if this paragraph has some real content. Things like new pages are not caught
514 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
515 // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
516 // std::all_of allows doing this check without having to copy the string.
517 // Open and close tags around each contained paragraph.
518 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(distance(begin, par)), 0);
519 for (auto & parXML : pars) {
520 if (!std::all_of(parXML.begin(), parXML.end(), ::isspace)) {
522 openParTag(xs, &*par, prevpar);
524 xs << XMLStream::ESCAPE_NONE << parXML;
527 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
533 void makeEnvironment(Text const &text,
536 OutputParams const &runparams,
537 ParagraphList::const_iterator const & par)
539 // TODO: simplify me!
540 auto const end = text.paragraphs().end();
542 // Output the opening tag for this environment, but only if it has not been previously opened (condition
543 // implemented in openParTag).
544 auto prevpar = text.paragraphs().getParagraphBefore(par);
545 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
547 // Generate the contents of this environment. There is a special case if this is like some environment.
548 Layout const & style = par->layout();
549 if (style.latextype == LATEX_COMMAND) {
550 // Nothing to do (otherwise, infinite loops).
551 } else if (style.latextype == LATEX_ENVIRONMENT) {
552 // Open a wrapper tag if needed.
553 if (style.docbookitemwrappertag() != "NONE")
554 openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(), style.docbookitemwrappertagtype());
556 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
557 // character after the label.
559 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
560 // At least one condition must be met:
561 // - this environment is not a list
562 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
563 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
564 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
565 docstring const lbl = par->params().labelString();
570 openLabelTag(xs, style);
572 closeLabelTag(xs, style);
575 // Only variablelist gets here (or similar items defined as an extension in the layout).
576 openLabelTag(xs, style);
577 sep = par->firstWordDocBook(xs, runparams);
578 closeLabelTag(xs, style);
582 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
583 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
584 // Common case: there is only the first word on the line, but there is a nested list instead
586 bool emptyItem = false;
587 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
590 if (next_par == text.paragraphs().end()) // There is no next paragraph.
592 else // There is a next paragraph: check depth.
593 emptyItem = par->params().depth() >= next_par->params().depth();
597 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
598 // generation of a full <para>.
599 // TODO: this always worked only by magic...
602 // Generate the rest of the paragraph, if need be. Open as many inner tags as necessary.
603 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
604 auto p = pars.begin();
606 xs << XMLStream::ESCAPE_NONE << *p;
608 if (p != pars.end()) {
609 closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
610 openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(), par->layout().docbookiteminnertagtype());
616 makeAny(text, buf, xs, runparams, par);
619 // Close the environment.
622 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
626 ParagraphList::const_iterator findEndOfEnvironment(
627 ParagraphList::const_iterator const & pstart,
628 ParagraphList::const_iterator const & pend)
630 // Copy-paste from XHTML. Should be factored out at some point...
632 ParagraphList::const_iterator p = pstart;
633 Layout const & bstyle = p->layout();
634 size_t const depth = p->params().depth();
635 for (++p; p != pend; ++p) {
636 Layout const & style = p->layout();
637 // It shouldn't happen that e.g. a section command occurs inside
638 // a quotation environment, at a higher depth, but as of 6/2009,
639 // it can happen. We pretend that it's just at lowest depth.
640 if (style.latextype == LATEX_COMMAND)
643 // If depth is down, we're done
644 if (p->params().depth() < depth)
647 // If depth is up, we're not done
648 if (p->params().depth() > depth)
651 // FIXME I am not sure about the first check.
652 // Surely we *could* have different layouts that count as
653 // LATEX_PARAGRAPH, right?
654 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
661 ParagraphList::const_iterator makeListEnvironment(Text const &text,
664 OutputParams const &runparams,
665 ParagraphList::const_iterator const & par)
667 auto const end = text.paragraphs().end();
669 // Output the opening tag for this environment, but only if it has not been previously opened (condition
670 // implemented in openParTag).
671 auto prevpar = text.paragraphs().getParagraphBefore(par);
672 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
674 // Generate the contents of this environment. There is a special case if this is like some environment.
675 Layout const & style = par->layout();
676 if (style.latextype == LATEX_COMMAND) {
677 // Nothing to do (otherwise, infinite loops).
678 } else if (style.latextype == LATEX_ENVIRONMENT ||
679 style.latextype == LATEX_LIST_ENVIRONMENT ||
680 style.latextype == LATEX_ITEM_ENVIRONMENT) {
681 // Open a wrapper tag if needed.
682 if (style.docbookitemwrappertag() != "NONE")
683 openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(), style.docbookitemwrappertagtype());
685 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
686 // character after the label.
688 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
689 // At least one condition must be met:
690 // - this environment is not a list
691 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
692 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
693 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
694 docstring const lbl = par->params().labelString();
699 openLabelTag(xs, style);
701 closeLabelTag(xs, style);
704 // Only variablelist gets here (or similar items defined as an extension in the layout).
705 openLabelTag(xs, style);
706 sep = par->firstWordDocBook(xs, runparams);
707 closeLabelTag(xs, style);
711 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
712 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
713 // Common case: there is only the first word on the line, but there is a nested list instead
715 bool emptyItem = false;
716 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
719 if (next_par == text.paragraphs().end()) // There is no next paragraph.
721 else // There is a next paragraph: check depth.
722 emptyItem = par->params().depth() >= next_par->params().depth();
726 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
727 // generation of a full <para>.
728 // TODO: this always worked only by magic...
731 // Generate the rest of the paragraph, if need be. Open as many inner tags as necessary.
732 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
733 auto p = pars.begin();
735 xs << XMLStream::ESCAPE_NONE << *p;
737 if (p != pars.end()) {
738 closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
739 openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(), par->layout().docbookiteminnertagtype());
745 makeAny(text, buf, xs, runparams, par);
748 // Close the environment.
751 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
761 OutputParams const & runparams,
762 ParagraphList::const_iterator const & par)
764 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
765 auto const begin = text.paragraphs().begin();
766 auto const end = text.paragraphs().end();
770 // Generate this command.
771 auto prevpar = text.paragraphs().getParagraphBefore(par);
772 openParTag(xs, &*par, prevpar);
774 auto pars = par->simpleDocBookOnePar(buf, runparams,text.outerFont(distance(begin, par)));
775 for (auto & parXML : pars)
776 // TODO: decide what to do with openParTag/closeParTag in new lines.
777 xs << XMLStream::ESCAPE_NONE << parXML;
779 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
783 ParagraphList::const_iterator makeAny(Text const &text,
786 OutputParams const &runparams,
787 ParagraphList::const_iterator par)
789 switch (par->layout().latextype) {
791 makeCommand(text, buf, xs, runparams, par);
793 case LATEX_ENVIRONMENT:
794 makeEnvironment(text, buf, xs, runparams, par);
796 case LATEX_LIST_ENVIRONMENT:
797 case LATEX_ITEM_ENVIRONMENT:
798 // Only case when makeAny() might consume more than one paragraph.
799 return makeListEnvironment(text, buf, xs, runparams, par);
800 case LATEX_PARAGRAPH:
801 makeParagraph(text, buf, xs, runparams, par);
803 case LATEX_BIB_ENVIRONMENT:
804 makeBibliography(text, buf, xs, runparams, par);
812 using DocBookDocumentSectioning = tuple<bool, pit_type>;
815 struct DocBookInfoTag
817 const set<pit_type> shouldBeInInfo;
818 const set<pit_type> mustBeInInfo;
819 const set<pit_type> abstract;
823 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
824 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
825 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
826 bpit(bpit), epit(epit) {}
830 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
831 bool documentHasSections = false;
833 while (bpit < epit) {
834 Layout const &style = paragraphs[bpit].layout();
835 documentHasSections |= style.category() == from_utf8("Sectioning");
837 if (documentHasSections)
841 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
843 return make_tuple(documentHasSections, bpit);
847 bool hasOnlyNotes(Paragraph const & par)
849 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
850 for (int i = 0; i < par.size(); ++i)
851 // If you find something that is not an inset (like actual text) or an inset that is not a note,
853 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
859 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
860 set<pit_type> shouldBeInInfo;
861 set<pit_type> mustBeInInfo;
862 set<pit_type> abstract;
864 // Find the first non empty paragraph by mutating bpit.
865 while (bpit < epit) {
866 Paragraph const &par = paragraphs[bpit];
867 if (par.empty() || hasOnlyNotes(par))
873 // Find the last info-like paragraph.
874 pit_type cpit = bpit;
875 bool hasAbstractLayout = false;
876 while (cpit < epit) {
877 // Skip paragraphs only containing one note.
878 Paragraph const & par = paragraphs[cpit];
879 if (hasOnlyNotes(par)) {
884 if (par.layout().docbookabstract())
885 hasAbstractLayout = true;
887 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
888 Layout const &style = par.layout();
890 if (style.docbookininfo() == "always") {
891 mustBeInInfo.emplace(cpit);
892 } else if (style.docbookininfo() == "maybe") {
893 shouldBeInInfo.emplace(cpit);
895 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
896 // There may be notes in between, but nothing else.
901 // Now, cpit points to the last paragraph that has things that could go in <info>.
902 // bpit is the beginning of the <info> part.
904 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
905 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
906 if (hasAbstractLayout) {
908 while (pit < cpit) { // Don't overshoot the <info> part.
909 if (paragraphs[pit].layout().docbookabstract())
910 abstract.emplace(pit);
914 pit_type lastAbstract = epit + 1; // A nonsensical value.
915 docstring lastAbstractLayout;
918 while (pit < cpit) { // Don't overshoot the <info> part.
919 const Paragraph & par = paragraphs.at(pit);
920 if (!par.insetList().empty()) {
921 for (const auto &i : par.insetList()) {
922 if (i.inset->getText(0) != nullptr) {
923 if (lastAbstract == epit + 1) {
924 // First paragraph that matches the heuristic definition of abstract.
926 lastAbstractLayout = par.layout().name();
927 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
928 // This is either too far from the last abstract paragraph or doesn't
929 // have the right layout name, BUT there has already been an abstract
930 // in this document: done with detecting the abstract.
931 goto done; // Easier to get out of two nested loops.
934 abstract.emplace(pit);
944 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
947 } // end anonymous namespace
950 xml::FontTag docbookStartFontTag(xml::FontTypes type)
952 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
956 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
958 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
962 void outputDocBookInfo(
966 OutputParams const & runparams,
967 ParagraphList const & paragraphs,
968 DocBookInfoTag const & info)
970 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
971 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
972 // then only create the <abstract> tag if these paragraphs generate some content.
973 // This check must be performed *before* a decision on whether or not to output <info> is made.
974 bool hasAbstract = !info.abstract.empty();
977 // Generate the abstract XML into a string before further checks.
978 odocstringstream os2;
981 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
982 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
983 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
985 while (bpit < epit) {
986 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
991 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
992 // even though they must be properly output if there is some abstract.
993 abstract = os2.str();
994 docstring cleaned = abstract;
995 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), ::isspace), cleaned.end());
997 // Nothing? Then there is no abstract!
1002 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
1003 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1005 // Start the <info> tag if required.
1007 xs.startDivision(false);
1008 xs << xml::StartTag("info");
1012 // Output the elements that should go in <info>, before and after the abstract.
1013 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
1014 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1015 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1017 for (auto pit : info.mustBeInInfo) {
1018 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
1019 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1022 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
1023 // it contains several paragraphs that are empty).
1025 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1026 // if (tag == "NONE")
1027 // tag = "abstract";
1029 // xs << xml::StartTag(tag);
1031 xs << XMLStream::ESCAPE_NONE << abstract;
1032 // xs << xml::EndTag(tag);
1036 // End the <info> tag if it was started.
1038 xs << xml::EndTag("info");
1045 void docbookFirstParagraphs(
1049 OutputParams const &runparams,
1052 // Handle the beginning of the document, supposing it has sections.
1053 // Major role: output the first <info> tag.
1055 ParagraphList const ¶graphs = text.paragraphs();
1056 pit_type bpit = runparams.par_begin;
1057 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1058 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1062 void docbookSimpleAllParagraphs(
1066 OutputParams const & runparams)
1068 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
1069 // between a single paragraph to a whole document.
1071 // First, the <info> tag.
1072 ParagraphList const ¶graphs = text.paragraphs();
1073 pit_type bpit = runparams.par_begin;
1074 pit_type const epit = runparams.par_end;
1075 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1076 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1078 // Then, the content. It starts where the <info> ends.
1079 auto par = text.paragraphs().iterator_at(info.epit);
1080 auto end = text.paragraphs().iterator_at(epit);
1081 while (par != end) {
1082 if (!hasOnlyNotes(*par))
1083 par = makeAny(text, buf, xs, runparams, par);
1090 void docbookParagraphs(Text const &text,
1093 OutputParams const &runparams) {
1094 ParagraphList const ¶graphs = text.paragraphs();
1095 if (runparams.par_begin == runparams.par_end) {
1096 runparams.par_begin = 0;
1097 runparams.par_end = paragraphs.size();
1099 pit_type bpit = runparams.par_begin;
1100 pit_type const epit = runparams.par_end;
1101 LASSERT(bpit < epit,
1103 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1107 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1108 // of the section and the tag that was used to open it.
1110 // Detect whether the document contains sections. If there are no sections, there can be no automatically
1111 // discovered abstract.
1112 bool documentHasSections;
1114 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1116 if (documentHasSections) {
1117 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1120 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1124 bool currentlyInAppendix = false;
1126 auto par = text.paragraphs().iterator_at(bpit);
1127 auto end = text.paragraphs().iterator_at(epit);
1128 while (par != end) {
1129 OutputParams ourparams = runparams;
1131 if (par->params().startOfAppendix())
1132 currentlyInAppendix = true;
1133 if (hasOnlyNotes(*par)) {
1138 Layout const &style = par->layout();
1140 // Think about adding <section> and/or </section>s.
1141 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1142 if (isLayoutSectioning) {
1143 int level = style.toclevel;
1145 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1146 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1147 // - current: h2; back: h1; do not close any <section>
1148 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1149 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1150 int stackLevel = headerLevels.top().first;
1151 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1154 // Output the tag only if it corresponds to a legit section.
1155 if (stackLevel != Layout::NOT_IN_TOC)
1156 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1159 // Open the new section: first push it onto the stack, then output it in DocBook.
1160 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1161 "appendix" : style.docbooksectiontag();
1162 headerLevels.push(std::make_pair(level, sectionTag));
1164 // Some sectioning-like elements should not be output (such as FrontMatter).
1165 if (level != Layout::NOT_IN_TOC) {
1166 // Look for a label in the title, i.e. a InsetLabel as a child.
1167 docstring id = docstring();
1168 for (pos_type i = 0; i < par->size(); ++i) {
1169 Inset const *inset = par->getInset(i);
1171 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1172 // Generate the attributes for the section if need be.
1173 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1175 // Don't output the ID as a DocBook <anchor>.
1176 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1178 // Cannot have multiple IDs per tag.
1184 // Write the open tag for this section.
1185 docstring tag = from_utf8("<" + sectionTag);
1187 tag += from_utf8(" ") + id;
1188 tag += from_utf8(">");
1189 xs << XMLStream::ESCAPE_NONE << tag;
1194 // Close all sections before the bibliography.
1195 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1196 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1197 if (insetsLength > 0) {
1198 Inset const *firstInset = par->getInset(0);
1199 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1200 while (!headerLevels.empty()) {
1201 int level = headerLevels.top().first;
1202 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1205 // Output the tag only if it corresponds to a legit section.
1206 if (level != Layout::NOT_IN_TOC) {
1207 xs << XMLStream::ESCAPE_NONE << tag;
1214 // Generate this paragraph.
1215 par = makeAny(text, buf, xs, ourparams, par);
1218 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1220 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1221 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1223 xs << XMLStream::ESCAPE_NONE << tag;