2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
14 #include "output_docbook.h"
17 #include "buffer_funcs.h"
18 #include "BufferParams.h"
20 #include "InsetList.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "mathed/InsetMath.h"
32 #include "insets/InsetNote.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/textutils.h"
44 using namespace lyx::support;
50 std::string fontToDocBookTag(xml::FontTypes type)
53 case xml::FontTypes::FT_EMPH:
54 case xml::FontTypes::FT_BOLD:
56 case xml::FontTypes::FT_NOUN:
58 case xml::FontTypes::FT_UBAR:
59 case xml::FontTypes::FT_WAVE:
60 case xml::FontTypes::FT_DBAR:
61 case xml::FontTypes::FT_SOUT:
62 case xml::FontTypes::FT_XOUT:
63 case xml::FontTypes::FT_ITALIC:
64 case xml::FontTypes::FT_UPRIGHT:
65 case xml::FontTypes::FT_SLANTED:
66 case xml::FontTypes::FT_SMALLCAPS:
67 case xml::FontTypes::FT_ROMAN:
68 case xml::FontTypes::FT_SANS:
70 case xml::FontTypes::FT_TYPE:
72 case xml::FontTypes::FT_SIZE_TINY:
73 case xml::FontTypes::FT_SIZE_SCRIPT:
74 case xml::FontTypes::FT_SIZE_FOOTNOTE:
75 case xml::FontTypes::FT_SIZE_SMALL:
76 case xml::FontTypes::FT_SIZE_NORMAL:
77 case xml::FontTypes::FT_SIZE_LARGE:
78 case xml::FontTypes::FT_SIZE_LARGER:
79 case xml::FontTypes::FT_SIZE_LARGEST:
80 case xml::FontTypes::FT_SIZE_HUGE:
81 case xml::FontTypes::FT_SIZE_HUGER:
82 case xml::FontTypes::FT_SIZE_INCREASE:
83 case xml::FontTypes::FT_SIZE_DECREASE:
91 string fontToRole(xml::FontTypes type)
93 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
94 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
95 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
96 // Hence, it is not a problem to have many roles by default here.
97 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
99 case xml::FontTypes::FT_ITALIC:
100 case xml::FontTypes::FT_EMPH:
102 case xml::FontTypes::FT_BOLD:
104 case xml::FontTypes::FT_NOUN: // Outputs a <person>
105 case xml::FontTypes::FT_TYPE: // Outputs a <code>
107 case xml::FontTypes::FT_UBAR:
110 // All other roles are non-standard for DocBook.
112 case xml::FontTypes::FT_WAVE:
114 case xml::FontTypes::FT_DBAR:
116 case xml::FontTypes::FT_SOUT:
118 case xml::FontTypes::FT_XOUT:
120 case xml::FontTypes::FT_UPRIGHT:
122 case xml::FontTypes::FT_SLANTED:
124 case xml::FontTypes::FT_SMALLCAPS:
126 case xml::FontTypes::FT_ROMAN:
128 case xml::FontTypes::FT_SANS:
130 case xml::FontTypes::FT_SIZE_TINY:
132 case xml::FontTypes::FT_SIZE_SCRIPT:
133 return "size_script";
134 case xml::FontTypes::FT_SIZE_FOOTNOTE:
135 return "size_footnote";
136 case xml::FontTypes::FT_SIZE_SMALL:
138 case xml::FontTypes::FT_SIZE_NORMAL:
139 return "size_normal";
140 case xml::FontTypes::FT_SIZE_LARGE:
142 case xml::FontTypes::FT_SIZE_LARGER:
143 return "size_larger";
144 case xml::FontTypes::FT_SIZE_LARGEST:
145 return "size_largest";
146 case xml::FontTypes::FT_SIZE_HUGE:
148 case xml::FontTypes::FT_SIZE_HUGER:
150 case xml::FontTypes::FT_SIZE_INCREASE:
151 return "size_increase";
152 case xml::FontTypes::FT_SIZE_DECREASE:
153 return "size_decrease";
160 string fontToAttribute(xml::FontTypes type) {
161 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
163 string role = fontToRole(type);
165 return "role='" + role + "'";
171 // Higher-level convenience functions.
173 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar, const OutputParams & runparams)
178 // If the previous paragraph is empty, don't consider it when opening wrappers.
179 if (prevpar && prevpar->empty() && !prevpar->allowEmpty())
182 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
183 // (usually, they won't have the same layout) and the CURRENT one allows merging.
184 // The main use case is author information in several paragraphs: if the name of the author is the
185 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
186 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
187 // layout, same wrapper tag).
188 Layout const & lay = par->layout();
189 bool openWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
191 if (prevpar != nullptr && !runparams.docbook_ignore_wrapper) {
192 Layout const & prevlay = prevpar->layout();
193 if (prevlay.docbookwrappertag() != "NONE") {
194 if (prevlay.docbookwrappertag() == lay.docbookwrappertag() &&
195 prevlay.docbookwrapperattr() == lay.docbookwrapperattr())
196 openWrapper = !lay.docbookwrappermergewithprevious();
204 xml::openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
206 const string & tag = lay.docbooktag();
208 auto xmltag = xml::ParTag(tag, lay.docbookattr());
209 if (!xs.isTagOpen(xmltag, 1)) { // Don't nest a paragraph directly in a paragraph.
210 // TODO: required or not?
211 // TODO: avoid creating a ParTag object just for this query...
212 xml::openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
213 xml::openTag(xs, lay.docbookinnertag(), lay.docbookinnerattr(), lay.docbookinnertagtype());
217 xml::openTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrapperattr(), lay.docbookitemwrappertagtype());
218 xml::openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
219 xml::openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
223 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar, const OutputParams & runparams)
228 // If the next paragraph is empty, don't consider it when closing wrappers.
229 if (nextpar && nextpar->empty() && !nextpar->allowEmpty())
232 // See comment in openParTag.
233 Layout const & lay = par->layout();
234 bool closeWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
236 if (nextpar != nullptr && !runparams.docbook_ignore_wrapper) {
237 Layout const & nextlay = nextpar->layout();
238 if (nextlay.docbookwrappertag() != "NONE") {
239 if (nextlay.docbookwrappertag() == lay.docbookwrappertag() &&
240 nextlay.docbookwrapperattr() == lay.docbookwrapperattr())
241 closeWrapper = !nextlay.docbookwrappermergewithprevious();
248 xml::closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
249 xml::closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
250 xml::closeTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrappertagtype());
251 xml::closeTag(xs, lay.docbookinnertag(), lay.docbookinnertagtype());
252 xml::closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
254 xml::closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
258 void makeBibliography(
262 OutputParams const & runparams,
263 ParagraphList::const_iterator const & par)
265 // If this is the first paragraph in a bibliography, open the bibliography tag.
266 auto const * pbegin_before = text.paragraphs().getParagraphBefore(par);
267 if (pbegin_before == nullptr || (pbegin_before && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT)) {
268 xs << xml::StartTag("bibliography");
272 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
273 // Don't forget the citation ID!
275 for (auto i = 0; i < par->size(); ++i) {
276 Inset const *ip = par->getInset(i);
279 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
280 auto id = xml::cleanID(bibitem->getParam("key"));
281 attr = from_utf8("xml:id='") + id + from_utf8("'");
285 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
287 // Generate the entry. Concatenate the different parts of the paragraph if any.
288 auto const begin = text.paragraphs().begin();
289 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(begin, par)), 0);
290 for (auto & parXML : pars)
291 xs << XMLStream::ESCAPE_NONE << parXML;
293 // End the precooked bibliography entry.
294 xs << xml::EndTag("bibliomixed");
297 // If this is the last paragraph in a bibliography, close the bibliography tag.
298 auto const end = text.paragraphs().end();
301 bool endBibliography = nextpar == end || nextpar->layout().latextype != LATEX_BIB_ENVIRONMENT;
303 if (endBibliography) {
304 xs << xml::EndTag("bibliography");
314 OutputParams const & runparams,
315 ParagraphList::const_iterator const & par)
318 auto const begin = text.paragraphs().begin();
319 auto const end = text.paragraphs().end();
320 auto prevpar = text.paragraphs().getParagraphBefore(par);
322 // We want to open the paragraph tag if:
323 // (i) the current layout permits multiple paragraphs
324 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
325 // we are, but this is not the first paragraph
327 // But there is also a special case, and we first see whether we are in it.
328 // We do not want to open the paragraph tag if this paragraph contains
329 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
330 // as a branch). On the other hand, if that single item has a font change
331 // applied to it, then we still do need to open the paragraph.
333 // Obviously, this is very fragile. The main reason we need to do this is
334 // because of branches, e.g., a branch that contains an entire new section.
335 // We do not really want to wrap that whole thing in a <div>...</div>.
336 bool special_case = false;
337 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
338 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
339 Layout const &style = par->layout();
340 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
341 style.labelfont : style.font;
342 FontInfo const our_font =
343 par->getFont(buf.masterBuffer()->params(), 0,
344 text.outerFont(std::distance(begin, par))).fontInfo();
346 if (first_font == our_font)
350 size_t nInsets = std::distance(par->insetList().begin(), par->insetList().end());
351 auto parSize = (size_t) par->size();
353 // Plain layouts must be ignored.
354 special_case |= buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars;
355 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
356 // Exception: any case that generates an <inlineequation> must still get a paragraph to be valid.
357 special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
358 return inset.inset && inset.inset->asInsetMath() && inset.inset->asInsetMath()->getType() != hullSimple;
361 // Things that should not get into their own paragraph. (Only valid for DocBook.)
362 static std::set<InsetCode> lyxCodeSpecialCases = {
365 BIBTEX_CODE, // Bibliographies cannot be in paragraphs. Bibitems should still be handled as paragraphs,
366 // though (see makeParagraphBibliography).
367 ERT_CODE, // ERTs are in comments, not paragraphs.
372 TOC_CODE, // To be ignored in DocBook, the processor afterwards should deal with ToCs.
373 NOTE_CODE // Notes do not produce any output.
375 auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
376 return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end();
378 special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isLyxCodeSpecialCase);
380 // Flex elements (InsetLayout) have their own parameter to control the special case.
381 auto isFlexSpecialCase = [](InsetList::Element inset) {
382 if (inset.inset->lyxCode() != FLEX_CODE)
385 // Standard condition: check the parameter.
386 if (inset.inset->getLayout().docbooknotinpara())
389 // If the parameter is not set, maybe the flex inset only contains things that should match the standard
390 // condition. In this case, isLyxCodeSpecialCase must also check for bibitems...
391 auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
392 return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end() ||
393 inset.inset->lyxCode() == BIBITEM_CODE;
395 if (InsetText * text = inset.inset->asInsetText()) {
396 for (auto const & par : text->paragraphs()) {
397 size_t nInsets = std::distance(par.insetList().begin(), par.insetList().end());
398 auto parSize = (size_t) par.size();
400 if (nInsets == 1 && par.insetList().begin()->inset->lyxCode() == BIBITEM_CODE)
402 if (nInsets != parSize)
404 if (!std::all_of(par.insetList().begin(), par.insetList().end(), isLyxCodeSpecialCase))
410 // No case matched: give up.
413 special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isFlexSpecialCase);
415 // Open a paragraph if it is allowed, we are not already within a paragraph, and the insets in the paragraph do
416 // not forbid paragraphs (aka special cases).
417 bool const open_par = runparams.docbook_make_pars
418 && !runparams.docbook_in_par
421 // We want to issue the closing tag if either:
422 // (i) We opened it, and either docbook_in_par is false,
423 // or we're not in the last paragraph, anyway.
424 // (ii) We didn't open it and docbook_in_par is true,
425 // but we are in the first par, and there is a next par.
426 bool const close_par = open_par && !runparams.docbook_in_par;
428 // Determine if this paragraph has some real content. Things like new pages are not caught
429 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
430 // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
431 // std::all_of allows doing this check without having to copy the string.
432 // Open and close tags around each contained paragraph.
435 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(distance(begin, par)), 0, nextpar == end, special_case);
436 for (docstring const & parXML : pars) {
437 if (!xml::isNotOnlySpace(parXML))
441 openParTag(xs, &*par, prevpar, runparams);
443 xs << XMLStream::ESCAPE_NONE << parXML;
446 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
451 void makeEnvironment(Text const &text,
454 OutputParams const &runparams,
455 ParagraphList::const_iterator const & par)
458 auto const end = text.paragraphs().end();
462 // Special cases for listing-like environments provided in layouts. This is quite ad-hoc, but provides a useful
463 // default. This should not be used by too many environments (only LyX-Code right now).
464 // This would be much simpler if LyX-Code was implemented as InsetListings...
465 bool mimicListing = false;
466 bool ignoreFonts = false;
467 if (par->layout().docbooktag() == "programlisting") {
472 // Output the opening tag for this environment, but only if it has not been previously opened (condition
473 // implemented in openParTag).
474 auto prevpar = text.paragraphs().getParagraphBefore(par);
475 openParTag(xs, &*par, prevpar, runparams);
477 // Generate the contents of this environment. There is a special case if this is like some environment.
478 Layout const & style = par->layout();
479 if (style.latextype == LATEX_COMMAND) {
480 // Nothing to do (otherwise, infinite loops).
481 } else if (style.latextype == LATEX_ENVIRONMENT) {
482 // Generate the paragraph, if need be.
483 auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)), 0, false, ignoreFonts);
486 auto p = pars.begin();
487 while (p != pars.end()) {
488 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
489 par->layout().docbookiteminnertagtype());
490 xs << XMLStream::ESCAPE_NONE << *p;
491 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
494 // Insert a new line after each "paragraph" (i.e. line in the listing), except for the last one.
495 // Otherwise, there would one more new line in the output than in the LyX document.
500 for (auto const & p : pars) {
501 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
502 par->layout().docbookiteminnertagtype());
503 xs << XMLStream::ESCAPE_NONE << p;
504 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
508 makeAny(text, buf, xs, runparams, par);
511 // Close the environment.
512 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
516 ParagraphList::const_iterator findEndOfEnvironment(
517 ParagraphList::const_iterator const & pstart,
518 ParagraphList::const_iterator const & pend)
520 // Copy-paste from XHTML. Should be factored out at some point...
521 ParagraphList::const_iterator p = pstart;
522 Layout const & bstyle = p->layout();
523 size_t const depth = p->params().depth();
524 for (++p; p != pend; ++p) {
525 Layout const & style = p->layout();
526 // It shouldn't happen that e.g. a section command occurs inside
527 // a quotation environment, at a higher depth, but as of 6/2009,
528 // it can happen. We pretend that it's just at lowest depth.
529 if (style.latextype == LATEX_COMMAND)
532 // If depth is down, we're done
533 if (p->params().depth() < depth)
536 // If depth is up, we're not done
537 if (p->params().depth() > depth)
540 // FIXME I am not sure about the first check.
541 // Surely we *could* have different layouts that count as
542 // LATEX_PARAGRAPH, right?
543 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
550 ParagraphList::const_iterator makeListEnvironment(Text const &text,
553 OutputParams const &runparams,
554 ParagraphList::const_iterator const & begin)
558 auto const end = text.paragraphs().end();
559 auto const envend = findEndOfEnvironment(par, end);
561 // Output the opening tag for this environment.
562 Layout const & envstyle = par->layout();
563 xml::openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
564 xml::openTag(xs, envstyle.docbooktag(), envstyle.docbookattr(), envstyle.docbooktagtype());
566 // Handle the content of the list environment, item by item.
567 while (par != envend) {
568 // Skip this paragraph if it is both empty and the last one (otherwise, there may be deeper paragraphs after).
571 if (par->empty() && nextpar == envend)
574 // Open the item wrapper.
575 Layout const & style = par->layout();
576 xml::openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(),
577 style.docbookitemwrappertagtype());
579 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
580 // character after the label.
582 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
583 if (style.labeltype == LABEL_MANUAL) {
584 // Only variablelist gets here (or similar items defined as an extension in the layout).
585 xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
586 style.docbookitemlabeltagtype());
587 sep = 1 + par->firstWordDocBook(xs, runparams);
588 xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
590 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
591 docstring const lbl = par->params().labelString();
594 xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
595 style.docbookitemlabeltagtype());
597 xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
602 // Open the item (after the wrapper and the label).
603 xml::openTag(xs, style.docbookitemtag(), style.docbookitemattr(), style.docbookitemtagtype());
605 // Generate the content of the item.
606 if (sep < par->size()) {
607 auto pars = par->simpleDocBookOnePar(buf, runparams,
608 text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
609 for (auto &p : pars) {
610 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
611 par->layout().docbookiteminnertagtype());
612 xs << XMLStream::ESCAPE_NONE << p;
613 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
616 // DocBook doesn't like emptiness.
617 xml::compTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
618 par->layout().docbookiteminnertagtype());
621 // If the next item is deeper, it must go entirely within this item (do it recursively).
622 // By construction, with findEndOfEnvironment, depth can only stay constant or increase, never decrease.
623 depth_type currentDepth = par->getDepth();
625 while (par != envend && par->getDepth() != currentDepth)
626 par = makeAny(text, buf, xs, runparams, par);
627 // Usually, this loop only makes one iteration, except in complex scenarios, like an item with a paragraph,
628 // a list, and another paragraph; or an item with two types of list (itemise then enumerate, for instance).
631 xml::closeTag(xs, style.docbookitemtag(), style.docbookitemtagtype());
632 xml::closeTag(xs, style.docbookitemwrappertag(), style.docbookitemwrappertagtype());
635 // Close this environment in exactly the same way as it was opened.
636 xml::closeTag(xs, envstyle.docbooktag(), envstyle.docbooktagtype());
637 xml::closeTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrappertagtype());
647 OutputParams const & runparams,
648 ParagraphList::const_iterator const & par)
651 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
652 auto const begin = text.paragraphs().begin();
653 auto const end = text.paragraphs().end();
657 // Generate this command.
658 auto prevpar = text.paragraphs().getParagraphBefore(par);
659 openParTag(xs, &*par, prevpar, runparams);
661 auto pars = par->simpleDocBookOnePar(buf, runparams,text.outerFont(distance(begin, par)));
662 for (auto & parXML : pars)
663 // TODO: decide what to do with openParTag/closeParTag in new lines.
664 xs << XMLStream::ESCAPE_NONE << parXML;
666 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
670 bool isLayoutSectioning(Layout const & lay)
672 if (lay.docbooksection()) // Special case: some DocBook styles must be handled as sections.
674 else if (lay.category() == from_utf8("Sectioning") || lay.docbooktag() == "section") // Generic case.
675 return lay.toclevel != Layout::NOT_IN_TOC;
680 bool isLayoutSectioningOrSimilar(Layout const & lay)
682 return isLayoutSectioning(lay) || lay.docbooktag() == "bridgehead";
686 using DocBookDocumentSectioning = tuple<bool, pit_type>;
689 struct DocBookInfoTag
691 const set<pit_type> shouldBeInInfo;
692 const set<pit_type> mustBeInInfo; // With the notable exception of the abstract!
693 const set<pit_type> abstract;
694 const bool abstractLayout;
698 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
699 const set<pit_type> & abstract, bool abstractLayout, pit_type bpit, pit_type epit) :
700 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
701 abstractLayout(abstractLayout), bpit(bpit), epit(epit) {}
705 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
706 bool documentHasSections = false;
708 while (bpit < epit) {
709 Layout const &style = paragraphs[bpit].layout();
710 documentHasSections |= isLayoutSectioningOrSimilar(style);
712 if (documentHasSections)
716 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
718 return make_tuple(documentHasSections, bpit);
722 bool hasOnlyNotes(Paragraph const & par)
724 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
725 for (int i = 0; i < par.size(); ++i)
726 // If you find something that is not an inset (like actual text) or an inset that is not a note,
728 if (!par.isInset(i) || par.getInset(i)->lyxCode() != NOTE_CODE)
731 // An empty paragraph may still require some output.
732 if (par.layout().docbooksection())
735 // There should be really no content here.
740 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs,
741 pit_type bpit, pit_type const epit,
742 // Typically, bpit is the beginning of the document and epit the end of the
743 // document *or* the first section.
744 bool documentHasSections,
745 bool detectUnlayoutedAbstract
746 // Whether paragraphs with no specific layout should be detected as abstracts.
747 // For inner sections, an abstract should only be detected if it has a specific
748 // layout. For others, anything that might look like an abstract should be sought.
750 set<pit_type> shouldBeInInfo;
751 set<pit_type> mustBeInInfo;
752 set<pit_type> abstractWithLayout;
753 set<pit_type> abstractNoLayout;
755 // Find the first non empty paragraph by mutating bpit.
756 while (bpit < epit) {
757 Paragraph const &par = paragraphs[bpit];
758 if (par.empty() || hasOnlyNotes(par))
764 // Traverse everything that might belong to <info>.
765 bool hasAbstractLayout = false;
766 static depth_type INVALID_DEPTH = 100000;
767 depth_type abstractDepth = INVALID_DEPTH;
768 pit_type cpit = bpit;
769 for (; cpit < epit; ++cpit) {
770 // Skip paragraphs that don't generate anything in DocBook.
771 Paragraph const & par = paragraphs[cpit];
772 Layout const &style = par.layout();
773 if (hasOnlyNotes(par))
776 // There should never be any section here, except for the first paragraph (a title can be part of <info>).
777 // (Just a sanity check: if this fails, this function could end up processing the whole document.)
778 if (cpit != bpit && isLayoutSectioningOrSimilar(par.layout())) {
779 LYXERR0("Assertion failed: section found in potential <info> paragraphs.");
783 // If this is marked as an abstract by the layout, put it in the right set.
784 if (style.docbookabstract()) {
785 hasAbstractLayout = true;
786 abstractDepth = par.getDepth();
787 abstractWithLayout.emplace(cpit);
791 // Deeper paragraphs following the abstract must still be considered as part of the abstract.
792 // For instance, this includes lists. There should not be any other kind of paragraph in between.
793 if (abstractDepth != INVALID_DEPTH && style.docbookininfo() == "never") {
794 if (par.getDepth() > abstractDepth) {
795 abstractWithLayout.emplace(cpit);
798 if (par.getDepth() == abstractDepth) {
799 // This is not an abstract paragraph and it should not either be considered as part
800 // of it. It breaks the rule that abstract paragraphs must follow each other.
801 abstractDepth = INVALID_DEPTH;
806 // Based on layout information, store this paragraph in one set: should be in <info>, must be,
807 // or abstract (either because of layout or of position).
808 if (style.docbookininfo() == "always")
809 mustBeInInfo.emplace(cpit);
810 else if (style.docbookininfo() == "maybe")
811 shouldBeInInfo.emplace(cpit);
812 else if (documentHasSections && !hasAbstractLayout && detectUnlayoutedAbstract &&
813 (style.docbooktag() == "NONE" || style.docbooktag() == "para") &&
814 style.docbookwrappertag() == "NONE")
815 // In this case, it is very likely that style.docbookininfo() == "never"! Be extra careful
816 // about anything that gets caught here. For instance, don't ake into account
817 abstractNoLayout.emplace(cpit);
818 else // This should definitely not be in <info>.
821 // Now, cpit points to the first paragraph that no more has things that could go in <info>.
822 // bpit is the beginning of the <info> part.
824 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo,
825 hasAbstractLayout ? abstractWithLayout : abstractNoLayout,
826 hasAbstractLayout, bpit, cpit);
829 } // end anonymous namespace
832 std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
834 // This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
835 // should become the document's <abstract>.
836 std::set<const Inset *> values;
838 // If this kind of layout should be ignored, already leave.
839 if (par->layout().docbooktag() == "IGNORE")
842 // If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
843 // DocBook doesn't want to be inside the abstract.
844 for (pos_type i = 0; i < par->size(); ++i) {
845 if (par->getInset(i) && par->getInset(i)->asInsetText()) {
846 InsetText const *inset = par->getInset(i)->asInsetText();
848 if (inset->getLayout().docbookininfo() != "never") {
849 values.insert(inset);
851 auto subpar = inset->paragraphs().begin();
852 while (subpar != inset->paragraphs().end()) {
853 auto subinfos = gatherInfo(subpar);
854 for (auto & subinfo: subinfos)
855 values.insert(subinfo);
866 ParagraphList::const_iterator makeAny(Text const &text,
869 OutputParams const &runparams,
870 ParagraphList::const_iterator par)
872 bool ignoreParagraph = false;
874 // If this kind of layout should be ignored, already leave.
875 ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
877 // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
879 bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
880 ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
882 // Switch on the type of paragraph to call the right handler.
883 if (!ignoreParagraph) {
884 switch (par->layout().latextype) {
886 makeCommand(text, buf, xs, runparams, par);
888 case LATEX_ENVIRONMENT:
889 makeEnvironment(text, buf, xs, runparams, par);
891 case LATEX_LIST_ENVIRONMENT:
892 case LATEX_ITEM_ENVIRONMENT:
893 // Only case when makeAny() might consume more than one paragraph.
894 return makeListEnvironment(text, buf, xs, runparams, par);
895 case LATEX_PARAGRAPH:
896 makeParagraph(text, buf, xs, runparams, par);
898 case LATEX_BIB_ENVIRONMENT:
899 makeBibliography(text, buf, xs, runparams, par);
904 // For cases that are not lists, the next paragraph to handle is the next one.
910 xml::FontTag docbookStartFontTag(xml::FontTypes type)
912 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
916 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
918 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
922 void outputDocBookInfo(
926 OutputParams const & runparams,
927 ParagraphList const & paragraphs,
928 DocBookInfoTag const & info)
930 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
931 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
932 // then only create the <abstract> tag if these paragraphs generate some content.
933 // This check must be performed *before* a decision on whether or not to output <info> is made.
934 bool hasAbstract = !info.abstract.empty();
936 set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
937 // paragraph. (This happens for quite a few layouts, unfortunately.)
940 // Generate the abstract XML into a string before further checks.
941 // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
942 // generate more than one paragraph, as indicated in the return value.
943 odocstringstream os2;
947 rp.docbook_generate_info = false;
948 rp.docbook_ignore_wrapper = true;
950 set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
951 for (auto const & p : info.abstract) {
952 if (doneParas.find(p) == doneParas.end()) {
953 auto oldPar = paragraphs.iterator_at(p);
954 auto newPar = makeAny(text, buf, xs2, rp, oldPar);
956 // Find insets that should go outside the abstract.
957 auto subinfos = gatherInfo(oldPar);
958 for (auto & subinfo: subinfos)
959 infoInsets.insert(subinfo);
961 // Insert the indices of all the paragraphs that were just generated (typically, one).
962 // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
963 // Otherwise, makeAny and makeListEnvironment would have to be adapted too.
965 while (oldPar != newPar) {
966 doneParas.emplace(id);
973 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
974 // even though they must be properly output if there is some abstract.
975 abstract = os2.str();
976 docstring cleaned = abstract;
977 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), lyx::isSpace), cleaned.end());
979 // Nothing? Then there is no abstract!
984 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
985 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
987 // Start the <info> tag if required.
989 xs.startDivision(false);
990 xs << xml::StartTag("info");
994 // Output the elements that should go in <info>.
995 // - First, the title.
996 for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
997 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
998 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
999 // If there is no title, generate one (required for the document to be valid).
1000 // This code is called for the main document, for table cells, etc., so be precise in this condition.
1001 if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
1002 xs << xml::StartTag("title");
1003 xs << "Untitled Document";
1004 xs << xml::EndTag("title");
1008 // - Then, other metadata.
1009 for (auto pit : info.mustBeInInfo)
1010 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1011 for (auto const * inset : infoInsets)
1012 inset->docbook(xs, runparams);
1014 // - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
1015 // (especially if it contains several paragraphs that are empty).
1017 string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1021 if (!xs.isLastTagCR())
1024 xs << xml::StartTag(tag);
1026 xs << XMLStream::ESCAPE_NONE << abstract;
1027 xs << xml::EndTag(tag);
1031 // End the <info> tag if it was started.
1033 if (!xs.isLastTagCR())
1036 xs << xml::EndTag("info");
1043 void docbookSimpleAllParagraphs(
1047 OutputParams const & runparams)
1049 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
1050 // between a single paragraph to a whole document.
1051 pit_type const bpit = runparams.par_begin;
1052 pit_type const epit = runparams.par_end;
1053 ParagraphList const ¶graphs = text.paragraphs();
1055 // First, the <info> tag.
1056 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false, true);
1057 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1059 // Then, the content. It starts where the <info> ends.
1060 auto par = paragraphs.iterator_at(info.epit);
1061 auto end = paragraphs.iterator_at(epit);
1062 while (par != end) {
1063 if (!hasOnlyNotes(*par))
1064 par = makeAny(text, buf, xs, runparams, par);
1071 void docbookParagraphs(Text const &text,
1074 OutputParams const &runparams) {
1075 ParagraphList const ¶graphs = text.paragraphs();
1076 if (runparams.par_begin == runparams.par_end) {
1077 runparams.par_begin = 0;
1078 runparams.par_end = paragraphs.size();
1080 pit_type bpit = runparams.par_begin;
1081 pit_type const epit = runparams.par_end;
1082 LASSERT(bpit < epit,
1084 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1088 // Detect whether the document contains sections. If there are no sections, treatment is largely simplified.
1089 // In particular, there can't be an abstract, unless it is manually marked.
1090 bool documentHasSections;
1092 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1094 // Deal with "simple" documents, i.e. those without sections.
1095 if (!documentHasSections) {
1096 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1100 // Output the first <info> tag (or just the title).
1101 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true, true);
1102 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1105 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1106 // of the section and the tag that was used to open it.
1108 // Then, iterate through the paragraphs of this document.
1109 auto par = text.paragraphs().iterator_at(bpit);
1110 auto end = text.paragraphs().iterator_at(epit);
1111 while (par != end) {
1112 // Skip paragraphs not producing any output.
1113 if (hasOnlyNotes(*par)) {
1118 OutputParams ourparams = runparams;
1119 Layout const &style = par->layout();
1121 // Think about adding <section> and/or </section>s.
1122 if (isLayoutSectioning(style) || par->params().startOfAppendix()) {
1123 int level = style.toclevel;
1125 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a
1126 // <h2> after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1127 // - current: h2; back: h1; do not close any <section>
1128 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1129 // Some layouts require that Layout::NOT_IN_TOC sections still cause closing of previous sections. This is
1130 // mostly to ensure that the section is positioned at a DocBook-compatible level (acknowledgements: cannot
1131 // be under a section!).
1132 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1133 // Output the tag only if it corresponds to a legit section.
1134 int stackLevel = headerLevels.top().first;
1135 if (stackLevel != Layout::NOT_IN_TOC) {
1136 xs << xml::EndTag(headerLevels.top().second);
1142 // Open the new section: first push it onto the stack, then output it in DocBook.
1143 string sectionTag = (par->params().startOfAppendix()) ? "appendix" : style.docbooksectiontag();
1144 headerLevels.push(std::make_pair(level, sectionTag));
1146 // Some sectioning-like elements should not be output (such as FrontMatter).
1147 if (level != Layout::NOT_IN_TOC) {
1148 // Look for a label in the title, i.e. a InsetLabel as a child.
1149 docstring id = docstring();
1150 for (pos_type i = 0; i < par->size(); ++i) {
1151 Inset const *inset = par->getInset(i);
1153 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1154 // Generate the attributes for the section if need be.
1155 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1157 // Don't output the ID as a DocBook <anchor>.
1158 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1160 // Cannot have multiple IDs per tag. If there is another ID inset in the document, it will
1161 // be output as a DocBook anchor.
1167 // Write the open tag for this section.
1171 xs << xml::StartTag(sectionTag, attrs);
1176 // Close all sections before the bibliography.
1177 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1178 if (!par->insetList().empty()) {
1179 Inset const *firstInset = par->getInset(0);
1180 if (firstInset && (firstInset->lyxCode() == BIBITEM_CODE || firstInset->lyxCode() == BIBTEX_CODE)) {
1181 while (!headerLevels.empty()) {
1182 // Don't close appendices before bibliographies.
1183 if (headerLevels.top().second == "appendix")
1186 // Pop the section from the stack.
1187 int level = headerLevels.top().first;
1188 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1191 // Output the tag only if it corresponds to a legit section, as the rest of the code.
1192 if (level != Layout::NOT_IN_TOC) {
1193 xs << XMLStream::ESCAPE_NONE << tag;
1200 // Generate the <info> tag if a section was just opened.
1201 // Some sections may require abstracts (mostly parts, in books: DocBookForceAbstractTag will not be NONE),
1202 // others can still have an abstract (it must be detected so that it can be output at the right place).
1203 // TODO: docbookforceabstracttag is a bit contrived here, but it does the job. Having another field just for this would be cleaner, but that's just for <part> and <partintro>, so it's probably not worth the effort.
1204 if (isLayoutSectioning(style)) {
1205 // This abstract may be found between the next paragraph and the next title.
1206 pit_type cpit = std::distance(text.paragraphs().begin(), par);
1207 pit_type ppit = std::get<1>(hasDocumentSectioning(paragraphs, cpit + 1L, epit));
1209 // Generate this abstract (this code corresponds to parts of outputDocBookInfo).
1210 DocBookInfoTag secInfo = getParagraphsWithInfo(paragraphs, cpit, ppit, true,
1211 style.docbookforceabstracttag() != "NONE");
1213 if (!secInfo.mustBeInInfo.empty() || !secInfo.shouldBeInInfo.empty() || !secInfo.abstract.empty()) {
1214 // Generate the <info>, if required. If DocBookForceAbstractTag != NONE, this abstract will not be in
1215 // <info>, unlike other ("standard") abstracts.
1216 bool hasStandardAbstract = !secInfo.abstract.empty() && style.docbookforceabstracttag() == "NONE";
1217 bool needInfo = !secInfo.mustBeInInfo.empty() || hasStandardAbstract;
1220 xs.startDivision(false);
1221 xs << xml::StartTag("info");
1225 // Output the elements that should go in <info>, before and after the abstract.
1226 for (auto pit : secInfo.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1227 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1228 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1229 for (auto pit : secInfo.mustBeInInfo)
1230 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1232 // Deal with the abstract in <info> if it is standard (i.e. its tag is <abstract>).
1233 if (!secInfo.abstract.empty() && hasStandardAbstract) {
1234 if (!secInfo.abstractLayout) {
1235 xs << xml::StartTag("abstract");
1239 for (auto const &p : secInfo.abstract)
1240 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1242 if (!secInfo.abstractLayout) {
1243 xs << xml::EndTag("abstract");
1248 // End the <info> tag if it was started.
1250 if (!xs.isLastTagCR())
1253 xs << xml::EndTag("info");
1258 // Deal with the abstract outside <info> if it is not standard (i.e. its tag is layout-defined).
1259 if (!secInfo.abstract.empty() && !hasStandardAbstract) {
1260 // Assert: style.docbookforceabstracttag() != NONE.
1261 xs << xml::StartTag(style.docbookforceabstracttag());
1263 for (auto const &p : secInfo.abstract)
1264 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1265 xs << xml::EndTag(style.docbookforceabstracttag());
1269 // Skip all the text that has just been generated.
1270 par = paragraphs.iterator_at(secInfo.epit);
1272 // No <info> tag to generate, proceed as for normal paragraphs.
1273 par = makeAny(text, buf, xs, ourparams, par);
1276 // Generate this paragraph, as it has nothing special.
1277 par = makeAny(text, buf, xs, ourparams, par);
1281 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1283 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1284 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1286 xs << XMLStream::ESCAPE_NONE << tag;