2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
94 string fontToRole(xml::FontTypes type)
96 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99 // Hence, it is not a problem to have many roles by default here.
100 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
102 case xml::FontTypes::FT_ITALIC:
103 case xml::FontTypes::FT_EMPH:
105 case xml::FontTypes::FT_BOLD:
107 case xml::FontTypes::FT_NOUN: // Outputs a <person>
108 case xml::FontTypes::FT_TYPE: // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
173 } // end anonymous namespace
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
178 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
184 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
190 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
194 // Contents of the block.
199 // <paratag>Contents of the paragraph.</paratag>
202 // Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
204 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
206 xs << xml::StartTag(tag, attr);
210 void closeInlineTag(XMLStream & xs, const std::string & tag)
212 xs << xml::EndTag(tag);
216 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
218 if (!xs.isLastTagCR())
220 xs << xml::StartTag(tag, attr);
224 void closeParTag(XMLStream & xs, const std::string & tag)
226 xs << xml::EndTag(tag);
231 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
233 if (!xs.isLastTagCR())
235 xs << xml::StartTag(tag, attr);
240 void closeBlockTag(XMLStream & xs, const std::string & tag)
242 if (!xs.isLastTagCR())
244 xs << xml::EndTag(tag);
249 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
251 if (tag.empty() || tag == "NONE")
254 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
255 openParTag(xs, tag, attr);
256 else if (tagtype == "block")
257 openBlockTag(xs, tag, attr);
258 else if (tagtype == "inline")
259 openInlineTag(xs, tag, attr);
261 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
265 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
267 if (tag.empty() || tag == "NONE")
270 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
271 closeParTag(xs, tag);
272 else if (tagtype == "block")
273 closeBlockTag(xs, tag);
274 else if (tagtype == "inline")
275 closeInlineTag(xs, tag);
277 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
281 // Higher-level convenience functions.
283 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
285 Layout const & lay = par->layout();
290 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
291 // (usually, they won't have the same layout) and the CURRENT one allows merging.
292 // The main use case is author information in several paragraphs: if the name of the author is the
293 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
294 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
295 // layout, same wrapper tag).
296 bool openWrapper = lay.docbookwrappertag() != "NONE";
297 if (prevpar != nullptr) {
298 Layout const & prevlay = prevpar->layout();
299 if (prevlay.docbookwrappertag() != "NONE") {
300 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
301 && !lay.docbookwrappermergewithprevious();
307 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
309 const string & tag = lay.docbooktag();
311 auto xmltag = xml::ParTag(tag, lay.docbookattr());
312 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
313 // TODO: required or not?
314 // TODO: avoid creating a ParTag object just for this query...
315 openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
318 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
319 openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
323 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
328 // See comment in openParTag.
329 Layout const & lay = par->layout();
330 bool closeWrapper = lay.docbookwrappertag() != "NONE";
331 if (nextpar != nullptr) {
332 Layout const & nextlay = nextpar->layout();
333 if (nextlay.docbookwrappertag() != "NONE") {
334 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
335 && !nextlay.docbookwrappermergewithprevious();
340 closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
341 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
342 closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
344 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
348 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
350 openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
354 void closeLabelTag(XMLStream & xs, Layout const & lay)
356 closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
360 void openItemTag(XMLStream & xs, Layout const & lay)
362 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
366 void closeItemTag(XMLStream & xs, Layout const & lay)
368 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
372 void makeParagraphBibliography(
375 OutputParams const & runparams,
377 ParagraphList::const_iterator const & pbegin)
379 auto const begin = text.paragraphs().begin();
380 auto const end = text.paragraphs().end();
384 // Find the paragraph *before* pbegin.
385 ParagraphList::const_iterator pbegin_before = begin;
386 if (pbegin != begin) {
387 ParagraphList::const_iterator pbegin_before_next = begin;
388 ++pbegin_before_next;
390 while (pbegin_before_next != pbegin) {
392 ++pbegin_before_next;
396 ParagraphList::const_iterator par = pbegin;
398 // If this is the first paragraph in a bibliography, open the bibliography tag.
399 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
400 xs << xml::StartTag("bibliography");
404 // Generate the required paragraphs, but only if they are .
405 for (; par != pend; ++par) {
406 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
407 // Don't forget the citation ID!
409 for (auto i = 0; i < par->size(); ++i) {
410 Inset const *ip = par->getInset(0);
411 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
412 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
413 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
417 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
419 // Generate the entry.
420 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
422 // End the precooked bibliography entry.
423 xs << xml::EndTag("bibliomixed");
427 // If this is the last paragraph in a bibliography, close the bibliography tag.
428 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
429 xs << xml::EndTag("bibliography");
438 OutputParams const & runparams,
440 ParagraphList::const_iterator const & par)
442 auto const begin = text.paragraphs().begin();
443 auto const end = text.paragraphs().end();
444 auto prevpar = text.paragraphs().getParagraphBefore(par);
446 // We want to open the paragraph tag if:
447 // (i) the current layout permits multiple paragraphs
448 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
449 // we are, but this is not the first paragraph
451 // But there is also a special case, and we first see whether we are in it.
452 // We do not want to open the paragraph tag if this paragraph contains
453 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
454 // as a branch). On the other hand, if that single item has a font change
455 // applied to it, then we still do need to open the paragraph.
457 // Obviously, this is very fragile. The main reason we need to do this is
458 // because of branches, e.g., a branch that contains an entire new section.
459 // We do not really want to wrap that whole thing in a <div>...</div>.
460 bool special_case = false;
461 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
462 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
463 Layout const &style = par->layout();
464 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
465 style.labelfont : style.font;
466 FontInfo const our_font =
467 par->getFont(buf.masterBuffer()->params(), 0,
468 text.outerFont(std::distance(begin, par))).fontInfo();
470 if (first_font == our_font)
474 // Plain layouts must be ignored.
475 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
477 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
478 if (!special_case && par->size() == 1 && par->getInset(0)) {
479 Inset const * firstInset = par->getInset(0);
481 // Floats cannot be in paragraphs.
482 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
484 // Bibliographies cannot be in paragraphs.
485 if (!special_case && firstInset->asInsetCommand())
486 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
488 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
489 if (!special_case && firstInset->asInsetMath())
492 // ERTs are in comments, not paragraphs.
493 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
496 // Listings should not get into their own paragraph.
497 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
501 bool const open_par = runparams.docbook_make_pars
502 && !runparams.docbook_in_par
505 // We want to issue the closing tag if either:
506 // (i) We opened it, and either docbook_in_par is false,
507 // or we're not in the last paragraph, anyway.
508 // (ii) We didn't open it and docbook_in_par is true,
509 // but we are in the first par, and there is a next par.
512 bool const close_par = open_par && (!runparams.docbook_in_par);
514 // Determine if this paragraph has some real content. Things like new pages are not caught
515 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
516 odocstringstream os2;
518 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
520 docstring cleaned = os2.str();
521 static const lyx::regex reg("[ \\r\\n]*");
522 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
524 if (!cleaned.empty()) {
526 openParTag(xs, &*par, prevpar);
528 xs << XMLStream::ESCAPE_NONE << os2.str();
531 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
540 OutputParams const &ourparams,
541 ParagraphList::const_iterator par);
544 void makeEnvironment(
547 OutputParams const &runparams,
549 ParagraphList::const_iterator const & par)
551 auto const end = text.paragraphs().end();
553 // Output the opening tag for this environment, but only if it has not been previously opened (condition
554 // implemented in openParTag).
555 auto prevpar = text.paragraphs().getParagraphBefore(par);
556 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
558 // Generate the contents of this environment. There is a special case if this is like some environment.
559 Layout const & style = par->layout();
560 if (style.latextype == LATEX_COMMAND) {
561 // Nothing to do (otherwise, infinite loops).
562 } else if (style.latextype == LATEX_ENVIRONMENT ||
563 style.latextype == LATEX_LIST_ENVIRONMENT ||
564 style.latextype == LATEX_ITEM_ENVIRONMENT) {
565 // Open a wrapper tag if needed.
566 if (style.docbookitemwrappertag() != "NONE") {
567 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
571 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
572 // character after the label.
574 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
575 // At least one condition must be met:
576 // - this environment is not a list
577 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
578 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
579 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
580 docstring const lbl = par->params().labelString();
585 openLabelTag(xs, style);
587 closeLabelTag(xs, style);
590 // Only variablelist gets here (or similar items defined as an extension in the layout).
591 openLabelTag(xs, style);
592 sep = par->firstWordDocBook(xs, runparams);
593 closeLabelTag(xs, style);
597 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
598 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
599 // Common case: there is only the first word on the line, but there is a nested list instead
601 bool emptyItem = false;
602 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
605 if (next_par == text.paragraphs().end()) // There is no next paragraph.
607 else // There is a next paragraph: check depth.
608 emptyItem = par->params().depth() >= next_par->params().depth();
612 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
613 // generation of a full <para>.
614 // TODO: this always worked only by magic...
617 // Generate the rest of the paragraph, if need be.
618 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
622 makeAny(text, buf, xs, runparams, par);
625 // Close the environment.
628 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
635 OutputParams const & runparams,
637 ParagraphList::const_iterator const & par)
639 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
640 auto const begin = text.paragraphs().begin();
641 auto const end = text.paragraphs().end();
645 // Generate this command.
646 auto prevpar = text.paragraphs().getParagraphBefore(par);
647 openParTag(xs, &*par, prevpar);
649 par->simpleDocBookOnePar(buf, xs, runparams,
650 text.outerFont(distance(begin, par)));
652 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
660 OutputParams const &ourparams,
661 ParagraphList::const_iterator par)
663 switch (par->layout().latextype) {
665 makeCommand(buf, xs, ourparams, text, par);
667 case LATEX_ENVIRONMENT:
668 case LATEX_LIST_ENVIRONMENT:
669 case LATEX_ITEM_ENVIRONMENT:
670 makeEnvironment(buf, xs, ourparams, text, par);
672 case LATEX_PARAGRAPH:
673 makeParagraph(buf, xs, ourparams, text, par);
675 case LATEX_BIB_ENVIRONMENT:
676 makeParagraphBibliography(buf, xs, ourparams, text, par);
681 } // end anonymous namespace
684 using DocBookDocumentSectioning = tuple<bool, pit_type>;
687 struct DocBookInfoTag
689 const set<pit_type> shouldBeInInfo;
690 const set<pit_type> mustBeInInfo;
691 const set<pit_type> abstract;
695 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
696 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
697 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
698 bpit(bpit), epit(epit) {}
702 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
703 bool documentHasSections = false;
705 while (bpit < epit) {
706 Layout const &style = paragraphs[bpit].layout();
707 documentHasSections |= style.category() == from_utf8("Sectioning");
709 if (documentHasSections)
713 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
715 return make_tuple(documentHasSections, bpit);
719 bool hasOnlyNotes(Paragraph const & par)
721 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
722 for (int i = 0; i < par.size(); ++i)
723 // If you find something that is not an inset (like actual text) or an inset that is not a note,
725 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
731 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
732 set<pit_type> shouldBeInInfo;
733 set<pit_type> mustBeInInfo;
734 set<pit_type> abstract;
736 // Find the first non empty paragraph by mutating bpit.
737 while (bpit < epit) {
738 Paragraph const &par = paragraphs[bpit];
739 if (par.empty() || hasOnlyNotes(par))
745 // Find the last info-like paragraph.
746 pit_type cpit = bpit;
747 bool hasAbstractLayout = false;
748 while (cpit < epit) {
749 // Skip paragraphs only containing one note.
750 Paragraph const & par = paragraphs[cpit];
751 if (hasOnlyNotes(par)) {
756 if (par.layout().docbookabstract())
757 hasAbstractLayout = true;
759 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
760 Layout const &style = par.layout();
762 if (style.docbookininfo() == "always") {
763 mustBeInInfo.emplace(cpit);
764 } else if (style.docbookininfo() == "maybe") {
765 shouldBeInInfo.emplace(cpit);
767 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
768 // There may be notes in between, but nothing else.
773 // Now, cpit points to the last paragraph that has things that could go in <info>.
774 // bpit is the beginning of the <info> part.
776 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
777 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
778 if (hasAbstractLayout) {
780 while (pit < cpit) { // Don't overshoot the <info> part.
781 if (paragraphs[pit].layout().docbookabstract())
782 abstract.emplace(pit);
786 pit_type lastAbstract = epit + 1; // A nonsensical value.
787 docstring lastAbstractLayout;
790 while (pit < cpit) { // Don't overshoot the <info> part.
791 const Paragraph & par = paragraphs.at(pit);
792 if (!par.insetList().empty()) {
793 for (const auto &i : par.insetList()) {
794 if (i.inset->getText(0) != nullptr) {
795 if (lastAbstract == epit + 1) {
796 // First paragraph that matches the heuristic definition of abstract.
798 lastAbstractLayout = par.layout().name();
799 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
800 // This is either too far from the last abstract paragraph or doesn't
801 // have the right layout name, BUT there has already been an abstract
802 // in this document: done with detecting the abstract.
803 goto done; // Easier to get out of two nested loops.
806 abstract.emplace(pit);
816 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
820 void outputDocBookInfo(
824 OutputParams const & runparams,
825 ParagraphList const & paragraphs,
826 DocBookInfoTag const & info)
828 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
829 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
830 // then only create the <abstract> tag if these paragraphs generate some content.
831 // This check must be performed *before* a decision on whether or not to output <info> is made.
832 bool hasAbstract = !info.abstract.empty();
835 // Generate the abstract XML into a string before further checks.
836 odocstringstream os2;
839 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
840 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
841 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
843 while (bpit < epit) {
844 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
849 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
850 // even though they must be properly output if there is some abstract.
851 abstract = os2.str();
852 static const lyx::regex reg("[ \\r\\n]*");
853 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
855 // Nothing? Then there is no abstract!
856 if (abstractContent.empty())
860 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
861 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
863 // Start the <info> tag if required.
865 xs.startDivision(false);
866 xs << xml::StartTag("info");
870 // Output the elements that should go in <info>, before and after the abstract.
871 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
872 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
873 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
875 for (auto pit : info.mustBeInInfo) {
876 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
877 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
880 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
881 // it contains several paragraphs that are empty).
883 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
884 // if (tag == "NONE")
887 // xs << xml::StartTag(tag);
889 xs << XMLStream::ESCAPE_NONE << abstract;
890 // xs << xml::EndTag(tag);
894 // End the <info> tag if it was started.
896 xs << xml::EndTag("info");
903 void docbookFirstParagraphs(
907 OutputParams const &runparams,
910 // Handle the beginning of the document, supposing it has sections.
911 // Major role: output the first <info> tag.
913 ParagraphList const ¶graphs = text.paragraphs();
914 pit_type bpit = runparams.par_begin;
915 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
916 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
920 void docbookSimpleAllParagraphs(
924 OutputParams const & runparams)
926 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
927 // between a single paragraph to a whole document.
929 // First, the <info> tag.
930 ParagraphList const ¶graphs = text.paragraphs();
931 pit_type bpit = runparams.par_begin;
932 pit_type const epit = runparams.par_end;
933 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
934 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
936 // Then, the content. It starts where the <info> ends.
938 while (bpit < epit) {
939 auto par = paragraphs.iterator_at(bpit);
940 if (!hasOnlyNotes(*par))
941 makeAny(text, buf, xs, runparams, par);
947 void docbookParagraphs(Text const &text,
950 OutputParams const &runparams) {
951 ParagraphList const ¶graphs = text.paragraphs();
952 if (runparams.par_begin == runparams.par_end) {
953 runparams.par_begin = 0;
954 runparams.par_end = paragraphs.size();
956 pit_type bpit = runparams.par_begin;
957 pit_type const epit = runparams.par_end;
960 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
964 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
965 // of the section and the tag that was used to open it.
967 // Detect whether the document contains sections. If there are no sections, there can be no automatically
968 // discovered abstract.
969 bool documentHasSections;
971 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
973 if (documentHasSections) {
974 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
977 docbookSimpleAllParagraphs(text, buf, xs, runparams);
981 bool currentlyInAppendix = false;
983 while (bpit < epit) {
984 OutputParams ourparams = runparams;
986 auto par = paragraphs.iterator_at(bpit);
987 if (par->params().startOfAppendix())
988 currentlyInAppendix = true;
989 Layout const &style = par->layout();
990 ParagraphList::const_iterator const lastStartedPar = par;
991 ParagraphList::const_iterator send;
993 if (hasOnlyNotes(*par)) {
998 // Think about adding <section> and/or </section>s.
999 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1000 if (isLayoutSectioning) {
1001 int level = style.toclevel;
1003 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1004 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1005 // - current: h2; back: h1; do not close any <section>
1006 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1007 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1008 int stackLevel = headerLevels.top().first;
1009 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1012 // Output the tag only if it corresponds to a legit section.
1013 if (stackLevel != Layout::NOT_IN_TOC)
1014 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1017 // Open the new section: first push it onto the stack, then output it in DocBook.
1018 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1019 "appendix" : style.docbooksectiontag();
1020 headerLevels.push(std::make_pair(level, sectionTag));
1022 // Some sectioning-like elements should not be output (such as FrontMatter).
1023 if (level != Layout::NOT_IN_TOC) {
1024 // Look for a label in the title, i.e. a InsetLabel as a child.
1025 docstring id = docstring();
1026 for (pos_type i = 0; i < par->size(); ++i) {
1027 Inset const *inset = par->getInset(i);
1029 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1030 // Generate the attributes for the section if need be.
1031 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1033 // Don't output the ID as a DocBook <anchor>.
1034 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1036 // Cannot have multiple IDs per tag.
1042 // Write the open tag for this section.
1043 docstring tag = from_utf8("<" + sectionTag);
1045 tag += from_utf8(" ") + id;
1046 tag += from_utf8(">");
1047 xs << XMLStream::ESCAPE_NONE << tag;
1052 // Close all sections before the bibliography.
1053 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1054 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1055 if (insetsLength > 0) {
1056 Inset const *firstInset = par->getInset(0);
1057 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1058 while (!headerLevels.empty()) {
1059 int level = headerLevels.top().first;
1060 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1063 // Output the tag only if it corresponds to a legit section.
1064 if (level != Layout::NOT_IN_TOC) {
1065 xs << XMLStream::ESCAPE_NONE << tag;
1072 // Generate this paragraph.
1073 makeAny(text, buf, xs, ourparams, par);
1077 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1079 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1080 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1082 xs << XMLStream::ESCAPE_NONE << tag;