2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
19 #include "Paragraph.h"
20 #include "ParagraphList.h"
21 #include "ParagraphParameters.h"
24 #include "TextClass.h"
26 #include "insets/InsetBibtex.h"
27 #include "insets/InsetBibitem.h"
28 #include "insets/InsetLabel.h"
29 #include "insets/InsetNote.h"
31 #include "support/lassert.h"
33 #include "support/regex.h"
41 using namespace lyx::support;
47 std::string fontToDocBookTag(xml::FontTypes type)
50 case xml::FontTypes::FT_EMPH:
51 case xml::FontTypes::FT_BOLD:
53 case xml::FontTypes::FT_NOUN:
55 case xml::FontTypes::FT_UBAR:
56 case xml::FontTypes::FT_WAVE:
57 case xml::FontTypes::FT_DBAR:
58 case xml::FontTypes::FT_SOUT:
59 case xml::FontTypes::FT_XOUT:
60 case xml::FontTypes::FT_ITALIC:
61 case xml::FontTypes::FT_UPRIGHT:
62 case xml::FontTypes::FT_SLANTED:
63 case xml::FontTypes::FT_SMALLCAPS:
64 case xml::FontTypes::FT_ROMAN:
65 case xml::FontTypes::FT_SANS:
67 case xml::FontTypes::FT_TYPE:
69 case xml::FontTypes::FT_SIZE_TINY:
70 case xml::FontTypes::FT_SIZE_SCRIPT:
71 case xml::FontTypes::FT_SIZE_FOOTNOTE:
72 case xml::FontTypes::FT_SIZE_SMALL:
73 case xml::FontTypes::FT_SIZE_NORMAL:
74 case xml::FontTypes::FT_SIZE_LARGE:
75 case xml::FontTypes::FT_SIZE_LARGER:
76 case xml::FontTypes::FT_SIZE_LARGEST:
77 case xml::FontTypes::FT_SIZE_HUGE:
78 case xml::FontTypes::FT_SIZE_HUGER:
79 case xml::FontTypes::FT_SIZE_INCREASE:
80 case xml::FontTypes::FT_SIZE_DECREASE:
88 string fontToRole(xml::FontTypes type)
90 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
91 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
92 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
93 // Hence, it is not a problem to have many roles by default here.
94 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
96 case xml::FontTypes::FT_ITALIC:
97 case xml::FontTypes::FT_EMPH:
99 case xml::FontTypes::FT_BOLD:
101 case xml::FontTypes::FT_NOUN: // Outputs a <person>
102 case xml::FontTypes::FT_TYPE: // Outputs a <code>
104 case xml::FontTypes::FT_UBAR:
107 // All other roles are non-standard for DocBook.
109 case xml::FontTypes::FT_WAVE:
111 case xml::FontTypes::FT_DBAR:
113 case xml::FontTypes::FT_SOUT:
115 case xml::FontTypes::FT_XOUT:
117 case xml::FontTypes::FT_UPRIGHT:
119 case xml::FontTypes::FT_SLANTED:
121 case xml::FontTypes::FT_SMALLCAPS:
123 case xml::FontTypes::FT_ROMAN:
125 case xml::FontTypes::FT_SANS:
127 case xml::FontTypes::FT_SIZE_TINY:
129 case xml::FontTypes::FT_SIZE_SCRIPT:
130 return "size_script";
131 case xml::FontTypes::FT_SIZE_FOOTNOTE:
132 return "size_footnote";
133 case xml::FontTypes::FT_SIZE_SMALL:
135 case xml::FontTypes::FT_SIZE_NORMAL:
136 return "size_normal";
137 case xml::FontTypes::FT_SIZE_LARGE:
139 case xml::FontTypes::FT_SIZE_LARGER:
140 return "size_larger";
141 case xml::FontTypes::FT_SIZE_LARGEST:
142 return "size_largest";
143 case xml::FontTypes::FT_SIZE_HUGE:
145 case xml::FontTypes::FT_SIZE_HUGER:
147 case xml::FontTypes::FT_SIZE_INCREASE:
148 return "size_increase";
149 case xml::FontTypes::FT_SIZE_DECREASE:
150 return "size_decrease";
157 string fontToAttribute(xml::FontTypes type) {
158 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
160 string role = fontToRole(type);
162 return "role='" + role + "'";
169 xml::FontTag docbookStartFontTag(xml::FontTypes type)
171 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
175 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
177 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
181 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
185 // Contents of the block.
190 // <paratag>Contents of the paragraph.</paratag>
193 // Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
195 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
197 xs << xml::StartTag(tag, attr);
201 void closeInlineTag(XMLStream & xs, const std::string & tag)
203 xs << xml::EndTag(tag);
207 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
209 if (!xs.isLastTagCR())
211 xs << xml::StartTag(tag, attr);
215 void closeParTag(XMLStream & xs, const std::string & tag)
217 xs << xml::EndTag(tag);
222 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
224 if (!xs.isLastTagCR())
226 xs << xml::StartTag(tag, attr);
231 void closeBlockTag(XMLStream & xs, const std::string & tag)
233 if (!xs.isLastTagCR())
235 xs << xml::EndTag(tag);
240 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
242 if (tag.empty() || tag == "NONE")
245 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
246 openParTag(xs, tag, attr);
247 else if (tagtype == "block")
248 openBlockTag(xs, tag, attr);
249 else if (tagtype == "inline")
250 openInlineTag(xs, tag, attr);
252 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
256 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
258 if (tag.empty() || tag == "NONE")
261 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
262 closeParTag(xs, tag);
263 else if (tagtype == "block")
264 closeBlockTag(xs, tag);
265 else if (tagtype == "inline")
266 closeInlineTag(xs, tag);
268 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
272 // Higher-level convenience functions.
274 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
276 Layout const & lay = par->layout();
281 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
282 // (usually, they won't have the same layout) and the CURRENT one allows merging.
283 // The main use case is author information in several paragraphs: if the name of the author is the
284 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
285 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
286 // layout, same wrapper tag).
287 bool openWrapper = lay.docbookwrappertag() != "NONE";
288 if (prevpar != nullptr) {
289 Layout const & prevlay = prevpar->layout();
290 if (prevlay.docbookwrappertag() != "NONE") {
291 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
292 && !lay.docbookwrappermergewithprevious();
298 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
300 const string & tag = lay.docbooktag();
302 auto xmltag = xml::ParTag(tag, lay.docbookattr());
303 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
304 // TODO: required or not?
305 // TODO: avoid creating a ParTag object just for this query...
306 openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
309 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
310 openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
314 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
319 // See comment in openParTag.
320 Layout const & lay = par->layout();
321 bool closeWrapper = lay.docbookwrappertag() != "NONE";
322 if (nextpar != nullptr) {
323 Layout const & nextlay = nextpar->layout();
324 if (nextlay.docbookwrappertag() != "NONE") {
325 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
326 && !nextlay.docbookwrappermergewithprevious();
331 closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
332 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
333 closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
335 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
339 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
341 openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
345 void closeLabelTag(XMLStream & xs, Layout const & lay)
347 closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
351 void openItemTag(XMLStream & xs, Layout const & lay)
353 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
357 void closeItemTag(XMLStream & xs, Layout const & lay)
359 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
363 void makeParagraphBibliography(
366 OutputParams const & runparams,
368 ParagraphList::const_iterator const & pbegin)
370 auto const begin = text.paragraphs().begin();
371 auto const end = text.paragraphs().end();
375 // Find the paragraph *before* pbegin.
376 ParagraphList::const_iterator pbegin_before = begin;
377 if (pbegin != begin) {
378 ParagraphList::const_iterator pbegin_before_next = begin;
379 ++pbegin_before_next;
381 while (pbegin_before_next != pbegin) {
383 ++pbegin_before_next;
387 ParagraphList::const_iterator par = pbegin;
389 // If this is the first paragraph in a bibliography, open the bibliography tag.
390 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
391 xs << xml::StartTag("bibliography");
395 // Generate the required paragraphs, but only if they are .
396 for (; par != pend; ++par) {
397 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
398 // Don't forget the citation ID!
400 for (auto i = 0; i < par->size(); ++i) {
401 Inset const *ip = par->getInset(0);
402 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
403 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
404 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
408 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
410 // Generate the entry.
411 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
413 // End the precooked bibliography entry.
414 xs << xml::EndTag("bibliomixed");
418 // If this is the last paragraph in a bibliography, close the bibliography tag.
419 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
420 xs << xml::EndTag("bibliography");
429 OutputParams const & runparams,
431 ParagraphList::const_iterator const & par)
433 auto const begin = text.paragraphs().begin();
434 auto const end = text.paragraphs().end();
435 auto prevpar = text.paragraphs().getParagraphBefore(par);
437 // We want to open the paragraph tag if:
438 // (i) the current layout permits multiple paragraphs
439 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
440 // we are, but this is not the first paragraph
442 // But there is also a special case, and we first see whether we are in it.
443 // We do not want to open the paragraph tag if this paragraph contains
444 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
445 // as a branch). On the other hand, if that single item has a font change
446 // applied to it, then we still do need to open the paragraph.
448 // Obviously, this is very fragile. The main reason we need to do this is
449 // because of branches, e.g., a branch that contains an entire new section.
450 // We do not really want to wrap that whole thing in a <div>...</div>.
451 bool special_case = false;
452 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
453 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
454 Layout const &style = par->layout();
455 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
456 style.labelfont : style.font;
457 FontInfo const our_font =
458 par->getFont(buf.masterBuffer()->params(), 0,
459 text.outerFont(std::distance(begin, par))).fontInfo();
461 if (first_font == our_font)
465 // Plain layouts must be ignored.
466 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
468 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
469 if (!special_case && par->size() == 1 && par->getInset(0)) {
470 Inset const * firstInset = par->getInset(0);
472 // Floats cannot be in paragraphs.
473 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
475 // Bibliographies cannot be in paragraphs.
476 if (!special_case && firstInset->asInsetCommand())
477 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
479 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
480 if (!special_case && firstInset->asInsetMath())
483 // ERTs are in comments, not paragraphs.
484 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
487 // Listings should not get into their own paragraph.
488 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
492 bool const open_par = runparams.docbook_make_pars
493 && !runparams.docbook_in_par
496 // We want to issue the closing tag if either:
497 // (i) We opened it, and either docbook_in_par is false,
498 // or we're not in the last paragraph, anyway.
499 // (ii) We didn't open it and docbook_in_par is true,
500 // but we are in the first par, and there is a next par.
503 bool const close_par = open_par && (!runparams.docbook_in_par);
505 // Determine if this paragraph has some real content. Things like new pages are not caught
506 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
507 odocstringstream os2;
509 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
511 docstring cleaned = os2.str();
512 static const lyx::regex reg("[ \\r\\n]*");
513 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
515 if (!cleaned.empty()) {
517 openParTag(xs, &*par, prevpar);
519 xs << XMLStream::ESCAPE_NONE << os2.str();
522 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
531 OutputParams const &ourparams,
532 ParagraphList::const_iterator par);
535 void makeEnvironment(
538 OutputParams const &runparams,
540 ParagraphList::const_iterator const & par)
542 auto const end = text.paragraphs().end();
544 // Output the opening tag for this environment, but only if it has not been previously opened (condition
545 // implemented in openParTag).
546 auto prevpar = text.paragraphs().getParagraphBefore(par);
547 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
549 // Generate the contents of this environment. There is a special case if this is like some environment.
550 Layout const & style = par->layout();
551 if (style.latextype == LATEX_COMMAND) {
552 // Nothing to do (otherwise, infinite loops).
553 } else if (style.latextype == LATEX_ENVIRONMENT ||
554 style.latextype == LATEX_LIST_ENVIRONMENT ||
555 style.latextype == LATEX_ITEM_ENVIRONMENT) {
556 // Open a wrapper tag if needed.
557 if (style.docbookitemwrappertag() != "NONE") {
558 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
562 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
563 // character after the label.
565 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
566 // At least one condition must be met:
567 // - this environment is not a list
568 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
569 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
570 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
571 docstring const lbl = par->params().labelString();
576 openLabelTag(xs, style);
578 closeLabelTag(xs, style);
581 // Only variablelist gets here (or similar items defined as an extension in the layout).
582 openLabelTag(xs, style);
583 sep = par->firstWordDocBook(xs, runparams);
584 closeLabelTag(xs, style);
588 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
589 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
590 // Common case: there is only the first word on the line, but there is a nested list instead
592 bool emptyItem = false;
593 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
596 if (next_par == text.paragraphs().end()) // There is no next paragraph.
598 else // There is a next paragraph: check depth.
599 emptyItem = par->params().depth() >= next_par->params().depth();
603 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
604 // generation of a full <para>.
605 // TODO: this always worked only by magic...
608 // Generate the rest of the paragraph, if need be.
609 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
613 makeAny(text, buf, xs, runparams, par);
616 // Close the environment.
619 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
626 OutputParams const & runparams,
628 ParagraphList::const_iterator const & par)
630 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
631 auto const begin = text.paragraphs().begin();
632 auto const end = text.paragraphs().end();
636 // Generate this command.
637 auto prevpar = text.paragraphs().getParagraphBefore(par);
638 openParTag(xs, &*par, prevpar);
640 par->simpleDocBookOnePar(buf, xs, runparams,
641 text.outerFont(distance(begin, par)));
643 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
651 OutputParams const &ourparams,
652 ParagraphList::const_iterator par)
654 switch (par->layout().latextype) {
656 makeCommand(buf, xs, ourparams, text, par);
658 case LATEX_ENVIRONMENT:
659 case LATEX_LIST_ENVIRONMENT:
660 case LATEX_ITEM_ENVIRONMENT:
661 makeEnvironment(buf, xs, ourparams, text, par);
663 case LATEX_PARAGRAPH:
664 makeParagraph(buf, xs, ourparams, text, par);
666 case LATEX_BIB_ENVIRONMENT:
667 makeParagraphBibliography(buf, xs, ourparams, text, par);
672 } // end anonymous namespace
675 using DocBookDocumentSectioning = tuple<bool, pit_type>;
678 struct DocBookInfoTag
680 const set<pit_type> shouldBeInInfo;
681 const set<pit_type> mustBeInInfo;
682 const set<pit_type> abstract;
686 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
687 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
688 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
689 bpit(bpit), epit(epit) {}
693 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
694 bool documentHasSections = false;
696 while (bpit < epit) {
697 Layout const &style = paragraphs[bpit].layout();
698 documentHasSections |= style.category() == from_utf8("Sectioning");
700 if (documentHasSections)
704 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
706 return make_tuple(documentHasSections, bpit);
710 bool hasOnlyNotes(Paragraph const & par)
712 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
713 for (int i = 0; i < par.size(); ++i)
714 // If you find something that is not an inset (like actual text) or an inset that is not a note,
716 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
722 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
723 set<pit_type> shouldBeInInfo;
724 set<pit_type> mustBeInInfo;
725 set<pit_type> abstract;
727 // Find the first non empty paragraph by mutating bpit.
728 while (bpit < epit) {
729 Paragraph const &par = paragraphs[bpit];
730 if (par.empty() || hasOnlyNotes(par))
736 // Find the last info-like paragraph.
737 pit_type cpit = bpit;
738 bool hasAbstractLayout = false;
739 while (cpit < epit) {
740 // Skip paragraphs only containing one note.
741 Paragraph const & par = paragraphs[cpit];
742 if (hasOnlyNotes(par)) {
747 if (par.layout().docbookabstract())
748 hasAbstractLayout = true;
750 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
751 Layout const &style = par.layout();
753 if (style.docbookininfo() == "always") {
754 mustBeInInfo.emplace(cpit);
755 } else if (style.docbookininfo() == "maybe") {
756 shouldBeInInfo.emplace(cpit);
758 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
759 // There may be notes in between, but nothing else.
764 // Now, cpit points to the last paragraph that has things that could go in <info>.
765 // bpit is the beginning of the <info> part.
767 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
768 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
769 if (hasAbstractLayout) {
771 while (pit < cpit) { // Don't overshoot the <info> part.
772 if (paragraphs[pit].layout().docbookabstract())
773 abstract.emplace(pit);
777 pit_type lastAbstract = epit + 1; // A nonsensical value.
778 docstring lastAbstractLayout;
781 while (pit < cpit) { // Don't overshoot the <info> part.
782 const Paragraph & par = paragraphs.at(pit);
783 if (!par.insetList().empty()) {
784 for (const auto &i : par.insetList()) {
785 if (i.inset->getText(0) != nullptr) {
786 if (lastAbstract == epit + 1) {
787 // First paragraph that matches the heuristic definition of abstract.
789 lastAbstractLayout = par.layout().name();
790 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
791 // This is either too far from the last abstract paragraph or doesn't
792 // have the right layout name, BUT there has already been an abstract
793 // in this document: done with detecting the abstract.
794 goto done; // Easier to get out of two nested loops.
797 abstract.emplace(pit);
807 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
811 void outputDocBookInfo(
815 OutputParams const & runparams,
816 ParagraphList const & paragraphs,
817 DocBookInfoTag const & info)
819 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
820 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
821 // then only create the <abstract> tag if these paragraphs generate some content.
822 // This check must be performed *before* a decision on whether or not to output <info> is made.
823 bool hasAbstract = !info.abstract.empty();
826 // Generate the abstract XML into a string before further checks.
827 odocstringstream os2;
830 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
831 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
832 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
834 while (bpit < epit) {
835 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
840 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
841 // even though they must be properly output if there is some abstract.
842 abstract = os2.str();
843 static const lyx::regex reg("[ \\r\\n]*");
844 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
846 // Nothing? Then there is no abstract!
847 if (abstractContent.empty())
851 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
852 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
854 // Start the <info> tag if required.
856 xs.startDivision(false);
857 xs << xml::StartTag("info");
861 // Output the elements that should go in <info>, before and after the abstract.
862 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
863 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
864 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
866 for (auto pit : info.mustBeInInfo) {
867 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
868 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
871 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
872 // it contains several paragraphs that are empty).
874 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
875 // if (tag == "NONE")
878 // xs << xml::StartTag(tag);
880 xs << XMLStream::ESCAPE_NONE << abstract;
881 // xs << xml::EndTag(tag);
885 // End the <info> tag if it was started.
887 xs << xml::EndTag("info");
894 void docbookFirstParagraphs(
898 OutputParams const &runparams,
901 // Handle the beginning of the document, supposing it has sections.
902 // Major role: output the first <info> tag.
904 ParagraphList const ¶graphs = text.paragraphs();
905 pit_type bpit = runparams.par_begin;
906 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
907 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
911 void docbookSimpleAllParagraphs(
915 OutputParams const & runparams)
917 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
918 // between a single paragraph to a whole document.
920 // First, the <info> tag.
921 ParagraphList const ¶graphs = text.paragraphs();
922 pit_type bpit = runparams.par_begin;
923 pit_type const epit = runparams.par_end;
924 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
925 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
927 // Then, the content. It starts where the <info> ends.
929 while (bpit < epit) {
930 auto par = paragraphs.iterator_at(bpit);
931 if (!hasOnlyNotes(*par))
932 makeAny(text, buf, xs, runparams, par);
938 void docbookParagraphs(Text const &text,
941 OutputParams const &runparams) {
942 ParagraphList const ¶graphs = text.paragraphs();
943 if (runparams.par_begin == runparams.par_end) {
944 runparams.par_begin = 0;
945 runparams.par_end = paragraphs.size();
947 pit_type bpit = runparams.par_begin;
948 pit_type const epit = runparams.par_end;
951 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
955 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
956 // of the section and the tag that was used to open it.
958 // Detect whether the document contains sections. If there are no sections, there can be no automatically
959 // discovered abstract.
960 bool documentHasSections;
962 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
964 if (documentHasSections) {
965 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
968 docbookSimpleAllParagraphs(text, buf, xs, runparams);
972 bool currentlyInAppendix = false;
974 while (bpit < epit) {
975 OutputParams ourparams = runparams;
977 auto par = paragraphs.iterator_at(bpit);
978 if (par->params().startOfAppendix())
979 currentlyInAppendix = true;
980 Layout const &style = par->layout();
981 ParagraphList::const_iterator const lastStartedPar = par;
982 ParagraphList::const_iterator send;
984 if (hasOnlyNotes(*par)) {
989 // Think about adding <section> and/or </section>s.
990 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
991 if (isLayoutSectioning) {
992 int level = style.toclevel;
994 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
995 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
996 // - current: h2; back: h1; do not close any <section>
997 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
998 while (!headerLevels.empty() && level <= headerLevels.top().first) {
999 int stackLevel = headerLevels.top().first;
1000 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1003 // Output the tag only if it corresponds to a legit section.
1004 if (stackLevel != Layout::NOT_IN_TOC)
1005 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1008 // Open the new section: first push it onto the stack, then output it in DocBook.
1009 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1010 "appendix" : style.docbooksectiontag();
1011 headerLevels.push(std::make_pair(level, sectionTag));
1013 // Some sectioning-like elements should not be output (such as FrontMatter).
1014 if (level != Layout::NOT_IN_TOC) {
1015 // Look for a label in the title, i.e. a InsetLabel as a child.
1016 docstring id = docstring();
1017 for (pos_type i = 0; i < par->size(); ++i) {
1018 Inset const *inset = par->getInset(i);
1020 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1021 // Generate the attributes for the section if need be.
1022 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1024 // Don't output the ID as a DocBook <anchor>.
1025 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1027 // Cannot have multiple IDs per tag.
1033 // Write the open tag for this section.
1034 docstring tag = from_utf8("<" + sectionTag);
1036 tag += from_utf8(" ") + id;
1037 tag += from_utf8(">");
1038 xs << XMLStream::ESCAPE_NONE << tag;
1043 // Close all sections before the bibliography.
1044 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1045 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1046 if (insetsLength > 0) {
1047 Inset const *firstInset = par->getInset(0);
1048 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1049 while (!headerLevels.empty()) {
1050 int level = headerLevels.top().first;
1051 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1054 // Output the tag only if it corresponds to a legit section.
1055 if (level != Layout::NOT_IN_TOC) {
1056 xs << XMLStream::ESCAPE_NONE << tag;
1063 // Generate this paragraph.
1064 makeAny(text, buf, xs, ourparams, par);
1068 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1070 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1071 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1073 xs << XMLStream::ESCAPE_NONE << tag;