2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
94 string fontToRole(xml::FontTypes type)
96 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99 // Hence, it is not a problem to have many roles by default here.
100 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
102 case xml::FontTypes::FT_ITALIC:
103 case xml::FontTypes::FT_EMPH:
105 case xml::FontTypes::FT_BOLD:
107 case xml::FontTypes::FT_NOUN: // Outputs a <person>
108 case xml::FontTypes::FT_TYPE: // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
173 } // end anonymous namespace
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
178 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
184 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
190 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
194 // Contents of the block.
199 // <paratag>Contents of the paragraph.</paratag>
202 // Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
204 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
206 xs << xml::StartTag(tag, attr);
210 void closeInlineTag(XMLStream & xs, const std::string & tag)
212 xs << xml::EndTag(tag);
216 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
218 if (!xs.isLastTagCR())
220 xs << xml::StartTag(tag, attr);
224 void closeParTag(XMLStream & xs, const std::string & tag)
226 xs << xml::EndTag(tag);
231 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
233 if (!xs.isLastTagCR())
235 xs << xml::StartTag(tag, attr);
240 void closeBlockTag(XMLStream & xs, const std::string & tag)
243 xs << xml::EndTag(tag);
248 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
250 if (tag.empty() || tag == "NONE")
253 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
254 openParTag(xs, tag, attr);
255 else if (tagtype == "block")
256 openBlockTag(xs, tag, attr);
257 else if (tagtype == "inline")
258 openInlineTag(xs, tag, attr);
260 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
264 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
266 if (tag.empty() || tag == "NONE")
269 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
270 closeParTag(xs, tag);
271 else if (tagtype == "block")
272 closeBlockTag(xs, tag);
273 else if (tagtype == "inline")
274 closeInlineTag(xs, tag);
276 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
280 // Higher-level convenience functions.
282 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
284 Layout const & lay = par->layout();
289 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
290 // (usually, they won't have the same layout) and the CURRENT one allows merging.
291 // The main use case is author information in several paragraphs: if the name of the author is the
292 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
293 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
294 // layout, same wrapper tag).
295 bool openWrapper = lay.docbookwrappertag() != "NONE";
296 if (prevpar != nullptr) {
297 Layout const & prevlay = prevpar->layout();
298 if (prevlay.docbookwrappertag() != "NONE") {
299 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
300 && !lay.docbookwrappermergewithprevious();
306 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
308 const string & tag = lay.docbooktag();
310 auto xmltag = xml::ParTag(tag, lay.docbookattr());
311 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
312 // TODO: required or not?
313 // TODO: avoid creating a ParTag object just for this query...
314 openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
317 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
318 openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
322 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
327 // See comment in openParTag.
328 Layout const & lay = par->layout();
329 bool closeWrapper = lay.docbookwrappertag() != "NONE";
330 if (nextpar != nullptr) {
331 Layout const & nextlay = nextpar->layout();
332 if (nextlay.docbookwrappertag() != "NONE") {
333 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
334 && !nextlay.docbookwrappermergewithprevious();
339 closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
340 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
341 closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
343 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
347 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
349 openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
353 void closeLabelTag(XMLStream & xs, Layout const & lay)
355 closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
359 void openItemTag(XMLStream & xs, Layout const & lay)
361 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
365 void closeItemTag(XMLStream & xs, Layout const & lay)
367 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
371 void makeParagraphBibliography(
374 OutputParams const & runparams,
376 ParagraphList::const_iterator const & pbegin)
378 auto const begin = text.paragraphs().begin();
379 auto const end = text.paragraphs().end();
383 // Find the paragraph *before* pbegin.
384 ParagraphList::const_iterator pbegin_before = begin;
385 if (pbegin != begin) {
386 ParagraphList::const_iterator pbegin_before_next = begin;
387 ++pbegin_before_next;
389 while (pbegin_before_next != pbegin) {
391 ++pbegin_before_next;
395 ParagraphList::const_iterator par = pbegin;
397 // If this is the first paragraph in a bibliography, open the bibliography tag.
398 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
399 xs << xml::StartTag("bibliography");
403 // Generate the required paragraphs, but only if they are .
404 for (; par != pend; ++par) {
405 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
406 // Don't forget the citation ID!
408 for (auto i = 0; i < par->size(); ++i) {
409 Inset const *ip = par->getInset(0);
410 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
411 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
412 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
416 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
418 // Generate the entry.
419 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
421 // End the precooked bibliography entry.
422 xs << xml::EndTag("bibliomixed");
426 // If this is the last paragraph in a bibliography, close the bibliography tag.
427 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
428 xs << xml::EndTag("bibliography");
437 OutputParams const & runparams,
439 ParagraphList::const_iterator const & par)
441 auto const begin = text.paragraphs().begin();
442 auto const end = text.paragraphs().end();
443 auto prevpar = text.paragraphs().getParagraphBefore(par);
445 // We want to open the paragraph tag if:
446 // (i) the current layout permits multiple paragraphs
447 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
448 // we are, but this is not the first paragraph
450 // But there is also a special case, and we first see whether we are in it.
451 // We do not want to open the paragraph tag if this paragraph contains
452 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
453 // as a branch). On the other hand, if that single item has a font change
454 // applied to it, then we still do need to open the paragraph.
456 // Obviously, this is very fragile. The main reason we need to do this is
457 // because of branches, e.g., a branch that contains an entire new section.
458 // We do not really want to wrap that whole thing in a <div>...</div>.
459 bool special_case = false;
460 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
461 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
462 Layout const &style = par->layout();
463 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
464 style.labelfont : style.font;
465 FontInfo const our_font =
466 par->getFont(buf.masterBuffer()->params(), 0,
467 text.outerFont(std::distance(begin, par))).fontInfo();
469 if (first_font == our_font)
473 // Plain layouts must be ignored.
474 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
476 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
477 if (!special_case && par->size() == 1 && par->getInset(0)) {
478 Inset const * firstInset = par->getInset(0);
480 // Floats cannot be in paragraphs.
481 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
483 // Bibliographies cannot be in paragraphs.
484 if (!special_case && firstInset->asInsetCommand())
485 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
487 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
488 if (!special_case && firstInset->asInsetMath())
491 // ERTs are in comments, not paragraphs.
492 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
495 // Listings should not get into their own paragraph.
496 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
500 bool const open_par = runparams.docbook_make_pars
501 && !runparams.docbook_in_par
504 // We want to issue the closing tag if either:
505 // (i) We opened it, and either docbook_in_par is false,
506 // or we're not in the last paragraph, anyway.
507 // (ii) We didn't open it and docbook_in_par is true,
508 // but we are in the first par, and there is a next par.
511 bool const close_par = open_par && (!runparams.docbook_in_par);
513 // Determine if this paragraph has some real content. Things like new pages are not caught
514 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
515 odocstringstream os2;
517 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
519 docstring cleaned = os2.str();
520 static const lyx::regex reg("[ \\r\\n]*");
521 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
523 if (!cleaned.empty()) {
525 openParTag(xs, &*par, prevpar);
527 xs << XMLStream::ESCAPE_NONE << os2.str();
530 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
539 OutputParams const &ourparams,
540 ParagraphList::const_iterator par);
543 void makeEnvironment(
546 OutputParams const &runparams,
548 ParagraphList::const_iterator const & par)
550 auto const end = text.paragraphs().end();
552 // Output the opening tag for this environment, but only if it has not been previously opened (condition
553 // implemented in openParTag).
554 auto prevpar = text.paragraphs().getParagraphBefore(par);
555 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
557 // Generate the contents of this environment. There is a special case if this is like some environment.
558 Layout const & style = par->layout();
559 if (style.latextype == LATEX_COMMAND) {
560 // Nothing to do (otherwise, infinite loops).
561 } else if (style.latextype == LATEX_ENVIRONMENT ||
562 style.latextype == LATEX_LIST_ENVIRONMENT ||
563 style.latextype == LATEX_ITEM_ENVIRONMENT) {
564 // Open a wrapper tag if needed.
565 if (style.docbookitemwrappertag() != "NONE") {
566 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
570 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
571 // character after the label.
573 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
574 // At least one condition must be met:
575 // - this environment is not a list
576 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
577 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
578 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
579 docstring const lbl = par->params().labelString();
584 openLabelTag(xs, style);
586 closeLabelTag(xs, style);
589 // Only variablelist gets here (or similar items defined as an extension in the layout).
590 openLabelTag(xs, style);
591 sep = par->firstWordDocBook(xs, runparams);
592 closeLabelTag(xs, style);
596 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
597 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
598 // Common case: there is only the first word on the line, but there is a nested list instead
600 bool emptyItem = false;
601 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
604 if (next_par == text.paragraphs().end()) // There is no next paragraph.
606 else // There is a next paragraph: check depth.
607 emptyItem = par->params().depth() >= next_par->params().depth();
611 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
612 // generation of a full <para>.
613 // TODO: this always worked only by magic...
616 // Generate the rest of the paragraph, if need be.
617 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
621 makeAny(text, buf, xs, runparams, par);
624 // Close the environment.
627 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
634 OutputParams const & runparams,
636 ParagraphList::const_iterator const & par)
638 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
639 auto const begin = text.paragraphs().begin();
640 auto const end = text.paragraphs().end();
644 // Generate this command.
645 auto prevpar = text.paragraphs().getParagraphBefore(par);
646 openParTag(xs, &*par, prevpar);
648 par->simpleDocBookOnePar(buf, xs, runparams,
649 text.outerFont(distance(begin, par)));
651 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
659 OutputParams const &ourparams,
660 ParagraphList::const_iterator par)
662 switch (par->layout().latextype) {
664 makeCommand(buf, xs, ourparams, text, par);
666 case LATEX_ENVIRONMENT:
667 case LATEX_LIST_ENVIRONMENT:
668 case LATEX_ITEM_ENVIRONMENT:
669 makeEnvironment(buf, xs, ourparams, text, par);
671 case LATEX_PARAGRAPH:
672 makeParagraph(buf, xs, ourparams, text, par);
674 case LATEX_BIB_ENVIRONMENT:
675 makeParagraphBibliography(buf, xs, ourparams, text, par);
680 } // end anonymous namespace
683 using DocBookDocumentSectioning = tuple<bool, pit_type>;
686 struct DocBookInfoTag
688 const set<pit_type> shouldBeInInfo;
689 const set<pit_type> mustBeInInfo;
690 const set<pit_type> abstract;
694 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
695 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
696 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
697 bpit(bpit), epit(epit) {}
701 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
702 bool documentHasSections = false;
704 while (bpit < epit) {
705 Layout const &style = paragraphs[bpit].layout();
706 documentHasSections |= style.category() == from_utf8("Sectioning");
708 if (documentHasSections)
712 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
714 return make_tuple(documentHasSections, bpit);
718 bool hasOnlyNotes(Paragraph const & par)
720 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
721 for (int i = 0; i < par.size(); ++i)
722 // If you find something that is not an inset (like actual text) or an inset that is not a note,
724 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
730 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
731 set<pit_type> shouldBeInInfo;
732 set<pit_type> mustBeInInfo;
733 set<pit_type> abstract;
735 // Find the first non empty paragraph by mutating bpit.
736 while (bpit < epit) {
737 Paragraph const &par = paragraphs[bpit];
738 if (par.empty() || hasOnlyNotes(par))
744 // Find the last info-like paragraph.
745 pit_type cpit = bpit;
746 bool hasAbstractLayout = false;
747 while (cpit < epit) {
748 // Skip paragraphs only containing one note.
749 Paragraph const & par = paragraphs[cpit];
750 if (hasOnlyNotes(par)) {
755 if (par.layout().docbookabstract())
756 hasAbstractLayout = true;
758 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
759 Layout const &style = par.layout();
761 if (style.docbookininfo() == "always") {
762 mustBeInInfo.emplace(cpit);
763 } else if (style.docbookininfo() == "maybe") {
764 shouldBeInInfo.emplace(cpit);
766 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
767 // There may be notes in between, but nothing else.
772 // Now, cpit points to the last paragraph that has things that could go in <info>.
773 // bpit is the beginning of the <info> part.
775 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
776 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
777 if (hasAbstractLayout) {
779 while (pit < cpit) { // Don't overshoot the <info> part.
780 if (paragraphs[pit].layout().docbookabstract())
781 abstract.emplace(pit);
785 pit_type lastAbstract = epit + 1; // A nonsensical value.
786 docstring lastAbstractLayout;
789 while (pit < cpit) { // Don't overshoot the <info> part.
790 const Paragraph & par = paragraphs.at(pit);
791 if (!par.insetList().empty()) {
792 for (const auto &i : par.insetList()) {
793 if (i.inset->getText(0) != nullptr) {
794 if (lastAbstract == epit + 1) {
795 // First paragraph that matches the heuristic definition of abstract.
797 lastAbstractLayout = par.layout().name();
798 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
799 // This is either too far from the last abstract paragraph or doesn't
800 // have the right layout name, BUT there has already been an abstract
801 // in this document: done with detecting the abstract.
802 goto done; // Easier to get out of two nested loops.
805 abstract.emplace(pit);
815 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
819 void outputDocBookInfo(
823 OutputParams const & runparams,
824 ParagraphList const & paragraphs,
825 DocBookInfoTag const & info)
827 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
828 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
829 // then only create the <abstract> tag if these paragraphs generate some content.
830 // This check must be performed *before* a decision on whether or not to output <info> is made.
831 bool hasAbstract = !info.abstract.empty();
834 // Generate the abstract XML into a string before further checks.
835 odocstringstream os2;
838 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
839 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
840 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
842 while (bpit < epit) {
843 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
848 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
849 // even though they must be properly output if there is some abstract.
850 abstract = os2.str();
851 static const lyx::regex reg("[ \\r\\n]*");
852 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
854 // Nothing? Then there is no abstract!
855 if (abstractContent.empty())
859 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
860 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
862 // Start the <info> tag if required.
864 xs.startDivision(false);
865 xs << xml::StartTag("info");
869 // Output the elements that should go in <info>, before and after the abstract.
870 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
871 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
872 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
874 for (auto pit : info.mustBeInInfo) {
875 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
876 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
879 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
880 // it contains several paragraphs that are empty).
882 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
883 // if (tag == "NONE")
886 // xs << xml::StartTag(tag);
888 xs << XMLStream::ESCAPE_NONE << abstract;
889 // xs << xml::EndTag(tag);
893 // End the <info> tag if it was started.
895 xs << xml::EndTag("info");
902 void docbookFirstParagraphs(
906 OutputParams const &runparams,
909 // Handle the beginning of the document, supposing it has sections.
910 // Major role: output the first <info> tag.
912 ParagraphList const ¶graphs = text.paragraphs();
913 pit_type bpit = runparams.par_begin;
914 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
915 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
919 void docbookSimpleAllParagraphs(
923 OutputParams const & runparams)
925 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
926 // between a single paragraph to a whole document.
928 // First, the <info> tag.
929 ParagraphList const ¶graphs = text.paragraphs();
930 pit_type bpit = runparams.par_begin;
931 pit_type const epit = runparams.par_end;
932 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
933 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
935 // Then, the content. It starts where the <info> ends.
937 while (bpit < epit) {
938 auto par = paragraphs.iterator_at(bpit);
939 if (!hasOnlyNotes(*par))
940 makeAny(text, buf, xs, runparams, par);
946 void docbookParagraphs(Text const &text,
949 OutputParams const &runparams) {
950 ParagraphList const ¶graphs = text.paragraphs();
951 if (runparams.par_begin == runparams.par_end) {
952 runparams.par_begin = 0;
953 runparams.par_end = paragraphs.size();
955 pit_type bpit = runparams.par_begin;
956 pit_type const epit = runparams.par_end;
959 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
963 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
964 // of the section and the tag that was used to open it.
966 // Detect whether the document contains sections. If there are no sections, there can be no automatically
967 // discovered abstract.
968 bool documentHasSections;
970 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
972 if (documentHasSections) {
973 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
976 docbookSimpleAllParagraphs(text, buf, xs, runparams);
980 bool currentlyInAppendix = false;
982 while (bpit < epit) {
983 OutputParams ourparams = runparams;
985 auto par = paragraphs.iterator_at(bpit);
986 if (par->params().startOfAppendix())
987 currentlyInAppendix = true;
988 Layout const &style = par->layout();
989 ParagraphList::const_iterator const lastStartedPar = par;
990 ParagraphList::const_iterator send;
992 if (hasOnlyNotes(*par)) {
997 // Think about adding <section> and/or </section>s.
998 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
999 if (isLayoutSectioning) {
1000 int level = style.toclevel;
1002 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1003 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1004 // - current: h2; back: h1; do not close any <section>
1005 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1006 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1007 int stackLevel = headerLevels.top().first;
1008 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1011 // Output the tag only if it corresponds to a legit section.
1012 if (stackLevel != Layout::NOT_IN_TOC)
1013 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1016 // Open the new section: first push it onto the stack, then output it in DocBook.
1017 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1018 "appendix" : style.docbooksectiontag();
1019 headerLevels.push(std::make_pair(level, sectionTag));
1021 // Some sectioning-like elements should not be output (such as FrontMatter).
1022 if (level != Layout::NOT_IN_TOC) {
1023 // Look for a label in the title, i.e. a InsetLabel as a child.
1024 docstring id = docstring();
1025 for (pos_type i = 0; i < par->size(); ++i) {
1026 Inset const *inset = par->getInset(i);
1028 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1029 // Generate the attributes for the section if need be.
1030 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1032 // Don't output the ID as a DocBook <anchor>.
1033 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1035 // Cannot have multiple IDs per tag.
1041 // Write the open tag for this section.
1042 docstring tag = from_utf8("<" + sectionTag);
1044 tag += from_utf8(" ") + id;
1045 tag += from_utf8(">");
1046 xs << XMLStream::ESCAPE_NONE << tag;
1051 // Close all sections before the bibliography.
1052 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1053 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1054 if (insetsLength > 0) {
1055 Inset const *firstInset = par->getInset(0);
1056 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1057 while (!headerLevels.empty()) {
1058 int level = headerLevels.top().first;
1059 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1062 // Output the tag only if it corresponds to a legit section.
1063 if (level != Layout::NOT_IN_TOC) {
1064 xs << XMLStream::ESCAPE_NONE << tag;
1071 // Generate this paragraph.
1072 makeAny(text, buf, xs, ourparams, par);
1076 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1078 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1079 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1081 xs << XMLStream::ESCAPE_NONE << tag;