2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
94 string fontToRole(xml::FontTypes type)
96 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99 // Hence, it is not a problem to have many roles by default here.
100 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
102 case xml::FontTypes::FT_ITALIC:
103 case xml::FontTypes::FT_EMPH:
105 case xml::FontTypes::FT_BOLD:
107 case xml::FontTypes::FT_NOUN: // Outputs a <person>
108 case xml::FontTypes::FT_TYPE: // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
174 xml::FontTag docbookStartFontTag(xml::FontTypes type)
176 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
180 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
182 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
186 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
190 // Contents of the block.
195 // <paratag>Contents of the paragraph.</paratag>
198 // Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
200 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
202 xs << xml::StartTag(tag, attr);
206 void closeInlineTag(XMLStream & xs, const std::string & tag)
208 xs << xml::EndTag(tag);
212 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
214 if (!xs.isLastTagCR())
216 xs << xml::StartTag(tag, attr);
220 void closeParTag(XMLStream & xs, const std::string & tag)
222 xs << xml::EndTag(tag);
227 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
229 if (!xs.isLastTagCR())
231 xs << xml::StartTag(tag, attr);
236 void closeBlockTag(XMLStream & xs, const std::string & tag)
238 if (!xs.isLastTagCR())
240 xs << xml::EndTag(tag);
245 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
247 if (tag.empty() || tag == "NONE")
250 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
251 openParTag(xs, tag, attr);
252 else if (tagtype == "block")
253 openBlockTag(xs, tag, attr);
254 else if (tagtype == "inline")
255 openInlineTag(xs, tag, attr);
257 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
261 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
263 if (tag.empty() || tag == "NONE")
266 if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
267 closeParTag(xs, tag);
268 else if (tagtype == "block")
269 closeBlockTag(xs, tag);
270 else if (tagtype == "inline")
271 closeInlineTag(xs, tag);
273 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
277 // Higher-level convenience functions.
279 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
281 Layout const & lay = par->layout();
286 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
287 // (usually, they won't have the same layout) and the CURRENT one allows merging.
288 // The main use case is author information in several paragraphs: if the name of the author is the
289 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
290 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
291 // layout, same wrapper tag).
292 bool openWrapper = lay.docbookwrappertag() != "NONE";
293 if (prevpar != nullptr) {
294 Layout const & prevlay = prevpar->layout();
295 if (prevlay.docbookwrappertag() != "NONE") {
296 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
297 && !lay.docbookwrappermergewithprevious();
303 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
305 const string & tag = lay.docbooktag();
307 auto xmltag = xml::ParTag(tag, lay.docbookattr());
308 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
309 // TODO: required or not?
310 // TODO: avoid creating a ParTag object just for this query...
311 openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
314 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
315 openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
319 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
324 // See comment in openParTag.
325 Layout const & lay = par->layout();
326 bool closeWrapper = lay.docbookwrappertag() != "NONE";
327 if (nextpar != nullptr) {
328 Layout const & nextlay = nextpar->layout();
329 if (nextlay.docbookwrappertag() != "NONE") {
330 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
331 && !nextlay.docbookwrappermergewithprevious();
336 closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
337 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
338 closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
340 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
344 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
346 openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
350 void closeLabelTag(XMLStream & xs, Layout const & lay)
352 closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
356 void openItemTag(XMLStream & xs, Layout const & lay)
358 openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
362 void closeItemTag(XMLStream & xs, Layout const & lay)
364 closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
368 void makeParagraphBibliography(
371 OutputParams const & runparams,
373 ParagraphList::const_iterator const & pbegin)
375 auto const begin = text.paragraphs().begin();
376 auto const end = text.paragraphs().end();
380 // Find the paragraph *before* pbegin.
381 ParagraphList::const_iterator pbegin_before = begin;
382 if (pbegin != begin) {
383 ParagraphList::const_iterator pbegin_before_next = begin;
384 ++pbegin_before_next;
386 while (pbegin_before_next != pbegin) {
388 ++pbegin_before_next;
392 ParagraphList::const_iterator par = pbegin;
394 // If this is the first paragraph in a bibliography, open the bibliography tag.
395 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
396 xs << xml::StartTag("bibliography");
400 // Generate the required paragraphs, but only if they are .
401 for (; par != pend; ++par) {
402 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
403 // Don't forget the citation ID!
405 for (auto i = 0; i < par->size(); ++i) {
406 Inset const *ip = par->getInset(0);
407 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
408 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
409 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
413 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
415 // Generate the entry.
416 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
418 // End the precooked bibliography entry.
419 xs << xml::EndTag("bibliomixed");
423 // If this is the last paragraph in a bibliography, close the bibliography tag.
424 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
425 xs << xml::EndTag("bibliography");
434 OutputParams const & runparams,
436 ParagraphList::const_iterator const & par)
438 auto const begin = text.paragraphs().begin();
439 auto const end = text.paragraphs().end();
440 auto prevpar = text.paragraphs().getParagraphBefore(par);
442 // We want to open the paragraph tag if:
443 // (i) the current layout permits multiple paragraphs
444 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
445 // we are, but this is not the first paragraph
447 // But there is also a special case, and we first see whether we are in it.
448 // We do not want to open the paragraph tag if this paragraph contains
449 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
450 // as a branch). On the other hand, if that single item has a font change
451 // applied to it, then we still do need to open the paragraph.
453 // Obviously, this is very fragile. The main reason we need to do this is
454 // because of branches, e.g., a branch that contains an entire new section.
455 // We do not really want to wrap that whole thing in a <div>...</div>.
456 bool special_case = false;
457 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
458 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
459 Layout const &style = par->layout();
460 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
461 style.labelfont : style.font;
462 FontInfo const our_font =
463 par->getFont(buf.masterBuffer()->params(), 0,
464 text.outerFont(std::distance(begin, par))).fontInfo();
466 if (first_font == our_font)
470 // Plain layouts must be ignored.
471 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
473 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
474 if (!special_case && par->size() == 1 && par->getInset(0)) {
475 Inset const * firstInset = par->getInset(0);
477 // Floats cannot be in paragraphs.
478 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
480 // Bibliographies cannot be in paragraphs.
481 if (!special_case && firstInset->asInsetCommand())
482 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
484 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
485 if (!special_case && firstInset->asInsetMath())
488 // ERTs are in comments, not paragraphs.
489 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
492 // Listings should not get into their own paragraph.
493 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
497 bool const open_par = runparams.docbook_make_pars
498 && !runparams.docbook_in_par
501 // We want to issue the closing tag if either:
502 // (i) We opened it, and either docbook_in_par is false,
503 // or we're not in the last paragraph, anyway.
504 // (ii) We didn't open it and docbook_in_par is true,
505 // but we are in the first par, and there is a next par.
508 bool const close_par = open_par && (!runparams.docbook_in_par);
510 // Determine if this paragraph has some real content. Things like new pages are not caught
511 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
512 odocstringstream os2;
514 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
516 docstring cleaned = os2.str();
517 static const lyx::regex reg("[ \\r\\n]*");
518 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
520 if (!cleaned.empty()) {
522 openParTag(xs, &*par, prevpar);
524 xs << XMLStream::ESCAPE_NONE << os2.str();
527 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
536 OutputParams const &ourparams,
537 ParagraphList::const_iterator par);
540 void makeEnvironment(
543 OutputParams const &runparams,
545 ParagraphList::const_iterator const & par)
547 auto const end = text.paragraphs().end();
549 // Output the opening tag for this environment, but only if it has not been previously opened (condition
550 // implemented in openParTag).
551 auto prevpar = text.paragraphs().getParagraphBefore(par);
552 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
554 // Generate the contents of this environment. There is a special case if this is like some environment.
555 Layout const & style = par->layout();
556 if (style.latextype == LATEX_COMMAND) {
557 // Nothing to do (otherwise, infinite loops).
558 } else if (style.latextype == LATEX_ENVIRONMENT ||
559 style.latextype == LATEX_LIST_ENVIRONMENT ||
560 style.latextype == LATEX_ITEM_ENVIRONMENT) {
561 // Open a wrapper tag if needed.
562 if (style.docbookitemwrappertag() != "NONE") {
563 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
567 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
568 // character after the label.
570 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
571 // At least one condition must be met:
572 // - this environment is not a list
573 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
574 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
575 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
576 docstring const lbl = par->params().labelString();
581 openLabelTag(xs, style);
583 closeLabelTag(xs, style);
586 // Only variablelist gets here (or similar items defined as an extension in the layout).
587 openLabelTag(xs, style);
588 sep = par->firstWordDocBook(xs, runparams);
589 closeLabelTag(xs, style);
593 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
594 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
595 // Common case: there is only the first word on the line, but there is a nested list instead
597 bool emptyItem = false;
598 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
601 if (next_par == text.paragraphs().end()) // There is no next paragraph.
603 else // There is a next paragraph: check depth.
604 emptyItem = par->params().depth() >= next_par->params().depth();
608 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
609 // generation of a full <para>.
610 // TODO: this always worked only by magic...
613 // Generate the rest of the paragraph, if need be.
614 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
618 makeAny(text, buf, xs, runparams, par);
621 // Close the environment.
624 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
631 OutputParams const & runparams,
633 ParagraphList::const_iterator const & par)
635 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
636 auto const begin = text.paragraphs().begin();
637 auto const end = text.paragraphs().end();
641 // Generate this command.
642 auto prevpar = text.paragraphs().getParagraphBefore(par);
643 openParTag(xs, &*par, prevpar);
645 par->simpleDocBookOnePar(buf, xs, runparams,
646 text.outerFont(distance(begin, par)));
648 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
656 OutputParams const &ourparams,
657 ParagraphList::const_iterator par)
659 switch (par->layout().latextype) {
661 makeCommand(buf, xs, ourparams, text, par);
663 case LATEX_ENVIRONMENT:
664 case LATEX_LIST_ENVIRONMENT:
665 case LATEX_ITEM_ENVIRONMENT:
666 makeEnvironment(buf, xs, ourparams, text, par);
668 case LATEX_PARAGRAPH:
669 makeParagraph(buf, xs, ourparams, text, par);
671 case LATEX_BIB_ENVIRONMENT:
672 makeParagraphBibliography(buf, xs, ourparams, text, par);
677 } // end anonymous namespace
680 using DocBookDocumentSectioning = tuple<bool, pit_type>;
683 struct DocBookInfoTag
685 const set<pit_type> shouldBeInInfo;
686 const set<pit_type> mustBeInInfo;
687 const set<pit_type> abstract;
691 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
692 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
693 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
694 bpit(bpit), epit(epit) {}
698 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
699 bool documentHasSections = false;
701 while (bpit < epit) {
702 Layout const &style = paragraphs[bpit].layout();
703 documentHasSections |= style.category() == from_utf8("Sectioning");
705 if (documentHasSections)
709 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
711 return make_tuple(documentHasSections, bpit);
715 bool hasOnlyNotes(Paragraph const & par)
717 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
718 for (int i = 0; i < par.size(); ++i)
719 // If you find something that is not an inset (like actual text) or an inset that is not a note,
721 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
727 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
728 set<pit_type> shouldBeInInfo;
729 set<pit_type> mustBeInInfo;
730 set<pit_type> abstract;
732 // Find the first non empty paragraph by mutating bpit.
733 while (bpit < epit) {
734 Paragraph const &par = paragraphs[bpit];
735 if (par.empty() || hasOnlyNotes(par))
741 // Find the last info-like paragraph.
742 pit_type cpit = bpit;
743 bool hasAbstractLayout = false;
744 while (cpit < epit) {
745 // Skip paragraphs only containing one note.
746 Paragraph const & par = paragraphs[cpit];
747 if (hasOnlyNotes(par)) {
752 if (par.layout().docbookabstract())
753 hasAbstractLayout = true;
755 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
756 Layout const &style = par.layout();
758 if (style.docbookininfo() == "always") {
759 mustBeInInfo.emplace(cpit);
760 } else if (style.docbookininfo() == "maybe") {
761 shouldBeInInfo.emplace(cpit);
763 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
764 // There may be notes in between, but nothing else.
769 // Now, cpit points to the last paragraph that has things that could go in <info>.
770 // bpit is the beginning of the <info> part.
772 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
773 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
774 if (hasAbstractLayout) {
776 while (pit < cpit) { // Don't overshoot the <info> part.
777 if (paragraphs[pit].layout().docbookabstract())
778 abstract.emplace(pit);
782 pit_type lastAbstract = epit + 1; // A nonsensical value.
783 docstring lastAbstractLayout;
786 while (pit < cpit) { // Don't overshoot the <info> part.
787 const Paragraph & par = paragraphs.at(pit);
788 if (!par.insetList().empty()) {
789 for (const auto &i : par.insetList()) {
790 if (i.inset->getText(0) != nullptr) {
791 if (lastAbstract == epit + 1) {
792 // First paragraph that matches the heuristic definition of abstract.
794 lastAbstractLayout = par.layout().name();
795 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
796 // This is either too far from the last abstract paragraph or doesn't
797 // have the right layout name, BUT there has already been an abstract
798 // in this document: done with detecting the abstract.
799 goto done; // Easier to get out of two nested loops.
802 abstract.emplace(pit);
812 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
816 void outputDocBookInfo(
820 OutputParams const & runparams,
821 ParagraphList const & paragraphs,
822 DocBookInfoTag const & info)
824 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
825 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
826 // then only create the <abstract> tag if these paragraphs generate some content.
827 // This check must be performed *before* a decision on whether or not to output <info> is made.
828 bool hasAbstract = !info.abstract.empty();
831 // Generate the abstract XML into a string before further checks.
832 odocstringstream os2;
835 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
836 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
837 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
839 while (bpit < epit) {
840 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
845 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
846 // even though they must be properly output if there is some abstract.
847 abstract = os2.str();
848 static const lyx::regex reg("[ \\r\\n]*");
849 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
851 // Nothing? Then there is no abstract!
852 if (abstractContent.empty())
856 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
857 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
859 // Start the <info> tag if required.
861 xs.startDivision(false);
862 xs << xml::StartTag("info");
866 // Output the elements that should go in <info>, before and after the abstract.
867 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
868 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
869 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
871 for (auto pit : info.mustBeInInfo) {
872 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
873 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
876 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
877 // it contains several paragraphs that are empty).
879 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
880 // if (tag == "NONE")
883 // xs << xml::StartTag(tag);
885 xs << XMLStream::ESCAPE_NONE << abstract;
886 // xs << xml::EndTag(tag);
890 // End the <info> tag if it was started.
892 xs << xml::EndTag("info");
899 void docbookFirstParagraphs(
903 OutputParams const &runparams,
906 // Handle the beginning of the document, supposing it has sections.
907 // Major role: output the first <info> tag.
909 ParagraphList const ¶graphs = text.paragraphs();
910 pit_type bpit = runparams.par_begin;
911 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
912 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
916 void docbookSimpleAllParagraphs(
920 OutputParams const & runparams)
922 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
923 // between a single paragraph to a whole document.
925 // First, the <info> tag.
926 ParagraphList const ¶graphs = text.paragraphs();
927 pit_type bpit = runparams.par_begin;
928 pit_type const epit = runparams.par_end;
929 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
930 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
932 // Then, the content. It starts where the <info> ends.
934 while (bpit < epit) {
935 auto par = paragraphs.iterator_at(bpit);
936 if (!hasOnlyNotes(*par))
937 makeAny(text, buf, xs, runparams, par);
943 void docbookParagraphs(Text const &text,
946 OutputParams const &runparams) {
947 ParagraphList const ¶graphs = text.paragraphs();
948 if (runparams.par_begin == runparams.par_end) {
949 runparams.par_begin = 0;
950 runparams.par_end = paragraphs.size();
952 pit_type bpit = runparams.par_begin;
953 pit_type const epit = runparams.par_end;
956 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
960 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
961 // of the section and the tag that was used to open it.
963 // Detect whether the document contains sections. If there are no sections, there can be no automatically
964 // discovered abstract.
965 bool documentHasSections;
967 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
969 if (documentHasSections) {
970 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
973 docbookSimpleAllParagraphs(text, buf, xs, runparams);
977 bool currentlyInAppendix = false;
979 while (bpit < epit) {
980 OutputParams ourparams = runparams;
982 auto par = paragraphs.iterator_at(bpit);
983 if (par->params().startOfAppendix())
984 currentlyInAppendix = true;
985 Layout const &style = par->layout();
986 ParagraphList::const_iterator const lastStartedPar = par;
987 ParagraphList::const_iterator send;
989 if (hasOnlyNotes(*par)) {
994 // Think about adding <section> and/or </section>s.
995 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
996 if (isLayoutSectioning) {
997 int level = style.toclevel;
999 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1000 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1001 // - current: h2; back: h1; do not close any <section>
1002 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1003 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1004 int stackLevel = headerLevels.top().first;
1005 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1008 // Output the tag only if it corresponds to a legit section.
1009 if (stackLevel != Layout::NOT_IN_TOC)
1010 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1013 // Open the new section: first push it onto the stack, then output it in DocBook.
1014 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1015 "appendix" : style.docbooksectiontag();
1016 headerLevels.push(std::make_pair(level, sectionTag));
1018 // Some sectioning-like elements should not be output (such as FrontMatter).
1019 if (level != Layout::NOT_IN_TOC) {
1020 // Look for a label in the title, i.e. a InsetLabel as a child.
1021 docstring id = docstring();
1022 for (pos_type i = 0; i < par->size(); ++i) {
1023 Inset const *inset = par->getInset(i);
1025 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1026 // Generate the attributes for the section if need be.
1027 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1029 // Don't output the ID as a DocBook <anchor>.
1030 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1032 // Cannot have multiple IDs per tag.
1038 // Write the open tag for this section.
1039 docstring tag = from_utf8("<" + sectionTag);
1041 tag += from_utf8(" ") + id;
1042 tag += from_utf8(">");
1043 xs << XMLStream::ESCAPE_NONE << tag;
1048 // Close all sections before the bibliography.
1049 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1050 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1051 if (insetsLength > 0) {
1052 Inset const *firstInset = par->getInset(0);
1053 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1054 while (!headerLevels.empty()) {
1055 int level = headerLevels.top().first;
1056 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1059 // Output the tag only if it corresponds to a legit section.
1060 if (level != Layout::NOT_IN_TOC) {
1061 xs << XMLStream::ESCAPE_NONE << tag;
1068 // Generate this paragraph.
1069 makeAny(text, buf, xs, ourparams, par);
1073 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1075 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1076 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1078 xs << XMLStream::ESCAPE_NONE << tag;