2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
46 // #define DOCBOOK_DEBUG_NEWLINES
49 using namespace lyx::support;
55 std::string fontToDocBookTag(xml::FontTypes type)
58 case xml::FontTypes::FT_EMPH:
59 case xml::FontTypes::FT_BOLD:
61 case xml::FontTypes::FT_NOUN:
63 case xml::FontTypes::FT_UBAR:
64 case xml::FontTypes::FT_WAVE:
65 case xml::FontTypes::FT_DBAR:
66 case xml::FontTypes::FT_SOUT:
67 case xml::FontTypes::FT_XOUT:
68 case xml::FontTypes::FT_ITALIC:
69 case xml::FontTypes::FT_UPRIGHT:
70 case xml::FontTypes::FT_SLANTED:
71 case xml::FontTypes::FT_SMALLCAPS:
72 case xml::FontTypes::FT_ROMAN:
73 case xml::FontTypes::FT_SANS:
75 case xml::FontTypes::FT_TYPE:
77 case xml::FontTypes::FT_SIZE_TINY:
78 case xml::FontTypes::FT_SIZE_SCRIPT:
79 case xml::FontTypes::FT_SIZE_FOOTNOTE:
80 case xml::FontTypes::FT_SIZE_SMALL:
81 case xml::FontTypes::FT_SIZE_NORMAL:
82 case xml::FontTypes::FT_SIZE_LARGE:
83 case xml::FontTypes::FT_SIZE_LARGER:
84 case xml::FontTypes::FT_SIZE_LARGEST:
85 case xml::FontTypes::FT_SIZE_HUGE:
86 case xml::FontTypes::FT_SIZE_HUGER:
87 case xml::FontTypes::FT_SIZE_INCREASE:
88 case xml::FontTypes::FT_SIZE_DECREASE:
96 string fontToRole(xml::FontTypes type)
98 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
99 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
100 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
101 // Hence, it is not a problem to have many roles by default here.
102 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
104 case xml::FontTypes::FT_ITALIC:
105 case xml::FontTypes::FT_EMPH:
107 case xml::FontTypes::FT_BOLD:
109 case xml::FontTypes::FT_NOUN: // Outputs a <person>
110 case xml::FontTypes::FT_TYPE: // Outputs a <code>
112 case xml::FontTypes::FT_UBAR:
115 // All other roles are non-standard for DocBook.
117 case xml::FontTypes::FT_WAVE:
119 case xml::FontTypes::FT_DBAR:
121 case xml::FontTypes::FT_SOUT:
123 case xml::FontTypes::FT_XOUT:
125 case xml::FontTypes::FT_UPRIGHT:
127 case xml::FontTypes::FT_SLANTED:
129 case xml::FontTypes::FT_SMALLCAPS:
131 case xml::FontTypes::FT_ROMAN:
133 case xml::FontTypes::FT_SANS:
135 case xml::FontTypes::FT_SIZE_TINY:
137 case xml::FontTypes::FT_SIZE_SCRIPT:
138 return "size_script";
139 case xml::FontTypes::FT_SIZE_FOOTNOTE:
140 return "size_footnote";
141 case xml::FontTypes::FT_SIZE_SMALL:
143 case xml::FontTypes::FT_SIZE_NORMAL:
144 return "size_normal";
145 case xml::FontTypes::FT_SIZE_LARGE:
147 case xml::FontTypes::FT_SIZE_LARGER:
148 return "size_larger";
149 case xml::FontTypes::FT_SIZE_LARGEST:
150 return "size_largest";
151 case xml::FontTypes::FT_SIZE_HUGE:
153 case xml::FontTypes::FT_SIZE_HUGER:
155 case xml::FontTypes::FT_SIZE_INCREASE:
156 return "size_increase";
157 case xml::FontTypes::FT_SIZE_DECREASE:
158 return "size_decrease";
164 string fontToAttribute(xml::FontTypes type) {
165 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
167 string role = fontToRole(type);
169 return "role='" + role + "'";
175 } // end anonymous namespace
178 xml::FontTag docbookStartFontTag(xml::FontTypes type)
180 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
184 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
186 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
192 // convenience functions
194 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
196 #ifdef DOCBOOK_DEBUG_NEWLINES
197 xs << XMLStream::ESCAPE_NONE << "<!-- openParTag -->";
200 Layout const & lay = par->layout();
205 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
206 // (usually, they won't have the same layout) and the CURRENT one allows merging.
207 // The main use case is author information in several paragraphs: if the name of the author is the
208 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
209 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
210 // layout, same wrapper tag).
211 bool openWrapper = lay.docbookwrappertag() != "NONE";
212 if (prevpar != nullptr) {
213 Layout const & prevlay = prevpar->layout();
214 if (prevlay.docbookwrappertag() != "NONE") {
215 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
216 && !lay.docbookwrappermergewithprevious();
222 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
226 string tag = lay.docbooktag();
228 auto xmltag = xml::ParTag(tag, lay.docbookattr());
229 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph. TODO: required or not?
233 if (lay.docbookitemtag() != "NONE") {
234 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
238 if (lay.docbookiteminnertag() != "NONE")
239 xs << xml::StartTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
241 #ifdef DOCBOOK_DEBUG_NEWLINES
242 xs << XMLStream::ESCAPE_NONE << "<!-- /openParTag -->";
247 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
249 #ifdef DOCBOOK_DEBUG_NEWLINES
250 xs << XMLStream::ESCAPE_NONE << "<!-- closeParTag -->";
256 // See comment in openParTag.
257 Layout const & lay = par->layout();
258 bool closeWrapper = lay.docbookwrappertag() != "NONE";
259 if (nextpar != nullptr) {
260 Layout const & nextlay = nextpar->layout();
261 if (nextlay.docbookwrappertag() != "NONE") {
262 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
263 && !nextlay.docbookwrappermergewithprevious();
268 if (lay.docbookiteminnertag() != "NONE") {
269 xs << xml::EndTag(lay.docbookiteminnertag());
273 if (lay.docbookitemtag() != "NONE") {
274 xs << xml::EndTag(lay.docbookitemtag());
278 if (lay.docbooktag() != "NONE") {
279 xs << xml::EndTag(lay.docbooktag());
284 xs << xml::EndTag(lay.docbookwrappertag());
288 #ifdef DOCBOOK_DEBUG_NEWLINES
289 xs << XMLStream::ESCAPE_NONE << "<!-- /closeParTag -->";
294 void openBlockTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
296 #ifdef DOCBOOK_DEBUG_NEWLINES
297 xs << XMLStream::ESCAPE_NONE << "<!-- openBlockTag -->";
300 // Similar as openParTag, but with a line feed after.
301 openParTag(xs, par, prevpar);
304 #ifdef DOCBOOK_DEBUG_NEWLINES
305 xs << XMLStream::ESCAPE_NONE << "<!-- /openBlockTag -->";
310 void closeBlockTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
312 #ifdef DOCBOOK_DEBUG_NEWLINES
313 xs << XMLStream::ESCAPE_NONE << "<!-- closeBlockTag -->";
316 // Similar as closeParTag, but with a line feed before.
318 closeParTag(xs, par, prevpar);
320 #ifdef DOCBOOK_DEBUG_NEWLINES
321 xs << XMLStream::ESCAPE_NONE << "<!-- /closeBlockTag -->";
326 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
328 #ifdef DOCBOOK_DEBUG_NEWLINES
329 xs << XMLStream::ESCAPE_NONE << "<!-- openLabelTag -->";
332 xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
334 #ifdef DOCBOOK_DEBUG_NEWLINES
335 xs << XMLStream::ESCAPE_NONE << "<!-- /openLabelTag -->";
340 void closeLabelTag(XMLStream & xs, Layout const & lay)
342 #ifdef DOCBOOK_DEBUG_NEWLINES
343 xs << XMLStream::ESCAPE_NONE << "<!-- closeLabelTag -->";
346 xs << xml::EndTag(lay.docbookitemlabeltag());
349 #ifdef DOCBOOK_DEBUG_NEWLINES
350 xs << XMLStream::ESCAPE_NONE << "<!-- closeLabelTag -->";
355 void openItemTag(XMLStream & xs, Layout const & lay)
357 #ifdef DOCBOOK_DEBUG_NEWLINES
358 xs << XMLStream::ESCAPE_NONE << "<!-- openItemTag -->";
361 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
363 #ifdef DOCBOOK_DEBUG_NEWLINES
364 xs << XMLStream::ESCAPE_NONE << "<!-- /openItemTag -->";
369 void closeItemTag(XMLStream & xs, Layout const & lay)
371 #ifdef DOCBOOK_DEBUG_NEWLINES
372 xs << XMLStream::ESCAPE_NONE << "<!-- closeItemTag -->";
375 xs << xml::EndTag(lay.docbookitemtag());
378 #ifdef DOCBOOK_DEBUG_NEWLINES
379 xs << XMLStream::ESCAPE_NONE << "<!-- /closeItemTag -->";
384 void makeParagraphBibliography(
387 OutputParams const & runparams,
389 ParagraphList::const_iterator const & pbegin)
391 auto const begin = text.paragraphs().begin();
392 auto const end = text.paragraphs().end();
396 // Find the paragraph *before* pbegin.
397 ParagraphList::const_iterator pbegin_before = begin;
398 if (pbegin != begin) {
399 ParagraphList::const_iterator pbegin_before_next = begin;
400 ++pbegin_before_next;
402 while (pbegin_before_next != pbegin) {
404 ++pbegin_before_next;
408 ParagraphList::const_iterator par = pbegin;
410 // If this is the first paragraph in a bibliography, open the bibliography tag.
411 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
412 xs << xml::StartTag("bibliography");
416 // Generate the required paragraphs, but only if they are .
417 for (; par != pend; ++par) {
418 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
419 // Don't forget the citation ID!
421 for (auto i = 0; i < par->size(); ++i) {
422 Inset const *ip = par->getInset(0);
423 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
424 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
425 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
429 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
431 // Generate the entry.
432 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
434 // End the precooked bibliography entry.
435 xs << xml::EndTag("bibliomixed");
439 // If this is the last paragraph in a bibliography, close the bibliography tag.
440 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
441 xs << xml::EndTag("bibliography");
450 OutputParams const & runparams,
452 ParagraphList::const_iterator const & par)
454 auto const begin = text.paragraphs().begin();
455 auto const end = text.paragraphs().end();
456 auto prevpar = text.paragraphs().getParagraphBefore(par);
458 // We want to open the paragraph tag if:
459 // (i) the current layout permits multiple paragraphs
460 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
461 // we are, but this is not the first paragraph
463 // But there is also a special case, and we first see whether we are in it.
464 // We do not want to open the paragraph tag if this paragraph contains
465 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
466 // as a branch). On the other hand, if that single item has a font change
467 // applied to it, then we still do need to open the paragraph.
469 // Obviously, this is very fragile. The main reason we need to do this is
470 // because of branches, e.g., a branch that contains an entire new section.
471 // We do not really want to wrap that whole thing in a <div>...</div>.
472 bool special_case = false;
473 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
474 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
475 Layout const &style = par->layout();
476 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
477 style.labelfont : style.font;
478 FontInfo const our_font =
479 par->getFont(buf.masterBuffer()->params(), 0,
480 text.outerFont(std::distance(begin, par))).fontInfo();
482 if (first_font == our_font)
486 // Plain layouts must be ignored.
487 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
489 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
490 if (!special_case && par->size() == 1 && par->getInset(0)) {
491 Inset const * firstInset = par->getInset(0);
493 // Floats cannot be in paragraphs.
494 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
496 // Bibliographies cannot be in paragraphs.
497 if (!special_case && firstInset->asInsetCommand())
498 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
500 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
501 if (!special_case && firstInset->asInsetMath())
504 // ERTs are in comments, not paragraphs.
505 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
508 // Listings should not get into their own paragraph.
509 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
513 bool const open_par = runparams.docbook_make_pars
514 && !runparams.docbook_in_par
517 // We want to issue the closing tag if either:
518 // (i) We opened it, and either docbook_in_par is false,
519 // or we're not in the last paragraph, anyway.
520 // (ii) We didn't open it and docbook_in_par is true,
521 // but we are in the first par, and there is a next par.
524 bool const close_par = open_par && (!runparams.docbook_in_par);
526 // Determine if this paragraph has some real content. Things like new pages are not caught
527 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
528 odocstringstream os2;
530 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
532 docstring cleaned = os2.str();
533 static const lyx::regex reg("[ \\r\\n]*");
534 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
536 if (!cleaned.empty()) {
538 openParTag(xs, &*par, prevpar);
540 xs << XMLStream::ESCAPE_NONE << os2.str();
543 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
552 OutputParams const &ourparams,
553 ParagraphList::const_iterator par);
556 void makeEnvironment(
559 OutputParams const &runparams,
561 ParagraphList::const_iterator const & par)
563 auto const end = text.paragraphs().end();
565 // Output the opening tag for this environment, but only if it has not been previously opened (condition
566 // implemented in openParTag).
567 auto prevpar = text.paragraphs().getParagraphBefore(par);
568 openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
570 // Generate the contents of this environment. There is a special case if this is like some environment.
571 Layout const & style = par->layout();
572 if (style.latextype == LATEX_COMMAND) {
573 // Nothing to do (otherwise, infinite loops).
574 } else if (style.latextype == LATEX_ENVIRONMENT ||
575 style.latextype == LATEX_LIST_ENVIRONMENT ||
576 style.latextype == LATEX_ITEM_ENVIRONMENT) {
577 // Open a wrapper tag if needed.
578 if (style.docbookitemwrappertag() != "NONE") {
579 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
583 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
584 // character after the label.
586 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
587 // At least one condition must be met:
588 // - this environment is not a list
589 // - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
590 if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
591 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
592 docstring const lbl = par->params().labelString();
597 openLabelTag(xs, style);
599 closeLabelTag(xs, style);
602 // Only variablelist gets here (or similar items defined as an extension in the layout).
603 openLabelTag(xs, style);
604 sep = par->firstWordDocBook(xs, runparams);
605 closeLabelTag(xs, style);
609 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
610 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
611 // Common case: there is only the first word on the line, but there is a nested list instead
613 bool emptyItem = false;
614 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
617 if (next_par == text.paragraphs().end()) // There is no next paragraph.
619 else // There is a next paragraph: check depth.
620 emptyItem = par->params().depth() >= next_par->params().depth();
624 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
625 // generation of a full <para>.
626 // TODO: this always worked only by magic...
629 // Generate the rest of the paragraph, if need be.
630 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
634 makeAny(text, buf, xs, runparams, par);
637 // Close the environment.
640 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
647 OutputParams const & runparams,
649 ParagraphList::const_iterator const & par)
651 // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
652 auto const begin = text.paragraphs().begin();
653 auto const end = text.paragraphs().end();
657 // Generate this command.
658 auto prevpar = text.paragraphs().getParagraphBefore(par);
659 openParTag(xs, &*par, prevpar);
661 par->simpleDocBookOnePar(buf, xs, runparams,
662 text.outerFont(distance(begin, par)));
664 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
672 OutputParams const &ourparams,
673 ParagraphList::const_iterator par)
675 switch (par->layout().latextype) {
677 makeCommand(buf, xs, ourparams, text, par);
679 case LATEX_ENVIRONMENT:
680 case LATEX_LIST_ENVIRONMENT:
681 case LATEX_ITEM_ENVIRONMENT:
682 makeEnvironment(buf, xs, ourparams, text, par);
684 case LATEX_PARAGRAPH:
685 makeParagraph(buf, xs, ourparams, text, par);
687 case LATEX_BIB_ENVIRONMENT:
688 makeParagraphBibliography(buf, xs, ourparams, text, par);
693 } // end anonymous namespace
696 using DocBookDocumentSectioning = tuple<bool, pit_type>;
699 struct DocBookInfoTag
701 const set<pit_type> shouldBeInInfo;
702 const set<pit_type> mustBeInInfo;
703 const set<pit_type> abstract;
707 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
708 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
709 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
710 bpit(bpit), epit(epit) {}
714 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
715 bool documentHasSections = false;
717 while (bpit < epit) {
718 Layout const &style = paragraphs[bpit].layout();
719 documentHasSections |= style.category() == from_utf8("Sectioning");
721 if (documentHasSections)
725 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
727 return make_tuple(documentHasSections, bpit);
731 bool hasOnlyNotes(Paragraph const & par)
733 // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
734 for (int i = 0; i < par.size(); ++i)
735 // If you find something that is not an inset (like actual text) or an inset that is not a note,
737 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
743 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
744 set<pit_type> shouldBeInInfo;
745 set<pit_type> mustBeInInfo;
746 set<pit_type> abstract;
748 // Find the first non empty paragraph by mutating bpit.
749 while (bpit < epit) {
750 Paragraph const &par = paragraphs[bpit];
751 if (par.empty() || hasOnlyNotes(par))
757 // Find the last info-like paragraph.
758 pit_type cpit = bpit;
759 bool hasAbstractLayout = false;
760 while (cpit < epit) {
761 // Skip paragraphs only containing one note.
762 Paragraph const & par = paragraphs[cpit];
763 if (hasOnlyNotes(par)) {
768 if (par.layout().docbookabstract())
769 hasAbstractLayout = true;
771 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
772 Layout const &style = par.layout();
774 if (style.docbookininfo() == "always") {
775 mustBeInInfo.emplace(cpit);
776 } else if (style.docbookininfo() == "maybe") {
777 shouldBeInInfo.emplace(cpit);
779 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
780 // There may be notes in between, but nothing else.
785 // Now, cpit points to the last paragraph that has things that could go in <info>.
786 // bpit is the beginning of the <info> part.
788 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
789 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
790 if (hasAbstractLayout) {
792 while (pit < cpit) { // Don't overshoot the <info> part.
793 if (paragraphs[pit].layout().docbookabstract())
794 abstract.emplace(pit);
798 pit_type lastAbstract = epit + 1; // A nonsensical value.
799 docstring lastAbstractLayout;
802 while (pit < cpit) { // Don't overshoot the <info> part.
803 const Paragraph & par = paragraphs.at(pit);
804 if (!par.insetList().empty()) {
805 for (const auto &i : par.insetList()) {
806 if (i.inset->getText(0) != nullptr) {
807 if (lastAbstract == epit + 1) {
808 // First paragraph that matches the heuristic definition of abstract.
810 lastAbstractLayout = par.layout().name();
811 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
812 // This is either too far from the last abstract paragraph or doesn't
813 // have the right layout name, BUT there has already been an abstract
814 // in this document: done with detecting the abstract.
815 goto done; // Easier to get out of two nested loops.
818 abstract.emplace(pit);
828 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
832 void outputDocBookInfo(
836 OutputParams const & runparams,
837 ParagraphList const & paragraphs,
838 DocBookInfoTag const & info)
840 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
841 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
842 // then only create the <abstract> tag if these paragraphs generate some content.
843 // This check must be performed *before* a decision on whether or not to output <info> is made.
844 bool hasAbstract = !info.abstract.empty();
847 // Generate the abstract XML into a string before further checks.
848 odocstringstream os2;
851 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
852 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
853 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
855 while (bpit < epit) {
856 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
861 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
862 // even though they must be properly output if there is some abstract.
863 abstract = os2.str();
864 static const lyx::regex reg("[ \\r\\n]*");
865 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
867 // Nothing? Then there is no abstract!
868 if (abstractContent.empty())
872 // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
873 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
875 // Start the <info> tag if required.
877 xs.startDivision(false);
878 xs << xml::StartTag("info");
882 // Output the elements that should go in <info>, before and after the abstract.
883 for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
884 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
885 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
887 for (auto pit : info.mustBeInInfo) {
888 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
889 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
892 // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
893 // it contains several paragraphs that are empty).
895 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
896 // if (tag == "NONE")
899 // xs << xml::StartTag(tag);
901 xs << XMLStream::ESCAPE_NONE << abstract;
902 // xs << xml::EndTag(tag);
906 // End the <info> tag if it was started.
908 xs << xml::EndTag("info");
915 void docbookFirstParagraphs(
919 OutputParams const &runparams,
922 // Handle the beginning of the document, supposing it has sections.
923 // Major role: output the first <info> tag.
925 ParagraphList const ¶graphs = text.paragraphs();
926 pit_type bpit = runparams.par_begin;
927 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
928 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
932 void docbookSimpleAllParagraphs(
936 OutputParams const & runparams)
938 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
939 // between a single paragraph to a whole document.
941 // First, the <info> tag.
942 ParagraphList const ¶graphs = text.paragraphs();
943 pit_type bpit = runparams.par_begin;
944 pit_type const epit = runparams.par_end;
945 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
946 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
948 // Then, the content. It starts where the <info> ends.
950 while (bpit < epit) {
951 auto par = paragraphs.iterator_at(bpit);
952 if (!hasOnlyNotes(*par))
953 makeAny(text, buf, xs, runparams, par);
959 void docbookParagraphs(Text const &text,
962 OutputParams const &runparams) {
963 ParagraphList const ¶graphs = text.paragraphs();
964 if (runparams.par_begin == runparams.par_end) {
965 runparams.par_begin = 0;
966 runparams.par_end = paragraphs.size();
968 pit_type bpit = runparams.par_begin;
969 pit_type const epit = runparams.par_end;
972 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
976 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
977 // of the section and the tag that was used to open it.
979 // Detect whether the document contains sections. If there are no sections, there can be no automatically
980 // discovered abstract.
981 bool documentHasSections;
983 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
985 if (documentHasSections) {
986 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
989 docbookSimpleAllParagraphs(text, buf, xs, runparams);
993 bool currentlyInAppendix = false;
995 while (bpit < epit) {
996 OutputParams ourparams = runparams;
998 auto par = paragraphs.iterator_at(bpit);
999 if (par->params().startOfAppendix())
1000 currentlyInAppendix = true;
1001 Layout const &style = par->layout();
1002 ParagraphList::const_iterator const lastStartedPar = par;
1003 ParagraphList::const_iterator send;
1005 if (hasOnlyNotes(*par)) {
1010 // Think about adding <section> and/or </section>s.
1011 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1012 if (isLayoutSectioning) {
1013 int level = style.toclevel;
1015 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1016 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1017 // - current: h2; back: h1; do not close any <section>
1018 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1019 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1020 int stackLevel = headerLevels.top().first;
1021 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1024 // Output the tag only if it corresponds to a legit section.
1025 if (stackLevel != Layout::NOT_IN_TOC)
1026 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1029 // Open the new section: first push it onto the stack, then output it in DocBook.
1030 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1031 "appendix" : style.docbooksectiontag();
1032 headerLevels.push(std::make_pair(level, sectionTag));
1034 // Some sectioning-like elements should not be output (such as FrontMatter).
1035 if (level != Layout::NOT_IN_TOC) {
1036 // Look for a label in the title, i.e. a InsetLabel as a child.
1037 docstring id = docstring();
1038 for (pos_type i = 0; i < par->size(); ++i) {
1039 Inset const *inset = par->getInset(i);
1041 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1042 // Generate the attributes for the section if need be.
1043 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1045 // Don't output the ID as a DocBook <anchor>.
1046 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1048 // Cannot have multiple IDs per tag.
1054 // Write the open tag for this section.
1055 docstring tag = from_utf8("<" + sectionTag);
1057 tag += from_utf8(" ") + id;
1058 tag += from_utf8(">");
1059 xs << XMLStream::ESCAPE_NONE << tag;
1064 // Close all sections before the bibliography.
1065 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1066 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1067 if (insetsLength > 0) {
1068 Inset const *firstInset = par->getInset(0);
1069 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1070 while (!headerLevels.empty()) {
1071 int level = headerLevels.top().first;
1072 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1075 // Output the tag only if it corresponds to a legit section.
1076 if (level != Layout::NOT_IN_TOC) {
1077 xs << XMLStream::ESCAPE_NONE << tag;
1084 // Generate this paragraph.
1085 makeAny(text, buf, xs, ourparams, par);
1089 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1091 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1092 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1094 xs << XMLStream::ESCAPE_NONE << tag;