2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string const fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
93 string fontToRole(xml::FontTypes type)
95 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98 // Hence, it is not a problem to have many roles by default here.
99 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
101 case xml::FontTypes::FT_ITALIC:
102 case xml::FontTypes::FT_EMPH:
104 case xml::FontTypes::FT_BOLD:
106 case xml::FontTypes::FT_NOUN:
107 return ""; // Outputs a <person>
108 case xml::FontTypes::FT_TYPE:
109 return ""; // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
173 } // end anonymous namespace
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
178 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
184 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
190 // convenience functions
192 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
194 Layout const & lay = par->layout();
199 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
200 // (usually, they won't have the same layout) and the CURRENT one allows merging.
201 // The main use case is author information in several paragraphs: if the name of the author is the
202 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
203 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
204 // layout, same wrapper tag).
205 bool openWrapper = false;
206 if (prevpar == nullptr) {
207 openWrapper = lay.docbookwrappertag() != "NONE";
209 Layout const & prevlay = prevpar->layout();
210 if (prevlay.docbookwrappertag() == "NONE") {
211 openWrapper = lay.docbookwrappertag() != "NONE";
213 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
214 && !lay.docbookwrappermergewithprevious();
220 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
222 string tag = lay.docbooktag();
223 if (tag == "Plain Layout")
226 xs << xml::ParTag(tag, lay.docbookattr());
228 if (lay.docbookitemtag() != "NONE")
229 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
233 void closeTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
235 Layout const & lay = par->layout();
240 // See comment in openParTag.
241 bool closeWrapper = false;
242 if (nextpar == nullptr) {
243 closeWrapper = lay.docbookwrappertag() != "NONE";
245 Layout const & nextlay = nextpar->layout();
246 if (nextlay.docbookwrappertag() == "NONE") {
247 closeWrapper = lay.docbookwrappertag() != "NONE";
249 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
250 && !nextlay.docbookwrappermergewithprevious();
255 if (lay.docbookitemtag() != "NONE")
256 xs << xml::EndTag(lay.docbookitemtag());
258 string tag = lay.docbooktag();
259 if (tag == "Plain Layout")
262 xs << xml::EndTag(tag);
264 xs << xml::EndTag(lay.docbookwrappertag());
268 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
270 xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
274 void closeLabelTag(XMLStream & xs, Layout const & lay)
276 xs << xml::EndTag(lay.docbookitemlabeltag());
281 void openItemTag(XMLStream & xs, Layout const & lay)
283 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
287 // Return true when new elements are output in a paragraph, false otherwise.
288 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
290 if (lay.docbookiteminnertag() != "NONE") {
292 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
294 if (lay.docbookiteminnertag() == "para") {
302 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
304 if (lay.docbookiteminnertag()!= "NONE") {
305 xs << xml::EndTag(lay.docbookiteminnertag());
311 inline void closeItemTag(XMLStream & xs, Layout const & lay)
313 xs << xml::EndTag(lay.docbookitemtag());
317 // end of convenience functions
319 ParagraphList::const_iterator findLast(
320 ParagraphList::const_iterator p,
321 ParagraphList::const_iterator const & pend,
323 for (++p; p != pend && p->layout().latextype == type; ++p);
328 ParagraphList::const_iterator findLastBibliographyParagraph(
329 ParagraphList::const_iterator p,
330 ParagraphList::const_iterator const & pend) {
331 for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
337 ParagraphList::const_iterator findEndOfEnvironment(
338 ParagraphList::const_iterator const & pstart,
339 ParagraphList::const_iterator const & pend)
341 ParagraphList::const_iterator p = pstart;
342 size_t const depth = p->params().depth();
344 for (++p; p != pend; ++p) {
345 Layout const &style = p->layout();
346 // It shouldn't happen that e.g. a section command occurs inside
347 // a quotation environment, at a higher depth, but as of 6/2009,
348 // it can happen. We pretend that it's just at lowest depth.
349 if (style.latextype == LATEX_COMMAND)
352 // If depth is down, we're done
353 if (p->params().depth() < depth)
356 // If depth is up, we're not done
357 if (p->params().depth() > depth)
360 // FIXME I am not sure about the first check.
361 // Surely we *could* have different layouts that count as
362 // LATEX_PARAGRAPH, right?
363 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
371 ParagraphList::const_iterator makeParagraphBibliography(
374 OutputParams const &runparams,
376 ParagraphList::const_iterator const & pbegin,
377 ParagraphList::const_iterator const & pend)
379 auto const begin = text.paragraphs().begin();
380 auto const end = text.paragraphs().end();
382 // Find the paragraph *before* pbegin.
383 ParagraphList::const_iterator pbegin_before = begin;
384 if (pbegin != begin) {
385 ParagraphList::const_iterator pbegin_before_next = begin;
386 ++pbegin_before_next;
388 while (pbegin_before_next != pbegin) {
390 ++pbegin_before_next;
394 ParagraphList::const_iterator par = pbegin;
396 // If this is the first paragraph in a bibliography, open the bibliography tag.
397 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
398 xs << xml::StartTag("bibliography");
402 // Generate the required paragraphs, but only if they are .
403 for (; par != pend; ++par) {
404 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
405 // Don't forget the citation ID!
407 for (auto i = 0; i < par->size(); ++i) {
408 Inset const *ip = par->getInset(0);
409 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
410 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
411 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
415 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
417 // Generate the entry.
418 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
420 // End the precooked bibliography entry.
421 xs << xml::EndTag("bibliomixed");
425 // If this is the last paragraph in a bibliography, close the bibliography tag.
426 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
427 xs << xml::EndTag("bibliography");
435 ParagraphList::const_iterator makeParagraphs(
438 OutputParams const &runparams,
440 ParagraphList::const_iterator const & pbegin,
441 ParagraphList::const_iterator const & pend)
443 auto const begin = text.paragraphs().begin();
444 auto const end = text.paragraphs().end();
445 ParagraphList::const_iterator par = pbegin;
446 ParagraphList::const_iterator prevpar = pbegin;
448 for (; par != pend; prevpar = par, ++par) {
449 // We want to open the paragraph tag if:
450 // (i) the current layout permits multiple paragraphs
451 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
452 // we are, but this is not the first paragraph
454 // But there is also a special case, and we first see whether we are in it.
455 // We do not want to open the paragraph tag if this paragraph contains
456 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
457 // as a branch). On the other hand, if that single item has a font change
458 // applied to it, then we still do need to open the paragraph.
460 // Obviously, this is very fragile. The main reason we need to do this is
461 // because of branches, e.g., a branch that contains an entire new section.
462 // We do not really want to wrap that whole thing in a <div>...</div>.
463 bool special_case = false;
464 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
465 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
466 Layout const &style = par->layout();
467 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
468 style.labelfont : style.font;
469 FontInfo const our_font =
470 par->getFont(buf.masterBuffer()->params(), 0,
471 text.outerFont(distance(begin, par))).fontInfo();
473 if (first_font == our_font)
477 // Plain layouts must be ignored.
478 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
480 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
481 if (!special_case && par->size() == 1 && par->getInset(0)) {
482 Inset const * firstInset = par->getInset(0);
484 // Floats cannot be in paragraphs.
485 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
487 // Bibliographies cannot be in paragraphs.
488 if (!special_case && firstInset->asInsetCommand())
489 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
491 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
492 if (!special_case && firstInset->asInsetMath())
495 // ERTs are in comments, not paragraphs.
496 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
499 // Listings should not get into their own paragraph.
500 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
504 bool const open_par = runparams.docbook_make_pars
505 && (!runparams.docbook_in_par || par != pbegin)
508 // We want to issue the closing tag if either:
509 // (i) We opened it, and either docbook_in_par is false,
510 // or we're not in the last paragraph, anyway.
511 // (ii) We didn't open it and docbook_in_par is true,
512 // but we are in the first par, and there is a next par.
515 bool const close_par =
516 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
517 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
519 // Determine if this paragraph has some real content. Things like new pages are not caught
520 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
521 odocstringstream os2;
523 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
525 docstring cleaned = os2.str();
526 static const lyx::regex reg("[ \\r\\n]*");
527 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
529 if (!cleaned.empty()) {
531 openParTag(xs, &*par, &*prevpar);
533 xs << XMLStream::ESCAPE_NONE << os2.str();
536 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
545 bool isNormalEnv(Layout const &lay)
547 return lay.latextype == LATEX_ENVIRONMENT
548 || lay.latextype == LATEX_BIB_ENVIRONMENT;
552 ParagraphList::const_iterator makeEnvironment(
555 OutputParams const &runparams,
557 ParagraphList::const_iterator const & pbegin,
558 ParagraphList::const_iterator const & pend)
560 auto const begin = text.paragraphs().begin();
561 auto const end = text.paragraphs().end();
562 ParagraphList::const_iterator par = pbegin;
563 depth_type const origdepth = pbegin->params().depth();
565 // Find the previous paragraph.
566 auto prevpar = begin;
567 if (prevpar != par) {
568 auto prevpar_next = prevpar;
571 while (prevpar_next != par) {
577 // open tag for this environment
578 openParTag(xs, &*par, &*prevpar);
581 // we will on occasion need to remember a layout from before.
582 Layout const *lastlay = nullptr;
584 while (par != pend) {
585 Layout const & style = par->layout();
586 ParagraphList::const_iterator send;
588 // Actual content of this paragraph.
589 switch (style.latextype) {
590 case LATEX_ENVIRONMENT:
591 case LATEX_LIST_ENVIRONMENT:
592 case LATEX_ITEM_ENVIRONMENT: {
593 // There are two possibilities in this case.
594 // One is that we are still in the environment in which we
595 // started---which we will be if the depth is the same.
596 if (par->params().depth() == origdepth) {
597 LATTEST(par->layout() == style);
598 if (lastlay != nullptr) {
599 closeItemTag(xs, *lastlay);
600 if (lastlay->docbookitemwrappertag() != "NONE") {
601 xs << xml::EndTag(lastlay->docbookitemwrappertag());
607 // this will be positive if we want to skip the
608 // initial word (if it's been taken for the label).
611 // Open a wrapper tag if needed.
612 if (style.docbookitemwrappertag() != "NONE") {
613 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
618 if (style.labeltype != LABEL_NO_LABEL &&
619 style.docbookitemlabeltag() != "NONE") {
621 if (isNormalEnv(style)) {
622 // in this case, we print the label only for the first
623 // paragraph (as in a theorem or an abstract).
625 docstring const lbl = pbegin->params().labelString();
627 openLabelTag(xs, style);
629 closeLabelTag(xs, style);
631 // No new line after closeLabelTag.
635 } else { // some kind of list
636 if (style.labeltype == LABEL_MANUAL) {
637 // Only variablelist gets here.
639 openLabelTag(xs, style);
640 sep = par->firstWordDocBook(xs, runparams);
641 closeLabelTag(xs, style);
643 openLabelTag(xs, style);
644 xs << par->params().labelString();
645 closeLabelTag(xs, style);
648 } // end label output
650 // Start generating the item.
651 bool wasInParagraph = runparams.docbook_in_par;
652 openItemTag(xs, style);
653 bool getsIntoParagraph = openInnerItemTag(xs, style);
654 OutputParams rp = runparams;
655 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
657 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
658 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
659 // Common case: there is only the first word on the line, but there is a nested list instead
661 bool emptyItem = false;
662 if (sep == par->size()) {
665 if (next_par == text.paragraphs().end()) // There is no next paragraph.
667 else // There is a next paragraph: check depth.
668 emptyItem = par->params().depth() >= next_par->params().depth();
672 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
673 // generation of a full <para>.
676 // Generate the rest of the paragraph, if need be.
677 par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
681 if (getsIntoParagraph)
682 closeInnerItemTag(xs, style);
684 // We may not want to close the tag yet, in particular:
685 // If we're not at the end of the item...
687 // and are doing items...
688 && !isNormalEnv(style)
689 // and if the depth has changed...
690 && par->params().depth() != origdepth) {
691 // then we'll save this layout for later, and close it when
692 // we get another item.
695 closeItemTag(xs, style);
697 // Eventually, close the item wrapper.
698 if (style.docbookitemwrappertag() != "NONE") {
699 xs << xml::EndTag(style.docbookitemwrappertag());
704 // The other possibility is that the depth has increased.
706 send = findEndOfEnvironment(par, pend);
707 par = makeEnvironment(buf, xs, runparams, text, par, send);
711 case LATEX_PARAGRAPH:
712 send = findLast(par, pend, LATEX_PARAGRAPH);
713 par = makeParagraphs(buf, xs, runparams, text, par, send);
715 case LATEX_BIB_ENVIRONMENT:
716 send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
717 par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
725 if (lastlay != nullptr) {
726 closeItemTag(xs, *lastlay);
727 if (lastlay->docbookitemwrappertag() != "NONE") {
728 xs << xml::EndTag(lastlay->docbookitemwrappertag());
734 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
743 OutputParams const & runparams,
745 ParagraphList::const_iterator const & pbegin)
747 // No need for labels, as they are handled by DocBook tags.
748 auto const begin = text.paragraphs().begin();
749 auto const end = text.paragraphs().end();
750 auto nextpar = pbegin;
753 // Find the previous paragraph.
754 auto prevpar = begin;
755 if (prevpar != pbegin) {
756 auto prevpar_next = prevpar;
759 while (prevpar_next != pbegin) {
765 // Generate this command.
766 openParTag(xs, &*pbegin, &*prevpar);
768 pbegin->simpleDocBookOnePar(buf, xs, runparams,
769 text.outerFont(distance(begin, pbegin)));
771 closeTag(xs, &*pbegin, (nextpar != end) ? &*nextpar : nullptr);
775 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
779 OutputParams const &ourparams,
780 ParagraphList::const_iterator par,
781 ParagraphList::const_iterator send,
782 ParagraphList::const_iterator pend)
784 Layout const & style = par->layout();
786 switch (style.latextype) {
787 case LATEX_COMMAND: {
788 // The files with which we are working never have more than
789 // one paragraph in a command structure.
791 // if (ourparams.docbook_in_par)
792 // fix it so we don't get sections inside standard, e.g.
793 // note that we may then need to make runparams not const, so we
794 // can communicate that back.
795 // FIXME Maybe this fix should be in the routines themselves, in case
796 // they are called from elsewhere.
797 makeCommand(buf, xs, ourparams, text, par);
801 case LATEX_ENVIRONMENT:
802 case LATEX_LIST_ENVIRONMENT:
803 case LATEX_ITEM_ENVIRONMENT:
804 // FIXME Same fix here.
805 send = findEndOfEnvironment(par, pend);
806 par = makeEnvironment(buf, xs, ourparams, text, par, send);
808 case LATEX_PARAGRAPH:
809 send = findLast(par, pend, LATEX_PARAGRAPH);
810 par = makeParagraphs(buf, xs, ourparams, text, par, send);
812 case LATEX_BIB_ENVIRONMENT:
813 send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
814 par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
818 return make_pair(par, send);
821 } // end anonymous namespace
824 using DocBookDocumentSectioning = tuple<bool, pit_type>;
825 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
828 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
829 bool documentHasSections = false;
831 while (bpit < epit) {
832 Layout const &style = paragraphs[bpit].layout();
833 documentHasSections |= style.category() == from_utf8("Sectioning");
835 if (documentHasSections) {
840 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
842 return make_tuple(documentHasSections, bpit);
846 bool hasOnlyNotes(Paragraph const & par)
848 for (int i = 0; i < par.size(); ++i)
849 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
855 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
856 set<pit_type> shouldBeInInfo;
857 set<pit_type> mustBeInInfo;
859 // Find the first non empty paragraph by mutating bpit.
860 while (bpit < epit) {
861 Paragraph const &par = paragraphs[bpit];
862 if (par.empty() || hasOnlyNotes(par))
868 // Find the last info-like paragraph.
869 pit_type cpit = bpit;
870 while (cpit < epit) {
871 // Skip paragraphs only containing one note.
872 Paragraph const &par = paragraphs[cpit];
873 if (hasOnlyNotes(par)) {
878 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
879 Layout const &style = par.layout();
881 if (style.docbookininfo() == "always") {
882 mustBeInInfo.emplace(cpit);
883 } else if (style.docbookininfo() == "maybe") {
884 shouldBeInInfo.emplace(cpit);
886 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
891 // Now, cpit points to the last paragraph that has things that could go in <info>.
892 // bpit is still the beginning of the <info> part.
894 return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
898 bool hasAbstractBetween(ParagraphList const ¶graphs, pit_type const bpitAbstract, pit_type const epitAbstract)
900 // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
901 // are just after a document or part title.
902 if (epitAbstract - bpitAbstract <= 0)
905 // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
906 pit_type bpit = bpitAbstract;
907 while (bpit < epitAbstract) {
908 const Paragraph &p = paragraphs.at(bpit);
910 if (p.layout().name() == from_ascii("Abstract"))
913 if (!p.insetList().empty()) {
914 for (const auto &i : p.insetList()) {
915 if (i.inset->getText(0) != nullptr) {
926 pit_type generateDocBookParagraphWithoutSectioning(
930 OutputParams const & runparams,
931 ParagraphList const & paragraphs,
935 auto par = paragraphs.iterator_at(bpit);
936 auto lastStartedPar = par;
937 ParagraphList::const_iterator send;
939 (epit == (int) paragraphs.size()) ?
940 paragraphs.end() : paragraphs.iterator_at(epit);
942 while (bpit < epit) {
943 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
944 bpit += distance(lastStartedPar, par);
945 lastStartedPar = par;
952 void outputDocBookInfo(
956 OutputParams const & runparams,
957 ParagraphList const & paragraphs,
958 DocBookInfoTag const & info,
959 pit_type bpitAbstract,
960 pit_type const epitAbstract)
962 // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
963 // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
965 set<pit_type> shouldBeInInfo;
966 set<pit_type> mustBeInInfo;
969 tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
971 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
972 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
973 // then only create the <abstract> tag if these paragraphs generate some content.
974 // This check must be performed *before* a decision on whether or not to output <info> is made.
975 bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
978 odocstringstream os2;
980 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
982 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
983 // even though they must be properly output if there is some abstract.
984 docstring abstractContent = os2.str();
985 static const lyx::regex reg("[ \\r\\n]*");
986 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
988 // Nothing? Then there is no abstract!
989 if (abstractContent.empty())
993 // The abstract must go in <info>.
994 bool needInfo = !mustBeInInfo.empty() || hasAbstract;
996 // Start the <info> tag if required.
998 xs.startDivision(false);
999 xs << xml::StartTag("info");
1003 // Output the elements that should go in <info>.
1004 generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
1006 if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
1007 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
1011 xs << xml::StartTag(tag);
1013 xs << XMLStream::ESCAPE_NONE << abstract;
1014 xs << xml::EndTag(tag);
1018 // End the <info> tag if it was started.
1020 xs << xml::EndTag("info");
1027 void docbookFirstParagraphs(
1031 OutputParams const &runparams,
1034 // Handle the beginning of the document, supposing it has sections.
1035 // Major role: output the first <info> tag.
1037 ParagraphList const ¶graphs = text.paragraphs();
1038 pit_type bpit = runparams.par_begin;
1039 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1040 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
1044 bool isParagraphEmpty(const Paragraph &par)
1046 InsetList const &insets = par.insetList();
1047 size_t insetsLength = distance(insets.begin(), insets.end());
1048 bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
1049 dynamic_cast<InsetNote *>(insets.get(0));
1050 return hasParagraphOnlyNote;
1054 void docbookSimpleAllParagraphs(
1058 OutputParams const & runparams)
1060 // Handle the document, supposing it has no sections (i.e. a "simple" document).
1062 // First, the <info> tag.
1063 ParagraphList const ¶graphs = text.paragraphs();
1064 pit_type bpit = runparams.par_begin;
1065 pit_type const epit = runparams.par_end;
1066 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1067 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
1068 bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
1070 // Then, the content.
1071 ParagraphList::const_iterator const pend =
1072 (epit == (int) paragraphs.size()) ?
1073 paragraphs.end() : paragraphs.iterator_at(epit);
1075 while (bpit < epit) {
1076 auto par = paragraphs.iterator_at(bpit);
1077 ParagraphList::const_iterator const lastStartedPar = par;
1078 ParagraphList::const_iterator send;
1080 if (isParagraphEmpty(*par)) {
1082 bpit += distance(lastStartedPar, par);
1086 // Generate this paragraph.
1087 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1088 bpit += distance(lastStartedPar, par);
1093 void docbookParagraphs(Text const &text,
1096 OutputParams const &runparams) {
1097 ParagraphList const ¶graphs = text.paragraphs();
1098 if (runparams.par_begin == runparams.par_end) {
1099 runparams.par_begin = 0;
1100 runparams.par_end = paragraphs.size();
1102 pit_type bpit = runparams.par_begin;
1103 pit_type const epit = runparams.par_end;
1104 LASSERT(bpit < epit,
1106 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1110 ParagraphList::const_iterator const pend =
1111 (epit == (int) paragraphs.size()) ?
1112 paragraphs.end() : paragraphs.iterator_at(epit);
1113 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1114 // of the section and the tag that was used to open it.
1116 // Detect whether the document contains sections. If there are no sections, there can be no automatically
1117 // discovered abstract.
1118 bool documentHasSections;
1120 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1122 if (documentHasSections) {
1123 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1126 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1130 bool currentlyInAppendix = false;
1132 while (bpit < epit) {
1133 OutputParams ourparams = runparams;
1135 auto par = paragraphs.iterator_at(bpit);
1136 if (par->params().startOfAppendix())
1137 currentlyInAppendix = true;
1138 Layout const &style = par->layout();
1139 ParagraphList::const_iterator const lastStartedPar = par;
1140 ParagraphList::const_iterator send;
1142 if (isParagraphEmpty(*par)) {
1144 bpit += distance(lastStartedPar, par);
1148 // Think about adding <section> and/or </section>s.
1149 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1150 if (isLayoutSectioning) {
1151 int level = style.toclevel;
1153 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1154 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1155 // - current: h2; back: h1; do not close any <section>
1156 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1157 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1158 int stackLevel = headerLevels.top().first;
1159 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1162 // Output the tag only if it corresponds to a legit section.
1163 if (stackLevel != Layout::NOT_IN_TOC)
1164 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1167 // Open the new section: first push it onto the stack, then output it in DocBook.
1168 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1169 "appendix" : style.docbooksectiontag();
1170 headerLevels.push(std::make_pair(level, sectionTag));
1172 // Some sectioning-like elements should not be output (such as FrontMatter).
1173 if (level != Layout::NOT_IN_TOC) {
1174 // Look for a label in the title, i.e. a InsetLabel as a child.
1175 docstring id = docstring();
1176 for (pos_type i = 0; i < par->size(); ++i) {
1177 Inset const *inset = par->getInset(i);
1179 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1180 // Generate the attributes for the section if need be.
1181 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1183 // Don't output the ID as a DocBook <anchor>.
1184 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1186 // Cannot have multiple IDs per tag.
1192 // Write the open tag for this section.
1193 docstring tag = from_utf8("<" + sectionTag);
1195 tag += from_utf8(" ") + id;
1196 tag += from_utf8(">");
1197 xs << XMLStream::ESCAPE_NONE << tag;
1202 // Close all sections before the bibliography.
1203 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1204 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1205 if (insetsLength > 0) {
1206 Inset const *firstInset = par->getInset(0);
1207 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1208 while (!headerLevels.empty()) {
1209 int level = headerLevels.top().first;
1210 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1213 // Output the tag only if it corresponds to a legit section.
1214 if (level != Layout::NOT_IN_TOC) {
1215 xs << XMLStream::ESCAPE_NONE << tag;
1222 // Generate this paragraph.
1223 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1224 bpit += distance(lastStartedPar, par);
1227 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1229 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1230 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1232 xs << XMLStream::ESCAPE_NONE << tag;