2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
94 string fontToRole(xml::FontTypes type)
96 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99 // Hence, it is not a problem to have many roles by default here.
100 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
102 case xml::FontTypes::FT_ITALIC:
103 case xml::FontTypes::FT_EMPH:
105 case xml::FontTypes::FT_BOLD:
107 case xml::FontTypes::FT_NOUN: // Outputs a <person>
108 case xml::FontTypes::FT_TYPE: // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
173 } // end anonymous namespace
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
178 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
184 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
190 // convenience functions
192 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
194 Layout const & lay = par->layout();
199 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
200 // (usually, they won't have the same layout) and the CURRENT one allows merging.
201 // The main use case is author information in several paragraphs: if the name of the author is the
202 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
203 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
204 // layout, same wrapper tag).
205 bool openWrapper = lay.docbookwrappertag() != "NONE";
206 if (prevpar != nullptr) {
207 Layout const & prevlay = prevpar->layout();
208 if (prevlay.docbookwrappertag() != "NONE") {
209 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
210 && !lay.docbookwrappermergewithprevious();
216 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
218 string tag = lay.docbooktag();
219 if (tag == "Plain Layout")
222 xs << xml::ParTag(tag, lay.docbookattr());
224 if (lay.docbookitemtag() != "NONE")
225 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
229 void closeTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
231 Layout const & lay = par->layout();
236 // See comment in openParTag.
237 bool closeWrapper = lay.docbookwrappertag() != "NONE";
238 if (nextpar != nullptr) {
239 Layout const & nextlay = nextpar->layout();
240 if (nextlay.docbookwrappertag() != "NONE") {
241 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
242 && !nextlay.docbookwrappermergewithprevious();
247 if (lay.docbookitemtag() != "NONE")
248 xs << xml::EndTag(lay.docbookitemtag());
250 string tag = lay.docbooktag();
251 if (tag == "Plain Layout")
254 xs << xml::EndTag(tag);
256 xs << xml::EndTag(lay.docbookwrappertag());
260 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
262 xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
266 void closeLabelTag(XMLStream & xs, Layout const & lay)
268 xs << xml::EndTag(lay.docbookitemlabeltag());
273 void openItemTag(XMLStream & xs, Layout const & lay)
275 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
279 // Return true when new elements are output in a paragraph, false otherwise.
280 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
282 if (lay.docbookiteminnertag() != "NONE") {
284 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
286 if (lay.docbookiteminnertag() == "para") {
294 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
296 if (lay.docbookiteminnertag()!= "NONE") {
297 xs << xml::EndTag(lay.docbookiteminnertag());
303 inline void closeItemTag(XMLStream & xs, Layout const & lay)
305 xs << xml::EndTag(lay.docbookitemtag());
309 // end of convenience functions
311 ParagraphList::const_iterator findLast(
312 ParagraphList::const_iterator p,
313 ParagraphList::const_iterator const & pend,
315 for (++p; p != pend && p->layout().latextype == type; ++p);
320 ParagraphList::const_iterator findLastBibliographyParagraph(
321 ParagraphList::const_iterator p,
322 ParagraphList::const_iterator const & pend) {
323 for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
329 ParagraphList::const_iterator findEndOfEnvironment(
330 ParagraphList::const_iterator const & pstart,
331 ParagraphList::const_iterator const & pend)
333 ParagraphList::const_iterator p = pstart;
334 size_t const depth = p->params().depth();
336 for (++p; p != pend; ++p) {
337 Layout const &style = p->layout();
338 // It shouldn't happen that e.g. a section command occurs inside
339 // a quotation environment, at a higher depth, but as of 6/2009,
340 // it can happen. We pretend that it's just at lowest depth.
341 if (style.latextype == LATEX_COMMAND)
344 // If depth is down, we're done
345 if (p->params().depth() < depth)
348 // If depth is up, we're not done
349 if (p->params().depth() > depth)
352 // FIXME I am not sure about the first check.
353 // Surely we *could* have different layouts that count as
354 // LATEX_PARAGRAPH, right?
355 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
363 ParagraphList::const_iterator makeParagraphBibliography(
366 OutputParams const &runparams,
368 ParagraphList::const_iterator const & pbegin,
369 ParagraphList::const_iterator const & pend)
371 auto const begin = text.paragraphs().begin();
372 auto const end = text.paragraphs().end();
374 // Find the paragraph *before* pbegin.
375 ParagraphList::const_iterator pbegin_before = begin;
376 if (pbegin != begin) {
377 ParagraphList::const_iterator pbegin_before_next = begin;
378 ++pbegin_before_next;
380 while (pbegin_before_next != pbegin) {
382 ++pbegin_before_next;
386 ParagraphList::const_iterator par = pbegin;
388 // If this is the first paragraph in a bibliography, open the bibliography tag.
389 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
390 xs << xml::StartTag("bibliography");
394 // Generate the required paragraphs, but only if they are .
395 for (; par != pend; ++par) {
396 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
397 // Don't forget the citation ID!
399 for (auto i = 0; i < par->size(); ++i) {
400 Inset const *ip = par->getInset(0);
401 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
402 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
403 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
407 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
409 // Generate the entry.
410 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
412 // End the precooked bibliography entry.
413 xs << xml::EndTag("bibliomixed");
417 // If this is the last paragraph in a bibliography, close the bibliography tag.
418 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
419 xs << xml::EndTag("bibliography");
427 ParagraphList::const_iterator makeParagraphs(
430 OutputParams const &runparams,
432 ParagraphList::const_iterator const & pbegin,
433 ParagraphList::const_iterator const & pend)
435 auto const begin = text.paragraphs().begin();
436 auto const end = text.paragraphs().end();
437 ParagraphList::const_iterator par = pbegin;
438 ParagraphList::const_iterator prevpar = pbegin;
440 for (; par != pend; prevpar = par, ++par) {
441 // We want to open the paragraph tag if:
442 // (i) the current layout permits multiple paragraphs
443 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
444 // we are, but this is not the first paragraph
446 // But there is also a special case, and we first see whether we are in it.
447 // We do not want to open the paragraph tag if this paragraph contains
448 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
449 // as a branch). On the other hand, if that single item has a font change
450 // applied to it, then we still do need to open the paragraph.
452 // Obviously, this is very fragile. The main reason we need to do this is
453 // because of branches, e.g., a branch that contains an entire new section.
454 // We do not really want to wrap that whole thing in a <div>...</div>.
455 bool special_case = false;
456 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
457 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
458 Layout const &style = par->layout();
459 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
460 style.labelfont : style.font;
461 FontInfo const our_font =
462 par->getFont(buf.masterBuffer()->params(), 0,
463 text.outerFont(distance(begin, par))).fontInfo();
465 if (first_font == our_font)
469 // Plain layouts must be ignored.
470 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
472 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
473 if (!special_case && par->size() == 1 && par->getInset(0)) {
474 Inset const * firstInset = par->getInset(0);
476 // Floats cannot be in paragraphs.
477 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
479 // Bibliographies cannot be in paragraphs.
480 if (!special_case && firstInset->asInsetCommand())
481 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
483 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
484 if (!special_case && firstInset->asInsetMath())
487 // ERTs are in comments, not paragraphs.
488 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
491 // Listings should not get into their own paragraph.
492 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
496 bool const open_par = runparams.docbook_make_pars
497 && (!runparams.docbook_in_par || par != pbegin)
500 // We want to issue the closing tag if either:
501 // (i) We opened it, and either docbook_in_par is false,
502 // or we're not in the last paragraph, anyway.
503 // (ii) We didn't open it and docbook_in_par is true,
504 // but we are in the first par, and there is a next par.
507 bool const close_par =
508 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
509 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
511 // Determine if this paragraph has some real content. Things like new pages are not caught
512 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
513 odocstringstream os2;
515 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
517 docstring cleaned = os2.str();
518 static const lyx::regex reg("[ \\r\\n]*");
519 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
521 if (!cleaned.empty()) {
523 openParTag(xs, &*par, &*prevpar);
525 xs << XMLStream::ESCAPE_NONE << os2.str();
528 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
537 bool isNormalEnv(Layout const &lay)
539 return lay.latextype == LATEX_ENVIRONMENT
540 || lay.latextype == LATEX_BIB_ENVIRONMENT;
544 ParagraphList::const_iterator makeEnvironment(
547 OutputParams const &runparams,
549 ParagraphList::const_iterator const & pbegin,
550 ParagraphList::const_iterator const & pend)
552 auto const begin = text.paragraphs().begin();
553 auto const end = text.paragraphs().end();
554 ParagraphList::const_iterator par = pbegin;
555 depth_type const origdepth = pbegin->params().depth();
557 // Find the previous paragraph.
558 auto prevpar = begin;
559 if (prevpar != par) {
560 auto prevpar_next = prevpar;
563 while (prevpar_next != par) {
569 // open tag for this environment
570 openParTag(xs, &*par, &*prevpar);
573 // we will on occasion need to remember a layout from before.
574 Layout const *lastlay = nullptr;
576 while (par != pend) {
577 Layout const & style = par->layout();
578 ParagraphList::const_iterator send;
580 // Actual content of this paragraph.
581 switch (style.latextype) {
582 case LATEX_ENVIRONMENT:
583 case LATEX_LIST_ENVIRONMENT:
584 case LATEX_ITEM_ENVIRONMENT: {
585 // There are two possibilities in this case.
586 // One is that we are still in the environment in which we
587 // started---which we will be if the depth is the same.
588 if (par->params().depth() == origdepth) {
589 LATTEST(par->layout() == style);
590 if (lastlay != nullptr) {
591 closeItemTag(xs, *lastlay);
592 if (lastlay->docbookitemwrappertag() != "NONE") {
593 xs << xml::EndTag(lastlay->docbookitemwrappertag());
599 // this will be positive if we want to skip the
600 // initial word (if it's been taken for the label).
603 // Open a wrapper tag if needed.
604 if (style.docbookitemwrappertag() != "NONE") {
605 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
610 if (style.labeltype != LABEL_NO_LABEL &&
611 style.docbookitemlabeltag() != "NONE") {
613 if (isNormalEnv(style)) {
614 // in this case, we print the label only for the first
615 // paragraph (as in a theorem or an abstract).
617 docstring const lbl = pbegin->params().labelString();
619 openLabelTag(xs, style);
621 closeLabelTag(xs, style);
623 // No new line after closeLabelTag.
627 } else { // some kind of list
628 if (style.labeltype == LABEL_MANUAL) {
629 // Only variablelist gets here.
631 openLabelTag(xs, style);
632 sep = par->firstWordDocBook(xs, runparams);
633 closeLabelTag(xs, style);
635 openLabelTag(xs, style);
636 xs << par->params().labelString();
637 closeLabelTag(xs, style);
640 } // end label output
642 // Start generating the item.
643 bool wasInParagraph = runparams.docbook_in_par;
644 openItemTag(xs, style);
645 bool getsIntoParagraph = openInnerItemTag(xs, style);
646 OutputParams rp = runparams;
647 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
649 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
650 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
651 // Common case: there is only the first word on the line, but there is a nested list instead
653 bool emptyItem = false;
654 if (sep == par->size()) {
657 if (next_par == text.paragraphs().end()) // There is no next paragraph.
659 else // There is a next paragraph: check depth.
660 emptyItem = par->params().depth() >= next_par->params().depth();
664 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
665 // generation of a full <para>.
668 // Generate the rest of the paragraph, if need be.
669 par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
673 if (getsIntoParagraph)
674 closeInnerItemTag(xs, style);
676 // We may not want to close the tag yet, in particular:
677 // If we're not at the end of the item...
679 // and are doing items...
680 && !isNormalEnv(style)
681 // and if the depth has changed...
682 && par->params().depth() != origdepth) {
683 // then we'll save this layout for later, and close it when
684 // we get another item.
687 closeItemTag(xs, style);
689 // Eventually, close the item wrapper.
690 if (style.docbookitemwrappertag() != "NONE") {
691 xs << xml::EndTag(style.docbookitemwrappertag());
696 // The other possibility is that the depth has increased.
698 send = findEndOfEnvironment(par, pend);
699 par = makeEnvironment(buf, xs, runparams, text, par, send);
703 case LATEX_PARAGRAPH:
704 send = findLast(par, pend, LATEX_PARAGRAPH);
705 par = makeParagraphs(buf, xs, runparams, text, par, send);
707 case LATEX_BIB_ENVIRONMENT:
708 send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
709 par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
717 if (lastlay != nullptr) {
718 closeItemTag(xs, *lastlay);
719 if (lastlay->docbookitemwrappertag() != "NONE") {
720 xs << xml::EndTag(lastlay->docbookitemwrappertag());
726 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
735 OutputParams const & runparams,
737 ParagraphList::const_iterator const & pbegin)
739 // No need for labels, as they are handled by DocBook tags.
740 auto const begin = text.paragraphs().begin();
741 auto const end = text.paragraphs().end();
742 auto nextpar = pbegin;
745 // Find the previous paragraph.
746 auto prevpar = begin;
747 if (prevpar != pbegin) {
748 auto prevpar_next = prevpar;
751 while (prevpar_next != pbegin) {
757 // Generate this command.
758 openParTag(xs, &*pbegin, &*prevpar);
760 pbegin->simpleDocBookOnePar(buf, xs, runparams,
761 text.outerFont(distance(begin, pbegin)));
763 closeTag(xs, &*pbegin, (nextpar != end) ? &*nextpar : nullptr);
767 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
771 OutputParams const &ourparams,
772 ParagraphList::const_iterator par,
773 ParagraphList::const_iterator send,
774 ParagraphList::const_iterator pend)
776 Layout const & style = par->layout();
778 switch (style.latextype) {
779 case LATEX_COMMAND: {
780 // The files with which we are working never have more than
781 // one paragraph in a command structure.
783 // if (ourparams.docbook_in_par)
784 // fix it so we don't get sections inside standard, e.g.
785 // note that we may then need to make runparams not const, so we
786 // can communicate that back.
787 // FIXME Maybe this fix should be in the routines themselves, in case
788 // they are called from elsewhere.
789 makeCommand(buf, xs, ourparams, text, par);
793 case LATEX_ENVIRONMENT:
794 case LATEX_LIST_ENVIRONMENT:
795 case LATEX_ITEM_ENVIRONMENT:
796 // FIXME Same fix here.
797 send = findEndOfEnvironment(par, pend);
798 par = makeEnvironment(buf, xs, ourparams, text, par, send);
800 case LATEX_PARAGRAPH:
801 send = findLast(par, pend, LATEX_PARAGRAPH);
802 par = makeParagraphs(buf, xs, ourparams, text, par, send);
804 case LATEX_BIB_ENVIRONMENT:
805 send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
806 par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
810 return make_pair(par, send);
813 } // end anonymous namespace
816 using DocBookDocumentSectioning = tuple<bool, pit_type>;
819 struct DocBookInfoTag
821 const set<pit_type> shouldBeInInfo;
822 const set<pit_type> mustBeInInfo;
823 const set<pit_type> abstract;
827 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
828 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
829 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
830 bpit(bpit), epit(epit) {}
834 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
835 bool documentHasSections = false;
837 while (bpit < epit) {
838 Layout const &style = paragraphs[bpit].layout();
839 documentHasSections |= style.category() == from_utf8("Sectioning");
841 if (documentHasSections) {
846 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
848 return make_tuple(documentHasSections, bpit);
852 bool hasOnlyNotes(Paragraph const & par)
854 for (int i = 0; i < par.size(); ++i)
855 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
861 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
862 set<pit_type> shouldBeInInfo;
863 set<pit_type> mustBeInInfo;
864 set<pit_type> abstract;
866 // Find the first non empty paragraph by mutating bpit.
867 while (bpit < epit) {
868 Paragraph const &par = paragraphs[bpit];
869 if (par.empty() || hasOnlyNotes(par))
875 // Find the last info-like paragraph.
876 pit_type cpit = bpit;
877 bool hasAbstractLayout = false;
878 while (cpit < epit) {
879 // Skip paragraphs only containing one note.
880 Paragraph const & par = paragraphs[cpit];
881 if (hasOnlyNotes(par)) {
886 if (par.layout().name() == from_ascii("Abstract"))
887 hasAbstractLayout = true;
889 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
890 Layout const &style = par.layout();
892 if (style.docbookininfo() == "always") {
893 mustBeInInfo.emplace(cpit);
894 } else if (style.docbookininfo() == "maybe") {
895 shouldBeInInfo.emplace(cpit);
897 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
898 // There may be notes in between, but nothing else.
903 // Now, cpit points to the last paragraph that has things that could go in <info>.
904 // bpit is the beginning of the <info> part.
906 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
907 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
908 if (hasAbstractLayout) {
910 while (pit < cpit) { // Don't overshoot the <info> part.
911 if (paragraphs[pit].layout().name() == from_ascii("Abstract"))
912 abstract.emplace(pit);
916 pit_type lastAbstract = epit + 1; // A nonsensical value.
917 docstring lastAbstractLayout;
920 while (pit < cpit) { // Don't overshoot the <info> part.
921 const Paragraph & par = paragraphs.at(pit);
922 if (!par.insetList().empty()) {
923 for (const auto &i : par.insetList()) {
924 if (i.inset->getText(0) != nullptr) {
925 if (lastAbstract == epit + 1) {
926 // First paragraph that matches the heuristic definition of abstract.
928 lastAbstractLayout = par.layout().name();
929 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
930 // This is either too far from the last abstract paragraph or doesn't
931 // have the right layout name, BUT there has already been an abstract
932 // in this document: done with detecting the abstract.
933 goto done; // Easier to get out of two nested loops.
936 abstract.emplace(pit);
946 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
950 pit_type generateDocBookParagraphWithoutSectioning(
954 OutputParams const & runparams,
955 ParagraphList const & paragraphs,
959 auto par = paragraphs.iterator_at(bpit);
960 auto lastStartedPar = par;
961 ParagraphList::const_iterator send;
963 (epit == (int) paragraphs.size()) ?
964 paragraphs.end() : paragraphs.iterator_at(epit);
966 while (bpit < epit) {
967 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
968 bpit += distance(lastStartedPar, par);
969 lastStartedPar = par;
976 void outputDocBookInfo(
980 OutputParams const & runparams,
981 ParagraphList const & paragraphs,
982 DocBookInfoTag const & info)
984 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
985 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
986 // then only create the <abstract> tag if these paragraphs generate some content.
987 // This check must be performed *before* a decision on whether or not to output <info> is made.
988 bool hasAbstract = !info.abstract.empty();
991 pit_type bpitAbstract = *std::min_element(info.abstract.begin(), info.abstract.end());
992 pit_type epitAbstract = *std::max_element(info.abstract.begin(), info.abstract.end());
994 odocstringstream os2;
996 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
998 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
999 // even though they must be properly output if there is some abstract.
1000 docstring abstractContent = os2.str();
1001 static const lyx::regex reg("[ \\r\\n]*");
1002 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
1004 // Nothing? Then there is no abstract!
1005 if (abstractContent.empty())
1006 hasAbstract = false;
1009 // The abstract must go in <info>.
1010 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1012 // Start the <info> tag if required.
1014 xs.startDivision(false);
1015 xs << xml::StartTag("info");
1019 // Output the elements that should go in <info>.
1020 generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, info.bpit, info.epit);
1022 if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
1023 string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1027 xs << xml::StartTag(tag);
1029 xs << XMLStream::ESCAPE_NONE << abstract;
1030 xs << xml::EndTag(tag);
1034 // End the <info> tag if it was started.
1036 xs << xml::EndTag("info");
1043 void docbookFirstParagraphs(
1047 OutputParams const &runparams,
1050 // Handle the beginning of the document, supposing it has sections.
1051 // Major role: output the first <info> tag.
1053 ParagraphList const ¶graphs = text.paragraphs();
1054 pit_type bpit = runparams.par_begin;
1055 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1056 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1060 bool isParagraphEmpty(const Paragraph &par)
1062 InsetList const &insets = par.insetList();
1063 size_t insetsLength = distance(insets.begin(), insets.end());
1064 bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
1065 dynamic_cast<InsetNote *>(insets.get(0));
1066 return hasParagraphOnlyNote;
1070 void docbookSimpleAllParagraphs(
1074 OutputParams const & runparams)
1076 // Handle the document, supposing it has no sections (i.e. a "simple" document).
1078 // First, the <info> tag.
1079 ParagraphList const ¶graphs = text.paragraphs();
1080 pit_type bpit = runparams.par_begin;
1081 pit_type const epit = runparams.par_end;
1082 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1083 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1086 // Then, the content.
1087 ParagraphList::const_iterator const pend =
1088 (epit == (int) paragraphs.size()) ?
1089 paragraphs.end() : paragraphs.iterator_at(epit);
1091 while (bpit < epit) {
1092 auto par = paragraphs.iterator_at(bpit);
1093 ParagraphList::const_iterator const lastStartedPar = par;
1094 ParagraphList::const_iterator send;
1096 if (isParagraphEmpty(*par)) {
1098 bpit += distance(lastStartedPar, par);
1102 // Generate this paragraph.
1103 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1104 bpit += distance(lastStartedPar, par);
1109 void docbookParagraphs(Text const &text,
1112 OutputParams const &runparams) {
1113 ParagraphList const ¶graphs = text.paragraphs();
1114 if (runparams.par_begin == runparams.par_end) {
1115 runparams.par_begin = 0;
1116 runparams.par_end = paragraphs.size();
1118 pit_type bpit = runparams.par_begin;
1119 pit_type const epit = runparams.par_end;
1120 LASSERT(bpit < epit,
1122 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1126 ParagraphList::const_iterator const pend =
1127 (epit == (int) paragraphs.size()) ?
1128 paragraphs.end() : paragraphs.iterator_at(epit);
1129 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1130 // of the section and the tag that was used to open it.
1132 // Detect whether the document contains sections. If there are no sections, there can be no automatically
1133 // discovered abstract.
1134 bool documentHasSections;
1136 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1138 if (documentHasSections) {
1139 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1142 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1146 bool currentlyInAppendix = false;
1148 while (bpit < epit) {
1149 OutputParams ourparams = runparams;
1151 auto par = paragraphs.iterator_at(bpit);
1152 if (par->params().startOfAppendix())
1153 currentlyInAppendix = true;
1154 Layout const &style = par->layout();
1155 ParagraphList::const_iterator const lastStartedPar = par;
1156 ParagraphList::const_iterator send;
1158 if (isParagraphEmpty(*par)) {
1160 bpit += distance(lastStartedPar, par);
1164 // Think about adding <section> and/or </section>s.
1165 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1166 if (isLayoutSectioning) {
1167 int level = style.toclevel;
1169 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1170 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1171 // - current: h2; back: h1; do not close any <section>
1172 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1173 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1174 int stackLevel = headerLevels.top().first;
1175 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1178 // Output the tag only if it corresponds to a legit section.
1179 if (stackLevel != Layout::NOT_IN_TOC)
1180 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1183 // Open the new section: first push it onto the stack, then output it in DocBook.
1184 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1185 "appendix" : style.docbooksectiontag();
1186 headerLevels.push(std::make_pair(level, sectionTag));
1188 // Some sectioning-like elements should not be output (such as FrontMatter).
1189 if (level != Layout::NOT_IN_TOC) {
1190 // Look for a label in the title, i.e. a InsetLabel as a child.
1191 docstring id = docstring();
1192 for (pos_type i = 0; i < par->size(); ++i) {
1193 Inset const *inset = par->getInset(i);
1195 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1196 // Generate the attributes for the section if need be.
1197 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1199 // Don't output the ID as a DocBook <anchor>.
1200 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1202 // Cannot have multiple IDs per tag.
1208 // Write the open tag for this section.
1209 docstring tag = from_utf8("<" + sectionTag);
1211 tag += from_utf8(" ") + id;
1212 tag += from_utf8(">");
1213 xs << XMLStream::ESCAPE_NONE << tag;
1218 // Close all sections before the bibliography.
1219 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1220 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1221 if (insetsLength > 0) {
1222 Inset const *firstInset = par->getInset(0);
1223 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1224 while (!headerLevels.empty()) {
1225 int level = headerLevels.top().first;
1226 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1229 // Output the tag only if it corresponds to a legit section.
1230 if (level != Layout::NOT_IN_TOC) {
1231 xs << XMLStream::ESCAPE_NONE << tag;
1238 // Generate this paragraph.
1239 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1240 bpit += distance(lastStartedPar, par);
1243 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1245 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1246 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1248 xs << XMLStream::ESCAPE_NONE << tag;