2 * \file output_docbook.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
9 * Full author contact details are available in file CREDITS.
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
18 #include "InsetList.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
26 #include "TextClass.h"
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
39 #include "support/regex.h"
47 using namespace lyx::support;
53 std::string fontToDocBookTag(xml::FontTypes type)
56 case xml::FontTypes::FT_EMPH:
57 case xml::FontTypes::FT_BOLD:
59 case xml::FontTypes::FT_NOUN:
61 case xml::FontTypes::FT_UBAR:
62 case xml::FontTypes::FT_WAVE:
63 case xml::FontTypes::FT_DBAR:
64 case xml::FontTypes::FT_SOUT:
65 case xml::FontTypes::FT_XOUT:
66 case xml::FontTypes::FT_ITALIC:
67 case xml::FontTypes::FT_UPRIGHT:
68 case xml::FontTypes::FT_SLANTED:
69 case xml::FontTypes::FT_SMALLCAPS:
70 case xml::FontTypes::FT_ROMAN:
71 case xml::FontTypes::FT_SANS:
73 case xml::FontTypes::FT_TYPE:
75 case xml::FontTypes::FT_SIZE_TINY:
76 case xml::FontTypes::FT_SIZE_SCRIPT:
77 case xml::FontTypes::FT_SIZE_FOOTNOTE:
78 case xml::FontTypes::FT_SIZE_SMALL:
79 case xml::FontTypes::FT_SIZE_NORMAL:
80 case xml::FontTypes::FT_SIZE_LARGE:
81 case xml::FontTypes::FT_SIZE_LARGER:
82 case xml::FontTypes::FT_SIZE_LARGEST:
83 case xml::FontTypes::FT_SIZE_HUGE:
84 case xml::FontTypes::FT_SIZE_HUGER:
85 case xml::FontTypes::FT_SIZE_INCREASE:
86 case xml::FontTypes::FT_SIZE_DECREASE:
94 string fontToRole(xml::FontTypes type)
96 // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97 // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98 // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99 // Hence, it is not a problem to have many roles by default here.
100 // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
102 case xml::FontTypes::FT_ITALIC:
103 case xml::FontTypes::FT_EMPH:
105 case xml::FontTypes::FT_BOLD:
107 case xml::FontTypes::FT_NOUN: // Outputs a <person>
108 case xml::FontTypes::FT_TYPE: // Outputs a <code>
110 case xml::FontTypes::FT_UBAR:
113 // All other roles are non-standard for DocBook.
115 case xml::FontTypes::FT_WAVE:
117 case xml::FontTypes::FT_DBAR:
119 case xml::FontTypes::FT_SOUT:
121 case xml::FontTypes::FT_XOUT:
123 case xml::FontTypes::FT_UPRIGHT:
125 case xml::FontTypes::FT_SLANTED:
127 case xml::FontTypes::FT_SMALLCAPS:
129 case xml::FontTypes::FT_ROMAN:
131 case xml::FontTypes::FT_SANS:
133 case xml::FontTypes::FT_SIZE_TINY:
135 case xml::FontTypes::FT_SIZE_SCRIPT:
136 return "size_script";
137 case xml::FontTypes::FT_SIZE_FOOTNOTE:
138 return "size_footnote";
139 case xml::FontTypes::FT_SIZE_SMALL:
141 case xml::FontTypes::FT_SIZE_NORMAL:
142 return "size_normal";
143 case xml::FontTypes::FT_SIZE_LARGE:
145 case xml::FontTypes::FT_SIZE_LARGER:
146 return "size_larger";
147 case xml::FontTypes::FT_SIZE_LARGEST:
148 return "size_largest";
149 case xml::FontTypes::FT_SIZE_HUGE:
151 case xml::FontTypes::FT_SIZE_HUGER:
153 case xml::FontTypes::FT_SIZE_INCREASE:
154 return "size_increase";
155 case xml::FontTypes::FT_SIZE_DECREASE:
156 return "size_decrease";
162 string fontToAttribute(xml::FontTypes type) {
163 // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
165 string role = fontToRole(type);
167 return "role='" + role + "'";
173 } // end anonymous namespace
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
178 return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
184 return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
190 // convenience functions
192 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
194 Layout const & lay = par->layout();
199 // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
200 // (usually, they won't have the same layout) and the CURRENT one allows merging.
201 // The main use case is author information in several paragraphs: if the name of the author is the
202 // first paragraph of an author, then merging with the previous tag does not make sense. Say the
203 // next paragraph is the affiliation, then it should be output in the same <author> tag (different
204 // layout, same wrapper tag).
205 bool openWrapper = lay.docbookwrappertag() != "NONE";
206 if (prevpar != nullptr) {
207 Layout const & prevlay = prevpar->layout();
208 if (prevlay.docbookwrappertag() != "NONE") {
209 openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
210 && !lay.docbookwrappermergewithprevious();
216 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
218 string tag = lay.docbooktag();
219 if (tag == "Plain Layout")
222 if (!xs.isTagOpen(xml::ParTag(tag, lay.docbookattr()), 1)) // Don't nest a paragraph directly in a paragraph.
223 xs << xml::ParTag(tag, lay.docbookattr());
225 if (lay.docbookitemtag() != "NONE")
226 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
230 void closeTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
232 Layout const & lay = par->layout();
237 // See comment in openParTag.
238 bool closeWrapper = lay.docbookwrappertag() != "NONE";
239 if (nextpar != nullptr) {
240 Layout const & nextlay = nextpar->layout();
241 if (nextlay.docbookwrappertag() != "NONE") {
242 closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
243 && !nextlay.docbookwrappermergewithprevious();
248 if (lay.docbookitemtag() != "NONE")
249 xs << xml::EndTag(lay.docbookitemtag());
251 string tag = lay.docbooktag();
252 if (tag == "Plain Layout")
255 xs << xml::EndTag(tag);
257 xs << xml::EndTag(lay.docbookwrappertag());
261 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
263 xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
267 void closeLabelTag(XMLStream & xs, Layout const & lay)
269 xs << xml::EndTag(lay.docbookitemlabeltag());
274 void openItemTag(XMLStream & xs, Layout const & lay)
276 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
280 // Return true when new elements are output in a paragraph, false otherwise.
281 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
283 if (lay.docbookiteminnertag() != "NONE") {
285 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
287 if (lay.docbookiteminnertag() == "para") {
295 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
297 if (lay.docbookiteminnertag()!= "NONE") {
298 xs << xml::EndTag(lay.docbookiteminnertag());
304 inline void closeItemTag(XMLStream & xs, Layout const & lay)
306 xs << xml::EndTag(lay.docbookitemtag());
310 // end of convenience functions
312 ParagraphList::const_iterator findLast(
313 ParagraphList::const_iterator p,
314 ParagraphList::const_iterator const & pend,
316 for (++p; p != pend && p->layout().latextype == type; ++p);
321 ParagraphList::const_iterator findLastBibliographyParagraph(
322 ParagraphList::const_iterator p,
323 ParagraphList::const_iterator const & pend) {
324 for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
330 ParagraphList::const_iterator findEndOfEnvironment(
331 ParagraphList::const_iterator const & pstart,
332 ParagraphList::const_iterator const & pend)
334 ParagraphList::const_iterator p = pstart;
335 size_t const depth = p->params().depth();
337 for (++p; p != pend; ++p) {
338 Layout const &style = p->layout();
339 // It shouldn't happen that e.g. a section command occurs inside
340 // a quotation environment, at a higher depth, but as of 6/2009,
341 // it can happen. We pretend that it's just at lowest depth.
342 if (style.latextype == LATEX_COMMAND)
345 // If depth is down, we're done
346 if (p->params().depth() < depth)
349 // If depth is up, we're not done
350 if (p->params().depth() > depth)
353 // FIXME I am not sure about the first check.
354 // Surely we *could* have different layouts that count as
355 // LATEX_PARAGRAPH, right?
356 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
364 ParagraphList::const_iterator makeParagraphBibliography(
367 OutputParams const &runparams,
369 ParagraphList::const_iterator const & pbegin,
370 ParagraphList::const_iterator const & pend)
372 auto const begin = text.paragraphs().begin();
373 auto const end = text.paragraphs().end();
375 // Find the paragraph *before* pbegin.
376 ParagraphList::const_iterator pbegin_before = begin;
377 if (pbegin != begin) {
378 ParagraphList::const_iterator pbegin_before_next = begin;
379 ++pbegin_before_next;
381 while (pbegin_before_next != pbegin) {
383 ++pbegin_before_next;
387 ParagraphList::const_iterator par = pbegin;
389 // If this is the first paragraph in a bibliography, open the bibliography tag.
390 if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
391 xs << xml::StartTag("bibliography");
395 // Generate the required paragraphs, but only if they are .
396 for (; par != pend; ++par) {
397 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
398 // Don't forget the citation ID!
400 for (auto i = 0; i < par->size(); ++i) {
401 Inset const *ip = par->getInset(0);
402 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
403 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
404 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
408 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
410 // Generate the entry.
411 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
413 // End the precooked bibliography entry.
414 xs << xml::EndTag("bibliomixed");
418 // If this is the last paragraph in a bibliography, close the bibliography tag.
419 if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
420 xs << xml::EndTag("bibliography");
428 ParagraphList::const_iterator makeParagraphs(
431 OutputParams const &runparams,
433 ParagraphList::const_iterator const & pbegin,
434 ParagraphList::const_iterator const & pend)
436 auto const begin = text.paragraphs().begin();
437 auto const end = text.paragraphs().end();
438 ParagraphList::const_iterator par = pbegin;
439 ParagraphList::const_iterator prevpar = pbegin;
441 for (; par != pend; prevpar = par, ++par) {
442 // We want to open the paragraph tag if:
443 // (i) the current layout permits multiple paragraphs
444 // (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
445 // we are, but this is not the first paragraph
447 // But there is also a special case, and we first see whether we are in it.
448 // We do not want to open the paragraph tag if this paragraph contains
449 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
450 // as a branch). On the other hand, if that single item has a font change
451 // applied to it, then we still do need to open the paragraph.
453 // Obviously, this is very fragile. The main reason we need to do this is
454 // because of branches, e.g., a branch that contains an entire new section.
455 // We do not really want to wrap that whole thing in a <div>...</div>.
456 bool special_case = false;
457 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
458 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
459 Layout const &style = par->layout();
460 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
461 style.labelfont : style.font;
462 FontInfo const our_font =
463 par->getFont(buf.masterBuffer()->params(), 0,
464 text.outerFont(distance(begin, par))).fontInfo();
466 if (first_font == our_font)
470 // Plain layouts must be ignored.
471 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
473 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
474 if (!special_case && par->size() == 1 && par->getInset(0)) {
475 Inset const * firstInset = par->getInset(0);
477 // Floats cannot be in paragraphs.
478 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
480 // Bibliographies cannot be in paragraphs.
481 if (!special_case && firstInset->asInsetCommand())
482 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
484 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
485 if (!special_case && firstInset->asInsetMath())
488 // ERTs are in comments, not paragraphs.
489 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
492 // Listings should not get into their own paragraph.
493 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
497 bool const open_par = runparams.docbook_make_pars
498 && (!runparams.docbook_in_par || par != pbegin)
501 // We want to issue the closing tag if either:
502 // (i) We opened it, and either docbook_in_par is false,
503 // or we're not in the last paragraph, anyway.
504 // (ii) We didn't open it and docbook_in_par is true,
505 // but we are in the first par, and there is a next par.
508 bool const close_par =
509 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
510 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
512 // Determine if this paragraph has some real content. Things like new pages are not caught
513 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
514 odocstringstream os2;
516 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
518 docstring cleaned = os2.str();
519 static const lyx::regex reg("[ \\r\\n]*");
520 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
522 if (!cleaned.empty()) {
524 openParTag(xs, &*par, &*prevpar);
526 xs << XMLStream::ESCAPE_NONE << os2.str();
529 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
538 bool isNormalEnv(Layout const &lay)
540 return lay.latextype == LATEX_ENVIRONMENT
541 || lay.latextype == LATEX_BIB_ENVIRONMENT;
545 ParagraphList::const_iterator makeEnvironment(
548 OutputParams const &runparams,
550 ParagraphList::const_iterator const & pbegin,
551 ParagraphList::const_iterator const & pend)
553 auto const begin = text.paragraphs().begin();
554 auto const end = text.paragraphs().end();
555 ParagraphList::const_iterator par = pbegin;
556 depth_type const origdepth = pbegin->params().depth();
558 // Output the opening tag for this environment.
560 // Find the previous paragraph.
561 auto prevpar = begin;
562 if (prevpar != par) {
563 auto prevpar_next = prevpar;
566 while (prevpar_next != par) {
572 // Open tag for this environment.
573 openParTag(xs, &*par, &*prevpar);
577 // we will on occasion need to remember a layout from before.
578 Layout const *lastlay = nullptr;
581 while (par != pend) {
582 Layout const & style = par->layout();
583 ParagraphList::const_iterator send;
588 // Actual content of this paragraph.
590 switch (style.latextype) {
591 case LATEX_ENVIRONMENT:
592 case LATEX_LIST_ENVIRONMENT:
593 case LATEX_ITEM_ENVIRONMENT: {
594 // There are two possibilities in this case.
595 // One is that we are still in the environment in which we
596 // started---which we will be if the depth is the same.
597 if (par->params().depth() == origdepth) {
598 LATTEST(par->layout() == style);
599 if (lastlay != nullptr) {
600 closeItemTag(xs, *lastlay);
601 if (lastlay->docbookitemwrappertag() != "NONE") {
602 xs << xml::EndTag(lastlay->docbookitemwrappertag());
608 // this will be positive if we want to skip the
609 // initial word (if it's been taken for the label).
612 // Open a wrapper tag if needed.
613 if (style.docbookitemwrappertag() != "NONE") {
614 xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
619 if (style.labeltype != LABEL_NO_LABEL &&
620 style.docbookitemlabeltag() != "NONE") {
622 if (isNormalEnv(style)) {
623 // in this case, we print the label only for the first
624 // paragraph (as in a theorem or an abstract).
626 docstring const lbl = pbegin->params().labelString();
628 openLabelTag(xs, style);
630 closeLabelTag(xs, style);
632 // No new line after closeLabelTag.
636 } else { // some kind of list
637 if (style.labeltype == LABEL_MANUAL) {
638 // Only variablelist gets here.
640 openLabelTag(xs, style);
641 sep = par->firstWordDocBook(xs, runparams);
642 closeLabelTag(xs, style);
644 openLabelTag(xs, style);
645 xs << par->params().labelString();
646 closeLabelTag(xs, style);
649 } // end label output
651 // Start generating the item.
652 bool wasInParagraph = runparams.docbook_in_par;
653 openItemTag(xs, style);
654 bool getsIntoParagraph = openInnerItemTag(xs, style);
655 OutputParams rp = runparams;
656 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
658 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
659 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
660 // Common case: there is only the first word on the line, but there is a nested list instead
662 bool emptyItem = false;
663 if (sep == par->size()) {
666 if (next_par == text.paragraphs().end()) // There is no next paragraph.
668 else // There is a next paragraph: check depth.
669 emptyItem = par->params().depth() >= next_par->params().depth();
673 // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
674 // generation of a full <para>.
677 // Generate the rest of the paragraph, if need be.
678 par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
682 if (getsIntoParagraph)
683 closeInnerItemTag(xs, style);
685 // We may not want to close the tag yet, in particular:
686 // If we're not at the end of the item...
688 // and are doing items...
689 && !isNormalEnv(style)
690 // and if the depth has changed...
691 && par->params().depth() != origdepth) {
692 // then we'll save this layout for later, and close it when
693 // we get another item.
696 closeItemTag(xs, style);
698 // Eventually, close the item wrapper.
699 if (style.docbookitemwrappertag() != "NONE") {
700 xs << xml::EndTag(style.docbookitemwrappertag());
705 // The other possibility is that the depth has increased.
707 send = findEndOfEnvironment(par, pend);
708 par = makeEnvironment(buf, xs, runparams, text, par, send);
712 case LATEX_PARAGRAPH:
713 // send = findLast(par, pend, LATEX_PARAGRAPH);
714 par = makeParagraphs(buf, xs, runparams, text, par, parnext);
716 case LATEX_BIB_ENVIRONMENT:
717 // send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
718 makeParagraphBibliography(buf, xs, runparams, text, par, parnext);
726 if (lastlay != nullptr) {
727 closeItemTag(xs, *lastlay);
728 if (lastlay->docbookitemwrappertag() != "NONE") {
729 xs << xml::EndTag(lastlay->docbookitemwrappertag());
733 // auto nextpar = par;
735 closeTag(xs, &*prevpar, &*par);
736 // closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
745 OutputParams const & runparams,
747 ParagraphList::const_iterator const & pbegin)
749 // No need for labels, as they are handled by DocBook tags.
750 auto const begin = text.paragraphs().begin();
751 auto const end = text.paragraphs().end();
752 auto nextpar = pbegin;
755 // Find the previous paragraph.
756 auto prevpar = begin;
757 if (prevpar != pbegin) {
758 auto prevpar_next = prevpar;
761 while (prevpar_next != pbegin) {
767 // Generate this command.
768 openParTag(xs, &*pbegin, &*prevpar);
770 pbegin->simpleDocBookOnePar(buf, xs, runparams,
771 text.outerFont(distance(begin, pbegin)));
773 closeTag(xs, &*pbegin, (nextpar != end) ? &*nextpar : nullptr);
777 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
781 OutputParams const &ourparams,
782 ParagraphList::const_iterator par,
783 ParagraphList::const_iterator send,
784 ParagraphList::const_iterator pend)
786 switch (par->layout().latextype) {
787 case LATEX_COMMAND: {
788 // The files with which we are working never have more than
789 // one paragraph in a command structure.
791 // if (ourparams.docbook_in_par)
792 // fix it so we don't get sections inside standard, e.g.
793 // note that we may then need to make runparams not const, so we
794 // can communicate that back.
795 // FIXME Maybe this fix should be in the routines themselves, in case
796 // they are called from elsewhere.
797 makeCommand(buf, xs, ourparams, text, par);
801 case LATEX_ENVIRONMENT:
802 case LATEX_LIST_ENVIRONMENT:
803 case LATEX_ITEM_ENVIRONMENT:
804 // FIXME Same fix here.
805 send = findEndOfEnvironment(par, pend);
806 par = makeEnvironment(buf, xs, ourparams, text, par, send);
808 case LATEX_PARAGRAPH:
809 send = findLast(par, pend, LATEX_PARAGRAPH);
810 par = makeParagraphs(buf, xs, ourparams, text, par, send);
812 case LATEX_BIB_ENVIRONMENT:
813 send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
814 par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
818 return make_pair(par, send);
821 ParagraphList::const_iterator makeAnySimple(
825 OutputParams const &ourparams,
826 ParagraphList::const_iterator par)
831 switch (par->layout().latextype) {
832 case LATEX_COMMAND: {
833 // The files with which we are working never have more than
834 // one paragraph in a command structure.
836 // if (ourparams.docbook_in_par)
837 // fix it so we don't get sections inside standard, e.g.
838 // note that we may then need to make runparams not const, so we
839 // can communicate that back.
840 // FIXME Maybe this fix should be in the routines themselves, in case
841 // they are called from elsewhere.
842 makeCommand(buf, xs, ourparams, text, par);
845 case LATEX_ENVIRONMENT:
846 case LATEX_LIST_ENVIRONMENT:
847 case LATEX_ITEM_ENVIRONMENT:
848 // FIXME Same fix here.
849 return makeEnvironment(buf, xs, ourparams, text, par, parnext);
850 case LATEX_PARAGRAPH:
851 return makeParagraphs(buf, xs, ourparams, text, par, parnext);
852 case LATEX_BIB_ENVIRONMENT:
853 return makeParagraphBibliography(buf, xs, ourparams, text, par, parnext);
856 // This should never happen. Return the next paragraph to avoid an infinite loop.
860 } // end anonymous namespace
863 using DocBookDocumentSectioning = tuple<bool, pit_type>;
866 struct DocBookInfoTag
868 const set<pit_type> shouldBeInInfo;
869 const set<pit_type> mustBeInInfo;
870 const set<pit_type> abstract;
874 DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
875 const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
876 shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
877 bpit(bpit), epit(epit) {}
881 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
882 bool documentHasSections = false;
884 while (bpit < epit) {
885 Layout const &style = paragraphs[bpit].layout();
886 documentHasSections |= style.category() == from_utf8("Sectioning");
888 if (documentHasSections) {
893 // Paragraphs before the first section: [ runparams.par_begin ; eppit )
895 return make_tuple(documentHasSections, bpit);
899 bool hasOnlyNotes(Paragraph const & par)
901 for (int i = 0; i < par.size(); ++i)
902 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
908 DocBookInfoTag getParagraphsWithInfo(ParagraphList const ¶graphs, pit_type bpit, pit_type const epit) {
909 set<pit_type> shouldBeInInfo;
910 set<pit_type> mustBeInInfo;
911 set<pit_type> abstract;
913 // Find the first non empty paragraph by mutating bpit.
914 while (bpit < epit) {
915 Paragraph const &par = paragraphs[bpit];
916 if (par.empty() || hasOnlyNotes(par))
922 // Find the last info-like paragraph.
923 pit_type cpit = bpit;
924 bool hasAbstractLayout = false;
925 while (cpit < epit) {
926 // Skip paragraphs only containing one note.
927 Paragraph const & par = paragraphs[cpit];
928 if (hasOnlyNotes(par)) {
933 if (par.layout().docbookabstract())
934 hasAbstractLayout = true;
936 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
937 Layout const &style = par.layout();
939 if (style.docbookininfo() == "always") {
940 mustBeInInfo.emplace(cpit);
941 } else if (style.docbookininfo() == "maybe") {
942 shouldBeInInfo.emplace(cpit);
944 // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
945 // There may be notes in between, but nothing else.
950 // Now, cpit points to the last paragraph that has things that could go in <info>.
951 // bpit is the beginning of the <info> part.
953 // Go once again through the list of paragraphs to find the abstract. If there is an abstract
954 // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
955 if (hasAbstractLayout) {
957 while (pit < cpit) { // Don't overshoot the <info> part.
958 if (paragraphs[pit].layout().docbookabstract())
959 abstract.emplace(pit);
963 pit_type lastAbstract = epit + 1; // A nonsensical value.
964 docstring lastAbstractLayout;
967 while (pit < cpit) { // Don't overshoot the <info> part.
968 const Paragraph & par = paragraphs.at(pit);
969 if (!par.insetList().empty()) {
970 for (const auto &i : par.insetList()) {
971 if (i.inset->getText(0) != nullptr) {
972 if (lastAbstract == epit + 1) {
973 // First paragraph that matches the heuristic definition of abstract.
975 lastAbstractLayout = par.layout().name();
976 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
977 // This is either too far from the last abstract paragraph or doesn't
978 // have the right layout name, BUT there has already been an abstract
979 // in this document: done with detecting the abstract.
980 goto done; // Easier to get out of two nested loops.
983 abstract.emplace(pit);
993 return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
997 pit_type generateDocBookParagraphWithoutSectioning(
1001 OutputParams const & runparams,
1002 ParagraphList const & paragraphs,
1003 DocBookInfoTag const & info)
1005 auto bpit = info.bpit;
1006 auto par = paragraphs.iterator_at(bpit);
1007 auto lastStartedPar = par;
1008 ParagraphList::const_iterator send;
1010 (info.epit == (int) paragraphs.size()) ?
1011 paragraphs.end() : paragraphs.iterator_at(info.epit);
1013 while (bpit < info.epit) {
1014 if (info.abstract.find(bpit) != info.abstract.end()) {
1019 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1020 bpit += distance(lastStartedPar, par);
1021 lastStartedPar = par;
1028 void outputDocBookInfo(
1032 OutputParams const & runparams,
1033 ParagraphList const & paragraphs,
1034 DocBookInfoTag const & info)
1036 // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
1037 // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
1038 // then only create the <abstract> tag if these paragraphs generate some content.
1039 // This check must be performed *before* a decision on whether or not to output <info> is made.
1040 bool hasAbstract = !info.abstract.empty();
1043 odocstringstream os2;
1046 auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
1047 auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
1048 // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
1050 while (bpit < epit) {
1051 makeAnySimple(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
1052 xs2 << XMLStream::ESCAPE_NONE << from_ascii("<!-- " + to_string(bpit) + " -->");
1057 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
1058 // even though they must be properly output if there is some abstract.
1059 abstract = os2.str();
1060 static const lyx::regex reg("[ \\r\\n]*");
1061 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
1063 // Nothing? Then there is no abstract!
1064 if (abstractContent.empty())
1065 hasAbstract = false;
1068 // The abstract must go in <info>.
1069 bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1071 // Start the <info> tag if required.
1073 xs.startDivision(false);
1074 xs << xml::StartTag("info");
1078 // Output the elements that should go in <info>, before and after the abstract.
1079 xs << XMLStream::ESCAPE_NONE << "<!-- shouldBeInInfo -->";
1080 for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1081 // that mandating a wrapper like <info> would repel users.
1082 makeAnySimple(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1083 xs << XMLStream::ESCAPE_NONE << "<!-- mustBeInInfo -->";
1084 for (auto pit : info.mustBeInInfo)
1085 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
1086 makeAnySimple(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1087 xs << XMLStream::ESCAPE_NONE << "<!-- /info -->";
1090 // string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1091 // if (tag == "NONE")
1092 // tag = "abstract";
1094 // xs << xml::StartTag(tag);
1096 xs << XMLStream::ESCAPE_NONE << "<!-- abs -->";
1097 xs << XMLStream::ESCAPE_NONE << abstract;
1098 xs << XMLStream::ESCAPE_NONE << "<!-- /abs -->";
1099 // xs << xml::EndTag(tag);
1103 // End the <info> tag if it was started.
1105 xs << xml::EndTag("info");
1112 void docbookFirstParagraphs(
1116 OutputParams const &runparams,
1119 // Handle the beginning of the document, supposing it has sections.
1120 // Major role: output the first <info> tag.
1122 ParagraphList const ¶graphs = text.paragraphs();
1123 pit_type bpit = runparams.par_begin;
1124 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1125 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1129 void docbookSimpleAllParagraphs(
1133 OutputParams const & runparams)
1135 // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
1136 // between a single paragraph to a whole document.
1138 // First, the <info> tag.
1139 ParagraphList const ¶graphs = text.paragraphs();
1140 pit_type bpit = runparams.par_begin;
1141 pit_type const epit = runparams.par_end;
1142 DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1143 outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1145 // Then, the content. It starts where the <info> ends.
1147 while (bpit < epit) {
1148 auto par = paragraphs.iterator_at(bpit);
1149 if (!hasOnlyNotes(*par))
1150 makeAnySimple(text, buf, xs, runparams, par);
1156 void docbookParagraphs(Text const &text,
1159 OutputParams const &runparams) {
1160 ParagraphList const ¶graphs = text.paragraphs();
1161 if (runparams.par_begin == runparams.par_end) {
1162 runparams.par_begin = 0;
1163 runparams.par_end = paragraphs.size();
1165 pit_type bpit = runparams.par_begin;
1166 pit_type const epit = runparams.par_end;
1167 LASSERT(bpit < epit,
1169 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1173 ParagraphList::const_iterator const pend =
1174 (epit == (int) paragraphs.size()) ?
1175 paragraphs.end() : paragraphs.iterator_at(epit);
1176 std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1177 // of the section and the tag that was used to open it.
1179 // Detect whether the document contains sections. If there are no sections, there can be no automatically
1180 // discovered abstract.
1181 bool documentHasSections;
1183 tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1185 if (documentHasSections) {
1186 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1189 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1193 bool currentlyInAppendix = false;
1195 while (bpit < epit) {
1196 OutputParams ourparams = runparams;
1198 auto par = paragraphs.iterator_at(bpit);
1199 if (par->params().startOfAppendix())
1200 currentlyInAppendix = true;
1201 Layout const &style = par->layout();
1202 ParagraphList::const_iterator const lastStartedPar = par;
1203 ParagraphList::const_iterator send;
1205 if (hasOnlyNotes(*par)) {
1207 bpit += distance(lastStartedPar, par);
1211 // Think about adding <section> and/or </section>s.
1212 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1213 if (isLayoutSectioning) {
1214 int level = style.toclevel;
1216 // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1217 // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1218 // - current: h2; back: h1; do not close any <section>
1219 // - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1220 while (!headerLevels.empty() && level <= headerLevels.top().first) {
1221 int stackLevel = headerLevels.top().first;
1222 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1225 // Output the tag only if it corresponds to a legit section.
1226 if (stackLevel != Layout::NOT_IN_TOC)
1227 xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1230 // Open the new section: first push it onto the stack, then output it in DocBook.
1231 string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1232 "appendix" : style.docbooksectiontag();
1233 headerLevels.push(std::make_pair(level, sectionTag));
1235 // Some sectioning-like elements should not be output (such as FrontMatter).
1236 if (level != Layout::NOT_IN_TOC) {
1237 // Look for a label in the title, i.e. a InsetLabel as a child.
1238 docstring id = docstring();
1239 for (pos_type i = 0; i < par->size(); ++i) {
1240 Inset const *inset = par->getInset(i);
1242 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1243 // Generate the attributes for the section if need be.
1244 id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1246 // Don't output the ID as a DocBook <anchor>.
1247 ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1249 // Cannot have multiple IDs per tag.
1255 // Write the open tag for this section.
1256 docstring tag = from_utf8("<" + sectionTag);
1258 tag += from_utf8(" ") + id;
1259 tag += from_utf8(">");
1260 xs << XMLStream::ESCAPE_NONE << tag;
1265 // Close all sections before the bibliography.
1266 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1267 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1268 if (insetsLength > 0) {
1269 Inset const *firstInset = par->getInset(0);
1270 if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1271 while (!headerLevels.empty()) {
1272 int level = headerLevels.top().first;
1273 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1276 // Output the tag only if it corresponds to a legit section.
1277 if (level != Layout::NOT_IN_TOC) {
1278 xs << XMLStream::ESCAPE_NONE << tag;
1285 // Generate this paragraph.
1286 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1287 bpit += distance(lastStartedPar, par);
1290 // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1292 while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1293 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1295 xs << XMLStream::ESCAPE_NONE << tag;