]> git.lyx.org Git - features.git/blob - src/output_docbook.cpp
Revert "Fix #11827"
[features.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream &xs, Layout const &lay)
193 {
194         if (lay.docbookwrappertag() != "NONE") {
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196         }
197
198         string tag = lay.docbooktag();
199         if (tag == "Plain Layout")
200                 tag = "para";
201
202         xs << xml::ParTag(tag, lay.docbookattr());
203 }
204
205
206 void closeTag(XMLStream &xs, Layout const &lay)
207 {
208         string tag = lay.docbooktag();
209         if (tag == "Plain Layout")
210                 tag = "para";
211
212         xs << xml::EndTag(tag);
213         if (lay.docbookwrappertag() != "NONE")
214                 xs << xml::EndTag(lay.docbookwrappertag());
215 }
216
217
218 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
219 {
220         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
221 }
222
223
224 void closeLabelTag(XMLStream & xs, Layout const & lay)
225 {
226         xs << xml::EndTag(lay.docbookitemlabeltag());
227         xs << xml::CR();
228 }
229
230
231 void openItemTag(XMLStream &xs, Layout const &lay)
232 {
233         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
234 }
235
236
237 // Return true when new elements are output in a paragraph, false otherwise.
238 bool openInnerItemTag(XMLStream &xs, Layout const &lay)
239 {
240         if (lay.docbookiteminnertag() != "NONE") {
241                 xs << xml::CR();
242                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
243
244                 if (lay.docbookiteminnertag() == "para") {
245                         return true;
246                 }
247         }
248         return false;
249 }
250
251
252 void closeInnerItemTag(XMLStream &xs, Layout const &lay)
253 {
254         if (lay.docbookiteminnertag()!= "NONE") {
255                 xs << xml::EndTag(lay.docbookiteminnertag());
256                 xs << xml::CR();
257         }
258 }
259
260
261 inline void closeItemTag(XMLStream &xs, Layout const &lay)
262 {
263         xs << xml::EndTag(lay.docbookitemtag());
264         xs << xml::CR();
265 }
266
267 // end of convenience functions
268
269 ParagraphList::const_iterator findLastParagraph(
270                 ParagraphList::const_iterator p,
271                 ParagraphList::const_iterator const & pend) {
272         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p);
273
274         return p;
275 }
276
277
278 ParagraphList::const_iterator findEndOfEnvironment(
279                 ParagraphList::const_iterator const & pstart,
280                 ParagraphList::const_iterator const & pend)
281 {
282         ParagraphList::const_iterator p = pstart;
283         Layout const &bstyle = p->layout();
284         size_t const depth = p->params().depth();
285         for (++p; p != pend; ++p) {
286                 Layout const &style = p->layout();
287                 // It shouldn't happen that e.g. a section command occurs inside
288                 // a quotation environment, at a higher depth, but as of 6/2009,
289                 // it can happen. We pretend that it's just at lowest depth.
290                 if (style.latextype == LATEX_COMMAND)
291                         return p;
292
293                 // If depth is down, we're done
294                 if (p->params().depth() < depth)
295                         return p;
296
297                 // If depth is up, we're not done
298                 if (p->params().depth() > depth)
299                         continue;
300
301                 // FIXME I am not sure about the first check.
302                 // Surely we *could* have different layouts that count as
303                 // LATEX_PARAGRAPH, right?
304                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
305                         return p;
306         }
307         return pend;
308 }
309
310
311 ParagraphList::const_iterator makeParagraphBibliography(
312                 Buffer const &buf,
313                 XMLStream &xs,
314                 OutputParams const &runparams,
315                 Text const &text,
316                 ParagraphList::const_iterator const & pbegin,
317                 ParagraphList::const_iterator const & pend)
318 {
319         auto const begin = text.paragraphs().begin();
320         auto const end = text.paragraphs().end();
321
322         // Find the paragraph *before* pbegin.
323         ParagraphList::const_iterator pbegin_before = begin;
324         if (pbegin != begin) {
325                 ParagraphList::const_iterator pbegin_before_next = begin;
326                 ++pbegin_before_next;
327
328                 while (pbegin_before_next != pbegin) {
329                         ++pbegin_before;
330                         ++pbegin_before_next;
331                 }
332         }
333
334         ParagraphList::const_iterator par = pbegin;
335
336         // If this is the first paragraph in a bibliography, open the bibliography tag.
337         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
338                 xs << xml::StartTag("bibliography");
339                 xs << xml::CR();
340         }
341
342         // Generate the required paragraphs.
343         for (; par != pend; ++par) {
344                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
345                 // Don't forget the citation ID!
346                 docstring attr;
347                 for (auto i = 0; i < par->size(); ++i) {
348                         Inset const *ip = par->getInset(0);
349                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
350                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
351                                 attr = from_utf8("xml:id='") + bibitem->bibLabel() + from_utf8("'");
352                                 break;
353                         }
354                 }
355                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
356
357                 // Generate the entry.
358                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
359
360                 // End the precooked bibliography entry.
361                 xs << xml::EndTag("bibliomixed");
362                 xs << xml::CR();
363         }
364
365         // If this is the last paragraph in a bibliography, close the bibliography tag.
366         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
367                 xs << xml::EndTag("bibliography");
368                 xs << xml::CR();
369         }
370
371         return pend;
372 }
373
374
375 ParagraphList::const_iterator makeParagraphs(
376                 Buffer const &buf,
377                 XMLStream &xs,
378                 OutputParams const &runparams,
379                 Text const &text,
380                 ParagraphList::const_iterator const & pbegin,
381                 ParagraphList::const_iterator const & pend)
382 {
383         ParagraphList::const_iterator const begin = text.paragraphs().begin();
384         ParagraphList::const_iterator par = pbegin;
385         for (; par != pend; ++par) {
386                 Layout const &lay = par->layout();
387
388                 // We want to open the paragraph tag if:
389                 //   (i) the current layout permits multiple paragraphs
390                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
391                 //         we are, but this is not the first paragraph
392                 //
393                 // But there is also a special case, and we first see whether we are in it.
394                 // We do not want to open the paragraph tag if this paragraph contains
395                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
396                 // as a branch). On the other hand, if that single item has a font change
397                 // applied to it, then we still do need to open the paragraph.
398                 //
399                 // Obviously, this is very fragile. The main reason we need to do this is
400                 // because of branches, e.g., a branch that contains an entire new section.
401                 // We do not really want to wrap that whole thing in a <div>...</div>.
402                 bool special_case = false;
403                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
404                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
405                         Layout const &style = par->layout();
406                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
407                                                                                 style.labelfont : style.font;
408                         FontInfo const our_font =
409                                         par->getFont(buf.masterBuffer()->params(), 0,
410                                                                  text.outerFont(distance(begin, par))).fontInfo();
411
412                         if (first_font == our_font)
413                                 special_case = true;
414                 }
415
416                 // Plain layouts must be ignored.
417                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
418                         special_case = true;
419                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
420                 if (!special_case && par->size() == 1 && par->getInset(0)) {
421                         Inset const * firstInset = par->getInset(0);
422
423                         // Floats cannot be in paragraphs.
424                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
425
426                         // Bibliographies cannot be in paragraphs.
427                         if (!special_case && firstInset->asInsetCommand())
428                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
429
430                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
431                         if (!special_case && firstInset->asInsetMath())
432                                 special_case = true;
433
434                         // ERTs are in comments, not paragraphs.
435                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
436                                 special_case = true;
437
438                         // Listings should not get into their own paragraph.
439                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
440                                 special_case = true;
441                 }
442
443                 bool const open_par = runparams.docbook_make_pars
444                                                           && (!runparams.docbook_in_par || par != pbegin)
445                                                           && !special_case;
446
447                 // We want to issue the closing tag if either:
448                 //   (i)  We opened it, and either docbook_in_par is false,
449                 //              or we're not in the last paragraph, anyway.
450                 //   (ii) We didn't open it and docbook_in_par is true,
451                 //              but we are in the first par, and there is a next par.
452                 ParagraphList::const_iterator nextpar = par;
453                 ++nextpar;
454                 bool const close_par =
455                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
456                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
457
458                 if (open_par) {
459                         openParTag(xs, lay);
460                 }
461
462                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
463
464                 if (close_par) {
465                         closeTag(xs, lay);
466                         xs << xml::CR();
467                 }
468         }
469         return pend;
470 }
471
472
473 bool isNormalEnv(Layout const &lay)
474 {
475         return lay.latextype == LATEX_ENVIRONMENT
476                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
477 }
478
479
480 ParagraphList::const_iterator makeEnvironment(
481                 Buffer const &buf,
482                 XMLStream &xs,
483                 OutputParams const &runparams,
484                 Text const &text,
485                 ParagraphList::const_iterator const & pbegin,
486                 ParagraphList::const_iterator const & pend)
487 {
488         ParagraphList::const_iterator const begin = text.paragraphs().begin();
489         ParagraphList::const_iterator par = pbegin;
490         Layout const &bstyle = par->layout();
491         depth_type const origdepth = pbegin->params().depth();
492
493         // open tag for this environment
494         openParTag(xs, bstyle);
495         xs << xml::CR();
496
497         // we will on occasion need to remember a layout from before.
498         Layout const *lastlay = nullptr;
499
500         while (par != pend) {
501                 Layout const & style = par->layout();
502                 ParagraphList::const_iterator send;
503
504                 // Actual content of this paragraph.
505                 switch (style.latextype) {
506                 case LATEX_ENVIRONMENT:
507                 case LATEX_LIST_ENVIRONMENT:
508                 case LATEX_ITEM_ENVIRONMENT: {
509                         // There are two possibilities in this case.
510                         // One is that we are still in the environment in which we
511                         // started---which we will be if the depth is the same.
512                         if (par->params().depth() == origdepth) {
513                                 LATTEST(bstyle == style);
514                                 if (lastlay != nullptr) {
515                                         closeItemTag(xs, *lastlay);
516                                         if (lastlay->docbookitemwrappertag() != "NONE") {
517                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
518                                                 xs << xml::CR();
519                                         }
520                                         lastlay = nullptr;
521                                 }
522
523                                 // this will be positive if we want to skip the
524                                 // initial word (if it's been taken for the label).
525                                 pos_type sep = 0;
526
527                                 // Open a wrapper tag if needed.
528                                 if (style.docbookitemwrappertag() != "NONE") {
529                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
530                                         xs << xml::CR();
531                                 }
532
533                                 // label output
534                                 if (style.labeltype != LABEL_NO_LABEL &&
535                                                 style.docbookitemlabeltag() != "NONE") {
536
537                                         if (isNormalEnv(style)) {
538                                                 // in this case, we print the label only for the first
539                                                 // paragraph (as in a theorem or an abstract).
540                                                 if (par == pbegin) {
541                                                         docstring const lbl = pbegin->params().labelString();
542                                                         if (!lbl.empty()) {
543                                                                 openLabelTag(xs, style);
544                                                                 xs << lbl;
545                                                                 closeLabelTag(xs, style);
546                                                         } else {
547                                                                 // No new line after closeLabelTag.
548                                                                 xs << xml::CR();
549                                                         }
550                                                 }
551                                         } else { // some kind of list
552                                                 if (style.labeltype == LABEL_MANUAL) {
553                                                         // Only variablelist gets here.
554
555                                                         openLabelTag(xs, style);
556                                                         sep = par->firstWordDocBook(xs, runparams);
557                                                         closeLabelTag(xs, style);
558                                                 } else {
559                                                         openLabelTag(xs, style);
560                                                         xs << par->params().labelString();
561                                                         closeLabelTag(xs, style);
562                                                 }
563                                         }
564                                 } // end label output
565
566                                 // Start generating the item.
567                                 bool wasInParagraph = runparams.docbook_in_par;
568                                 openItemTag(xs, style);
569                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
570                                 OutputParams rp = runparams;
571                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
572
573                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
574                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
575                                 // Common case: there is only the first word on the line, but there is a nested list instead
576                                 // of more text.
577                                 bool emptyItem = false;
578                                 if (sep == par->size()) {
579                                         auto next_par = par;
580                                         ++next_par;
581                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
582                                                 emptyItem = true;
583                                         else // There is a next paragraph: check depth.
584                                                 emptyItem = par->params().depth() >= next_par->params().depth();
585                                 }
586
587                                 if (emptyItem) {
588                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
589                                         // generation of a full <para>.
590                                         xs << ' ';
591                                 } else {
592                                         // Generate the rest of the paragraph, if need be.
593                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
594                                 }
595
596                                 ++par;
597                                 if (getsIntoParagraph)
598                                         closeInnerItemTag(xs, style);
599
600                                 // We may not want to close the tag yet, in particular:
601                                 // If we're not at the end of the item...
602                                 if (par != pend
603                                         //  and are doing items...
604                                         && !isNormalEnv(style)
605                                         // and if the depth has changed...
606                                         && par->params().depth() != origdepth) {
607                                         // then we'll save this layout for later, and close it when
608                                         // we get another item.
609                                         lastlay = &style;
610                                 } else {
611                                         closeItemTag(xs, style);
612
613                                         // Eventually, close the item wrapper.
614                                         if (style.docbookitemwrappertag() != "NONE") {
615                                                 xs << xml::EndTag(style.docbookitemwrappertag());
616                                                 xs << xml::CR();
617                                         }
618                                 }
619                         }
620                         // The other possibility is that the depth has increased.
621                         else {
622                                 send = findEndOfEnvironment(par, pend);
623                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
624                         }
625                         break;
626                 }
627                 case LATEX_PARAGRAPH:
628                         send = findLastParagraph(par, pend);
629                         par = makeParagraphs(buf, xs, runparams, text, par, send);
630                         break;
631                 case LATEX_BIB_ENVIRONMENT:
632                         send = findLastParagraph(par, pend);
633                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
634                         break;
635                 case LATEX_COMMAND:
636                         ++par;
637                         break;
638                 }
639         }
640
641         if (lastlay != nullptr) {
642                 closeItemTag(xs, *lastlay);
643                 if (lastlay->docbookitemwrappertag() != "NONE") {
644                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
645                         xs << xml::CR();
646                 }
647         }
648         closeTag(xs, bstyle);
649         xs << xml::CR();
650         return pend;
651 }
652
653
654 void makeCommand(
655                 Buffer const & buf,
656                 XMLStream & xs,
657                 OutputParams const & runparams,
658                 Text const & text,
659                 ParagraphList::const_iterator const & pbegin)
660 {
661         Layout const &style = pbegin->layout();
662
663         // No need for labels, as they are handled by DocBook tags.
664
665         openParTag(xs, style);
666
667         ParagraphList::const_iterator const begin = text.paragraphs().begin();
668         pbegin->simpleDocBookOnePar(buf, xs, runparams,
669                                                                 text.outerFont(distance(begin, pbegin)));
670         closeTag(xs, style);
671         xs << xml::CR();
672 }
673
674 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
675                 Text const &text,
676                 Buffer const &buf,
677                 XMLStream &xs,
678                 OutputParams const &ourparams,
679                 ParagraphList::const_iterator par,
680                 ParagraphList::const_iterator send,
681                 ParagraphList::const_iterator pend)
682 {
683         Layout const & style = par->layout();
684
685         switch (style.latextype) {
686                 case LATEX_COMMAND: {
687                         // The files with which we are working never have more than
688                         // one paragraph in a command structure.
689                         // FIXME
690                         // if (ourparams.docbook_in_par)
691                         //   fix it so we don't get sections inside standard, e.g.
692                         // note that we may then need to make runparams not const, so we
693                         // can communicate that back.
694                         // FIXME Maybe this fix should be in the routines themselves, in case
695                         // they are called from elsewhere.
696                         makeCommand(buf, xs, ourparams, text, par);
697                         ++par;
698                         break;
699                 }
700                 case LATEX_ENVIRONMENT:
701                 case LATEX_LIST_ENVIRONMENT:
702                 case LATEX_ITEM_ENVIRONMENT: {
703                         // FIXME Same fix here.
704                         send = findEndOfEnvironment(par, pend);
705                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
706                         break;
707                 }
708                 case LATEX_BIB_ENVIRONMENT: {
709                         send = findLastParagraph(par, pend);
710                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
711                         break;
712                 }
713                 case LATEX_PARAGRAPH: {
714                         send = findLastParagraph(par, pend);
715                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
716                         break;
717                 }
718         }
719
720         return make_pair(par, send);
721 }
722
723 } // end anonymous namespace
724
725
726 using DocBookDocumentSectioning = tuple<bool, pit_type>;
727 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
728
729
730 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
731         bool documentHasSections = false;
732
733         while (bpit < epit) {
734                 Layout const &style = paragraphs[bpit].layout();
735                 documentHasSections |= style.category() == from_utf8("Sectioning");
736
737                 if (documentHasSections) {
738                         break;
739                 }
740                 bpit += 1;
741         }
742         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
743
744         return make_tuple(documentHasSections, bpit);
745 }
746
747
748 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type const bpit, pit_type const epit) {
749         set<pit_type> shouldBeInInfo;
750         set<pit_type> mustBeInInfo;
751
752         pit_type cpit = bpit;
753         while (cpit < epit) {
754                 // Skip paragraphs only containing one note.
755                 Paragraph const &par = paragraphs[cpit];
756                 if (par.size() == 1 && dynamic_cast<InsetNote*>(paragraphs[cpit].insetList().get(0))) {
757                         cpit += 1;
758                         continue;
759                 }
760
761                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
762                 Layout const &style = par.layout();
763
764                 if (style.docbookininfo() == "always") {
765                         mustBeInInfo.emplace(cpit);
766                 } else if (style.docbookininfo() == "maybe") {
767                         shouldBeInInfo.emplace(cpit);
768                 } else {
769                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
770                         break;
771                 }
772                 cpit += 1;
773         }
774         // Now, cpit points to the last paragraph that has things that could go in <info>.
775         // bpit is still the beginning of the <info> part.
776
777         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
778 }
779
780
781 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
782 {
783         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
784         // are just after a document or part title.
785         if (epitAbstract - bpitAbstract <= 0)
786                 return false;
787
788         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
789         pit_type bpit = bpitAbstract;
790         while (bpit < epitAbstract) {
791                 const Paragraph &p = paragraphs.at(bpit);
792
793                 if (p.layout().name() == from_ascii("Abstract"))
794                         return true;
795
796                 if (!p.insetList().empty()) {
797                         for (const auto &i : p.insetList()) {
798                                 if (i.inset->getText(0) != nullptr) {
799                                         return true;
800                                 }
801                         }
802                 }
803                 bpit++;
804         }
805         return false;
806 }
807
808
809 pit_type generateDocBookParagraphWithoutSectioning(
810                 Text const & text,
811                 Buffer const & buf,
812                 XMLStream & xs,
813                 OutputParams const & runparams,
814                 ParagraphList const & paragraphs,
815                 pit_type bpit,
816                 pit_type epit)
817 {
818         auto par = paragraphs.iterator_at(bpit);
819         auto lastStartedPar = par;
820         ParagraphList::const_iterator send;
821         auto const pend =
822                         (epit == (int) paragraphs.size()) ?
823                         paragraphs.end() : paragraphs.iterator_at(epit);
824
825         while (bpit < epit) {
826                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
827                 bpit += distance(lastStartedPar, par);
828                 lastStartedPar = par;
829         }
830
831         return bpit;
832 }
833
834
835 void outputDocBookInfo(
836                 Text const & text,
837                 Buffer const & buf,
838                 XMLStream & xs,
839                 OutputParams const & runparams,
840                 ParagraphList const & paragraphs,
841                 DocBookInfoTag const & info,
842                 pit_type bpitAbstract,
843                 pit_type const epitAbstract)
844 {
845         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
846         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
847
848         set<pit_type> shouldBeInInfo;
849         set<pit_type> mustBeInInfo;
850         pit_type bpitInfo;
851         pit_type epitInfo;
852         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
853
854         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
855         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
856         // then only create the <abstract> tag if these paragraphs generate some content.
857         // This check must be performed *before* a decision on whether or not to output <info> is made.
858         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
859         docstring abstract;
860         if (hasAbstract) {
861                 odocstringstream os2;
862                 XMLStream xs2(os2);
863                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
864
865                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
866                 // even though they must be properly output if there is some abstract.
867                 docstring abstractContent = os2.str();
868                 static const lyx::regex reg("[ \\r\\n]*");
869                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
870
871                 // Nothing? Then there is no abstract!
872                 if (abstractContent.empty())
873                         hasAbstract = false;
874         }
875
876         // The abstract must go in <info>.
877         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
878
879         // Start the <info> tag if required.
880         if (needInfo) {
881                 xs.startDivision(false);
882                 xs << xml::StartTag("info");
883                 xs << xml::CR();
884         }
885
886         // Output the elements that should go in <info>.
887         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
888
889         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
890                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
891                 if (tag == "NONE")
892                         tag = "abstract";
893
894                 xs << xml::StartTag(tag);
895                 xs << xml::CR();
896                 xs << XMLStream::ESCAPE_NONE << abstract;
897                 xs << xml::EndTag(tag);
898                 xs << xml::CR();
899         }
900
901         // End the <info> tag if it was started.
902         if (needInfo) {
903                 xs << xml::EndTag("info");
904                 xs << xml::CR();
905                 xs.endDivision();
906         }
907 }
908
909
910 void docbookFirstParagraphs(
911                 Text const &text,
912                 Buffer const &buf,
913                 XMLStream &xs,
914                 OutputParams const &runparams,
915                 pit_type epit)
916 {
917         // Handle the beginning of the document, supposing it has sections.
918         // Major role: output the first <info> tag.
919
920         ParagraphList const &paragraphs = text.paragraphs();
921         pit_type bpit = runparams.par_begin;
922         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
923         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
924 }
925
926
927 bool isParagraphEmpty(const Paragraph &par)
928 {
929         InsetList const &insets = par.insetList();
930         size_t insetsLength = distance(insets.begin(), insets.end());
931         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
932                                                                 dynamic_cast<InsetNote *>(insets.get(0));
933         return hasParagraphOnlyNote;
934 }
935
936
937 void docbookSimpleAllParagraphs(
938                 Text const & text,
939                 Buffer const & buf,
940                 XMLStream & xs,
941                 OutputParams const & runparams)
942 {
943         // Handle the document, supposing it has no sections (i.e. a "simple" document).
944
945         // First, the <info> tag.
946         ParagraphList const &paragraphs = text.paragraphs();
947         pit_type bpit = runparams.par_begin;
948         pit_type const epit = runparams.par_end;
949         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
950         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
951         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
952
953         // Then, the content.
954         ParagraphList::const_iterator const pend =
955                         (epit == (int) paragraphs.size()) ?
956                         paragraphs.end() : paragraphs.iterator_at(epit);
957
958         while (bpit < epit) {
959                 auto par = paragraphs.iterator_at(bpit);
960                 ParagraphList::const_iterator const lastStartedPar = par;
961                 ParagraphList::const_iterator send;
962
963                 if (isParagraphEmpty(*par)) {
964                         ++par;
965                         bpit += distance(lastStartedPar, par);
966                         continue;
967                 }
968
969                 // Generate this paragraph.
970                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
971                 bpit += distance(lastStartedPar, par);
972         }
973 }
974
975
976 void docbookParagraphs(Text const &text,
977                                            Buffer const &buf,
978                                            XMLStream &xs,
979                                            OutputParams const &runparams) {
980         ParagraphList const &paragraphs = text.paragraphs();
981         if (runparams.par_begin == runparams.par_end) {
982                 runparams.par_begin = 0;
983                 runparams.par_end = paragraphs.size();
984         }
985         pit_type bpit = runparams.par_begin;
986         pit_type const epit = runparams.par_end;
987         LASSERT(bpit < epit,
988                         {
989                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
990                                 return;
991                         });
992
993         ParagraphList::const_iterator const pend =
994                         (epit == (int) paragraphs.size()) ?
995                         paragraphs.end() : paragraphs.iterator_at(epit);
996         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
997         // of the section and the tag that was used to open it.
998
999         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1000         // discovered abstract.
1001         bool documentHasSections;
1002         pit_type eppit;
1003         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1004
1005         if (documentHasSections) {
1006                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1007                 bpit = eppit;
1008         } else {
1009                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1010                 return;
1011         }
1012
1013         bool currentlyInAppendix = false;
1014
1015         while (bpit < epit) {
1016                 OutputParams ourparams = runparams;
1017
1018                 auto par = paragraphs.iterator_at(bpit);
1019                 if (par->params().startOfAppendix())
1020                         currentlyInAppendix = true;
1021                 Layout const &style = par->layout();
1022                 ParagraphList::const_iterator const lastStartedPar = par;
1023                 ParagraphList::const_iterator send;
1024
1025                 if (isParagraphEmpty(*par)) {
1026                         ++par;
1027                         bpit += distance(lastStartedPar, par);
1028                         continue;
1029                 }
1030
1031                 // Think about adding <section> and/or </section>s.
1032                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1033                 if (isLayoutSectioning) {
1034                         int level = style.toclevel;
1035
1036                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1037                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1038                         //   - current: h2; back: h1; do not close any <section>
1039                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1040                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1041                                 int stackLevel = headerLevels.top().first;
1042                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1043                                 headerLevels.pop();
1044
1045                                 // Output the tag only if it corresponds to a legit section.
1046                                 if (stackLevel != Layout::NOT_IN_TOC)
1047                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1048                         }
1049
1050                         // Open the new section: first push it onto the stack, then output it in DocBook.
1051                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1052                                                                 "appendix" : style.docbooksectiontag();
1053                         headerLevels.push(std::make_pair(level, sectionTag));
1054
1055                         // Some sectioning-like elements should not be output (such as FrontMatter).
1056                         if (level != Layout::NOT_IN_TOC) {
1057                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1058                                 docstring id = docstring();
1059                                 for (pos_type i = 0; i < par->size(); ++i) {
1060                                         Inset const *inset = par->getInset(i);
1061                                         if (inset) {
1062                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1063                                                         // Generate the attributes for the section if need be.
1064                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1065
1066                                                         // Don't output the ID as a DocBook <anchor>.
1067                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1068
1069                                                         // Cannot have multiple IDs per tag.
1070                                                         break;
1071                                                 }
1072                                         }
1073                                 }
1074
1075                                 // Write the open tag for this section.
1076                                 docstring tag = from_utf8("<" + sectionTag);
1077                                 if (!id.empty())
1078                                         tag += from_utf8(" ") + id;
1079                                 tag += from_utf8(">");
1080                                 xs << XMLStream::ESCAPE_NONE << tag;
1081                                 xs << xml::CR();
1082                         }
1083                 }
1084
1085                 // Close all sections before the bibliography.
1086                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1087                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1088                 if (insetsLength > 0) {
1089                         Inset const *firstInset = par->getInset(0);
1090                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1091                                 while (!headerLevels.empty()) {
1092                                         int level = headerLevels.top().first;
1093                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1094                                         headerLevels.pop();
1095
1096                                         // Output the tag only if it corresponds to a legit section.
1097                                         if (level != Layout::NOT_IN_TOC) {
1098                                                 xs << XMLStream::ESCAPE_NONE << tag;
1099                                                 xs << xml::CR();
1100                                         }
1101                                 }
1102                         }
1103                 }
1104
1105                 // Generate this paragraph.
1106                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1107                 bpit += distance(lastStartedPar, par);
1108         }
1109
1110         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1111         // of the loop).
1112         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1113                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1114                 headerLevels.pop();
1115                 xs << XMLStream::ESCAPE_NONE << tag;
1116                 xs << xml::CR();
1117         }
1118 }
1119
1120 } // namespace lyx