]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
DocBook: make openParTag/closeTag use paragraphs instead of layouts.
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream & xs, Paragraph const & par)
193 {
194         Layout const & lay = par.layout();
195
196         if (lay.docbookwrappertag() != "NONE")
197                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
198
199         string tag = lay.docbooktag();
200         if (tag == "Plain Layout")
201                 tag = "para";
202
203         xs << xml::ParTag(tag, lay.docbookattr());
204
205         if (lay.docbookitemtag() != "NONE")
206                 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
207 }
208
209
210 void closeTag(XMLStream & xs, Paragraph const & par)
211 {
212         Layout const & lay = par.layout();
213
214         if (lay.docbookitemtag() != "NONE")
215                 xs << xml::EndTag(lay.docbookitemtag());
216
217         string tag = lay.docbooktag();
218         if (tag == "Plain Layout")
219                 tag = "para";
220
221         xs << xml::EndTag(tag);
222         if (lay.docbookwrappertag() != "NONE")
223                 xs << xml::EndTag(lay.docbookwrappertag());
224 }
225
226
227 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
228 {
229         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
230 }
231
232
233 void closeLabelTag(XMLStream & xs, Layout const & lay)
234 {
235         xs << xml::EndTag(lay.docbookitemlabeltag());
236         xs << xml::CR();
237 }
238
239
240 void openItemTag(XMLStream & xs, Layout const & lay)
241 {
242         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
243 }
244
245
246 // Return true when new elements are output in a paragraph, false otherwise.
247 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
248 {
249         if (lay.docbookiteminnertag() != "NONE") {
250                 xs << xml::CR();
251                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
252
253                 if (lay.docbookiteminnertag() == "para") {
254                         return true;
255                 }
256         }
257         return false;
258 }
259
260
261 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
262 {
263         if (lay.docbookiteminnertag()!= "NONE") {
264                 xs << xml::EndTag(lay.docbookiteminnertag());
265                 xs << xml::CR();
266         }
267 }
268
269
270 inline void closeItemTag(XMLStream & xs, Layout const & lay)
271 {
272         xs << xml::EndTag(lay.docbookitemtag());
273         xs << xml::CR();
274 }
275
276 // end of convenience functions
277
278 ParagraphList::const_iterator findLast(
279                 ParagraphList::const_iterator p,
280                 ParagraphList::const_iterator const & pend,
281                 LatexType type) {
282         for (++p; p != pend && p->layout().latextype == type; ++p);
283
284         return p;
285 }
286
287 ParagraphList::const_iterator findLastBibliographyParagraph(
288                 ParagraphList::const_iterator p,
289                 ParagraphList::const_iterator const & pend) {
290         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
291
292         return p;
293 }
294
295
296 ParagraphList::const_iterator findEndOfEnvironment(
297                 ParagraphList::const_iterator const & pstart,
298                 ParagraphList::const_iterator const & pend)
299 {
300         ParagraphList::const_iterator p = pstart;
301         size_t const depth = p->params().depth();
302
303         for (++p; p != pend; ++p) {
304                 Layout const &style = p->layout();
305                 // It shouldn't happen that e.g. a section command occurs inside
306                 // a quotation environment, at a higher depth, but as of 6/2009,
307                 // it can happen. We pretend that it's just at lowest depth.
308                 if (style.latextype == LATEX_COMMAND)
309                         return p;
310
311                 // If depth is down, we're done
312                 if (p->params().depth() < depth)
313                         return p;
314
315                 // If depth is up, we're not done
316                 if (p->params().depth() > depth)
317                         continue;
318
319                 // FIXME I am not sure about the first check.
320                 // Surely we *could* have different layouts that count as
321                 // LATEX_PARAGRAPH, right?
322                 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
323                         return p;
324         }
325
326         return pend;
327 }
328
329
330 ParagraphList::const_iterator makeParagraphBibliography(
331                 Buffer const &buf,
332                 XMLStream &xs,
333                 OutputParams const &runparams,
334                 Text const &text,
335                 ParagraphList::const_iterator const & pbegin,
336                 ParagraphList::const_iterator const & pend)
337 {
338         auto const begin = text.paragraphs().begin();
339         auto const end = text.paragraphs().end();
340
341         // Find the paragraph *before* pbegin.
342         ParagraphList::const_iterator pbegin_before = begin;
343         if (pbegin != begin) {
344                 ParagraphList::const_iterator pbegin_before_next = begin;
345                 ++pbegin_before_next;
346
347                 while (pbegin_before_next != pbegin) {
348                         ++pbegin_before;
349                         ++pbegin_before_next;
350                 }
351         }
352
353         ParagraphList::const_iterator par = pbegin;
354
355         // If this is the first paragraph in a bibliography, open the bibliography tag.
356         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
357                 xs << xml::StartTag("bibliography");
358                 xs << xml::CR();
359         }
360
361         // Generate the required paragraphs, but only if they are .
362         for (; par != pend; ++par) {
363                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
364                 // Don't forget the citation ID!
365                 docstring attr;
366                 for (auto i = 0; i < par->size(); ++i) {
367                         Inset const *ip = par->getInset(0);
368                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
369                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
370                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
371                                 break;
372                         }
373                 }
374                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
375
376                 // Generate the entry.
377                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
378
379                 // End the precooked bibliography entry.
380                 xs << xml::EndTag("bibliomixed");
381                 xs << xml::CR();
382         }
383
384         // If this is the last paragraph in a bibliography, close the bibliography tag.
385         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
386                 xs << xml::EndTag("bibliography");
387                 xs << xml::CR();
388         }
389
390         return pend;
391 }
392
393
394 ParagraphList::const_iterator makeParagraphs(
395                 Buffer const &buf,
396                 XMLStream &xs,
397                 OutputParams const &runparams,
398                 Text const &text,
399                 ParagraphList::const_iterator const & pbegin,
400                 ParagraphList::const_iterator const & pend)
401 {
402         ParagraphList::const_iterator const begin = text.paragraphs().begin();
403         ParagraphList::const_iterator par = pbegin;
404         for (; par != pend; ++par) {
405                 // We want to open the paragraph tag if:
406                 //   (i) the current layout permits multiple paragraphs
407                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
408                 //         we are, but this is not the first paragraph
409                 //
410                 // But there is also a special case, and we first see whether we are in it.
411                 // We do not want to open the paragraph tag if this paragraph contains
412                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
413                 // as a branch). On the other hand, if that single item has a font change
414                 // applied to it, then we still do need to open the paragraph.
415                 //
416                 // Obviously, this is very fragile. The main reason we need to do this is
417                 // because of branches, e.g., a branch that contains an entire new section.
418                 // We do not really want to wrap that whole thing in a <div>...</div>.
419                 bool special_case = false;
420                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
421                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
422                         Layout const &style = par->layout();
423                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
424                                                                                 style.labelfont : style.font;
425                         FontInfo const our_font =
426                                         par->getFont(buf.masterBuffer()->params(), 0,
427                                                                  text.outerFont(distance(begin, par))).fontInfo();
428
429                         if (first_font == our_font)
430                                 special_case = true;
431                 }
432
433                 // Plain layouts must be ignored.
434                 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
435                         special_case = true;
436                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
437                 if (!special_case && par->size() == 1 && par->getInset(0)) {
438                         Inset const * firstInset = par->getInset(0);
439
440                         // Floats cannot be in paragraphs.
441                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
442
443                         // Bibliographies cannot be in paragraphs.
444                         if (!special_case && firstInset->asInsetCommand())
445                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
446
447                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
448                         if (!special_case && firstInset->asInsetMath())
449                                 special_case = true;
450
451                         // ERTs are in comments, not paragraphs.
452                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
453                                 special_case = true;
454
455                         // Listings should not get into their own paragraph.
456                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
457                                 special_case = true;
458                 }
459
460                 bool const open_par = runparams.docbook_make_pars
461                                                           && (!runparams.docbook_in_par || par != pbegin)
462                                                           && !special_case;
463
464                 // We want to issue the closing tag if either:
465                 //   (i)  We opened it, and either docbook_in_par is false,
466                 //              or we're not in the last paragraph, anyway.
467                 //   (ii) We didn't open it and docbook_in_par is true,
468                 //              but we are in the first par, and there is a next par.
469                 ParagraphList::const_iterator nextpar = par;
470                 ++nextpar;
471                 bool const close_par =
472                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
473                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
474
475                 // Determine if this paragraph has some real content. Things like new pages are not caught
476                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
477                 odocstringstream os2;
478                 XMLStream xs2(os2);
479                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
480
481                 docstring cleaned = os2.str();
482                 static const lyx::regex reg("[ \\r\\n]*");
483                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
484
485                 if (!cleaned.empty()) {
486                         if (open_par)
487                                 openParTag(xs, *par);
488
489                         xs << XMLStream::ESCAPE_NONE << os2.str();
490
491                         if (close_par) {
492                                 closeTag(xs, *par);
493                                 xs << xml::CR();
494                         }
495                 }
496         }
497         return pend;
498 }
499
500
501 bool isNormalEnv(Layout const &lay)
502 {
503         return lay.latextype == LATEX_ENVIRONMENT
504                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
505 }
506
507
508 ParagraphList::const_iterator makeEnvironment(
509                 Buffer const &buf,
510                 XMLStream &xs,
511                 OutputParams const &runparams,
512                 Text const &text,
513                 ParagraphList::const_iterator const & pbegin,
514                 ParagraphList::const_iterator const & pend)
515 {
516         auto const begin = text.paragraphs().begin();
517         ParagraphList::const_iterator par = pbegin;
518         depth_type const origdepth = pbegin->params().depth();
519
520         // open tag for this environment
521         openParTag(xs, *par);
522         xs << xml::CR();
523
524         // we will on occasion need to remember a layout from before.
525         Layout const *lastlay = nullptr;
526
527         while (par != pend) {
528                 Layout const & style = par->layout();
529                 ParagraphList::const_iterator send;
530
531                 // Actual content of this paragraph.
532                 switch (style.latextype) {
533                 case LATEX_ENVIRONMENT:
534                 case LATEX_LIST_ENVIRONMENT:
535                 case LATEX_ITEM_ENVIRONMENT: {
536                         // There are two possibilities in this case.
537                         // One is that we are still in the environment in which we
538                         // started---which we will be if the depth is the same.
539                         if (par->params().depth() == origdepth) {
540                                 LATTEST(par->layout() == style);
541                                 if (lastlay != nullptr) {
542                                         closeItemTag(xs, *lastlay);
543                                         if (lastlay->docbookitemwrappertag() != "NONE") {
544                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
545                                                 xs << xml::CR();
546                                         }
547                                         lastlay = nullptr;
548                                 }
549
550                                 // this will be positive if we want to skip the
551                                 // initial word (if it's been taken for the label).
552                                 pos_type sep = 0;
553
554                                 // Open a wrapper tag if needed.
555                                 if (style.docbookitemwrappertag() != "NONE") {
556                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
557                                         xs << xml::CR();
558                                 }
559
560                                 // label output
561                                 if (style.labeltype != LABEL_NO_LABEL &&
562                                                 style.docbookitemlabeltag() != "NONE") {
563
564                                         if (isNormalEnv(style)) {
565                                                 // in this case, we print the label only for the first
566                                                 // paragraph (as in a theorem or an abstract).
567                                                 if (par == pbegin) {
568                                                         docstring const lbl = pbegin->params().labelString();
569                                                         if (!lbl.empty()) {
570                                                                 openLabelTag(xs, style);
571                                                                 xs << lbl;
572                                                                 closeLabelTag(xs, style);
573                                                         } else {
574                                                                 // No new line after closeLabelTag.
575                                                                 xs << xml::CR();
576                                                         }
577                                                 }
578                                         } else { // some kind of list
579                                                 if (style.labeltype == LABEL_MANUAL) {
580                                                         // Only variablelist gets here.
581
582                                                         openLabelTag(xs, style);
583                                                         sep = par->firstWordDocBook(xs, runparams);
584                                                         closeLabelTag(xs, style);
585                                                 } else {
586                                                         openLabelTag(xs, style);
587                                                         xs << par->params().labelString();
588                                                         closeLabelTag(xs, style);
589                                                 }
590                                         }
591                                 } // end label output
592
593                                 // Start generating the item.
594                                 bool wasInParagraph = runparams.docbook_in_par;
595                                 openItemTag(xs, style);
596                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
597                                 OutputParams rp = runparams;
598                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
599
600                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
601                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
602                                 // Common case: there is only the first word on the line, but there is a nested list instead
603                                 // of more text.
604                                 bool emptyItem = false;
605                                 if (sep == par->size()) {
606                                         auto next_par = par;
607                                         ++next_par;
608                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
609                                                 emptyItem = true;
610                                         else // There is a next paragraph: check depth.
611                                                 emptyItem = par->params().depth() >= next_par->params().depth();
612                                 }
613
614                                 if (emptyItem) {
615                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
616                                         // generation of a full <para>.
617                                         xs << ' ';
618                                 } else {
619                                         // Generate the rest of the paragraph, if need be.
620                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
621                                 }
622
623                                 ++par;
624                                 if (getsIntoParagraph)
625                                         closeInnerItemTag(xs, style);
626
627                                 // We may not want to close the tag yet, in particular:
628                                 // If we're not at the end of the item...
629                                 if (par != pend
630                                         //  and are doing items...
631                                         && !isNormalEnv(style)
632                                         // and if the depth has changed...
633                                         && par->params().depth() != origdepth) {
634                                         // then we'll save this layout for later, and close it when
635                                         // we get another item.
636                                         lastlay = &style;
637                                 } else {
638                                         closeItemTag(xs, style);
639
640                                         // Eventually, close the item wrapper.
641                                         if (style.docbookitemwrappertag() != "NONE") {
642                                                 xs << xml::EndTag(style.docbookitemwrappertag());
643                                                 xs << xml::CR();
644                                         }
645                                 }
646                         }
647                         // The other possibility is that the depth has increased.
648                         else {
649                                 send = findEndOfEnvironment(par, pend);
650                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
651                         }
652                         break;
653                 }
654                 case LATEX_PARAGRAPH:
655                         send = findLast(par, pend, LATEX_PARAGRAPH);
656                         par = makeParagraphs(buf, xs, runparams, text, par, send);
657                         break;
658                 case LATEX_BIB_ENVIRONMENT:
659                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
660                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
661                         break;
662                 case LATEX_COMMAND:
663                         ++par;
664                         break;
665                 }
666         }
667
668         if (lastlay != nullptr) {
669                 closeItemTag(xs, *lastlay);
670                 if (lastlay->docbookitemwrappertag() != "NONE") {
671                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
672                         xs << xml::CR();
673                 }
674         }
675         closeTag(xs, *par);
676         xs << xml::CR();
677         return pend;
678 }
679
680
681 void makeCommand(
682                 Buffer const & buf,
683                 XMLStream & xs,
684                 OutputParams const & runparams,
685                 Text const & text,
686                 ParagraphList::const_iterator const & pbegin)
687 {
688         // No need for labels, as they are handled by DocBook tags.
689
690         openParTag(xs, *pbegin);
691
692         auto const begin = text.paragraphs().begin();
693         pbegin->simpleDocBookOnePar(buf, xs, runparams,
694                                                                 text.outerFont(distance(begin, pbegin)));
695
696         closeTag(xs, *pbegin);
697         xs << xml::CR();
698 }
699
700 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
701                 Text const &text,
702                 Buffer const &buf,
703                 XMLStream &xs,
704                 OutputParams const &ourparams,
705                 ParagraphList::const_iterator par,
706                 ParagraphList::const_iterator send,
707                 ParagraphList::const_iterator pend)
708 {
709         Layout const & style = par->layout();
710
711         switch (style.latextype) {
712                 case LATEX_COMMAND: {
713                         // The files with which we are working never have more than
714                         // one paragraph in a command structure.
715                         // FIXME
716                         // if (ourparams.docbook_in_par)
717                         //   fix it so we don't get sections inside standard, e.g.
718                         // note that we may then need to make runparams not const, so we
719                         // can communicate that back.
720                         // FIXME Maybe this fix should be in the routines themselves, in case
721                         // they are called from elsewhere.
722                         makeCommand(buf, xs, ourparams, text, par);
723                         ++par;
724                         break;
725                 }
726                 case LATEX_ENVIRONMENT:
727                 case LATEX_LIST_ENVIRONMENT:
728                 case LATEX_ITEM_ENVIRONMENT:
729                         // FIXME Same fix here.
730                         send = findEndOfEnvironment(par, pend);
731                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
732                         break;
733                 case LATEX_PARAGRAPH:
734                         send = findLast(par, pend, LATEX_PARAGRAPH);
735                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
736                         break;
737                 case LATEX_BIB_ENVIRONMENT:
738                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
739                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
740                         break;
741         }
742
743         return make_pair(par, send);
744 }
745
746 } // end anonymous namespace
747
748
749 using DocBookDocumentSectioning = tuple<bool, pit_type>;
750 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
751
752
753 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
754         bool documentHasSections = false;
755
756         while (bpit < epit) {
757                 Layout const &style = paragraphs[bpit].layout();
758                 documentHasSections |= style.category() == from_utf8("Sectioning");
759
760                 if (documentHasSections) {
761                         break;
762                 }
763                 bpit += 1;
764         }
765         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
766
767         return make_tuple(documentHasSections, bpit);
768 }
769
770
771 bool hasOnlyNotes(Paragraph const & par)
772 {
773         for (int i = 0; i < par.size(); ++i)
774                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
775                         return false;
776         return true;
777 }
778
779
780 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
781         set<pit_type> shouldBeInInfo;
782         set<pit_type> mustBeInInfo;
783
784         // Find the first non empty paragraph by mutating bpit.
785         while (bpit < epit) {
786                 Paragraph const &par = paragraphs[bpit];
787                 if (par.empty() || hasOnlyNotes(par))
788                         bpit += 1;
789                 else
790                         break;
791         }
792
793         // Find the last info-like paragraph.
794         pit_type cpit = bpit;
795         while (cpit < epit) {
796                 // Skip paragraphs only containing one note.
797                 Paragraph const &par = paragraphs[cpit];
798                 if (hasOnlyNotes(par)) {
799                         cpit += 1;
800                         continue;
801                 }
802
803                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
804                 Layout const &style = par.layout();
805
806                 if (style.docbookininfo() == "always") {
807                         mustBeInInfo.emplace(cpit);
808                 } else if (style.docbookininfo() == "maybe") {
809                         shouldBeInInfo.emplace(cpit);
810                 } else {
811                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
812                         break;
813                 }
814                 cpit += 1;
815         }
816         // Now, cpit points to the last paragraph that has things that could go in <info>.
817         // bpit is still the beginning of the <info> part.
818
819         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
820 }
821
822
823 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
824 {
825         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
826         // are just after a document or part title.
827         if (epitAbstract - bpitAbstract <= 0)
828                 return false;
829
830         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
831         pit_type bpit = bpitAbstract;
832         while (bpit < epitAbstract) {
833                 const Paragraph &p = paragraphs.at(bpit);
834
835                 if (p.layout().name() == from_ascii("Abstract"))
836                         return true;
837
838                 if (!p.insetList().empty()) {
839                         for (const auto &i : p.insetList()) {
840                                 if (i.inset->getText(0) != nullptr) {
841                                         return true;
842                                 }
843                         }
844                 }
845                 bpit++;
846         }
847         return false;
848 }
849
850
851 pit_type generateDocBookParagraphWithoutSectioning(
852                 Text const & text,
853                 Buffer const & buf,
854                 XMLStream & xs,
855                 OutputParams const & runparams,
856                 ParagraphList const & paragraphs,
857                 pit_type bpit,
858                 pit_type epit)
859 {
860         auto par = paragraphs.iterator_at(bpit);
861         auto lastStartedPar = par;
862         ParagraphList::const_iterator send;
863         auto const pend =
864                         (epit == (int) paragraphs.size()) ?
865                         paragraphs.end() : paragraphs.iterator_at(epit);
866
867         while (bpit < epit) {
868                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
869                 bpit += distance(lastStartedPar, par);
870                 lastStartedPar = par;
871         }
872
873         return bpit;
874 }
875
876
877 void outputDocBookInfo(
878                 Text const & text,
879                 Buffer const & buf,
880                 XMLStream & xs,
881                 OutputParams const & runparams,
882                 ParagraphList const & paragraphs,
883                 DocBookInfoTag const & info,
884                 pit_type bpitAbstract,
885                 pit_type const epitAbstract)
886 {
887         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
888         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
889
890         set<pit_type> shouldBeInInfo;
891         set<pit_type> mustBeInInfo;
892         pit_type bpitInfo;
893         pit_type epitInfo;
894         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
895
896         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
897         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
898         // then only create the <abstract> tag if these paragraphs generate some content.
899         // This check must be performed *before* a decision on whether or not to output <info> is made.
900         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
901         docstring abstract;
902         if (hasAbstract) {
903                 odocstringstream os2;
904                 XMLStream xs2(os2);
905                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
906
907                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
908                 // even though they must be properly output if there is some abstract.
909                 docstring abstractContent = os2.str();
910                 static const lyx::regex reg("[ \\r\\n]*");
911                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
912
913                 // Nothing? Then there is no abstract!
914                 if (abstractContent.empty())
915                         hasAbstract = false;
916         }
917
918         // The abstract must go in <info>.
919         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
920
921         // Start the <info> tag if required.
922         if (needInfo) {
923                 xs.startDivision(false);
924                 xs << xml::StartTag("info");
925                 xs << xml::CR();
926         }
927
928         // Output the elements that should go in <info>.
929         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
930
931         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
932                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
933                 if (tag == "NONE")
934                         tag = "abstract";
935
936                 xs << xml::StartTag(tag);
937                 xs << xml::CR();
938                 xs << XMLStream::ESCAPE_NONE << abstract;
939                 xs << xml::EndTag(tag);
940                 xs << xml::CR();
941         }
942
943         // End the <info> tag if it was started.
944         if (needInfo) {
945                 xs << xml::EndTag("info");
946                 xs << xml::CR();
947                 xs.endDivision();
948         }
949 }
950
951
952 void docbookFirstParagraphs(
953                 Text const &text,
954                 Buffer const &buf,
955                 XMLStream &xs,
956                 OutputParams const &runparams,
957                 pit_type epit)
958 {
959         // Handle the beginning of the document, supposing it has sections.
960         // Major role: output the first <info> tag.
961
962         ParagraphList const &paragraphs = text.paragraphs();
963         pit_type bpit = runparams.par_begin;
964         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
965         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
966 }
967
968
969 bool isParagraphEmpty(const Paragraph &par)
970 {
971         InsetList const &insets = par.insetList();
972         size_t insetsLength = distance(insets.begin(), insets.end());
973         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
974                                                                 dynamic_cast<InsetNote *>(insets.get(0));
975         return hasParagraphOnlyNote;
976 }
977
978
979 void docbookSimpleAllParagraphs(
980                 Text const & text,
981                 Buffer const & buf,
982                 XMLStream & xs,
983                 OutputParams const & runparams)
984 {
985         // Handle the document, supposing it has no sections (i.e. a "simple" document).
986
987         // First, the <info> tag.
988         ParagraphList const &paragraphs = text.paragraphs();
989         pit_type bpit = runparams.par_begin;
990         pit_type const epit = runparams.par_end;
991         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
992         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
993         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
994
995         // Then, the content.
996         ParagraphList::const_iterator const pend =
997                         (epit == (int) paragraphs.size()) ?
998                         paragraphs.end() : paragraphs.iterator_at(epit);
999
1000         while (bpit < epit) {
1001                 auto par = paragraphs.iterator_at(bpit);
1002                 ParagraphList::const_iterator const lastStartedPar = par;
1003                 ParagraphList::const_iterator send;
1004
1005                 if (isParagraphEmpty(*par)) {
1006                         ++par;
1007                         bpit += distance(lastStartedPar, par);
1008                         continue;
1009                 }
1010
1011                 // Generate this paragraph.
1012                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1013                 bpit += distance(lastStartedPar, par);
1014         }
1015 }
1016
1017
1018 void docbookParagraphs(Text const &text,
1019                                            Buffer const &buf,
1020                                            XMLStream &xs,
1021                                            OutputParams const &runparams) {
1022         ParagraphList const &paragraphs = text.paragraphs();
1023         if (runparams.par_begin == runparams.par_end) {
1024                 runparams.par_begin = 0;
1025                 runparams.par_end = paragraphs.size();
1026         }
1027         pit_type bpit = runparams.par_begin;
1028         pit_type const epit = runparams.par_end;
1029         LASSERT(bpit < epit,
1030                         {
1031                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1032                                 return;
1033                         });
1034
1035         ParagraphList::const_iterator const pend =
1036                         (epit == (int) paragraphs.size()) ?
1037                         paragraphs.end() : paragraphs.iterator_at(epit);
1038         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1039         // of the section and the tag that was used to open it.
1040
1041         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1042         // discovered abstract.
1043         bool documentHasSections;
1044         pit_type eppit;
1045         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1046
1047         if (documentHasSections) {
1048                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1049                 bpit = eppit;
1050         } else {
1051                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1052                 return;
1053         }
1054
1055         bool currentlyInAppendix = false;
1056
1057         while (bpit < epit) {
1058                 OutputParams ourparams = runparams;
1059
1060                 auto par = paragraphs.iterator_at(bpit);
1061                 if (par->params().startOfAppendix())
1062                         currentlyInAppendix = true;
1063                 Layout const &style = par->layout();
1064                 ParagraphList::const_iterator const lastStartedPar = par;
1065                 ParagraphList::const_iterator send;
1066
1067                 if (isParagraphEmpty(*par)) {
1068                         ++par;
1069                         bpit += distance(lastStartedPar, par);
1070                         continue;
1071                 }
1072
1073                 // Think about adding <section> and/or </section>s.
1074                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1075                 if (isLayoutSectioning) {
1076                         int level = style.toclevel;
1077
1078                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1079                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1080                         //   - current: h2; back: h1; do not close any <section>
1081                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1082                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1083                                 int stackLevel = headerLevels.top().first;
1084                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1085                                 headerLevels.pop();
1086
1087                                 // Output the tag only if it corresponds to a legit section.
1088                                 if (stackLevel != Layout::NOT_IN_TOC)
1089                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1090                         }
1091
1092                         // Open the new section: first push it onto the stack, then output it in DocBook.
1093                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1094                                                                 "appendix" : style.docbooksectiontag();
1095                         headerLevels.push(std::make_pair(level, sectionTag));
1096
1097                         // Some sectioning-like elements should not be output (such as FrontMatter).
1098                         if (level != Layout::NOT_IN_TOC) {
1099                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1100                                 docstring id = docstring();
1101                                 for (pos_type i = 0; i < par->size(); ++i) {
1102                                         Inset const *inset = par->getInset(i);
1103                                         if (inset) {
1104                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1105                                                         // Generate the attributes for the section if need be.
1106                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1107
1108                                                         // Don't output the ID as a DocBook <anchor>.
1109                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1110
1111                                                         // Cannot have multiple IDs per tag.
1112                                                         break;
1113                                                 }
1114                                         }
1115                                 }
1116
1117                                 // Write the open tag for this section.
1118                                 docstring tag = from_utf8("<" + sectionTag);
1119                                 if (!id.empty())
1120                                         tag += from_utf8(" ") + id;
1121                                 tag += from_utf8(">");
1122                                 xs << XMLStream::ESCAPE_NONE << tag;
1123                                 xs << xml::CR();
1124                         }
1125                 }
1126
1127                 // Close all sections before the bibliography.
1128                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1129                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1130                 if (insetsLength > 0) {
1131                         Inset const *firstInset = par->getInset(0);
1132                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1133                                 while (!headerLevels.empty()) {
1134                                         int level = headerLevels.top().first;
1135                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1136                                         headerLevels.pop();
1137
1138                                         // Output the tag only if it corresponds to a legit section.
1139                                         if (level != Layout::NOT_IN_TOC) {
1140                                                 xs << XMLStream::ESCAPE_NONE << tag;
1141                                                 xs << xml::CR();
1142                                         }
1143                                 }
1144                         }
1145                 }
1146
1147                 // Generate this paragraph.
1148                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1149                 bpit += distance(lastStartedPar, par);
1150         }
1151
1152         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1153         // of the loop).
1154         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1155                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1156                 headerLevels.pop();
1157                 xs << XMLStream::ESCAPE_NONE << tag;
1158                 xs << xml::CR();
1159         }
1160 }
1161
1162 } // namespace lyx