]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
Merge findLastParagraph and findLastBibliographyParagraph to lower code duplication.
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream & xs, Layout const & lay)
193 {
194         if (lay.docbookwrappertag() != "NONE") {
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196         }
197
198         string tag = lay.docbooktag();
199         if (tag == "Plain Layout")
200                 tag = "para";
201
202         xs << xml::ParTag(tag, lay.docbookattr());
203 }
204
205
206 void closeTag(XMLStream & xs, Layout const & lay)
207 {
208         string tag = lay.docbooktag();
209         if (tag == "Plain Layout")
210                 tag = "para";
211
212         xs << xml::EndTag(tag);
213         if (lay.docbookwrappertag() != "NONE")
214                 xs << xml::EndTag(lay.docbookwrappertag());
215 }
216
217
218 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
219 {
220         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
221 }
222
223
224 void closeLabelTag(XMLStream & xs, Layout const & lay)
225 {
226         xs << xml::EndTag(lay.docbookitemlabeltag());
227         xs << xml::CR();
228 }
229
230
231 void openItemTag(XMLStream & xs, Layout const & lay)
232 {
233         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
234 }
235
236
237 // Return true when new elements are output in a paragraph, false otherwise.
238 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
239 {
240         if (lay.docbookiteminnertag() != "NONE") {
241                 xs << xml::CR();
242                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
243
244                 if (lay.docbookiteminnertag() == "para") {
245                         return true;
246                 }
247         }
248         return false;
249 }
250
251
252 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
253 {
254         if (lay.docbookiteminnertag()!= "NONE") {
255                 xs << xml::EndTag(lay.docbookiteminnertag());
256                 xs << xml::CR();
257         }
258 }
259
260
261 inline void closeItemTag(XMLStream & xs, Layout const & lay)
262 {
263         xs << xml::EndTag(lay.docbookitemtag());
264         xs << xml::CR();
265 }
266
267 // end of convenience functions
268
269 ParagraphList::const_iterator findLast(
270                 ParagraphList::const_iterator p,
271                 ParagraphList::const_iterator const & pend,
272                 LatexType type) {
273         for (++p; p != pend && p->layout().latextype == type; ++p);
274
275         return p;
276 }
277
278 ParagraphList::const_iterator findLastBibliographyParagraph(
279                 ParagraphList::const_iterator p,
280                 ParagraphList::const_iterator const & pend) {
281         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
282
283         return p;
284 }
285
286
287 ParagraphList::const_iterator findEndOfEnvironment(
288                 ParagraphList::const_iterator const & pstart,
289                 ParagraphList::const_iterator const & pend)
290 {
291         ParagraphList::const_iterator p = pstart;
292         Layout const &bstyle = p->layout();
293         size_t const depth = p->params().depth();
294         for (++p; p != pend; ++p) {
295                 Layout const &style = p->layout();
296                 // It shouldn't happen that e.g. a section command occurs inside
297                 // a quotation environment, at a higher depth, but as of 6/2009,
298                 // it can happen. We pretend that it's just at lowest depth.
299                 if (style.latextype == LATEX_COMMAND)
300                         return p;
301
302                 // If depth is down, we're done
303                 if (p->params().depth() < depth)
304                         return p;
305
306                 // If depth is up, we're not done
307                 if (p->params().depth() > depth)
308                         continue;
309
310                 // FIXME I am not sure about the first check.
311                 // Surely we *could* have different layouts that count as
312                 // LATEX_PARAGRAPH, right?
313                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
314                         return p;
315         }
316         return pend;
317 }
318
319
320 ParagraphList::const_iterator makeParagraphBibliography(
321                 Buffer const &buf,
322                 XMLStream &xs,
323                 OutputParams const &runparams,
324                 Text const &text,
325                 ParagraphList::const_iterator const & pbegin,
326                 ParagraphList::const_iterator const & pend)
327 {
328         auto const begin = text.paragraphs().begin();
329         auto const end = text.paragraphs().end();
330
331         // Find the paragraph *before* pbegin.
332         ParagraphList::const_iterator pbegin_before = begin;
333         if (pbegin != begin) {
334                 ParagraphList::const_iterator pbegin_before_next = begin;
335                 ++pbegin_before_next;
336
337                 while (pbegin_before_next != pbegin) {
338                         ++pbegin_before;
339                         ++pbegin_before_next;
340                 }
341         }
342
343         ParagraphList::const_iterator par = pbegin;
344
345         // If this is the first paragraph in a bibliography, open the bibliography tag.
346         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
347                 xs << xml::StartTag("bibliography");
348                 xs << xml::CR();
349         }
350
351         // Generate the required paragraphs, but only if they are .
352         for (; par != pend; ++par) {
353                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
354                 // Don't forget the citation ID!
355                 docstring attr;
356                 for (auto i = 0; i < par->size(); ++i) {
357                         Inset const *ip = par->getInset(0);
358                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
359                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
360                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
361                                 break;
362                         }
363                 }
364                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
365
366                 // Generate the entry.
367                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
368
369                 // End the precooked bibliography entry.
370                 xs << xml::EndTag("bibliomixed");
371                 xs << xml::CR();
372         }
373
374         // If this is the last paragraph in a bibliography, close the bibliography tag.
375         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
376                 xs << xml::EndTag("bibliography");
377                 xs << xml::CR();
378         }
379
380         return pend;
381 }
382
383
384 ParagraphList::const_iterator makeParagraphs(
385                 Buffer const &buf,
386                 XMLStream &xs,
387                 OutputParams const &runparams,
388                 Text const &text,
389                 ParagraphList::const_iterator const & pbegin,
390                 ParagraphList::const_iterator const & pend)
391 {
392         ParagraphList::const_iterator const begin = text.paragraphs().begin();
393         ParagraphList::const_iterator par = pbegin;
394         for (; par != pend; ++par) {
395                 Layout const &lay = par->layout();
396
397                 // We want to open the paragraph tag if:
398                 //   (i) the current layout permits multiple paragraphs
399                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
400                 //         we are, but this is not the first paragraph
401                 //
402                 // But there is also a special case, and we first see whether we are in it.
403                 // We do not want to open the paragraph tag if this paragraph contains
404                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
405                 // as a branch). On the other hand, if that single item has a font change
406                 // applied to it, then we still do need to open the paragraph.
407                 //
408                 // Obviously, this is very fragile. The main reason we need to do this is
409                 // because of branches, e.g., a branch that contains an entire new section.
410                 // We do not really want to wrap that whole thing in a <div>...</div>.
411                 bool special_case = false;
412                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
413                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
414                         Layout const &style = par->layout();
415                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
416                                                                                 style.labelfont : style.font;
417                         FontInfo const our_font =
418                                         par->getFont(buf.masterBuffer()->params(), 0,
419                                                                  text.outerFont(distance(begin, par))).fontInfo();
420
421                         if (first_font == our_font)
422                                 special_case = true;
423                 }
424
425                 // Plain layouts must be ignored.
426                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
427                         special_case = true;
428                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
429                 if (!special_case && par->size() == 1 && par->getInset(0)) {
430                         Inset const * firstInset = par->getInset(0);
431
432                         // Floats cannot be in paragraphs.
433                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
434
435                         // Bibliographies cannot be in paragraphs.
436                         if (!special_case && firstInset->asInsetCommand())
437                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
438
439                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
440                         if (!special_case && firstInset->asInsetMath())
441                                 special_case = true;
442
443                         // ERTs are in comments, not paragraphs.
444                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
445                                 special_case = true;
446
447                         // Listings should not get into their own paragraph.
448                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
449                                 special_case = true;
450                 }
451
452                 bool const open_par = runparams.docbook_make_pars
453                                                           && (!runparams.docbook_in_par || par != pbegin)
454                                                           && !special_case;
455
456                 // We want to issue the closing tag if either:
457                 //   (i)  We opened it, and either docbook_in_par is false,
458                 //              or we're not in the last paragraph, anyway.
459                 //   (ii) We didn't open it and docbook_in_par is true,
460                 //              but we are in the first par, and there is a next par.
461                 ParagraphList::const_iterator nextpar = par;
462                 ++nextpar;
463                 bool const close_par =
464                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
465                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
466
467                 // Determine if this paragraph has some real content. Things like new pages are not caught
468                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
469                 odocstringstream os2;
470                 XMLStream xs2(os2);
471                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
472
473                 docstring cleaned = os2.str();
474                 static const lyx::regex reg("[ \\r\\n]*");
475                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
476
477                 if (!cleaned.empty()) {
478                         if (open_par)
479                                 openParTag(xs, lay);
480
481                         xs << XMLStream::ESCAPE_NONE << os2.str();
482
483                         if (close_par) {
484                                 closeTag(xs, lay);
485                                 xs << xml::CR();
486                         }
487                 }
488         }
489         return pend;
490 }
491
492
493 bool isNormalEnv(Layout const &lay)
494 {
495         return lay.latextype == LATEX_ENVIRONMENT
496                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
497 }
498
499
500 ParagraphList::const_iterator makeEnvironment(
501                 Buffer const &buf,
502                 XMLStream &xs,
503                 OutputParams const &runparams,
504                 Text const &text,
505                 ParagraphList::const_iterator const & pbegin,
506                 ParagraphList::const_iterator const & pend)
507 {
508         ParagraphList::const_iterator const begin = text.paragraphs().begin();
509         ParagraphList::const_iterator par = pbegin;
510         Layout const &bstyle = par->layout();
511         depth_type const origdepth = pbegin->params().depth();
512
513         // open tag for this environment
514         openParTag(xs, bstyle);
515         xs << xml::CR();
516
517         // we will on occasion need to remember a layout from before.
518         Layout const *lastlay = nullptr;
519
520         while (par != pend) {
521                 Layout const & style = par->layout();
522                 ParagraphList::const_iterator send;
523
524                 // Actual content of this paragraph.
525                 switch (style.latextype) {
526                 case LATEX_ENVIRONMENT:
527                 case LATEX_LIST_ENVIRONMENT:
528                 case LATEX_ITEM_ENVIRONMENT: {
529                         // There are two possibilities in this case.
530                         // One is that we are still in the environment in which we
531                         // started---which we will be if the depth is the same.
532                         if (par->params().depth() == origdepth) {
533                                 LATTEST(bstyle == style);
534                                 if (lastlay != nullptr) {
535                                         closeItemTag(xs, *lastlay);
536                                         if (lastlay->docbookitemwrappertag() != "NONE") {
537                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
538                                                 xs << xml::CR();
539                                         }
540                                         lastlay = nullptr;
541                                 }
542
543                                 // this will be positive if we want to skip the
544                                 // initial word (if it's been taken for the label).
545                                 pos_type sep = 0;
546
547                                 // Open a wrapper tag if needed.
548                                 if (style.docbookitemwrappertag() != "NONE") {
549                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
550                                         xs << xml::CR();
551                                 }
552
553                                 // label output
554                                 if (style.labeltype != LABEL_NO_LABEL &&
555                                                 style.docbookitemlabeltag() != "NONE") {
556
557                                         if (isNormalEnv(style)) {
558                                                 // in this case, we print the label only for the first
559                                                 // paragraph (as in a theorem or an abstract).
560                                                 if (par == pbegin) {
561                                                         docstring const lbl = pbegin->params().labelString();
562                                                         if (!lbl.empty()) {
563                                                                 openLabelTag(xs, style);
564                                                                 xs << lbl;
565                                                                 closeLabelTag(xs, style);
566                                                         } else {
567                                                                 // No new line after closeLabelTag.
568                                                                 xs << xml::CR();
569                                                         }
570                                                 }
571                                         } else { // some kind of list
572                                                 if (style.labeltype == LABEL_MANUAL) {
573                                                         // Only variablelist gets here.
574
575                                                         openLabelTag(xs, style);
576                                                         sep = par->firstWordDocBook(xs, runparams);
577                                                         closeLabelTag(xs, style);
578                                                 } else {
579                                                         openLabelTag(xs, style);
580                                                         xs << par->params().labelString();
581                                                         closeLabelTag(xs, style);
582                                                 }
583                                         }
584                                 } // end label output
585
586                                 // Start generating the item.
587                                 bool wasInParagraph = runparams.docbook_in_par;
588                                 openItemTag(xs, style);
589                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
590                                 OutputParams rp = runparams;
591                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
592
593                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
594                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
595                                 // Common case: there is only the first word on the line, but there is a nested list instead
596                                 // of more text.
597                                 bool emptyItem = false;
598                                 if (sep == par->size()) {
599                                         auto next_par = par;
600                                         ++next_par;
601                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
602                                                 emptyItem = true;
603                                         else // There is a next paragraph: check depth.
604                                                 emptyItem = par->params().depth() >= next_par->params().depth();
605                                 }
606
607                                 if (emptyItem) {
608                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
609                                         // generation of a full <para>.
610                                         xs << ' ';
611                                 } else {
612                                         // Generate the rest of the paragraph, if need be.
613                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
614                                 }
615
616                                 ++par;
617                                 if (getsIntoParagraph)
618                                         closeInnerItemTag(xs, style);
619
620                                 // We may not want to close the tag yet, in particular:
621                                 // If we're not at the end of the item...
622                                 if (par != pend
623                                         //  and are doing items...
624                                         && !isNormalEnv(style)
625                                         // and if the depth has changed...
626                                         && par->params().depth() != origdepth) {
627                                         // then we'll save this layout for later, and close it when
628                                         // we get another item.
629                                         lastlay = &style;
630                                 } else {
631                                         closeItemTag(xs, style);
632
633                                         // Eventually, close the item wrapper.
634                                         if (style.docbookitemwrappertag() != "NONE") {
635                                                 xs << xml::EndTag(style.docbookitemwrappertag());
636                                                 xs << xml::CR();
637                                         }
638                                 }
639                         }
640                         // The other possibility is that the depth has increased.
641                         else {
642                                 send = findEndOfEnvironment(par, pend);
643                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
644                         }
645                         break;
646                 }
647                 case LATEX_PARAGRAPH:
648                         send = findLast(par, pend, LATEX_PARAGRAPH);
649                         par = makeParagraphs(buf, xs, runparams, text, par, send);
650                         break;
651                 case LATEX_BIB_ENVIRONMENT:
652                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
653                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
654                         break;
655                 case LATEX_COMMAND:
656                         ++par;
657                         break;
658                 }
659         }
660
661         if (lastlay != nullptr) {
662                 closeItemTag(xs, *lastlay);
663                 if (lastlay->docbookitemwrappertag() != "NONE") {
664                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
665                         xs << xml::CR();
666                 }
667         }
668         closeTag(xs, bstyle);
669         xs << xml::CR();
670         return pend;
671 }
672
673
674 void makeCommand(
675                 Buffer const & buf,
676                 XMLStream & xs,
677                 OutputParams const & runparams,
678                 Text const & text,
679                 ParagraphList::const_iterator const & pbegin)
680 {
681         Layout const &style = pbegin->layout();
682
683         // No need for labels, as they are handled by DocBook tags.
684
685         openParTag(xs, style);
686
687         ParagraphList::const_iterator const begin = text.paragraphs().begin();
688         pbegin->simpleDocBookOnePar(buf, xs, runparams,
689                                                                 text.outerFont(distance(begin, pbegin)));
690         closeTag(xs, style);
691         xs << xml::CR();
692 }
693
694 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
695                 Text const &text,
696                 Buffer const &buf,
697                 XMLStream &xs,
698                 OutputParams const &ourparams,
699                 ParagraphList::const_iterator par,
700                 ParagraphList::const_iterator send,
701                 ParagraphList::const_iterator pend)
702 {
703         Layout const & style = par->layout();
704
705         switch (style.latextype) {
706                 case LATEX_COMMAND: {
707                         // The files with which we are working never have more than
708                         // one paragraph in a command structure.
709                         // FIXME
710                         // if (ourparams.docbook_in_par)
711                         //   fix it so we don't get sections inside standard, e.g.
712                         // note that we may then need to make runparams not const, so we
713                         // can communicate that back.
714                         // FIXME Maybe this fix should be in the routines themselves, in case
715                         // they are called from elsewhere.
716                         makeCommand(buf, xs, ourparams, text, par);
717                         ++par;
718                         break;
719                 }
720                 case LATEX_ENVIRONMENT:
721                 case LATEX_LIST_ENVIRONMENT:
722                 case LATEX_ITEM_ENVIRONMENT:
723                         // FIXME Same fix here.
724                         send = findEndOfEnvironment(par, pend);
725                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
726                         break;
727                 case LATEX_PARAGRAPH:
728                         send = findLast(par, pend, LATEX_PARAGRAPH);
729                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
730                         break;
731                 case LATEX_BIB_ENVIRONMENT:
732                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
733                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
734                         break;
735         }
736
737         return make_pair(par, send);
738 }
739
740 } // end anonymous namespace
741
742
743 using DocBookDocumentSectioning = tuple<bool, pit_type>;
744 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
745
746
747 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
748         bool documentHasSections = false;
749
750         while (bpit < epit) {
751                 Layout const &style = paragraphs[bpit].layout();
752                 documentHasSections |= style.category() == from_utf8("Sectioning");
753
754                 if (documentHasSections) {
755                         break;
756                 }
757                 bpit += 1;
758         }
759         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
760
761         return make_tuple(documentHasSections, bpit);
762 }
763
764
765 bool hasOnlyNotes(Paragraph const & par)
766 {
767         for (int i = 0; i < par.size(); ++i)
768                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
769                         return false;
770         return true;
771 }
772
773
774 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
775         set<pit_type> shouldBeInInfo;
776         set<pit_type> mustBeInInfo;
777
778         // Find the first non empty paragraph by mutating bpit.
779         while (bpit < epit) {
780                 Paragraph const &par = paragraphs[bpit];
781                 if (par.empty() || hasOnlyNotes(par))
782                         bpit += 1;
783                 else
784                         break;
785         }
786
787         // Find the last info-like paragraph.
788         pit_type cpit = bpit;
789         while (cpit < epit) {
790                 // Skip paragraphs only containing one note.
791                 Paragraph const &par = paragraphs[cpit];
792                 if (hasOnlyNotes(par)) {
793                         cpit += 1;
794                         continue;
795                 }
796
797                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
798                 Layout const &style = par.layout();
799
800                 if (style.docbookininfo() == "always") {
801                         mustBeInInfo.emplace(cpit);
802                 } else if (style.docbookininfo() == "maybe") {
803                         shouldBeInInfo.emplace(cpit);
804                 } else {
805                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
806                         break;
807                 }
808                 cpit += 1;
809         }
810         // Now, cpit points to the last paragraph that has things that could go in <info>.
811         // bpit is still the beginning of the <info> part.
812
813         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
814 }
815
816
817 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
818 {
819         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
820         // are just after a document or part title.
821         if (epitAbstract - bpitAbstract <= 0)
822                 return false;
823
824         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
825         pit_type bpit = bpitAbstract;
826         while (bpit < epitAbstract) {
827                 const Paragraph &p = paragraphs.at(bpit);
828
829                 if (p.layout().name() == from_ascii("Abstract"))
830                         return true;
831
832                 if (!p.insetList().empty()) {
833                         for (const auto &i : p.insetList()) {
834                                 if (i.inset->getText(0) != nullptr) {
835                                         return true;
836                                 }
837                         }
838                 }
839                 bpit++;
840         }
841         return false;
842 }
843
844
845 pit_type generateDocBookParagraphWithoutSectioning(
846                 Text const & text,
847                 Buffer const & buf,
848                 XMLStream & xs,
849                 OutputParams const & runparams,
850                 ParagraphList const & paragraphs,
851                 pit_type bpit,
852                 pit_type epit)
853 {
854         auto par = paragraphs.iterator_at(bpit);
855         auto lastStartedPar = par;
856         ParagraphList::const_iterator send;
857         auto const pend =
858                         (epit == (int) paragraphs.size()) ?
859                         paragraphs.end() : paragraphs.iterator_at(epit);
860
861         while (bpit < epit) {
862                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
863                 bpit += distance(lastStartedPar, par);
864                 lastStartedPar = par;
865         }
866
867         return bpit;
868 }
869
870
871 void outputDocBookInfo(
872                 Text const & text,
873                 Buffer const & buf,
874                 XMLStream & xs,
875                 OutputParams const & runparams,
876                 ParagraphList const & paragraphs,
877                 DocBookInfoTag const & info,
878                 pit_type bpitAbstract,
879                 pit_type const epitAbstract)
880 {
881         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
882         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
883
884         set<pit_type> shouldBeInInfo;
885         set<pit_type> mustBeInInfo;
886         pit_type bpitInfo;
887         pit_type epitInfo;
888         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
889
890         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
891         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
892         // then only create the <abstract> tag if these paragraphs generate some content.
893         // This check must be performed *before* a decision on whether or not to output <info> is made.
894         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
895         docstring abstract;
896         if (hasAbstract) {
897                 odocstringstream os2;
898                 XMLStream xs2(os2);
899                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
900
901                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
902                 // even though they must be properly output if there is some abstract.
903                 docstring abstractContent = os2.str();
904                 static const lyx::regex reg("[ \\r\\n]*");
905                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
906
907                 // Nothing? Then there is no abstract!
908                 if (abstractContent.empty())
909                         hasAbstract = false;
910         }
911
912         // The abstract must go in <info>.
913         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
914
915         // Start the <info> tag if required.
916         if (needInfo) {
917                 xs.startDivision(false);
918                 xs << xml::StartTag("info");
919                 xs << xml::CR();
920         }
921
922         // Output the elements that should go in <info>.
923         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
924
925         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
926                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
927                 if (tag == "NONE")
928                         tag = "abstract";
929
930                 xs << xml::StartTag(tag);
931                 xs << xml::CR();
932                 xs << XMLStream::ESCAPE_NONE << abstract;
933                 xs << xml::EndTag(tag);
934                 xs << xml::CR();
935         }
936
937         // End the <info> tag if it was started.
938         if (needInfo) {
939                 xs << xml::EndTag("info");
940                 xs << xml::CR();
941                 xs.endDivision();
942         }
943 }
944
945
946 void docbookFirstParagraphs(
947                 Text const &text,
948                 Buffer const &buf,
949                 XMLStream &xs,
950                 OutputParams const &runparams,
951                 pit_type epit)
952 {
953         // Handle the beginning of the document, supposing it has sections.
954         // Major role: output the first <info> tag.
955
956         ParagraphList const &paragraphs = text.paragraphs();
957         pit_type bpit = runparams.par_begin;
958         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
959         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
960 }
961
962
963 bool isParagraphEmpty(const Paragraph &par)
964 {
965         InsetList const &insets = par.insetList();
966         size_t insetsLength = distance(insets.begin(), insets.end());
967         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
968                                                                 dynamic_cast<InsetNote *>(insets.get(0));
969         return hasParagraphOnlyNote;
970 }
971
972
973 void docbookSimpleAllParagraphs(
974                 Text const & text,
975                 Buffer const & buf,
976                 XMLStream & xs,
977                 OutputParams const & runparams)
978 {
979         // Handle the document, supposing it has no sections (i.e. a "simple" document).
980
981         // First, the <info> tag.
982         ParagraphList const &paragraphs = text.paragraphs();
983         pit_type bpit = runparams.par_begin;
984         pit_type const epit = runparams.par_end;
985         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
986         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
987         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
988
989         // Then, the content.
990         ParagraphList::const_iterator const pend =
991                         (epit == (int) paragraphs.size()) ?
992                         paragraphs.end() : paragraphs.iterator_at(epit);
993
994         while (bpit < epit) {
995                 auto par = paragraphs.iterator_at(bpit);
996                 ParagraphList::const_iterator const lastStartedPar = par;
997                 ParagraphList::const_iterator send;
998
999                 if (isParagraphEmpty(*par)) {
1000                         ++par;
1001                         bpit += distance(lastStartedPar, par);
1002                         continue;
1003                 }
1004
1005                 // Generate this paragraph.
1006                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1007                 bpit += distance(lastStartedPar, par);
1008         }
1009 }
1010
1011
1012 void docbookParagraphs(Text const &text,
1013                                            Buffer const &buf,
1014                                            XMLStream &xs,
1015                                            OutputParams const &runparams) {
1016         ParagraphList const &paragraphs = text.paragraphs();
1017         if (runparams.par_begin == runparams.par_end) {
1018                 runparams.par_begin = 0;
1019                 runparams.par_end = paragraphs.size();
1020         }
1021         pit_type bpit = runparams.par_begin;
1022         pit_type const epit = runparams.par_end;
1023         LASSERT(bpit < epit,
1024                         {
1025                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1026                                 return;
1027                         });
1028
1029         ParagraphList::const_iterator const pend =
1030                         (epit == (int) paragraphs.size()) ?
1031                         paragraphs.end() : paragraphs.iterator_at(epit);
1032         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1033         // of the section and the tag that was used to open it.
1034
1035         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1036         // discovered abstract.
1037         bool documentHasSections;
1038         pit_type eppit;
1039         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1040
1041         if (documentHasSections) {
1042                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1043                 bpit = eppit;
1044         } else {
1045                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1046                 return;
1047         }
1048
1049         bool currentlyInAppendix = false;
1050
1051         while (bpit < epit) {
1052                 OutputParams ourparams = runparams;
1053
1054                 auto par = paragraphs.iterator_at(bpit);
1055                 if (par->params().startOfAppendix())
1056                         currentlyInAppendix = true;
1057                 Layout const &style = par->layout();
1058                 ParagraphList::const_iterator const lastStartedPar = par;
1059                 ParagraphList::const_iterator send;
1060
1061                 if (isParagraphEmpty(*par)) {
1062                         ++par;
1063                         bpit += distance(lastStartedPar, par);
1064                         continue;
1065                 }
1066
1067                 // Think about adding <section> and/or </section>s.
1068                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1069                 if (isLayoutSectioning) {
1070                         int level = style.toclevel;
1071
1072                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1073                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1074                         //   - current: h2; back: h1; do not close any <section>
1075                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1076                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1077                                 int stackLevel = headerLevels.top().first;
1078                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1079                                 headerLevels.pop();
1080
1081                                 // Output the tag only if it corresponds to a legit section.
1082                                 if (stackLevel != Layout::NOT_IN_TOC)
1083                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1084                         }
1085
1086                         // Open the new section: first push it onto the stack, then output it in DocBook.
1087                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1088                                                                 "appendix" : style.docbooksectiontag();
1089                         headerLevels.push(std::make_pair(level, sectionTag));
1090
1091                         // Some sectioning-like elements should not be output (such as FrontMatter).
1092                         if (level != Layout::NOT_IN_TOC) {
1093                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1094                                 docstring id = docstring();
1095                                 for (pos_type i = 0; i < par->size(); ++i) {
1096                                         Inset const *inset = par->getInset(i);
1097                                         if (inset) {
1098                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1099                                                         // Generate the attributes for the section if need be.
1100                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1101
1102                                                         // Don't output the ID as a DocBook <anchor>.
1103                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1104
1105                                                         // Cannot have multiple IDs per tag.
1106                                                         break;
1107                                                 }
1108                                         }
1109                                 }
1110
1111                                 // Write the open tag for this section.
1112                                 docstring tag = from_utf8("<" + sectionTag);
1113                                 if (!id.empty())
1114                                         tag += from_utf8(" ") + id;
1115                                 tag += from_utf8(">");
1116                                 xs << XMLStream::ESCAPE_NONE << tag;
1117                                 xs << xml::CR();
1118                         }
1119                 }
1120
1121                 // Close all sections before the bibliography.
1122                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1123                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1124                 if (insetsLength > 0) {
1125                         Inset const *firstInset = par->getInset(0);
1126                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1127                                 while (!headerLevels.empty()) {
1128                                         int level = headerLevels.top().first;
1129                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1130                                         headerLevels.pop();
1131
1132                                         // Output the tag only if it corresponds to a legit section.
1133                                         if (level != Layout::NOT_IN_TOC) {
1134                                                 xs << XMLStream::ESCAPE_NONE << tag;
1135                                                 xs << xml::CR();
1136                                         }
1137                                 }
1138                         }
1139                 }
1140
1141                 // Generate this paragraph.
1142                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1143                 bpit += distance(lastStartedPar, par);
1144         }
1145
1146         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1147         // of the loop).
1148         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1149                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1150                 headerLevels.pop();
1151                 xs << XMLStream::ESCAPE_NONE << tag;
1152                 xs << xml::CR();
1153         }
1154 }
1155
1156 } // namespace lyx