]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
Revert "DocBook: add a layout tag to tell whether an item is the abstract or not."
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93
94 string fontToRole(xml::FontTypes type)
95 {
96         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
97         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
98         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
99         // Hence, it is not a problem to have many roles by default here.
100         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
101         switch (type) {
102         case xml::FontTypes::FT_ITALIC:
103         case xml::FontTypes::FT_EMPH:
104                 return "";
105         case xml::FontTypes::FT_BOLD:
106                 return "bold";
107         case xml::FontTypes::FT_NOUN: // Outputs a <person>
108         case xml::FontTypes::FT_TYPE: // Outputs a <code>
109                 return "";
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113         // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
193 {
194         Layout const & lay = par->layout();
195
196         if (par == prevpar)
197                 prevpar = nullptr;
198
199         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
200         // (usually, they won't have the same layout) and the CURRENT one allows merging.
201         // The main use case is author information in several paragraphs: if the name of the author is the
202         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
203         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
204         // layout, same wrapper tag).
205         bool openWrapper = lay.docbookwrappertag() != "NONE";
206         if (prevpar != nullptr) {
207                 Layout const & prevlay = prevpar->layout();
208                 if (prevlay.docbookwrappertag() != "NONE") {
209                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
210                                         && !lay.docbookwrappermergewithprevious();
211                 }
212         }
213
214         // Main logic.
215         if (openWrapper)
216                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
217
218         string tag = lay.docbooktag();
219         if (tag == "Plain Layout")
220                 tag = "para";
221
222         xs << xml::ParTag(tag, lay.docbookattr());
223
224         if (lay.docbookitemtag() != "NONE")
225                 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
226 }
227
228
229 void closeTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
230 {
231         Layout const & lay = par->layout();
232
233         if (par == nextpar)
234                 nextpar = nullptr;
235
236         // See comment in openParTag.
237         bool closeWrapper = lay.docbookwrappertag() != "NONE";
238         if (nextpar != nullptr) {
239                 Layout const & nextlay = nextpar->layout();
240                 if (nextlay.docbookwrappertag() != "NONE") {
241                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
242                                         && !nextlay.docbookwrappermergewithprevious();
243                 }
244         }
245
246         // Main logic.
247         if (lay.docbookitemtag() != "NONE")
248                 xs << xml::EndTag(lay.docbookitemtag());
249
250         string tag = lay.docbooktag();
251         if (tag == "Plain Layout")
252                 tag = "para";
253
254         xs << xml::EndTag(tag);
255         if (closeWrapper)
256                 xs << xml::EndTag(lay.docbookwrappertag());
257 }
258
259
260 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
261 {
262         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
263 }
264
265
266 void closeLabelTag(XMLStream & xs, Layout const & lay)
267 {
268         xs << xml::EndTag(lay.docbookitemlabeltag());
269         xs << xml::CR();
270 }
271
272
273 void openItemTag(XMLStream & xs, Layout const & lay)
274 {
275         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
276 }
277
278
279 // Return true when new elements are output in a paragraph, false otherwise.
280 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
281 {
282         if (lay.docbookiteminnertag() != "NONE") {
283                 xs << xml::CR();
284                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
285
286                 if (lay.docbookiteminnertag() == "para") {
287                         return true;
288                 }
289         }
290         return false;
291 }
292
293
294 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
295 {
296         if (lay.docbookiteminnertag()!= "NONE") {
297                 xs << xml::EndTag(lay.docbookiteminnertag());
298                 xs << xml::CR();
299         }
300 }
301
302
303 inline void closeItemTag(XMLStream & xs, Layout const & lay)
304 {
305         xs << xml::EndTag(lay.docbookitemtag());
306         xs << xml::CR();
307 }
308
309 // end of convenience functions
310
311 ParagraphList::const_iterator findLast(
312                 ParagraphList::const_iterator p,
313                 ParagraphList::const_iterator const & pend,
314                 LatexType type) {
315         for (++p; p != pend && p->layout().latextype == type; ++p);
316
317         return p;
318 }
319
320 ParagraphList::const_iterator findLastBibliographyParagraph(
321                 ParagraphList::const_iterator p,
322                 ParagraphList::const_iterator const & pend) {
323         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
324
325         return p;
326 }
327
328
329 ParagraphList::const_iterator findEndOfEnvironment(
330                 ParagraphList::const_iterator const & pstart,
331                 ParagraphList::const_iterator const & pend)
332 {
333         ParagraphList::const_iterator p = pstart;
334         size_t const depth = p->params().depth();
335
336         for (++p; p != pend; ++p) {
337                 Layout const &style = p->layout();
338                 // It shouldn't happen that e.g. a section command occurs inside
339                 // a quotation environment, at a higher depth, but as of 6/2009,
340                 // it can happen. We pretend that it's just at lowest depth.
341                 if (style.latextype == LATEX_COMMAND)
342                         return p;
343
344                 // If depth is down, we're done
345                 if (p->params().depth() < depth)
346                         return p;
347
348                 // If depth is up, we're not done
349                 if (p->params().depth() > depth)
350                         continue;
351
352                 // FIXME I am not sure about the first check.
353                 // Surely we *could* have different layouts that count as
354                 // LATEX_PARAGRAPH, right?
355                 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
356                         return p;
357         }
358
359         return pend;
360 }
361
362
363 ParagraphList::const_iterator makeParagraphBibliography(
364                 Buffer const &buf,
365                 XMLStream &xs,
366                 OutputParams const &runparams,
367                 Text const &text,
368                 ParagraphList::const_iterator const & pbegin,
369                 ParagraphList::const_iterator const & pend)
370 {
371         auto const begin = text.paragraphs().begin();
372         auto const end = text.paragraphs().end();
373
374         // Find the paragraph *before* pbegin.
375         ParagraphList::const_iterator pbegin_before = begin;
376         if (pbegin != begin) {
377                 ParagraphList::const_iterator pbegin_before_next = begin;
378                 ++pbegin_before_next;
379
380                 while (pbegin_before_next != pbegin) {
381                         ++pbegin_before;
382                         ++pbegin_before_next;
383                 }
384         }
385
386         ParagraphList::const_iterator par = pbegin;
387
388         // If this is the first paragraph in a bibliography, open the bibliography tag.
389         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
390                 xs << xml::StartTag("bibliography");
391                 xs << xml::CR();
392         }
393
394         // Generate the required paragraphs, but only if they are .
395         for (; par != pend; ++par) {
396                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
397                 // Don't forget the citation ID!
398                 docstring attr;
399                 for (auto i = 0; i < par->size(); ++i) {
400                         Inset const *ip = par->getInset(0);
401                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
402                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
403                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
404                                 break;
405                         }
406                 }
407                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
408
409                 // Generate the entry.
410                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
411
412                 // End the precooked bibliography entry.
413                 xs << xml::EndTag("bibliomixed");
414                 xs << xml::CR();
415         }
416
417         // If this is the last paragraph in a bibliography, close the bibliography tag.
418         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
419                 xs << xml::EndTag("bibliography");
420                 xs << xml::CR();
421         }
422
423         return pend;
424 }
425
426
427 ParagraphList::const_iterator makeParagraphs(
428                 Buffer const &buf,
429                 XMLStream &xs,
430                 OutputParams const &runparams,
431                 Text const &text,
432                 ParagraphList::const_iterator const & pbegin,
433                 ParagraphList::const_iterator const & pend)
434 {
435         auto const begin = text.paragraphs().begin();
436         auto const end = text.paragraphs().end();
437         ParagraphList::const_iterator par = pbegin;
438         ParagraphList::const_iterator prevpar = pbegin;
439
440         for (; par != pend; prevpar = par, ++par) {
441                 // We want to open the paragraph tag if:
442                 //   (i) the current layout permits multiple paragraphs
443                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
444                 //         we are, but this is not the first paragraph
445                 //
446                 // But there is also a special case, and we first see whether we are in it.
447                 // We do not want to open the paragraph tag if this paragraph contains
448                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
449                 // as a branch). On the other hand, if that single item has a font change
450                 // applied to it, then we still do need to open the paragraph.
451                 //
452                 // Obviously, this is very fragile. The main reason we need to do this is
453                 // because of branches, e.g., a branch that contains an entire new section.
454                 // We do not really want to wrap that whole thing in a <div>...</div>.
455                 bool special_case = false;
456                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
457                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
458                         Layout const &style = par->layout();
459                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
460                                                                                 style.labelfont : style.font;
461                         FontInfo const our_font =
462                                         par->getFont(buf.masterBuffer()->params(), 0,
463                                                                  text.outerFont(distance(begin, par))).fontInfo();
464
465                         if (first_font == our_font)
466                                 special_case = true;
467                 }
468
469                 // Plain layouts must be ignored.
470                 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
471                         special_case = true;
472                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
473                 if (!special_case && par->size() == 1 && par->getInset(0)) {
474                         Inset const * firstInset = par->getInset(0);
475
476                         // Floats cannot be in paragraphs.
477                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
478
479                         // Bibliographies cannot be in paragraphs.
480                         if (!special_case && firstInset->asInsetCommand())
481                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
482
483                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
484                         if (!special_case && firstInset->asInsetMath())
485                                 special_case = true;
486
487                         // ERTs are in comments, not paragraphs.
488                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
489                                 special_case = true;
490
491                         // Listings should not get into their own paragraph.
492                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
493                                 special_case = true;
494                 }
495
496                 bool const open_par = runparams.docbook_make_pars
497                                                           && (!runparams.docbook_in_par || par != pbegin)
498                                                           && !special_case;
499
500                 // We want to issue the closing tag if either:
501                 //   (i)  We opened it, and either docbook_in_par is false,
502                 //              or we're not in the last paragraph, anyway.
503                 //   (ii) We didn't open it and docbook_in_par is true,
504                 //              but we are in the first par, and there is a next par.
505                 auto nextpar = par;
506                 ++nextpar;
507                 bool const close_par =
508                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
509                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
510
511                 // Determine if this paragraph has some real content. Things like new pages are not caught
512                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
513                 odocstringstream os2;
514                 XMLStream xs2(os2);
515                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
516
517                 docstring cleaned = os2.str();
518                 static const lyx::regex reg("[ \\r\\n]*");
519                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
520
521                 if (!cleaned.empty()) {
522                         if (open_par)
523                                 openParTag(xs, &*par, &*prevpar);
524
525                         xs << XMLStream::ESCAPE_NONE << os2.str();
526
527                         if (close_par) {
528                                 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
529                                 xs << xml::CR();
530                         }
531                 }
532         }
533         return pend;
534 }
535
536
537 bool isNormalEnv(Layout const &lay)
538 {
539         return lay.latextype == LATEX_ENVIRONMENT
540                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
541 }
542
543
544 ParagraphList::const_iterator makeEnvironment(
545                 Buffer const &buf,
546                 XMLStream &xs,
547                 OutputParams const &runparams,
548                 Text const &text,
549                 ParagraphList::const_iterator const & pbegin,
550                 ParagraphList::const_iterator const & pend)
551 {
552         auto const begin = text.paragraphs().begin();
553         auto const end = text.paragraphs().end();
554         ParagraphList::const_iterator par = pbegin;
555         depth_type const origdepth = pbegin->params().depth();
556
557         // Find the previous paragraph.
558         auto prevpar = begin;
559         if (prevpar != par) {
560                 auto prevpar_next = prevpar;
561                 ++prevpar_next;
562
563                 while (prevpar_next != par) {
564                         ++prevpar_next;
565                         ++prevpar;
566                 }
567         }
568
569         // open tag for this environment
570         openParTag(xs, &*par, &*prevpar);
571         xs << xml::CR();
572
573         // we will on occasion need to remember a layout from before.
574         Layout const *lastlay = nullptr;
575
576         while (par != pend) {
577                 Layout const & style = par->layout();
578                 ParagraphList::const_iterator send;
579
580                 // Actual content of this paragraph.
581                 switch (style.latextype) {
582                 case LATEX_ENVIRONMENT:
583                 case LATEX_LIST_ENVIRONMENT:
584                 case LATEX_ITEM_ENVIRONMENT: {
585                         // There are two possibilities in this case.
586                         // One is that we are still in the environment in which we
587                         // started---which we will be if the depth is the same.
588                         if (par->params().depth() == origdepth) {
589                                 LATTEST(par->layout() == style);
590                                 if (lastlay != nullptr) {
591                                         closeItemTag(xs, *lastlay);
592                                         if (lastlay->docbookitemwrappertag() != "NONE") {
593                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
594                                                 xs << xml::CR();
595                                         }
596                                         lastlay = nullptr;
597                                 }
598
599                                 // this will be positive if we want to skip the
600                                 // initial word (if it's been taken for the label).
601                                 pos_type sep = 0;
602
603                                 // Open a wrapper tag if needed.
604                                 if (style.docbookitemwrappertag() != "NONE") {
605                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
606                                         xs << xml::CR();
607                                 }
608
609                                 // label output
610                                 if (style.labeltype != LABEL_NO_LABEL &&
611                                                 style.docbookitemlabeltag() != "NONE") {
612
613                                         if (isNormalEnv(style)) {
614                                                 // in this case, we print the label only for the first
615                                                 // paragraph (as in a theorem or an abstract).
616                                                 if (par == pbegin) {
617                                                         docstring const lbl = pbegin->params().labelString();
618                                                         if (!lbl.empty()) {
619                                                                 openLabelTag(xs, style);
620                                                                 xs << lbl;
621                                                                 closeLabelTag(xs, style);
622                                                         } else {
623                                                                 // No new line after closeLabelTag.
624                                                                 xs << xml::CR();
625                                                         }
626                                                 }
627                                         } else { // some kind of list
628                                                 if (style.labeltype == LABEL_MANUAL) {
629                                                         // Only variablelist gets here.
630
631                                                         openLabelTag(xs, style);
632                                                         sep = par->firstWordDocBook(xs, runparams);
633                                                         closeLabelTag(xs, style);
634                                                 } else {
635                                                         openLabelTag(xs, style);
636                                                         xs << par->params().labelString();
637                                                         closeLabelTag(xs, style);
638                                                 }
639                                         }
640                                 } // end label output
641
642                                 // Start generating the item.
643                                 bool wasInParagraph = runparams.docbook_in_par;
644                                 openItemTag(xs, style);
645                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
646                                 OutputParams rp = runparams;
647                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
648
649                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
650                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
651                                 // Common case: there is only the first word on the line, but there is a nested list instead
652                                 // of more text.
653                                 bool emptyItem = false;
654                                 if (sep == par->size()) {
655                                         auto next_par = par;
656                                         ++next_par;
657                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
658                                                 emptyItem = true;
659                                         else // There is a next paragraph: check depth.
660                                                 emptyItem = par->params().depth() >= next_par->params().depth();
661                                 }
662
663                                 if (emptyItem) {
664                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
665                                         // generation of a full <para>.
666                                         xs << ' ';
667                                 } else {
668                                         // Generate the rest of the paragraph, if need be.
669                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
670                                 }
671
672                                 ++par;
673                                 if (getsIntoParagraph)
674                                         closeInnerItemTag(xs, style);
675
676                                 // We may not want to close the tag yet, in particular:
677                                 // If we're not at the end of the item...
678                                 if (par != pend
679                                         //  and are doing items...
680                                         && !isNormalEnv(style)
681                                         // and if the depth has changed...
682                                         && par->params().depth() != origdepth) {
683                                         // then we'll save this layout for later, and close it when
684                                         // we get another item.
685                                         lastlay = &style;
686                                 } else {
687                                         closeItemTag(xs, style);
688
689                                         // Eventually, close the item wrapper.
690                                         if (style.docbookitemwrappertag() != "NONE") {
691                                                 xs << xml::EndTag(style.docbookitemwrappertag());
692                                                 xs << xml::CR();
693                                         }
694                                 }
695                         }
696                         // The other possibility is that the depth has increased.
697                         else {
698                                 send = findEndOfEnvironment(par, pend);
699                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
700                         }
701                         break;
702                 }
703                 case LATEX_PARAGRAPH:
704                         send = findLast(par, pend, LATEX_PARAGRAPH);
705                         par = makeParagraphs(buf, xs, runparams, text, par, send);
706                         break;
707                 case LATEX_BIB_ENVIRONMENT:
708                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
709                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
710                         break;
711                 case LATEX_COMMAND:
712                         ++par;
713                         break;
714                 }
715         }
716
717         if (lastlay != nullptr) {
718                 closeItemTag(xs, *lastlay);
719                 if (lastlay->docbookitemwrappertag() != "NONE") {
720                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
721                         xs << xml::CR();
722                 }
723         }
724         auto nextpar = par;
725         ++nextpar;
726         closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
727         xs << xml::CR();
728         return pend;
729 }
730
731
732 void makeCommand(
733                 Buffer const & buf,
734                 XMLStream & xs,
735                 OutputParams const & runparams,
736                 Text const & text,
737                 ParagraphList::const_iterator const & pbegin)
738 {
739         // No need for labels, as they are handled by DocBook tags.
740         auto const begin = text.paragraphs().begin();
741         auto const end = text.paragraphs().end();
742         auto nextpar = pbegin;
743         ++nextpar;
744
745         // Find the previous paragraph.
746         auto prevpar = begin;
747         if (prevpar != pbegin) {
748                 auto prevpar_next = prevpar;
749                 ++prevpar_next;
750
751                 while (prevpar_next != pbegin) {
752                         ++prevpar_next;
753                         ++prevpar;
754                 }
755         }
756
757         // Generate this command.
758         openParTag(xs, &*pbegin, &*prevpar);
759
760         pbegin->simpleDocBookOnePar(buf, xs, runparams,
761                                                                 text.outerFont(distance(begin, pbegin)));
762
763         closeTag(xs, &*pbegin, (nextpar != end) ? &*nextpar : nullptr);
764         xs << xml::CR();
765 }
766
767 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
768                 Text const &text,
769                 Buffer const &buf,
770                 XMLStream &xs,
771                 OutputParams const &ourparams,
772                 ParagraphList::const_iterator par,
773                 ParagraphList::const_iterator send,
774                 ParagraphList::const_iterator pend)
775 {
776         Layout const & style = par->layout();
777
778         switch (style.latextype) {
779                 case LATEX_COMMAND: {
780                         // The files with which we are working never have more than
781                         // one paragraph in a command structure.
782                         // FIXME
783                         // if (ourparams.docbook_in_par)
784                         //   fix it so we don't get sections inside standard, e.g.
785                         // note that we may then need to make runparams not const, so we
786                         // can communicate that back.
787                         // FIXME Maybe this fix should be in the routines themselves, in case
788                         // they are called from elsewhere.
789                         makeCommand(buf, xs, ourparams, text, par);
790                         ++par;
791                         break;
792                 }
793                 case LATEX_ENVIRONMENT:
794                 case LATEX_LIST_ENVIRONMENT:
795                 case LATEX_ITEM_ENVIRONMENT:
796                         // FIXME Same fix here.
797                         send = findEndOfEnvironment(par, pend);
798                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
799                         break;
800                 case LATEX_PARAGRAPH:
801                         send = findLast(par, pend, LATEX_PARAGRAPH);
802                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
803                         break;
804                 case LATEX_BIB_ENVIRONMENT:
805                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
806                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
807                         break;
808         }
809
810         return make_pair(par, send);
811 }
812
813 } // end anonymous namespace
814
815
816 using DocBookDocumentSectioning = tuple<bool, pit_type>;
817
818
819 struct DocBookInfoTag
820 {
821         const set<pit_type> shouldBeInInfo;
822         const set<pit_type> mustBeInInfo;
823         const set<pit_type> abstract;
824         pit_type bpit;
825         pit_type epit;
826
827         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
828                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
829                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
830                                    bpit(bpit), epit(epit) {}
831 };
832
833
834 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
835         bool documentHasSections = false;
836
837         while (bpit < epit) {
838                 Layout const &style = paragraphs[bpit].layout();
839                 documentHasSections |= style.category() == from_utf8("Sectioning");
840
841                 if (documentHasSections) {
842                         break;
843                 }
844                 bpit += 1;
845         }
846         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
847
848         return make_tuple(documentHasSections, bpit);
849 }
850
851
852 bool hasOnlyNotes(Paragraph const & par)
853 {
854         for (int i = 0; i < par.size(); ++i)
855                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
856                         return false;
857         return true;
858 }
859
860
861 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
862         set<pit_type> shouldBeInInfo;
863         set<pit_type> mustBeInInfo;
864         set<pit_type> abstract;
865
866         // Find the first non empty paragraph by mutating bpit.
867         while (bpit < epit) {
868                 Paragraph const &par = paragraphs[bpit];
869                 if (par.empty() || hasOnlyNotes(par))
870                         bpit += 1;
871                 else
872                         break;
873         }
874
875         // Find the last info-like paragraph.
876         pit_type cpit = bpit;
877         bool hasAbstractLayout = false;
878         while (cpit < epit) {
879                 // Skip paragraphs only containing one note.
880                 Paragraph const & par = paragraphs[cpit];
881                 if (hasOnlyNotes(par)) {
882                         cpit += 1;
883                         continue;
884                 }
885
886                 if (par.layout().name() == from_ascii("Abstract"))
887                         hasAbstractLayout = true;
888
889                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
890                 Layout const &style = par.layout();
891
892                 if (style.docbookininfo() == "always") {
893                         mustBeInInfo.emplace(cpit);
894                 } else if (style.docbookininfo() == "maybe") {
895                         shouldBeInInfo.emplace(cpit);
896                 } else {
897                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
898                         // There may be notes in between, but nothing else.
899                         break;
900                 }
901                 cpit += 1;
902         }
903         // Now, cpit points to the last paragraph that has things that could go in <info>.
904         // bpit is the beginning of the <info> part.
905
906         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
907         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
908         if (hasAbstractLayout) {
909                 pit_type pit = bpit;
910                 while (pit < cpit) { // Don't overshoot the <info> part.
911                         if (paragraphs[pit].layout().name() == from_ascii("Abstract"))
912                                 abstract.emplace(pit);
913                         pit++;
914                 }
915         } else {
916                 pit_type lastAbstract = epit + 1; // A nonsensical value.
917                 docstring lastAbstractLayout;
918
919                 pit_type pit = bpit;
920                 while (pit < cpit) { // Don't overshoot the <info> part.
921                         const Paragraph & par = paragraphs.at(pit);
922                         if (!par.insetList().empty()) {
923                                 for (const auto &i : par.insetList()) {
924                                         if (i.inset->getText(0) != nullptr) {
925                                                 if (lastAbstract == epit + 1) {
926                                                         // First paragraph that matches the heuristic definition of abstract.
927                                                         lastAbstract = pit;
928                                                         lastAbstractLayout = par.layout().name();
929                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
930                                                         // This is either too far from the last abstract paragraph or doesn't
931                                                         // have the right layout name, BUT there has already been an abstract
932                                                         // in this document: done with detecting the abstract.
933                                                         goto done; // Easier to get out of two nested loops.
934                                                 }
935
936                                                 abstract.emplace(pit);
937                                                 break;
938                                         }
939                                 }
940                         }
941                         pit++;
942                 }
943         }
944
945         done:
946         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
947 }
948
949
950 pit_type generateDocBookParagraphWithoutSectioning(
951                 Text const & text,
952                 Buffer const & buf,
953                 XMLStream & xs,
954                 OutputParams const & runparams,
955                 ParagraphList const & paragraphs,
956                 pit_type bpit,
957                 pit_type epit)
958 {
959         auto par = paragraphs.iterator_at(bpit);
960         auto lastStartedPar = par;
961         ParagraphList::const_iterator send;
962         auto const pend =
963                         (epit == (int) paragraphs.size()) ?
964                         paragraphs.end() : paragraphs.iterator_at(epit);
965
966         while (bpit < epit) {
967                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
968                 bpit += distance(lastStartedPar, par);
969                 lastStartedPar = par;
970         }
971
972         return bpit;
973 }
974
975
976 void outputDocBookInfo(
977                 Text const & text,
978                 Buffer const & buf,
979                 XMLStream & xs,
980                 OutputParams const & runparams,
981                 ParagraphList const & paragraphs,
982                 DocBookInfoTag const & info)
983 {
984         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
985         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
986         // then only create the <abstract> tag if these paragraphs generate some content.
987         // This check must be performed *before* a decision on whether or not to output <info> is made.
988         bool hasAbstract = !info.abstract.empty();
989         docstring abstract;
990         if (hasAbstract) {
991                 pit_type bpitAbstract = *std::min_element(info.abstract.begin(), info.abstract.end());
992                 pit_type epitAbstract = *std::max_element(info.abstract.begin(), info.abstract.end());
993
994                 odocstringstream os2;
995                 XMLStream xs2(os2);
996                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
997
998                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
999                 // even though they must be properly output if there is some abstract.
1000                 docstring abstractContent = os2.str();
1001                 static const lyx::regex reg("[ \\r\\n]*");
1002                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
1003
1004                 // Nothing? Then there is no abstract!
1005                 if (abstractContent.empty())
1006                         hasAbstract = false;
1007         }
1008
1009         // The abstract must go in <info>.
1010         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1011
1012         // Start the <info> tag if required.
1013         if (needInfo) {
1014                 xs.startDivision(false);
1015                 xs << xml::StartTag("info");
1016                 xs << xml::CR();
1017         }
1018
1019         // Output the elements that should go in <info>.
1020         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, info.bpit, info.epit);
1021
1022         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
1023                 string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1024                 if (tag == "NONE")
1025                         tag = "abstract";
1026
1027                 xs << xml::StartTag(tag);
1028                 xs << xml::CR();
1029                 xs << XMLStream::ESCAPE_NONE << abstract;
1030                 xs << xml::EndTag(tag);
1031                 xs << xml::CR();
1032         }
1033
1034         // End the <info> tag if it was started.
1035         if (needInfo) {
1036                 xs << xml::EndTag("info");
1037                 xs << xml::CR();
1038                 xs.endDivision();
1039         }
1040 }
1041
1042
1043 void docbookFirstParagraphs(
1044                 Text const &text,
1045                 Buffer const &buf,
1046                 XMLStream &xs,
1047                 OutputParams const &runparams,
1048                 pit_type epit)
1049 {
1050         // Handle the beginning of the document, supposing it has sections.
1051         // Major role: output the first <info> tag.
1052
1053         ParagraphList const &paragraphs = text.paragraphs();
1054         pit_type bpit = runparams.par_begin;
1055         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1056         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1057 }
1058
1059
1060 bool isParagraphEmpty(const Paragraph &par)
1061 {
1062         InsetList const &insets = par.insetList();
1063         size_t insetsLength = distance(insets.begin(), insets.end());
1064         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
1065                                                                 dynamic_cast<InsetNote *>(insets.get(0));
1066         return hasParagraphOnlyNote;
1067 }
1068
1069
1070 void docbookSimpleAllParagraphs(
1071                 Text const & text,
1072                 Buffer const & buf,
1073                 XMLStream & xs,
1074                 OutputParams const & runparams)
1075 {
1076         // Handle the document, supposing it has no sections (i.e. a "simple" document).
1077
1078         // First, the <info> tag.
1079         ParagraphList const &paragraphs = text.paragraphs();
1080         pit_type bpit = runparams.par_begin;
1081         pit_type const epit = runparams.par_end;
1082         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1083         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1084         bpit = info.bpit;
1085
1086         // Then, the content.
1087         ParagraphList::const_iterator const pend =
1088                         (epit == (int) paragraphs.size()) ?
1089                         paragraphs.end() : paragraphs.iterator_at(epit);
1090
1091         while (bpit < epit) {
1092                 auto par = paragraphs.iterator_at(bpit);
1093                 ParagraphList::const_iterator const lastStartedPar = par;
1094                 ParagraphList::const_iterator send;
1095
1096                 if (isParagraphEmpty(*par)) {
1097                         ++par;
1098                         bpit += distance(lastStartedPar, par);
1099                         continue;
1100                 }
1101
1102                 // Generate this paragraph.
1103                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1104                 bpit += distance(lastStartedPar, par);
1105         }
1106 }
1107
1108
1109 void docbookParagraphs(Text const &text,
1110                                            Buffer const &buf,
1111                                            XMLStream &xs,
1112                                            OutputParams const &runparams) {
1113         ParagraphList const &paragraphs = text.paragraphs();
1114         if (runparams.par_begin == runparams.par_end) {
1115                 runparams.par_begin = 0;
1116                 runparams.par_end = paragraphs.size();
1117         }
1118         pit_type bpit = runparams.par_begin;
1119         pit_type const epit = runparams.par_end;
1120         LASSERT(bpit < epit,
1121                         {
1122                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1123                                 return;
1124                         });
1125
1126         ParagraphList::const_iterator const pend =
1127                         (epit == (int) paragraphs.size()) ?
1128                         paragraphs.end() : paragraphs.iterator_at(epit);
1129         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1130         // of the section and the tag that was used to open it.
1131
1132         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1133         // discovered abstract.
1134         bool documentHasSections;
1135         pit_type eppit;
1136         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1137
1138         if (documentHasSections) {
1139                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1140                 bpit = eppit;
1141         } else {
1142                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1143                 return;
1144         }
1145
1146         bool currentlyInAppendix = false;
1147
1148         while (bpit < epit) {
1149                 OutputParams ourparams = runparams;
1150
1151                 auto par = paragraphs.iterator_at(bpit);
1152                 if (par->params().startOfAppendix())
1153                         currentlyInAppendix = true;
1154                 Layout const &style = par->layout();
1155                 ParagraphList::const_iterator const lastStartedPar = par;
1156                 ParagraphList::const_iterator send;
1157
1158                 if (isParagraphEmpty(*par)) {
1159                         ++par;
1160                         bpit += distance(lastStartedPar, par);
1161                         continue;
1162                 }
1163
1164                 // Think about adding <section> and/or </section>s.
1165                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1166                 if (isLayoutSectioning) {
1167                         int level = style.toclevel;
1168
1169                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1170                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1171                         //   - current: h2; back: h1; do not close any <section>
1172                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1173                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1174                                 int stackLevel = headerLevels.top().first;
1175                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1176                                 headerLevels.pop();
1177
1178                                 // Output the tag only if it corresponds to a legit section.
1179                                 if (stackLevel != Layout::NOT_IN_TOC)
1180                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1181                         }
1182
1183                         // Open the new section: first push it onto the stack, then output it in DocBook.
1184                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1185                                                                 "appendix" : style.docbooksectiontag();
1186                         headerLevels.push(std::make_pair(level, sectionTag));
1187
1188                         // Some sectioning-like elements should not be output (such as FrontMatter).
1189                         if (level != Layout::NOT_IN_TOC) {
1190                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1191                                 docstring id = docstring();
1192                                 for (pos_type i = 0; i < par->size(); ++i) {
1193                                         Inset const *inset = par->getInset(i);
1194                                         if (inset) {
1195                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1196                                                         // Generate the attributes for the section if need be.
1197                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1198
1199                                                         // Don't output the ID as a DocBook <anchor>.
1200                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1201
1202                                                         // Cannot have multiple IDs per tag.
1203                                                         break;
1204                                                 }
1205                                         }
1206                                 }
1207
1208                                 // Write the open tag for this section.
1209                                 docstring tag = from_utf8("<" + sectionTag);
1210                                 if (!id.empty())
1211                                         tag += from_utf8(" ") + id;
1212                                 tag += from_utf8(">");
1213                                 xs << XMLStream::ESCAPE_NONE << tag;
1214                                 xs << xml::CR();
1215                         }
1216                 }
1217
1218                 // Close all sections before the bibliography.
1219                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1220                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1221                 if (insetsLength > 0) {
1222                         Inset const *firstInset = par->getInset(0);
1223                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1224                                 while (!headerLevels.empty()) {
1225                                         int level = headerLevels.top().first;
1226                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1227                                         headerLevels.pop();
1228
1229                                         // Output the tag only if it corresponds to a legit section.
1230                                         if (level != Layout::NOT_IN_TOC) {
1231                                                 xs << XMLStream::ESCAPE_NONE << tag;
1232                                                 xs << xml::CR();
1233                                         }
1234                                 }
1235                         }
1236                 }
1237
1238                 // Generate this paragraph.
1239                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1240                 bpit += distance(lastStartedPar, par);
1241         }
1242
1243         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1244         // of the loop).
1245         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1246                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1247                 headerLevels.pop();
1248                 xs << XMLStream::ESCAPE_NONE << tag;
1249                 xs << xml::CR();
1250         }
1251 }
1252
1253 } // namespace lyx