]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
Revert "DocBook: make openParTag/closeTag use paragraphs instead of layouts."
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream & xs, Layout const & lay)
193 {
194         if (lay.docbookwrappertag() != "NONE")
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196
197         string tag = lay.docbooktag();
198         if (tag == "Plain Layout")
199                 tag = "para";
200
201         xs << xml::ParTag(tag, lay.docbookattr());
202
203         if (lay.docbookitemtag() != "NONE")
204                 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
205 }
206
207
208 void closeTag(XMLStream & xs, Layout const & lay)
209 {
210         if (lay.docbookitemtag() != "NONE")
211                 xs << xml::EndTag(lay.docbookitemtag());
212
213         string tag = lay.docbooktag();
214         if (tag == "Plain Layout")
215                 tag = "para";
216
217         xs << xml::EndTag(tag);
218         if (lay.docbookwrappertag() != "NONE")
219                 xs << xml::EndTag(lay.docbookwrappertag());
220 }
221
222
223 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
224 {
225         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
226 }
227
228
229 void closeLabelTag(XMLStream & xs, Layout const & lay)
230 {
231         xs << xml::EndTag(lay.docbookitemlabeltag());
232         xs << xml::CR();
233 }
234
235
236 void openItemTag(XMLStream & xs, Layout const & lay)
237 {
238         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
239 }
240
241
242 // Return true when new elements are output in a paragraph, false otherwise.
243 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
244 {
245         if (lay.docbookiteminnertag() != "NONE") {
246                 xs << xml::CR();
247                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
248
249                 if (lay.docbookiteminnertag() == "para") {
250                         return true;
251                 }
252         }
253         return false;
254 }
255
256
257 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
258 {
259         if (lay.docbookiteminnertag()!= "NONE") {
260                 xs << xml::EndTag(lay.docbookiteminnertag());
261                 xs << xml::CR();
262         }
263 }
264
265
266 inline void closeItemTag(XMLStream & xs, Layout const & lay)
267 {
268         xs << xml::EndTag(lay.docbookitemtag());
269         xs << xml::CR();
270 }
271
272 // end of convenience functions
273
274 ParagraphList::const_iterator findLast(
275                 ParagraphList::const_iterator p,
276                 ParagraphList::const_iterator const & pend,
277                 LatexType type) {
278         for (++p; p != pend && p->layout().latextype == type; ++p);
279
280         return p;
281 }
282
283 ParagraphList::const_iterator findLastBibliographyParagraph(
284                 ParagraphList::const_iterator p,
285                 ParagraphList::const_iterator const & pend) {
286         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
287
288         return p;
289 }
290
291
292 ParagraphList::const_iterator findEndOfEnvironment(
293                 ParagraphList::const_iterator const & pstart,
294                 ParagraphList::const_iterator const & pend)
295 {
296         ParagraphList::const_iterator p = pstart;
297         Layout const &bstyle = p->layout();
298         size_t const depth = p->params().depth();
299         for (++p; p != pend; ++p) {
300                 Layout const &style = p->layout();
301                 // It shouldn't happen that e.g. a section command occurs inside
302                 // a quotation environment, at a higher depth, but as of 6/2009,
303                 // it can happen. We pretend that it's just at lowest depth.
304                 if (style.latextype == LATEX_COMMAND)
305                         return p;
306
307                 // If depth is down, we're done
308                 if (p->params().depth() < depth)
309                         return p;
310
311                 // If depth is up, we're not done
312                 if (p->params().depth() > depth)
313                         continue;
314
315                 // FIXME I am not sure about the first check.
316                 // Surely we *could* have different layouts that count as
317                 // LATEX_PARAGRAPH, right?
318                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
319                         return p;
320         }
321         return pend;
322 }
323
324
325 ParagraphList::const_iterator makeParagraphBibliography(
326                 Buffer const &buf,
327                 XMLStream &xs,
328                 OutputParams const &runparams,
329                 Text const &text,
330                 ParagraphList::const_iterator const & pbegin,
331                 ParagraphList::const_iterator const & pend)
332 {
333         auto const begin = text.paragraphs().begin();
334         auto const end = text.paragraphs().end();
335
336         // Find the paragraph *before* pbegin.
337         ParagraphList::const_iterator pbegin_before = begin;
338         if (pbegin != begin) {
339                 ParagraphList::const_iterator pbegin_before_next = begin;
340                 ++pbegin_before_next;
341
342                 while (pbegin_before_next != pbegin) {
343                         ++pbegin_before;
344                         ++pbegin_before_next;
345                 }
346         }
347
348         ParagraphList::const_iterator par = pbegin;
349
350         // If this is the first paragraph in a bibliography, open the bibliography tag.
351         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
352                 xs << xml::StartTag("bibliography");
353                 xs << xml::CR();
354         }
355
356         // Generate the required paragraphs, but only if they are .
357         for (; par != pend; ++par) {
358                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
359                 // Don't forget the citation ID!
360                 docstring attr;
361                 for (auto i = 0; i < par->size(); ++i) {
362                         Inset const *ip = par->getInset(0);
363                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
364                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
365                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
366                                 break;
367                         }
368                 }
369                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
370
371                 // Generate the entry.
372                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
373
374                 // End the precooked bibliography entry.
375                 xs << xml::EndTag("bibliomixed");
376                 xs << xml::CR();
377         }
378
379         // If this is the last paragraph in a bibliography, close the bibliography tag.
380         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
381                 xs << xml::EndTag("bibliography");
382                 xs << xml::CR();
383         }
384
385         return pend;
386 }
387
388
389 ParagraphList::const_iterator makeParagraphs(
390                 Buffer const &buf,
391                 XMLStream &xs,
392                 OutputParams const &runparams,
393                 Text const &text,
394                 ParagraphList::const_iterator const & pbegin,
395                 ParagraphList::const_iterator const & pend)
396 {
397         ParagraphList::const_iterator const begin = text.paragraphs().begin();
398         ParagraphList::const_iterator par = pbegin;
399         for (; par != pend; ++par) {
400                 Layout const &lay = par->layout();
401
402                 // We want to open the paragraph tag if:
403                 //   (i) the current layout permits multiple paragraphs
404                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
405                 //         we are, but this is not the first paragraph
406                 //
407                 // But there is also a special case, and we first see whether we are in it.
408                 // We do not want to open the paragraph tag if this paragraph contains
409                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
410                 // as a branch). On the other hand, if that single item has a font change
411                 // applied to it, then we still do need to open the paragraph.
412                 //
413                 // Obviously, this is very fragile. The main reason we need to do this is
414                 // because of branches, e.g., a branch that contains an entire new section.
415                 // We do not really want to wrap that whole thing in a <div>...</div>.
416                 bool special_case = false;
417                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
418                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
419                         Layout const &style = par->layout();
420                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
421                                                                                 style.labelfont : style.font;
422                         FontInfo const our_font =
423                                         par->getFont(buf.masterBuffer()->params(), 0,
424                                                                  text.outerFont(distance(begin, par))).fontInfo();
425
426                         if (first_font == our_font)
427                                 special_case = true;
428                 }
429
430                 // Plain layouts must be ignored.
431                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
432                         special_case = true;
433                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
434                 if (!special_case && par->size() == 1 && par->getInset(0)) {
435                         Inset const * firstInset = par->getInset(0);
436
437                         // Floats cannot be in paragraphs.
438                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
439
440                         // Bibliographies cannot be in paragraphs.
441                         if (!special_case && firstInset->asInsetCommand())
442                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
443
444                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
445                         if (!special_case && firstInset->asInsetMath())
446                                 special_case = true;
447
448                         // ERTs are in comments, not paragraphs.
449                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
450                                 special_case = true;
451
452                         // Listings should not get into their own paragraph.
453                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
454                                 special_case = true;
455                 }
456
457                 bool const open_par = runparams.docbook_make_pars
458                                                           && (!runparams.docbook_in_par || par != pbegin)
459                                                           && !special_case;
460
461                 // We want to issue the closing tag if either:
462                 //   (i)  We opened it, and either docbook_in_par is false,
463                 //              or we're not in the last paragraph, anyway.
464                 //   (ii) We didn't open it and docbook_in_par is true,
465                 //              but we are in the first par, and there is a next par.
466                 ParagraphList::const_iterator nextpar = par;
467                 ++nextpar;
468                 bool const close_par =
469                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
470                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
471
472                 // Determine if this paragraph has some real content. Things like new pages are not caught
473                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
474                 odocstringstream os2;
475                 XMLStream xs2(os2);
476                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
477
478                 docstring cleaned = os2.str();
479                 static const lyx::regex reg("[ \\r\\n]*");
480                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
481
482                 if (!cleaned.empty()) {
483                         if (open_par)
484                                 openParTag(xs, lay);
485
486                         xs << XMLStream::ESCAPE_NONE << os2.str();
487
488                         if (close_par) {
489                                 closeTag(xs, lay);
490                                 xs << xml::CR();
491                         }
492                 }
493         }
494         return pend;
495 }
496
497
498 bool isNormalEnv(Layout const &lay)
499 {
500         return lay.latextype == LATEX_ENVIRONMENT
501                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
502 }
503
504
505 ParagraphList::const_iterator makeEnvironment(
506                 Buffer const &buf,
507                 XMLStream &xs,
508                 OutputParams const &runparams,
509                 Text const &text,
510                 ParagraphList::const_iterator const & pbegin,
511                 ParagraphList::const_iterator const & pend)
512 {
513         ParagraphList::const_iterator const begin = text.paragraphs().begin();
514         ParagraphList::const_iterator par = pbegin;
515         Layout const &bstyle = par->layout();
516         depth_type const origdepth = pbegin->params().depth();
517
518         // open tag for this environment
519         openParTag(xs, bstyle);
520         xs << xml::CR();
521
522         // we will on occasion need to remember a layout from before.
523         Layout const *lastlay = nullptr;
524
525         while (par != pend) {
526                 Layout const & style = par->layout();
527                 ParagraphList::const_iterator send;
528
529                 // Actual content of this paragraph.
530                 switch (style.latextype) {
531                 case LATEX_ENVIRONMENT:
532                 case LATEX_LIST_ENVIRONMENT:
533                 case LATEX_ITEM_ENVIRONMENT: {
534                         // There are two possibilities in this case.
535                         // One is that we are still in the environment in which we
536                         // started---which we will be if the depth is the same.
537                         if (par->params().depth() == origdepth) {
538                                 LATTEST(bstyle == style);
539                                 if (lastlay != nullptr) {
540                                         closeItemTag(xs, *lastlay);
541                                         if (lastlay->docbookitemwrappertag() != "NONE") {
542                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
543                                                 xs << xml::CR();
544                                         }
545                                         lastlay = nullptr;
546                                 }
547
548                                 // this will be positive if we want to skip the
549                                 // initial word (if it's been taken for the label).
550                                 pos_type sep = 0;
551
552                                 // Open a wrapper tag if needed.
553                                 if (style.docbookitemwrappertag() != "NONE") {
554                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
555                                         xs << xml::CR();
556                                 }
557
558                                 // label output
559                                 if (style.labeltype != LABEL_NO_LABEL &&
560                                                 style.docbookitemlabeltag() != "NONE") {
561
562                                         if (isNormalEnv(style)) {
563                                                 // in this case, we print the label only for the first
564                                                 // paragraph (as in a theorem or an abstract).
565                                                 if (par == pbegin) {
566                                                         docstring const lbl = pbegin->params().labelString();
567                                                         if (!lbl.empty()) {
568                                                                 openLabelTag(xs, style);
569                                                                 xs << lbl;
570                                                                 closeLabelTag(xs, style);
571                                                         } else {
572                                                                 // No new line after closeLabelTag.
573                                                                 xs << xml::CR();
574                                                         }
575                                                 }
576                                         } else { // some kind of list
577                                                 if (style.labeltype == LABEL_MANUAL) {
578                                                         // Only variablelist gets here.
579
580                                                         openLabelTag(xs, style);
581                                                         sep = par->firstWordDocBook(xs, runparams);
582                                                         closeLabelTag(xs, style);
583                                                 } else {
584                                                         openLabelTag(xs, style);
585                                                         xs << par->params().labelString();
586                                                         closeLabelTag(xs, style);
587                                                 }
588                                         }
589                                 } // end label output
590
591                                 // Start generating the item.
592                                 bool wasInParagraph = runparams.docbook_in_par;
593                                 openItemTag(xs, style);
594                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
595                                 OutputParams rp = runparams;
596                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
597
598                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
599                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
600                                 // Common case: there is only the first word on the line, but there is a nested list instead
601                                 // of more text.
602                                 bool emptyItem = false;
603                                 if (sep == par->size()) {
604                                         auto next_par = par;
605                                         ++next_par;
606                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
607                                                 emptyItem = true;
608                                         else // There is a next paragraph: check depth.
609                                                 emptyItem = par->params().depth() >= next_par->params().depth();
610                                 }
611
612                                 if (emptyItem) {
613                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
614                                         // generation of a full <para>.
615                                         xs << ' ';
616                                 } else {
617                                         // Generate the rest of the paragraph, if need be.
618                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
619                                 }
620
621                                 ++par;
622                                 if (getsIntoParagraph)
623                                         closeInnerItemTag(xs, style);
624
625                                 // We may not want to close the tag yet, in particular:
626                                 // If we're not at the end of the item...
627                                 if (par != pend
628                                         //  and are doing items...
629                                         && !isNormalEnv(style)
630                                         // and if the depth has changed...
631                                         && par->params().depth() != origdepth) {
632                                         // then we'll save this layout for later, and close it when
633                                         // we get another item.
634                                         lastlay = &style;
635                                 } else {
636                                         closeItemTag(xs, style);
637
638                                         // Eventually, close the item wrapper.
639                                         if (style.docbookitemwrappertag() != "NONE") {
640                                                 xs << xml::EndTag(style.docbookitemwrappertag());
641                                                 xs << xml::CR();
642                                         }
643                                 }
644                         }
645                         // The other possibility is that the depth has increased.
646                         else {
647                                 send = findEndOfEnvironment(par, pend);
648                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
649                         }
650                         break;
651                 }
652                 case LATEX_PARAGRAPH:
653                         send = findLast(par, pend, LATEX_PARAGRAPH);
654                         par = makeParagraphs(buf, xs, runparams, text, par, send);
655                         break;
656                 case LATEX_BIB_ENVIRONMENT:
657                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
658                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
659                         break;
660                 case LATEX_COMMAND:
661                         ++par;
662                         break;
663                 }
664         }
665
666         if (lastlay != nullptr) {
667                 closeItemTag(xs, *lastlay);
668                 if (lastlay->docbookitemwrappertag() != "NONE") {
669                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
670                         xs << xml::CR();
671                 }
672         }
673         closeTag(xs, bstyle);
674         xs << xml::CR();
675         return pend;
676 }
677
678
679 void makeCommand(
680                 Buffer const & buf,
681                 XMLStream & xs,
682                 OutputParams const & runparams,
683                 Text const & text,
684                 ParagraphList::const_iterator const & pbegin)
685 {
686         Layout const &style = pbegin->layout();
687
688         // No need for labels, as they are handled by DocBook tags.
689
690         openParTag(xs, style);
691
692         ParagraphList::const_iterator const begin = text.paragraphs().begin();
693         pbegin->simpleDocBookOnePar(buf, xs, runparams,
694                                                                 text.outerFont(distance(begin, pbegin)));
695         closeTag(xs, style);
696         xs << xml::CR();
697 }
698
699 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
700                 Text const &text,
701                 Buffer const &buf,
702                 XMLStream &xs,
703                 OutputParams const &ourparams,
704                 ParagraphList::const_iterator par,
705                 ParagraphList::const_iterator send,
706                 ParagraphList::const_iterator pend)
707 {
708         Layout const & style = par->layout();
709
710         switch (style.latextype) {
711                 case LATEX_COMMAND: {
712                         // The files with which we are working never have more than
713                         // one paragraph in a command structure.
714                         // FIXME
715                         // if (ourparams.docbook_in_par)
716                         //   fix it so we don't get sections inside standard, e.g.
717                         // note that we may then need to make runparams not const, so we
718                         // can communicate that back.
719                         // FIXME Maybe this fix should be in the routines themselves, in case
720                         // they are called from elsewhere.
721                         makeCommand(buf, xs, ourparams, text, par);
722                         ++par;
723                         break;
724                 }
725                 case LATEX_ENVIRONMENT:
726                 case LATEX_LIST_ENVIRONMENT:
727                 case LATEX_ITEM_ENVIRONMENT:
728                         // FIXME Same fix here.
729                         send = findEndOfEnvironment(par, pend);
730                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
731                         break;
732                 case LATEX_PARAGRAPH:
733                         send = findLast(par, pend, LATEX_PARAGRAPH);
734                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
735                         break;
736                 case LATEX_BIB_ENVIRONMENT:
737                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
738                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
739                         break;
740         }
741
742         return make_pair(par, send);
743 }
744
745 } // end anonymous namespace
746
747
748 using DocBookDocumentSectioning = tuple<bool, pit_type>;
749 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
750
751
752 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
753         bool documentHasSections = false;
754
755         while (bpit < epit) {
756                 Layout const &style = paragraphs[bpit].layout();
757                 documentHasSections |= style.category() == from_utf8("Sectioning");
758
759                 if (documentHasSections) {
760                         break;
761                 }
762                 bpit += 1;
763         }
764         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
765
766         return make_tuple(documentHasSections, bpit);
767 }
768
769
770 bool hasOnlyNotes(Paragraph const & par)
771 {
772         for (int i = 0; i < par.size(); ++i)
773                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
774                         return false;
775         return true;
776 }
777
778
779 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
780         set<pit_type> shouldBeInInfo;
781         set<pit_type> mustBeInInfo;
782
783         // Find the first non empty paragraph by mutating bpit.
784         while (bpit < epit) {
785                 Paragraph const &par = paragraphs[bpit];
786                 if (par.empty() || hasOnlyNotes(par))
787                         bpit += 1;
788                 else
789                         break;
790         }
791
792         // Find the last info-like paragraph.
793         pit_type cpit = bpit;
794         while (cpit < epit) {
795                 // Skip paragraphs only containing one note.
796                 Paragraph const &par = paragraphs[cpit];
797                 if (hasOnlyNotes(par)) {
798                         cpit += 1;
799                         continue;
800                 }
801
802                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
803                 Layout const &style = par.layout();
804
805                 if (style.docbookininfo() == "always") {
806                         mustBeInInfo.emplace(cpit);
807                 } else if (style.docbookininfo() == "maybe") {
808                         shouldBeInInfo.emplace(cpit);
809                 } else {
810                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
811                         break;
812                 }
813                 cpit += 1;
814         }
815         // Now, cpit points to the last paragraph that has things that could go in <info>.
816         // bpit is still the beginning of the <info> part.
817
818         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
819 }
820
821
822 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
823 {
824         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
825         // are just after a document or part title.
826         if (epitAbstract - bpitAbstract <= 0)
827                 return false;
828
829         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
830         pit_type bpit = bpitAbstract;
831         while (bpit < epitAbstract) {
832                 const Paragraph &p = paragraphs.at(bpit);
833
834                 if (p.layout().name() == from_ascii("Abstract"))
835                         return true;
836
837                 if (!p.insetList().empty()) {
838                         for (const auto &i : p.insetList()) {
839                                 if (i.inset->getText(0) != nullptr) {
840                                         return true;
841                                 }
842                         }
843                 }
844                 bpit++;
845         }
846         return false;
847 }
848
849
850 pit_type generateDocBookParagraphWithoutSectioning(
851                 Text const & text,
852                 Buffer const & buf,
853                 XMLStream & xs,
854                 OutputParams const & runparams,
855                 ParagraphList const & paragraphs,
856                 pit_type bpit,
857                 pit_type epit)
858 {
859         auto par = paragraphs.iterator_at(bpit);
860         auto lastStartedPar = par;
861         ParagraphList::const_iterator send;
862         auto const pend =
863                         (epit == (int) paragraphs.size()) ?
864                         paragraphs.end() : paragraphs.iterator_at(epit);
865
866         while (bpit < epit) {
867                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
868                 bpit += distance(lastStartedPar, par);
869                 lastStartedPar = par;
870         }
871
872         return bpit;
873 }
874
875
876 void outputDocBookInfo(
877                 Text const & text,
878                 Buffer const & buf,
879                 XMLStream & xs,
880                 OutputParams const & runparams,
881                 ParagraphList const & paragraphs,
882                 DocBookInfoTag const & info,
883                 pit_type bpitAbstract,
884                 pit_type const epitAbstract)
885 {
886         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
887         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
888
889         set<pit_type> shouldBeInInfo;
890         set<pit_type> mustBeInInfo;
891         pit_type bpitInfo;
892         pit_type epitInfo;
893         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
894
895         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
896         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
897         // then only create the <abstract> tag if these paragraphs generate some content.
898         // This check must be performed *before* a decision on whether or not to output <info> is made.
899         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
900         docstring abstract;
901         if (hasAbstract) {
902                 odocstringstream os2;
903                 XMLStream xs2(os2);
904                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
905
906                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
907                 // even though they must be properly output if there is some abstract.
908                 docstring abstractContent = os2.str();
909                 static const lyx::regex reg("[ \\r\\n]*");
910                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
911
912                 // Nothing? Then there is no abstract!
913                 if (abstractContent.empty())
914                         hasAbstract = false;
915         }
916
917         // The abstract must go in <info>.
918         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
919
920         // Start the <info> tag if required.
921         if (needInfo) {
922                 xs.startDivision(false);
923                 xs << xml::StartTag("info");
924                 xs << xml::CR();
925         }
926
927         // Output the elements that should go in <info>.
928         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
929
930         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
931                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
932                 if (tag == "NONE")
933                         tag = "abstract";
934
935                 xs << xml::StartTag(tag);
936                 xs << xml::CR();
937                 xs << XMLStream::ESCAPE_NONE << abstract;
938                 xs << xml::EndTag(tag);
939                 xs << xml::CR();
940         }
941
942         // End the <info> tag if it was started.
943         if (needInfo) {
944                 xs << xml::EndTag("info");
945                 xs << xml::CR();
946                 xs.endDivision();
947         }
948 }
949
950
951 void docbookFirstParagraphs(
952                 Text const &text,
953                 Buffer const &buf,
954                 XMLStream &xs,
955                 OutputParams const &runparams,
956                 pit_type epit)
957 {
958         // Handle the beginning of the document, supposing it has sections.
959         // Major role: output the first <info> tag.
960
961         ParagraphList const &paragraphs = text.paragraphs();
962         pit_type bpit = runparams.par_begin;
963         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
964         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
965 }
966
967
968 bool isParagraphEmpty(const Paragraph &par)
969 {
970         InsetList const &insets = par.insetList();
971         size_t insetsLength = distance(insets.begin(), insets.end());
972         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
973                                                                 dynamic_cast<InsetNote *>(insets.get(0));
974         return hasParagraphOnlyNote;
975 }
976
977
978 void docbookSimpleAllParagraphs(
979                 Text const & text,
980                 Buffer const & buf,
981                 XMLStream & xs,
982                 OutputParams const & runparams)
983 {
984         // Handle the document, supposing it has no sections (i.e. a "simple" document).
985
986         // First, the <info> tag.
987         ParagraphList const &paragraphs = text.paragraphs();
988         pit_type bpit = runparams.par_begin;
989         pit_type const epit = runparams.par_end;
990         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
991         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
992         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
993
994         // Then, the content.
995         ParagraphList::const_iterator const pend =
996                         (epit == (int) paragraphs.size()) ?
997                         paragraphs.end() : paragraphs.iterator_at(epit);
998
999         while (bpit < epit) {
1000                 auto par = paragraphs.iterator_at(bpit);
1001                 ParagraphList::const_iterator const lastStartedPar = par;
1002                 ParagraphList::const_iterator send;
1003
1004                 if (isParagraphEmpty(*par)) {
1005                         ++par;
1006                         bpit += distance(lastStartedPar, par);
1007                         continue;
1008                 }
1009
1010                 // Generate this paragraph.
1011                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1012                 bpit += distance(lastStartedPar, par);
1013         }
1014 }
1015
1016
1017 void docbookParagraphs(Text const &text,
1018                                            Buffer const &buf,
1019                                            XMLStream &xs,
1020                                            OutputParams const &runparams) {
1021         ParagraphList const &paragraphs = text.paragraphs();
1022         if (runparams.par_begin == runparams.par_end) {
1023                 runparams.par_begin = 0;
1024                 runparams.par_end = paragraphs.size();
1025         }
1026         pit_type bpit = runparams.par_begin;
1027         pit_type const epit = runparams.par_end;
1028         LASSERT(bpit < epit,
1029                         {
1030                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1031                                 return;
1032                         });
1033
1034         ParagraphList::const_iterator const pend =
1035                         (epit == (int) paragraphs.size()) ?
1036                         paragraphs.end() : paragraphs.iterator_at(epit);
1037         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1038         // of the section and the tag that was used to open it.
1039
1040         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1041         // discovered abstract.
1042         bool documentHasSections;
1043         pit_type eppit;
1044         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1045
1046         if (documentHasSections) {
1047                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1048                 bpit = eppit;
1049         } else {
1050                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1051                 return;
1052         }
1053
1054         bool currentlyInAppendix = false;
1055
1056         while (bpit < epit) {
1057                 OutputParams ourparams = runparams;
1058
1059                 auto par = paragraphs.iterator_at(bpit);
1060                 if (par->params().startOfAppendix())
1061                         currentlyInAppendix = true;
1062                 Layout const &style = par->layout();
1063                 ParagraphList::const_iterator const lastStartedPar = par;
1064                 ParagraphList::const_iterator send;
1065
1066                 if (isParagraphEmpty(*par)) {
1067                         ++par;
1068                         bpit += distance(lastStartedPar, par);
1069                         continue;
1070                 }
1071
1072                 // Think about adding <section> and/or </section>s.
1073                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1074                 if (isLayoutSectioning) {
1075                         int level = style.toclevel;
1076
1077                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1078                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1079                         //   - current: h2; back: h1; do not close any <section>
1080                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1081                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1082                                 int stackLevel = headerLevels.top().first;
1083                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1084                                 headerLevels.pop();
1085
1086                                 // Output the tag only if it corresponds to a legit section.
1087                                 if (stackLevel != Layout::NOT_IN_TOC)
1088                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1089                         }
1090
1091                         // Open the new section: first push it onto the stack, then output it in DocBook.
1092                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1093                                                                 "appendix" : style.docbooksectiontag();
1094                         headerLevels.push(std::make_pair(level, sectionTag));
1095
1096                         // Some sectioning-like elements should not be output (such as FrontMatter).
1097                         if (level != Layout::NOT_IN_TOC) {
1098                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1099                                 docstring id = docstring();
1100                                 for (pos_type i = 0; i < par->size(); ++i) {
1101                                         Inset const *inset = par->getInset(i);
1102                                         if (inset) {
1103                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1104                                                         // Generate the attributes for the section if need be.
1105                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1106
1107                                                         // Don't output the ID as a DocBook <anchor>.
1108                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1109
1110                                                         // Cannot have multiple IDs per tag.
1111                                                         break;
1112                                                 }
1113                                         }
1114                                 }
1115
1116                                 // Write the open tag for this section.
1117                                 docstring tag = from_utf8("<" + sectionTag);
1118                                 if (!id.empty())
1119                                         tag += from_utf8(" ") + id;
1120                                 tag += from_utf8(">");
1121                                 xs << XMLStream::ESCAPE_NONE << tag;
1122                                 xs << xml::CR();
1123                         }
1124                 }
1125
1126                 // Close all sections before the bibliography.
1127                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1128                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1129                 if (insetsLength > 0) {
1130                         Inset const *firstInset = par->getInset(0);
1131                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1132                                 while (!headerLevels.empty()) {
1133                                         int level = headerLevels.top().first;
1134                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1135                                         headerLevels.pop();
1136
1137                                         // Output the tag only if it corresponds to a legit section.
1138                                         if (level != Layout::NOT_IN_TOC) {
1139                                                 xs << XMLStream::ESCAPE_NONE << tag;
1140                                                 xs << xml::CR();
1141                                         }
1142                                 }
1143                         }
1144                 }
1145
1146                 // Generate this paragraph.
1147                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1148                 bpit += distance(lastStartedPar, par);
1149         }
1150
1151         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1152         // of the loop).
1153         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1154                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1155                 headerLevels.pop();
1156                 xs << XMLStream::ESCAPE_NONE << tag;
1157                 xs << xml::CR();
1158         }
1159 }
1160
1161 } // namespace lyx