]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
DocBook: allow empty paragraphs before the <info> section.
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream &xs, Layout const &lay)
193 {
194         if (lay.docbookwrappertag() != "NONE") {
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196         }
197
198         string tag = lay.docbooktag();
199         if (tag == "Plain Layout")
200                 tag = "para";
201
202         xs << xml::ParTag(tag, lay.docbookattr());
203 }
204
205
206 void closeTag(XMLStream &xs, Layout const &lay)
207 {
208         string tag = lay.docbooktag();
209         if (tag == "Plain Layout")
210                 tag = "para";
211
212         xs << xml::EndTag(tag);
213         if (lay.docbookwrappertag() != "NONE")
214                 xs << xml::EndTag(lay.docbookwrappertag());
215 }
216
217
218 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
219 {
220         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
221 }
222
223
224 void closeLabelTag(XMLStream & xs, Layout const & lay)
225 {
226         xs << xml::EndTag(lay.docbookitemlabeltag());
227         xs << xml::CR();
228 }
229
230
231 void openItemTag(XMLStream &xs, Layout const &lay)
232 {
233         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
234 }
235
236
237 // Return true when new elements are output in a paragraph, false otherwise.
238 bool openInnerItemTag(XMLStream &xs, Layout const &lay)
239 {
240         if (lay.docbookiteminnertag() != "NONE") {
241                 xs << xml::CR();
242                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
243
244                 if (lay.docbookiteminnertag() == "para") {
245                         return true;
246                 }
247         }
248         return false;
249 }
250
251
252 void closeInnerItemTag(XMLStream &xs, Layout const &lay)
253 {
254         if (lay.docbookiteminnertag()!= "NONE") {
255                 xs << xml::EndTag(lay.docbookiteminnertag());
256                 xs << xml::CR();
257         }
258 }
259
260
261 inline void closeItemTag(XMLStream &xs, Layout const &lay)
262 {
263         xs << xml::EndTag(lay.docbookitemtag());
264         xs << xml::CR();
265 }
266
267 // end of convenience functions
268
269 ParagraphList::const_iterator findLastParagraph(
270                 ParagraphList::const_iterator p,
271                 ParagraphList::const_iterator const & pend) {
272         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p);
273
274         return p;
275 }
276
277 ParagraphList::const_iterator findLastBibliographyParagraph(
278                 ParagraphList::const_iterator p,
279                 ParagraphList::const_iterator const & pend) {
280         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
281
282         return p;
283 }
284
285
286 ParagraphList::const_iterator findEndOfEnvironment(
287                 ParagraphList::const_iterator const & pstart,
288                 ParagraphList::const_iterator const & pend)
289 {
290         ParagraphList::const_iterator p = pstart;
291         Layout const &bstyle = p->layout();
292         size_t const depth = p->params().depth();
293         for (++p; p != pend; ++p) {
294                 Layout const &style = p->layout();
295                 // It shouldn't happen that e.g. a section command occurs inside
296                 // a quotation environment, at a higher depth, but as of 6/2009,
297                 // it can happen. We pretend that it's just at lowest depth.
298                 if (style.latextype == LATEX_COMMAND)
299                         return p;
300
301                 // If depth is down, we're done
302                 if (p->params().depth() < depth)
303                         return p;
304
305                 // If depth is up, we're not done
306                 if (p->params().depth() > depth)
307                         continue;
308
309                 // FIXME I am not sure about the first check.
310                 // Surely we *could* have different layouts that count as
311                 // LATEX_PARAGRAPH, right?
312                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
313                         return p;
314         }
315         return pend;
316 }
317
318
319 ParagraphList::const_iterator makeParagraphBibliography(
320                 Buffer const &buf,
321                 XMLStream &xs,
322                 OutputParams const &runparams,
323                 Text const &text,
324                 ParagraphList::const_iterator const & pbegin,
325                 ParagraphList::const_iterator const & pend)
326 {
327         auto const begin = text.paragraphs().begin();
328         auto const end = text.paragraphs().end();
329
330         // Find the paragraph *before* pbegin.
331         ParagraphList::const_iterator pbegin_before = begin;
332         if (pbegin != begin) {
333                 ParagraphList::const_iterator pbegin_before_next = begin;
334                 ++pbegin_before_next;
335
336                 while (pbegin_before_next != pbegin) {
337                         ++pbegin_before;
338                         ++pbegin_before_next;
339                 }
340         }
341
342         ParagraphList::const_iterator par = pbegin;
343
344         // If this is the first paragraph in a bibliography, open the bibliography tag.
345         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
346                 xs << xml::StartTag("bibliography");
347                 xs << xml::CR();
348         }
349
350         // Generate the required paragraphs, but only if they are .
351         for (; par != pend; ++par) {
352                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
353                 // Don't forget the citation ID!
354                 docstring attr;
355                 for (auto i = 0; i < par->size(); ++i) {
356                         Inset const *ip = par->getInset(0);
357                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
358                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
359                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
360                                 break;
361                         }
362                 }
363                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
364
365                 // Generate the entry.
366                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
367
368                 // End the precooked bibliography entry.
369                 xs << xml::EndTag("bibliomixed");
370                 xs << xml::CR();
371         }
372
373         // If this is the last paragraph in a bibliography, close the bibliography tag.
374         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
375                 xs << xml::EndTag("bibliography");
376                 xs << xml::CR();
377         }
378
379         return pend;
380 }
381
382
383 ParagraphList::const_iterator makeParagraphs(
384                 Buffer const &buf,
385                 XMLStream &xs,
386                 OutputParams const &runparams,
387                 Text const &text,
388                 ParagraphList::const_iterator const & pbegin,
389                 ParagraphList::const_iterator const & pend)
390 {
391         ParagraphList::const_iterator const begin = text.paragraphs().begin();
392         ParagraphList::const_iterator par = pbegin;
393         for (; par != pend; ++par) {
394                 Layout const &lay = par->layout();
395
396                 // We want to open the paragraph tag if:
397                 //   (i) the current layout permits multiple paragraphs
398                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
399                 //         we are, but this is not the first paragraph
400                 //
401                 // But there is also a special case, and we first see whether we are in it.
402                 // We do not want to open the paragraph tag if this paragraph contains
403                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
404                 // as a branch). On the other hand, if that single item has a font change
405                 // applied to it, then we still do need to open the paragraph.
406                 //
407                 // Obviously, this is very fragile. The main reason we need to do this is
408                 // because of branches, e.g., a branch that contains an entire new section.
409                 // We do not really want to wrap that whole thing in a <div>...</div>.
410                 bool special_case = false;
411                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
412                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
413                         Layout const &style = par->layout();
414                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
415                                                                                 style.labelfont : style.font;
416                         FontInfo const our_font =
417                                         par->getFont(buf.masterBuffer()->params(), 0,
418                                                                  text.outerFont(distance(begin, par))).fontInfo();
419
420                         if (first_font == our_font)
421                                 special_case = true;
422                 }
423
424                 // Plain layouts must be ignored.
425                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
426                         special_case = true;
427                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
428                 if (!special_case && par->size() == 1 && par->getInset(0)) {
429                         Inset const * firstInset = par->getInset(0);
430
431                         // Floats cannot be in paragraphs.
432                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
433
434                         // Bibliographies cannot be in paragraphs.
435                         if (!special_case && firstInset->asInsetCommand())
436                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
437
438                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
439                         if (!special_case && firstInset->asInsetMath())
440                                 special_case = true;
441
442                         // ERTs are in comments, not paragraphs.
443                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
444                                 special_case = true;
445
446                         // Listings should not get into their own paragraph.
447                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
448                                 special_case = true;
449                 }
450
451                 bool const open_par = runparams.docbook_make_pars
452                                                           && (!runparams.docbook_in_par || par != pbegin)
453                                                           && !special_case;
454
455                 // We want to issue the closing tag if either:
456                 //   (i)  We opened it, and either docbook_in_par is false,
457                 //              or we're not in the last paragraph, anyway.
458                 //   (ii) We didn't open it and docbook_in_par is true,
459                 //              but we are in the first par, and there is a next par.
460                 ParagraphList::const_iterator nextpar = par;
461                 ++nextpar;
462                 bool const close_par =
463                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
464                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
465
466                 // Determine if this paragraph has some real content. Things like new pages are not caught
467                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
468                 odocstringstream os2;
469                 XMLStream xs2(os2);
470                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
471
472                 docstring cleaned = os2.str();
473                 static const lyx::regex reg("[ \\r\\n]*");
474                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
475
476                 if (!cleaned.empty()) {
477                         if (open_par)
478                                 openParTag(xs, lay);
479
480                         xs << XMLStream::ESCAPE_NONE << os2.str();
481
482                         if (close_par) {
483                                 closeTag(xs, lay);
484                                 xs << xml::CR();
485                         }
486                 }
487         }
488         return pend;
489 }
490
491
492 bool isNormalEnv(Layout const &lay)
493 {
494         return lay.latextype == LATEX_ENVIRONMENT
495                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
496 }
497
498
499 ParagraphList::const_iterator makeEnvironment(
500                 Buffer const &buf,
501                 XMLStream &xs,
502                 OutputParams const &runparams,
503                 Text const &text,
504                 ParagraphList::const_iterator const & pbegin,
505                 ParagraphList::const_iterator const & pend)
506 {
507         ParagraphList::const_iterator const begin = text.paragraphs().begin();
508         ParagraphList::const_iterator par = pbegin;
509         Layout const &bstyle = par->layout();
510         depth_type const origdepth = pbegin->params().depth();
511
512         // open tag for this environment
513         openParTag(xs, bstyle);
514         xs << xml::CR();
515
516         // we will on occasion need to remember a layout from before.
517         Layout const *lastlay = nullptr;
518
519         while (par != pend) {
520                 Layout const & style = par->layout();
521                 ParagraphList::const_iterator send;
522
523                 // Actual content of this paragraph.
524                 switch (style.latextype) {
525                 case LATEX_ENVIRONMENT:
526                 case LATEX_LIST_ENVIRONMENT:
527                 case LATEX_ITEM_ENVIRONMENT: {
528                         // There are two possibilities in this case.
529                         // One is that we are still in the environment in which we
530                         // started---which we will be if the depth is the same.
531                         if (par->params().depth() == origdepth) {
532                                 LATTEST(bstyle == style);
533                                 if (lastlay != nullptr) {
534                                         closeItemTag(xs, *lastlay);
535                                         if (lastlay->docbookitemwrappertag() != "NONE") {
536                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
537                                                 xs << xml::CR();
538                                         }
539                                         lastlay = nullptr;
540                                 }
541
542                                 // this will be positive if we want to skip the
543                                 // initial word (if it's been taken for the label).
544                                 pos_type sep = 0;
545
546                                 // Open a wrapper tag if needed.
547                                 if (style.docbookitemwrappertag() != "NONE") {
548                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
549                                         xs << xml::CR();
550                                 }
551
552                                 // label output
553                                 if (style.labeltype != LABEL_NO_LABEL &&
554                                                 style.docbookitemlabeltag() != "NONE") {
555
556                                         if (isNormalEnv(style)) {
557                                                 // in this case, we print the label only for the first
558                                                 // paragraph (as in a theorem or an abstract).
559                                                 if (par == pbegin) {
560                                                         docstring const lbl = pbegin->params().labelString();
561                                                         if (!lbl.empty()) {
562                                                                 openLabelTag(xs, style);
563                                                                 xs << lbl;
564                                                                 closeLabelTag(xs, style);
565                                                         } else {
566                                                                 // No new line after closeLabelTag.
567                                                                 xs << xml::CR();
568                                                         }
569                                                 }
570                                         } else { // some kind of list
571                                                 if (style.labeltype == LABEL_MANUAL) {
572                                                         // Only variablelist gets here.
573
574                                                         openLabelTag(xs, style);
575                                                         sep = par->firstWordDocBook(xs, runparams);
576                                                         closeLabelTag(xs, style);
577                                                 } else {
578                                                         openLabelTag(xs, style);
579                                                         xs << par->params().labelString();
580                                                         closeLabelTag(xs, style);
581                                                 }
582                                         }
583                                 } // end label output
584
585                                 // Start generating the item.
586                                 bool wasInParagraph = runparams.docbook_in_par;
587                                 openItemTag(xs, style);
588                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
589                                 OutputParams rp = runparams;
590                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
591
592                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
593                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
594                                 // Common case: there is only the first word on the line, but there is a nested list instead
595                                 // of more text.
596                                 bool emptyItem = false;
597                                 if (sep == par->size()) {
598                                         auto next_par = par;
599                                         ++next_par;
600                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
601                                                 emptyItem = true;
602                                         else // There is a next paragraph: check depth.
603                                                 emptyItem = par->params().depth() >= next_par->params().depth();
604                                 }
605
606                                 if (emptyItem) {
607                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
608                                         // generation of a full <para>.
609                                         xs << ' ';
610                                 } else {
611                                         // Generate the rest of the paragraph, if need be.
612                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
613                                 }
614
615                                 ++par;
616                                 if (getsIntoParagraph)
617                                         closeInnerItemTag(xs, style);
618
619                                 // We may not want to close the tag yet, in particular:
620                                 // If we're not at the end of the item...
621                                 if (par != pend
622                                         //  and are doing items...
623                                         && !isNormalEnv(style)
624                                         // and if the depth has changed...
625                                         && par->params().depth() != origdepth) {
626                                         // then we'll save this layout for later, and close it when
627                                         // we get another item.
628                                         lastlay = &style;
629                                 } else {
630                                         closeItemTag(xs, style);
631
632                                         // Eventually, close the item wrapper.
633                                         if (style.docbookitemwrappertag() != "NONE") {
634                                                 xs << xml::EndTag(style.docbookitemwrappertag());
635                                                 xs << xml::CR();
636                                         }
637                                 }
638                         }
639                         // The other possibility is that the depth has increased.
640                         else {
641                                 send = findEndOfEnvironment(par, pend);
642                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
643                         }
644                         break;
645                 }
646                 case LATEX_PARAGRAPH:
647                         send = findLastParagraph(par, pend);
648                         par = makeParagraphs(buf, xs, runparams, text, par, send);
649                         break;
650                 case LATEX_BIB_ENVIRONMENT:
651                         send = findLastBibliographyParagraph(par, pend);
652                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
653                         break;
654                 case LATEX_COMMAND:
655                         ++par;
656                         break;
657                 }
658         }
659
660         if (lastlay != nullptr) {
661                 closeItemTag(xs, *lastlay);
662                 if (lastlay->docbookitemwrappertag() != "NONE") {
663                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
664                         xs << xml::CR();
665                 }
666         }
667         closeTag(xs, bstyle);
668         xs << xml::CR();
669         return pend;
670 }
671
672
673 void makeCommand(
674                 Buffer const & buf,
675                 XMLStream & xs,
676                 OutputParams const & runparams,
677                 Text const & text,
678                 ParagraphList::const_iterator const & pbegin)
679 {
680         Layout const &style = pbegin->layout();
681
682         // No need for labels, as they are handled by DocBook tags.
683
684         openParTag(xs, style);
685
686         ParagraphList::const_iterator const begin = text.paragraphs().begin();
687         pbegin->simpleDocBookOnePar(buf, xs, runparams,
688                                                                 text.outerFont(distance(begin, pbegin)));
689         closeTag(xs, style);
690         xs << xml::CR();
691 }
692
693 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
694                 Text const &text,
695                 Buffer const &buf,
696                 XMLStream &xs,
697                 OutputParams const &ourparams,
698                 ParagraphList::const_iterator par,
699                 ParagraphList::const_iterator send,
700                 ParagraphList::const_iterator pend)
701 {
702         Layout const & style = par->layout();
703
704         switch (style.latextype) {
705                 case LATEX_COMMAND: {
706                         // The files with which we are working never have more than
707                         // one paragraph in a command structure.
708                         // FIXME
709                         // if (ourparams.docbook_in_par)
710                         //   fix it so we don't get sections inside standard, e.g.
711                         // note that we may then need to make runparams not const, so we
712                         // can communicate that back.
713                         // FIXME Maybe this fix should be in the routines themselves, in case
714                         // they are called from elsewhere.
715                         makeCommand(buf, xs, ourparams, text, par);
716                         ++par;
717                         break;
718                 }
719                 case LATEX_ENVIRONMENT:
720                 case LATEX_LIST_ENVIRONMENT:
721                 case LATEX_ITEM_ENVIRONMENT: {
722                         // FIXME Same fix here.
723                         send = findEndOfEnvironment(par, pend);
724                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
725                         break;
726                 }
727                 case LATEX_BIB_ENVIRONMENT: {
728                         send = findLastBibliographyParagraph(par, pend);
729                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
730                         break;
731                 }
732                 case LATEX_PARAGRAPH: {
733                         send = findLastParagraph(par, pend);
734                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
735                         break;
736                 }
737         }
738
739         return make_pair(par, send);
740 }
741
742 } // end anonymous namespace
743
744
745 using DocBookDocumentSectioning = tuple<bool, pit_type>;
746 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
747
748
749 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
750         bool documentHasSections = false;
751
752         while (bpit < epit) {
753                 Layout const &style = paragraphs[bpit].layout();
754                 documentHasSections |= style.category() == from_utf8("Sectioning");
755
756                 if (documentHasSections) {
757                         break;
758                 }
759                 bpit += 1;
760         }
761         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
762
763         return make_tuple(documentHasSections, bpit);
764 }
765
766
767 bool hasOnlyNotes(Paragraph const & par)
768 {
769         for (int i = 0; i < par.size(); ++i)
770                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
771                         return false;
772         return true;
773 }
774
775
776 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
777         set<pit_type> shouldBeInInfo;
778         set<pit_type> mustBeInInfo;
779
780         // Find the first non empty paragraph by mutating bpit.
781         while (bpit < epit) {
782                 Paragraph const &par = paragraphs[bpit];
783                 if (par.empty() || hasOnlyNotes(par))
784                         bpit += 1;
785                 else
786                         break;
787         }
788
789         // Find the last info-like paragraph.
790         pit_type cpit = bpit;
791         while (cpit < epit) {
792                 // Skip paragraphs only containing one note.
793                 Paragraph const &par = paragraphs[cpit];
794                 if (hasOnlyNotes(par)) {
795                         cpit += 1;
796                         continue;
797                 }
798
799                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
800                 Layout const &style = par.layout();
801
802                 if (style.docbookininfo() == "always") {
803                         mustBeInInfo.emplace(cpit);
804                 } else if (style.docbookininfo() == "maybe") {
805                         shouldBeInInfo.emplace(cpit);
806                 } else {
807                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
808                         break;
809                 }
810                 cpit += 1;
811         }
812         // Now, cpit points to the last paragraph that has things that could go in <info>.
813         // bpit is still the beginning of the <info> part.
814
815         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
816 }
817
818
819 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
820 {
821         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
822         // are just after a document or part title.
823         if (epitAbstract - bpitAbstract <= 0)
824                 return false;
825
826         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
827         pit_type bpit = bpitAbstract;
828         while (bpit < epitAbstract) {
829                 const Paragraph &p = paragraphs.at(bpit);
830
831                 if (p.layout().name() == from_ascii("Abstract"))
832                         return true;
833
834                 if (!p.insetList().empty()) {
835                         for (const auto &i : p.insetList()) {
836                                 if (i.inset->getText(0) != nullptr) {
837                                         return true;
838                                 }
839                         }
840                 }
841                 bpit++;
842         }
843         return false;
844 }
845
846
847 pit_type generateDocBookParagraphWithoutSectioning(
848                 Text const & text,
849                 Buffer const & buf,
850                 XMLStream & xs,
851                 OutputParams const & runparams,
852                 ParagraphList const & paragraphs,
853                 pit_type bpit,
854                 pit_type epit)
855 {
856         auto par = paragraphs.iterator_at(bpit);
857         auto lastStartedPar = par;
858         ParagraphList::const_iterator send;
859         auto const pend =
860                         (epit == (int) paragraphs.size()) ?
861                         paragraphs.end() : paragraphs.iterator_at(epit);
862
863         while (bpit < epit) {
864                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
865                 bpit += distance(lastStartedPar, par);
866                 lastStartedPar = par;
867         }
868
869         return bpit;
870 }
871
872
873 void outputDocBookInfo(
874                 Text const & text,
875                 Buffer const & buf,
876                 XMLStream & xs,
877                 OutputParams const & runparams,
878                 ParagraphList const & paragraphs,
879                 DocBookInfoTag const & info,
880                 pit_type bpitAbstract,
881                 pit_type const epitAbstract)
882 {
883         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
884         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
885
886         set<pit_type> shouldBeInInfo;
887         set<pit_type> mustBeInInfo;
888         pit_type bpitInfo;
889         pit_type epitInfo;
890         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
891
892         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
893         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
894         // then only create the <abstract> tag if these paragraphs generate some content.
895         // This check must be performed *before* a decision on whether or not to output <info> is made.
896         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
897         docstring abstract;
898         if (hasAbstract) {
899                 odocstringstream os2;
900                 XMLStream xs2(os2);
901                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
902
903                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
904                 // even though they must be properly output if there is some abstract.
905                 docstring abstractContent = os2.str();
906                 static const lyx::regex reg("[ \\r\\n]*");
907                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
908
909                 // Nothing? Then there is no abstract!
910                 if (abstractContent.empty())
911                         hasAbstract = false;
912         }
913
914         // The abstract must go in <info>.
915         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
916
917         // Start the <info> tag if required.
918         if (needInfo) {
919                 xs.startDivision(false);
920                 xs << xml::StartTag("info");
921                 xs << xml::CR();
922         }
923
924         // Output the elements that should go in <info>.
925         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
926
927         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
928                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
929                 if (tag == "NONE")
930                         tag = "abstract";
931
932                 xs << xml::StartTag(tag);
933                 xs << xml::CR();
934                 xs << XMLStream::ESCAPE_NONE << abstract;
935                 xs << xml::EndTag(tag);
936                 xs << xml::CR();
937         }
938
939         // End the <info> tag if it was started.
940         if (needInfo) {
941                 xs << xml::EndTag("info");
942                 xs << xml::CR();
943                 xs.endDivision();
944         }
945 }
946
947
948 void docbookFirstParagraphs(
949                 Text const &text,
950                 Buffer const &buf,
951                 XMLStream &xs,
952                 OutputParams const &runparams,
953                 pit_type epit)
954 {
955         // Handle the beginning of the document, supposing it has sections.
956         // Major role: output the first <info> tag.
957
958         ParagraphList const &paragraphs = text.paragraphs();
959         pit_type bpit = runparams.par_begin;
960         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
961         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
962 }
963
964
965 bool isParagraphEmpty(const Paragraph &par)
966 {
967         InsetList const &insets = par.insetList();
968         size_t insetsLength = distance(insets.begin(), insets.end());
969         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
970                                                                 dynamic_cast<InsetNote *>(insets.get(0));
971         return hasParagraphOnlyNote;
972 }
973
974
975 void docbookSimpleAllParagraphs(
976                 Text const & text,
977                 Buffer const & buf,
978                 XMLStream & xs,
979                 OutputParams const & runparams)
980 {
981         // Handle the document, supposing it has no sections (i.e. a "simple" document).
982
983         // First, the <info> tag.
984         ParagraphList const &paragraphs = text.paragraphs();
985         pit_type bpit = runparams.par_begin;
986         pit_type const epit = runparams.par_end;
987         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
988         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
989         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
990
991         // Then, the content.
992         ParagraphList::const_iterator const pend =
993                         (epit == (int) paragraphs.size()) ?
994                         paragraphs.end() : paragraphs.iterator_at(epit);
995
996         while (bpit < epit) {
997                 auto par = paragraphs.iterator_at(bpit);
998                 ParagraphList::const_iterator const lastStartedPar = par;
999                 ParagraphList::const_iterator send;
1000
1001                 if (isParagraphEmpty(*par)) {
1002                         ++par;
1003                         bpit += distance(lastStartedPar, par);
1004                         continue;
1005                 }
1006
1007                 // Generate this paragraph.
1008                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1009                 bpit += distance(lastStartedPar, par);
1010         }
1011 }
1012
1013
1014 void docbookParagraphs(Text const &text,
1015                                            Buffer const &buf,
1016                                            XMLStream &xs,
1017                                            OutputParams const &runparams) {
1018         ParagraphList const &paragraphs = text.paragraphs();
1019         if (runparams.par_begin == runparams.par_end) {
1020                 runparams.par_begin = 0;
1021                 runparams.par_end = paragraphs.size();
1022         }
1023         pit_type bpit = runparams.par_begin;
1024         pit_type const epit = runparams.par_end;
1025         LASSERT(bpit < epit,
1026                         {
1027                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1028                                 return;
1029                         });
1030
1031         ParagraphList::const_iterator const pend =
1032                         (epit == (int) paragraphs.size()) ?
1033                         paragraphs.end() : paragraphs.iterator_at(epit);
1034         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1035         // of the section and the tag that was used to open it.
1036
1037         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1038         // discovered abstract.
1039         bool documentHasSections;
1040         pit_type eppit;
1041         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1042
1043         if (documentHasSections) {
1044                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1045                 bpit = eppit;
1046         } else {
1047                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1048                 return;
1049         }
1050
1051         bool currentlyInAppendix = false;
1052
1053         while (bpit < epit) {
1054                 OutputParams ourparams = runparams;
1055
1056                 auto par = paragraphs.iterator_at(bpit);
1057                 if (par->params().startOfAppendix())
1058                         currentlyInAppendix = true;
1059                 Layout const &style = par->layout();
1060                 ParagraphList::const_iterator const lastStartedPar = par;
1061                 ParagraphList::const_iterator send;
1062
1063                 if (isParagraphEmpty(*par)) {
1064                         ++par;
1065                         bpit += distance(lastStartedPar, par);
1066                         continue;
1067                 }
1068
1069                 // Think about adding <section> and/or </section>s.
1070                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1071                 if (isLayoutSectioning) {
1072                         int level = style.toclevel;
1073
1074                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1075                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1076                         //   - current: h2; back: h1; do not close any <section>
1077                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1078                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1079                                 int stackLevel = headerLevels.top().first;
1080                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1081                                 headerLevels.pop();
1082
1083                                 // Output the tag only if it corresponds to a legit section.
1084                                 if (stackLevel != Layout::NOT_IN_TOC)
1085                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1086                         }
1087
1088                         // Open the new section: first push it onto the stack, then output it in DocBook.
1089                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1090                                                                 "appendix" : style.docbooksectiontag();
1091                         headerLevels.push(std::make_pair(level, sectionTag));
1092
1093                         // Some sectioning-like elements should not be output (such as FrontMatter).
1094                         if (level != Layout::NOT_IN_TOC) {
1095                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1096                                 docstring id = docstring();
1097                                 for (pos_type i = 0; i < par->size(); ++i) {
1098                                         Inset const *inset = par->getInset(i);
1099                                         if (inset) {
1100                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1101                                                         // Generate the attributes for the section if need be.
1102                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1103
1104                                                         // Don't output the ID as a DocBook <anchor>.
1105                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1106
1107                                                         // Cannot have multiple IDs per tag.
1108                                                         break;
1109                                                 }
1110                                         }
1111                                 }
1112
1113                                 // Write the open tag for this section.
1114                                 docstring tag = from_utf8("<" + sectionTag);
1115                                 if (!id.empty())
1116                                         tag += from_utf8(" ") + id;
1117                                 tag += from_utf8(">");
1118                                 xs << XMLStream::ESCAPE_NONE << tag;
1119                                 xs << xml::CR();
1120                         }
1121                 }
1122
1123                 // Close all sections before the bibliography.
1124                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1125                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1126                 if (insetsLength > 0) {
1127                         Inset const *firstInset = par->getInset(0);
1128                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1129                                 while (!headerLevels.empty()) {
1130                                         int level = headerLevels.top().first;
1131                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1132                                         headerLevels.pop();
1133
1134                                         // Output the tag only if it corresponds to a legit section.
1135                                         if (level != Layout::NOT_IN_TOC) {
1136                                                 xs << XMLStream::ESCAPE_NONE << tag;
1137                                                 xs << xml::CR();
1138                                         }
1139                                 }
1140                         }
1141                 }
1142
1143                 // Generate this paragraph.
1144                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1145                 bpit += distance(lastStartedPar, par);
1146         }
1147
1148         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1149         // of the loop).
1150         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1151                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1152                 headerLevels.pop();
1153                 xs << XMLStream::ESCAPE_NONE << tag;
1154                 xs << xml::CR();
1155         }
1156 }
1157
1158 } // namespace lyx