]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
DocBook: fix delimitation of bibliographies.
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream &xs, Layout const &lay)
193 {
194         if (lay.docbookwrappertag() != "NONE") {
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196         }
197
198         string tag = lay.docbooktag();
199         if (tag == "Plain Layout")
200                 tag = "para";
201
202         xs << xml::ParTag(tag, lay.docbookattr());
203 }
204
205
206 void closeTag(XMLStream &xs, Layout const &lay)
207 {
208         string tag = lay.docbooktag();
209         if (tag == "Plain Layout")
210                 tag = "para";
211
212         xs << xml::EndTag(tag);
213         if (lay.docbookwrappertag() != "NONE")
214                 xs << xml::EndTag(lay.docbookwrappertag());
215 }
216
217
218 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
219 {
220         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
221 }
222
223
224 void closeLabelTag(XMLStream & xs, Layout const & lay)
225 {
226         xs << xml::EndTag(lay.docbookitemlabeltag());
227         xs << xml::CR();
228 }
229
230
231 void openItemTag(XMLStream &xs, Layout const &lay)
232 {
233         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
234 }
235
236
237 // Return true when new elements are output in a paragraph, false otherwise.
238 bool openInnerItemTag(XMLStream &xs, Layout const &lay)
239 {
240         if (lay.docbookiteminnertag() != "NONE") {
241                 xs << xml::CR();
242                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
243
244                 if (lay.docbookiteminnertag() == "para") {
245                         return true;
246                 }
247         }
248         return false;
249 }
250
251
252 void closeInnerItemTag(XMLStream &xs, Layout const &lay)
253 {
254         if (lay.docbookiteminnertag()!= "NONE") {
255                 xs << xml::EndTag(lay.docbookiteminnertag());
256                 xs << xml::CR();
257         }
258 }
259
260
261 inline void closeItemTag(XMLStream &xs, Layout const &lay)
262 {
263         xs << xml::EndTag(lay.docbookitemtag());
264         xs << xml::CR();
265 }
266
267 // end of convenience functions
268
269 ParagraphList::const_iterator findLastParagraph(
270                 ParagraphList::const_iterator p,
271                 ParagraphList::const_iterator const & pend) {
272         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p);
273
274         return p;
275 }
276
277 ParagraphList::const_iterator findLastBibliographyParagraph(
278                 ParagraphList::const_iterator p,
279                 ParagraphList::const_iterator const & pend) {
280         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
281
282         return p;
283 }
284
285
286 ParagraphList::const_iterator findEndOfEnvironment(
287                 ParagraphList::const_iterator const & pstart,
288                 ParagraphList::const_iterator const & pend)
289 {
290         ParagraphList::const_iterator p = pstart;
291         Layout const &bstyle = p->layout();
292         size_t const depth = p->params().depth();
293         for (++p; p != pend; ++p) {
294                 Layout const &style = p->layout();
295                 // It shouldn't happen that e.g. a section command occurs inside
296                 // a quotation environment, at a higher depth, but as of 6/2009,
297                 // it can happen. We pretend that it's just at lowest depth.
298                 if (style.latextype == LATEX_COMMAND)
299                         return p;
300
301                 // If depth is down, we're done
302                 if (p->params().depth() < depth)
303                         return p;
304
305                 // If depth is up, we're not done
306                 if (p->params().depth() > depth)
307                         continue;
308
309                 // FIXME I am not sure about the first check.
310                 // Surely we *could* have different layouts that count as
311                 // LATEX_PARAGRAPH, right?
312                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
313                         return p;
314         }
315         return pend;
316 }
317
318
319 ParagraphList::const_iterator makeParagraphBibliography(
320                 Buffer const &buf,
321                 XMLStream &xs,
322                 OutputParams const &runparams,
323                 Text const &text,
324                 ParagraphList::const_iterator const & pbegin,
325                 ParagraphList::const_iterator const & pend)
326 {
327         auto const begin = text.paragraphs().begin();
328         auto const end = text.paragraphs().end();
329
330         // Find the paragraph *before* pbegin.
331         ParagraphList::const_iterator pbegin_before = begin;
332         if (pbegin != begin) {
333                 ParagraphList::const_iterator pbegin_before_next = begin;
334                 ++pbegin_before_next;
335
336                 while (pbegin_before_next != pbegin) {
337                         ++pbegin_before;
338                         ++pbegin_before_next;
339                 }
340         }
341
342         ParagraphList::const_iterator par = pbegin;
343
344         // If this is the first paragraph in a bibliography, open the bibliography tag.
345         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
346                 xs << xml::StartTag("bibliography");
347                 xs << xml::CR();
348         }
349
350         // Generate the required paragraphs, but only if they are .
351         for (; par != pend; ++par) {
352                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
353                 // Don't forget the citation ID!
354                 docstring attr;
355                 for (auto i = 0; i < par->size(); ++i) {
356                         Inset const *ip = par->getInset(0);
357                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
358                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
359                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
360                                 break;
361                         }
362                 }
363                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
364
365                 // Generate the entry.
366                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
367
368                 // End the precooked bibliography entry.
369                 xs << xml::EndTag("bibliomixed");
370                 xs << xml::CR();
371         }
372
373         // If this is the last paragraph in a bibliography, close the bibliography tag.
374         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
375                 xs << xml::EndTag("bibliography");
376                 xs << xml::CR();
377         }
378
379         return pend;
380 }
381
382
383 ParagraphList::const_iterator makeParagraphs(
384                 Buffer const &buf,
385                 XMLStream &xs,
386                 OutputParams const &runparams,
387                 Text const &text,
388                 ParagraphList::const_iterator const & pbegin,
389                 ParagraphList::const_iterator const & pend)
390 {
391         ParagraphList::const_iterator const begin = text.paragraphs().begin();
392         ParagraphList::const_iterator par = pbegin;
393         for (; par != pend; ++par) {
394                 Layout const &lay = par->layout();
395
396                 // We want to open the paragraph tag if:
397                 //   (i) the current layout permits multiple paragraphs
398                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
399                 //         we are, but this is not the first paragraph
400                 //
401                 // But there is also a special case, and we first see whether we are in it.
402                 // We do not want to open the paragraph tag if this paragraph contains
403                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
404                 // as a branch). On the other hand, if that single item has a font change
405                 // applied to it, then we still do need to open the paragraph.
406                 //
407                 // Obviously, this is very fragile. The main reason we need to do this is
408                 // because of branches, e.g., a branch that contains an entire new section.
409                 // We do not really want to wrap that whole thing in a <div>...</div>.
410                 bool special_case = false;
411                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
412                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
413                         Layout const &style = par->layout();
414                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
415                                                                                 style.labelfont : style.font;
416                         FontInfo const our_font =
417                                         par->getFont(buf.masterBuffer()->params(), 0,
418                                                                  text.outerFont(distance(begin, par))).fontInfo();
419
420                         if (first_font == our_font)
421                                 special_case = true;
422                 }
423
424                 // Plain layouts must be ignored.
425                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
426                         special_case = true;
427                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
428                 if (!special_case && par->size() == 1 && par->getInset(0)) {
429                         Inset const * firstInset = par->getInset(0);
430
431                         // Floats cannot be in paragraphs.
432                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
433
434                         // Bibliographies cannot be in paragraphs.
435                         if (!special_case && firstInset->asInsetCommand())
436                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
437
438                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
439                         if (!special_case && firstInset->asInsetMath())
440                                 special_case = true;
441
442                         // ERTs are in comments, not paragraphs.
443                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
444                                 special_case = true;
445
446                         // Listings should not get into their own paragraph.
447                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
448                                 special_case = true;
449                 }
450
451                 bool const open_par = runparams.docbook_make_pars
452                                                           && (!runparams.docbook_in_par || par != pbegin)
453                                                           && !special_case;
454
455                 // We want to issue the closing tag if either:
456                 //   (i)  We opened it, and either docbook_in_par is false,
457                 //              or we're not in the last paragraph, anyway.
458                 //   (ii) We didn't open it and docbook_in_par is true,
459                 //              but we are in the first par, and there is a next par.
460                 ParagraphList::const_iterator nextpar = par;
461                 ++nextpar;
462                 bool const close_par =
463                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
464                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
465
466                 if (open_par)
467                         openParTag(xs, lay);
468
469                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
470
471                 if (close_par) {
472                         closeTag(xs, lay);
473                         xs << xml::CR();
474                 }
475         }
476         return pend;
477 }
478
479
480 bool isNormalEnv(Layout const &lay)
481 {
482         return lay.latextype == LATEX_ENVIRONMENT
483                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
484 }
485
486
487 ParagraphList::const_iterator makeEnvironment(
488                 Buffer const &buf,
489                 XMLStream &xs,
490                 OutputParams const &runparams,
491                 Text const &text,
492                 ParagraphList::const_iterator const & pbegin,
493                 ParagraphList::const_iterator const & pend)
494 {
495         ParagraphList::const_iterator const begin = text.paragraphs().begin();
496         ParagraphList::const_iterator par = pbegin;
497         Layout const &bstyle = par->layout();
498         depth_type const origdepth = pbegin->params().depth();
499
500         // open tag for this environment
501         openParTag(xs, bstyle);
502         xs << xml::CR();
503
504         // we will on occasion need to remember a layout from before.
505         Layout const *lastlay = nullptr;
506
507         while (par != pend) {
508                 Layout const & style = par->layout();
509                 ParagraphList::const_iterator send;
510
511                 // Actual content of this paragraph.
512                 switch (style.latextype) {
513                 case LATEX_ENVIRONMENT:
514                 case LATEX_LIST_ENVIRONMENT:
515                 case LATEX_ITEM_ENVIRONMENT: {
516                         // There are two possibilities in this case.
517                         // One is that we are still in the environment in which we
518                         // started---which we will be if the depth is the same.
519                         if (par->params().depth() == origdepth) {
520                                 LATTEST(bstyle == style);
521                                 if (lastlay != nullptr) {
522                                         closeItemTag(xs, *lastlay);
523                                         if (lastlay->docbookitemwrappertag() != "NONE") {
524                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
525                                                 xs << xml::CR();
526                                         }
527                                         lastlay = nullptr;
528                                 }
529
530                                 // this will be positive if we want to skip the
531                                 // initial word (if it's been taken for the label).
532                                 pos_type sep = 0;
533
534                                 // Open a wrapper tag if needed.
535                                 if (style.docbookitemwrappertag() != "NONE") {
536                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
537                                         xs << xml::CR();
538                                 }
539
540                                 // label output
541                                 if (style.labeltype != LABEL_NO_LABEL &&
542                                                 style.docbookitemlabeltag() != "NONE") {
543
544                                         if (isNormalEnv(style)) {
545                                                 // in this case, we print the label only for the first
546                                                 // paragraph (as in a theorem or an abstract).
547                                                 if (par == pbegin) {
548                                                         docstring const lbl = pbegin->params().labelString();
549                                                         if (!lbl.empty()) {
550                                                                 openLabelTag(xs, style);
551                                                                 xs << lbl;
552                                                                 closeLabelTag(xs, style);
553                                                         } else {
554                                                                 // No new line after closeLabelTag.
555                                                                 xs << xml::CR();
556                                                         }
557                                                 }
558                                         } else { // some kind of list
559                                                 if (style.labeltype == LABEL_MANUAL) {
560                                                         // Only variablelist gets here.
561
562                                                         openLabelTag(xs, style);
563                                                         sep = par->firstWordDocBook(xs, runparams);
564                                                         closeLabelTag(xs, style);
565                                                 } else {
566                                                         openLabelTag(xs, style);
567                                                         xs << par->params().labelString();
568                                                         closeLabelTag(xs, style);
569                                                 }
570                                         }
571                                 } // end label output
572
573                                 // Start generating the item.
574                                 bool wasInParagraph = runparams.docbook_in_par;
575                                 openItemTag(xs, style);
576                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
577                                 OutputParams rp = runparams;
578                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
579
580                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
581                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
582                                 // Common case: there is only the first word on the line, but there is a nested list instead
583                                 // of more text.
584                                 bool emptyItem = false;
585                                 if (sep == par->size()) {
586                                         auto next_par = par;
587                                         ++next_par;
588                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
589                                                 emptyItem = true;
590                                         else // There is a next paragraph: check depth.
591                                                 emptyItem = par->params().depth() >= next_par->params().depth();
592                                 }
593
594                                 if (emptyItem) {
595                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
596                                         // generation of a full <para>.
597                                         xs << ' ';
598                                 } else {
599                                         // Generate the rest of the paragraph, if need be.
600                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
601                                 }
602
603                                 ++par;
604                                 if (getsIntoParagraph)
605                                         closeInnerItemTag(xs, style);
606
607                                 // We may not want to close the tag yet, in particular:
608                                 // If we're not at the end of the item...
609                                 if (par != pend
610                                         //  and are doing items...
611                                         && !isNormalEnv(style)
612                                         // and if the depth has changed...
613                                         && par->params().depth() != origdepth) {
614                                         // then we'll save this layout for later, and close it when
615                                         // we get another item.
616                                         lastlay = &style;
617                                 } else {
618                                         closeItemTag(xs, style);
619
620                                         // Eventually, close the item wrapper.
621                                         if (style.docbookitemwrappertag() != "NONE") {
622                                                 xs << xml::EndTag(style.docbookitemwrappertag());
623                                                 xs << xml::CR();
624                                         }
625                                 }
626                         }
627                         // The other possibility is that the depth has increased.
628                         else {
629                                 send = findEndOfEnvironment(par, pend);
630                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
631                         }
632                         break;
633                 }
634                 case LATEX_PARAGRAPH:
635                         send = findLastParagraph(par, pend);
636                         par = makeParagraphs(buf, xs, runparams, text, par, send);
637                         break;
638                 case LATEX_BIB_ENVIRONMENT:
639                         send = findLastBibliographyParagraph(par, pend);
640                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
641                         break;
642                 case LATEX_COMMAND:
643                         ++par;
644                         break;
645                 }
646         }
647
648         if (lastlay != nullptr) {
649                 closeItemTag(xs, *lastlay);
650                 if (lastlay->docbookitemwrappertag() != "NONE") {
651                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
652                         xs << xml::CR();
653                 }
654         }
655         closeTag(xs, bstyle);
656         xs << xml::CR();
657         return pend;
658 }
659
660
661 void makeCommand(
662                 Buffer const & buf,
663                 XMLStream & xs,
664                 OutputParams const & runparams,
665                 Text const & text,
666                 ParagraphList::const_iterator const & pbegin)
667 {
668         Layout const &style = pbegin->layout();
669
670         // No need for labels, as they are handled by DocBook tags.
671
672         openParTag(xs, style);
673
674         ParagraphList::const_iterator const begin = text.paragraphs().begin();
675         pbegin->simpleDocBookOnePar(buf, xs, runparams,
676                                                                 text.outerFont(distance(begin, pbegin)));
677         closeTag(xs, style);
678         xs << xml::CR();
679 }
680
681 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
682                 Text const &text,
683                 Buffer const &buf,
684                 XMLStream &xs,
685                 OutputParams const &ourparams,
686                 ParagraphList::const_iterator par,
687                 ParagraphList::const_iterator send,
688                 ParagraphList::const_iterator pend)
689 {
690         Layout const & style = par->layout();
691
692         switch (style.latextype) {
693                 case LATEX_COMMAND: {
694                         // The files with which we are working never have more than
695                         // one paragraph in a command structure.
696                         // FIXME
697                         // if (ourparams.docbook_in_par)
698                         //   fix it so we don't get sections inside standard, e.g.
699                         // note that we may then need to make runparams not const, so we
700                         // can communicate that back.
701                         // FIXME Maybe this fix should be in the routines themselves, in case
702                         // they are called from elsewhere.
703                         makeCommand(buf, xs, ourparams, text, par);
704                         ++par;
705                         break;
706                 }
707                 case LATEX_ENVIRONMENT:
708                 case LATEX_LIST_ENVIRONMENT:
709                 case LATEX_ITEM_ENVIRONMENT: {
710                         // FIXME Same fix here.
711                         send = findEndOfEnvironment(par, pend);
712                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
713                         break;
714                 }
715                 case LATEX_BIB_ENVIRONMENT: {
716                         send = findLastBibliographyParagraph(par, pend);
717                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
718                         break;
719                 }
720                 case LATEX_PARAGRAPH: {
721                         send = findLastParagraph(par, pend);
722                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
723                         break;
724                 }
725         }
726
727         return make_pair(par, send);
728 }
729
730 } // end anonymous namespace
731
732
733 using DocBookDocumentSectioning = tuple<bool, pit_type>;
734 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
735
736
737 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
738         bool documentHasSections = false;
739
740         while (bpit < epit) {
741                 Layout const &style = paragraphs[bpit].layout();
742                 documentHasSections |= style.category() == from_utf8("Sectioning");
743
744                 if (documentHasSections) {
745                         break;
746                 }
747                 bpit += 1;
748         }
749         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
750
751         return make_tuple(documentHasSections, bpit);
752 }
753
754
755 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type const bpit, pit_type const epit) {
756         set<pit_type> shouldBeInInfo;
757         set<pit_type> mustBeInInfo;
758
759         pit_type cpit = bpit;
760         while (cpit < epit) {
761                 // Skip paragraphs only containing one note.
762                 Paragraph const &par = paragraphs[cpit];
763                 if (par.size() == 1 && dynamic_cast<InsetNote*>(paragraphs[cpit].insetList().get(0))) {
764                         cpit += 1;
765                         continue;
766                 }
767
768                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
769                 Layout const &style = par.layout();
770
771                 if (style.docbookininfo() == "always") {
772                         mustBeInInfo.emplace(cpit);
773                 } else if (style.docbookininfo() == "maybe") {
774                         shouldBeInInfo.emplace(cpit);
775                 } else {
776                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
777                         break;
778                 }
779                 cpit += 1;
780         }
781         // Now, cpit points to the last paragraph that has things that could go in <info>.
782         // bpit is still the beginning of the <info> part.
783
784         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
785 }
786
787
788 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
789 {
790         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
791         // are just after a document or part title.
792         if (epitAbstract - bpitAbstract <= 0)
793                 return false;
794
795         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
796         pit_type bpit = bpitAbstract;
797         while (bpit < epitAbstract) {
798                 const Paragraph &p = paragraphs.at(bpit);
799
800                 if (p.layout().name() == from_ascii("Abstract"))
801                         return true;
802
803                 if (!p.insetList().empty()) {
804                         for (const auto &i : p.insetList()) {
805                                 if (i.inset->getText(0) != nullptr) {
806                                         return true;
807                                 }
808                         }
809                 }
810                 bpit++;
811         }
812         return false;
813 }
814
815
816 pit_type generateDocBookParagraphWithoutSectioning(
817                 Text const & text,
818                 Buffer const & buf,
819                 XMLStream & xs,
820                 OutputParams const & runparams,
821                 ParagraphList const & paragraphs,
822                 pit_type bpit,
823                 pit_type epit)
824 {
825         auto par = paragraphs.iterator_at(bpit);
826         auto lastStartedPar = par;
827         ParagraphList::const_iterator send;
828         auto const pend =
829                         (epit == (int) paragraphs.size()) ?
830                         paragraphs.end() : paragraphs.iterator_at(epit);
831
832         while (bpit < epit) {
833                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
834                 bpit += distance(lastStartedPar, par);
835                 lastStartedPar = par;
836         }
837
838         return bpit;
839 }
840
841
842 void outputDocBookInfo(
843                 Text const & text,
844                 Buffer const & buf,
845                 XMLStream & xs,
846                 OutputParams const & runparams,
847                 ParagraphList const & paragraphs,
848                 DocBookInfoTag const & info,
849                 pit_type bpitAbstract,
850                 pit_type const epitAbstract)
851 {
852         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
853         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
854
855         set<pit_type> shouldBeInInfo;
856         set<pit_type> mustBeInInfo;
857         pit_type bpitInfo;
858         pit_type epitInfo;
859         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
860
861         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
862         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
863         // then only create the <abstract> tag if these paragraphs generate some content.
864         // This check must be performed *before* a decision on whether or not to output <info> is made.
865         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
866         docstring abstract;
867         if (hasAbstract) {
868                 odocstringstream os2;
869                 XMLStream xs2(os2);
870                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
871
872                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
873                 // even though they must be properly output if there is some abstract.
874                 docstring abstractContent = os2.str();
875                 static const lyx::regex reg("[ \\r\\n]*");
876                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
877
878                 // Nothing? Then there is no abstract!
879                 if (abstractContent.empty())
880                         hasAbstract = false;
881         }
882
883         // The abstract must go in <info>.
884         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
885
886         // Start the <info> tag if required.
887         if (needInfo) {
888                 xs.startDivision(false);
889                 xs << xml::StartTag("info");
890                 xs << xml::CR();
891         }
892
893         // Output the elements that should go in <info>.
894         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
895
896         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
897                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
898                 if (tag == "NONE")
899                         tag = "abstract";
900
901                 xs << xml::StartTag(tag);
902                 xs << xml::CR();
903                 xs << XMLStream::ESCAPE_NONE << abstract;
904                 xs << xml::EndTag(tag);
905                 xs << xml::CR();
906         }
907
908         // End the <info> tag if it was started.
909         if (needInfo) {
910                 xs << xml::EndTag("info");
911                 xs << xml::CR();
912                 xs.endDivision();
913         }
914 }
915
916
917 void docbookFirstParagraphs(
918                 Text const &text,
919                 Buffer const &buf,
920                 XMLStream &xs,
921                 OutputParams const &runparams,
922                 pit_type epit)
923 {
924         // Handle the beginning of the document, supposing it has sections.
925         // Major role: output the first <info> tag.
926
927         ParagraphList const &paragraphs = text.paragraphs();
928         pit_type bpit = runparams.par_begin;
929         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
930         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
931 }
932
933
934 bool isParagraphEmpty(const Paragraph &par)
935 {
936         InsetList const &insets = par.insetList();
937         size_t insetsLength = distance(insets.begin(), insets.end());
938         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
939                                                                 dynamic_cast<InsetNote *>(insets.get(0));
940         return hasParagraphOnlyNote;
941 }
942
943
944 void docbookSimpleAllParagraphs(
945                 Text const & text,
946                 Buffer const & buf,
947                 XMLStream & xs,
948                 OutputParams const & runparams)
949 {
950         // Handle the document, supposing it has no sections (i.e. a "simple" document).
951
952         // First, the <info> tag.
953         ParagraphList const &paragraphs = text.paragraphs();
954         pit_type bpit = runparams.par_begin;
955         pit_type const epit = runparams.par_end;
956         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
957         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
958         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
959
960         // Then, the content.
961         ParagraphList::const_iterator const pend =
962                         (epit == (int) paragraphs.size()) ?
963                         paragraphs.end() : paragraphs.iterator_at(epit);
964
965         while (bpit < epit) {
966                 auto par = paragraphs.iterator_at(bpit);
967                 ParagraphList::const_iterator const lastStartedPar = par;
968                 ParagraphList::const_iterator send;
969
970                 if (isParagraphEmpty(*par)) {
971                         ++par;
972                         bpit += distance(lastStartedPar, par);
973                         continue;
974                 }
975
976                 // Generate this paragraph.
977                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
978                 bpit += distance(lastStartedPar, par);
979         }
980 }
981
982
983 void docbookParagraphs(Text const &text,
984                                            Buffer const &buf,
985                                            XMLStream &xs,
986                                            OutputParams const &runparams) {
987         ParagraphList const &paragraphs = text.paragraphs();
988         if (runparams.par_begin == runparams.par_end) {
989                 runparams.par_begin = 0;
990                 runparams.par_end = paragraphs.size();
991         }
992         pit_type bpit = runparams.par_begin;
993         pit_type const epit = runparams.par_end;
994         LASSERT(bpit < epit,
995                         {
996                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
997                                 return;
998                         });
999
1000         ParagraphList::const_iterator const pend =
1001                         (epit == (int) paragraphs.size()) ?
1002                         paragraphs.end() : paragraphs.iterator_at(epit);
1003         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1004         // of the section and the tag that was used to open it.
1005
1006         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1007         // discovered abstract.
1008         bool documentHasSections;
1009         pit_type eppit;
1010         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1011
1012         if (documentHasSections) {
1013                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1014                 bpit = eppit;
1015         } else {
1016                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1017                 return;
1018         }
1019
1020         bool currentlyInAppendix = false;
1021
1022         while (bpit < epit) {
1023                 OutputParams ourparams = runparams;
1024
1025                 auto par = paragraphs.iterator_at(bpit);
1026                 if (par->params().startOfAppendix())
1027                         currentlyInAppendix = true;
1028                 Layout const &style = par->layout();
1029                 ParagraphList::const_iterator const lastStartedPar = par;
1030                 ParagraphList::const_iterator send;
1031
1032                 if (isParagraphEmpty(*par)) {
1033                         ++par;
1034                         bpit += distance(lastStartedPar, par);
1035                         continue;
1036                 }
1037
1038                 // Think about adding <section> and/or </section>s.
1039                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1040                 if (isLayoutSectioning) {
1041                         int level = style.toclevel;
1042
1043                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1044                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1045                         //   - current: h2; back: h1; do not close any <section>
1046                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1047                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1048                                 int stackLevel = headerLevels.top().first;
1049                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1050                                 headerLevels.pop();
1051
1052                                 // Output the tag only if it corresponds to a legit section.
1053                                 if (stackLevel != Layout::NOT_IN_TOC)
1054                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1055                         }
1056
1057                         // Open the new section: first push it onto the stack, then output it in DocBook.
1058                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1059                                                                 "appendix" : style.docbooksectiontag();
1060                         headerLevels.push(std::make_pair(level, sectionTag));
1061
1062                         // Some sectioning-like elements should not be output (such as FrontMatter).
1063                         if (level != Layout::NOT_IN_TOC) {
1064                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1065                                 docstring id = docstring();
1066                                 for (pos_type i = 0; i < par->size(); ++i) {
1067                                         Inset const *inset = par->getInset(i);
1068                                         if (inset) {
1069                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1070                                                         // Generate the attributes for the section if need be.
1071                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1072
1073                                                         // Don't output the ID as a DocBook <anchor>.
1074                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1075
1076                                                         // Cannot have multiple IDs per tag.
1077                                                         break;
1078                                                 }
1079                                         }
1080                                 }
1081
1082                                 // Write the open tag for this section.
1083                                 docstring tag = from_utf8("<" + sectionTag);
1084                                 if (!id.empty())
1085                                         tag += from_utf8(" ") + id;
1086                                 tag += from_utf8(">");
1087                                 xs << XMLStream::ESCAPE_NONE << tag;
1088                                 xs << xml::CR();
1089                         }
1090                 }
1091
1092                 // Close all sections before the bibliography.
1093                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1094                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1095                 if (insetsLength > 0) {
1096                         Inset const *firstInset = par->getInset(0);
1097                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1098                                 while (!headerLevels.empty()) {
1099                                         int level = headerLevels.top().first;
1100                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1101                                         headerLevels.pop();
1102
1103                                         // Output the tag only if it corresponds to a legit section.
1104                                         if (level != Layout::NOT_IN_TOC) {
1105                                                 xs << XMLStream::ESCAPE_NONE << tag;
1106                                                 xs << xml::CR();
1107                                         }
1108                                 }
1109                         }
1110                 }
1111
1112                 // Generate this paragraph.
1113                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1114                 bpit += distance(lastStartedPar, par);
1115         }
1116
1117         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1118         // of the loop).
1119         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1120                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1121                 headerLevels.pop();
1122                 xs << XMLStream::ESCAPE_NONE << tag;
1123                 xs << xml::CR();
1124         }
1125 }
1126
1127 } // namespace lyx