]> git.lyx.org Git - features.git/blob - src/output_docbook.cpp
xHTML export: change filenames of exported images.
[features.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream &xs, Layout const &lay)
193 {
194         if (lay.docbookwrappertag() != "NONE") {
195                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
196         }
197
198         string tag = lay.docbooktag();
199         if (tag == "Plain Layout")
200                 tag = "para";
201
202         xs << xml::ParTag(tag, lay.docbookattr());
203 }
204
205
206 void closeTag(XMLStream &xs, Layout const &lay)
207 {
208         string tag = lay.docbooktag();
209         if (tag == "Plain Layout")
210                 tag = "para";
211
212         xs << xml::EndTag(tag);
213         if (lay.docbookwrappertag() != "NONE")
214                 xs << xml::EndTag(lay.docbookwrappertag());
215 }
216
217
218 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
219 {
220         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
221 }
222
223
224 void closeLabelTag(XMLStream & xs, Layout const & lay)
225 {
226         xs << xml::EndTag(lay.docbookitemlabeltag());
227         xs << xml::CR();
228 }
229
230
231 void openItemTag(XMLStream &xs, Layout const &lay)
232 {
233         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
234 }
235
236
237 // Return true when new elements are output in a paragraph, false otherwise.
238 bool openInnerItemTag(XMLStream &xs, Layout const &lay)
239 {
240         if (lay.docbookiteminnertag() != "NONE") {
241                 xs << xml::CR();
242                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
243
244                 if (lay.docbookiteminnertag() == "para") {
245                         return true;
246                 }
247         }
248         return false;
249 }
250
251
252 void closeInnerItemTag(XMLStream &xs, Layout const &lay)
253 {
254         if (lay.docbookiteminnertag()!= "NONE") {
255                 xs << xml::EndTag(lay.docbookiteminnertag());
256                 xs << xml::CR();
257         }
258 }
259
260
261 inline void closeItemTag(XMLStream &xs, Layout const &lay)
262 {
263         xs << xml::EndTag(lay.docbookitemtag());
264         xs << xml::CR();
265 }
266
267 // end of convenience functions
268
269 ParagraphList::const_iterator findLastParagraph(
270                 ParagraphList::const_iterator p,
271                 ParagraphList::const_iterator const & pend) {
272         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p);
273
274         return p;
275 }
276
277
278 ParagraphList::const_iterator findEndOfEnvironment(
279                 ParagraphList::const_iterator const & pstart,
280                 ParagraphList::const_iterator const & pend)
281 {
282         ParagraphList::const_iterator p = pstart;
283         Layout const &bstyle = p->layout();
284         size_t const depth = p->params().depth();
285         for (++p; p != pend; ++p) {
286                 Layout const &style = p->layout();
287                 // It shouldn't happen that e.g. a section command occurs inside
288                 // a quotation environment, at a higher depth, but as of 6/2009,
289                 // it can happen. We pretend that it's just at lowest depth.
290                 if (style.latextype == LATEX_COMMAND)
291                         return p;
292
293                 // If depth is down, we're done
294                 if (p->params().depth() < depth)
295                         return p;
296
297                 // If depth is up, we're not done
298                 if (p->params().depth() > depth)
299                         continue;
300
301                 // FIXME I am not sure about the first check.
302                 // Surely we *could* have different layouts that count as
303                 // LATEX_PARAGRAPH, right?
304                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
305                         return p;
306         }
307         return pend;
308 }
309
310
311 ParagraphList::const_iterator makeParagraphBibliography(
312                 Buffer const &buf,
313                 XMLStream &xs,
314                 OutputParams const &runparams,
315                 Text const &text,
316                 ParagraphList::const_iterator const & pbegin,
317                 ParagraphList::const_iterator const & pend)
318 {
319         auto const begin = text.paragraphs().begin();
320         auto const end = text.paragraphs().end();
321
322         // Find the paragraph *before* pbegin.
323         ParagraphList::const_iterator pbegin_before = begin;
324         if (pbegin != begin) {
325                 ParagraphList::const_iterator pbegin_before_next = begin;
326                 ++pbegin_before_next;
327
328                 while (pbegin_before_next != pbegin) {
329                         ++pbegin_before;
330                         ++pbegin_before_next;
331                 }
332         }
333
334         ParagraphList::const_iterator par = pbegin;
335
336         // If this is the first paragraph in a bibliography, open the bibliography tag.
337         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
338                 xs << xml::StartTag("bibliography");
339                 xs << xml::CR();
340         }
341
342         // Generate the required paragraphs.
343         for (; par != pend; ++par) {
344                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
345                 // Don't forget the citation ID!
346                 docstring attr;
347                 for (auto i = 0; i < par->size(); ++i) {
348                         Inset const *ip = par->getInset(0);
349                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
350                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
351                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
352                                 break;
353                         }
354                 }
355                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
356
357                 // Generate the entry.
358                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
359
360                 // End the precooked bibliography entry.
361                 xs << xml::EndTag("bibliomixed");
362                 xs << xml::CR();
363         }
364
365         // If this is the last paragraph in a bibliography, close the bibliography tag.
366         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
367                 xs << xml::EndTag("bibliography");
368                 xs << xml::CR();
369         }
370
371         return pend;
372 }
373
374
375 ParagraphList::const_iterator makeParagraphs(
376                 Buffer const &buf,
377                 XMLStream &xs,
378                 OutputParams const &runparams,
379                 Text const &text,
380                 ParagraphList::const_iterator const & pbegin,
381                 ParagraphList::const_iterator const & pend)
382 {
383         ParagraphList::const_iterator const begin = text.paragraphs().begin();
384         ParagraphList::const_iterator par = pbegin;
385         for (; par != pend; ++par) {
386                 Layout const &lay = par->layout();
387
388                 // We want to open the paragraph tag if:
389                 //   (i) the current layout permits multiple paragraphs
390                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
391                 //         we are, but this is not the first paragraph
392                 //
393                 // But there is also a special case, and we first see whether we are in it.
394                 // We do not want to open the paragraph tag if this paragraph contains
395                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
396                 // as a branch). On the other hand, if that single item has a font change
397                 // applied to it, then we still do need to open the paragraph.
398                 //
399                 // Obviously, this is very fragile. The main reason we need to do this is
400                 // because of branches, e.g., a branch that contains an entire new section.
401                 // We do not really want to wrap that whole thing in a <div>...</div>.
402                 bool special_case = false;
403                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : 0;
404                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
405                         Layout const &style = par->layout();
406                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
407                                                                                 style.labelfont : style.font;
408                         FontInfo const our_font =
409                                         par->getFont(buf.masterBuffer()->params(), 0,
410                                                                  text.outerFont(distance(begin, par))).fontInfo();
411
412                         if (first_font == our_font)
413                                 special_case = true;
414                 }
415
416                 // Plain layouts must be ignored.
417                 if (!special_case && buf.params().documentClass().isPlainLayout(lay) && !runparams.docbook_force_pars)
418                         special_case = true;
419                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
420                 if (!special_case && par->size() == 1 && par->getInset(0)) {
421                         Inset const * firstInset = par->getInset(0);
422
423                         // Floats cannot be in paragraphs.
424                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
425
426                         // Bibliographies cannot be in paragraphs.
427                         if (!special_case && firstInset->asInsetCommand())
428                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
429
430                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
431                         if (!special_case && firstInset->asInsetMath())
432                                 special_case = true;
433
434                         // ERTs are in comments, not paragraphs.
435                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
436                                 special_case = true;
437
438                         // Listings should not get into their own paragraph.
439                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
440                                 special_case = true;
441                 }
442
443                 bool const open_par = runparams.docbook_make_pars
444                                                           && (!runparams.docbook_in_par || par != pbegin)
445                                                           && !special_case;
446
447                 // We want to issue the closing tag if either:
448                 //   (i)  We opened it, and either docbook_in_par is false,
449                 //              or we're not in the last paragraph, anyway.
450                 //   (ii) We didn't open it and docbook_in_par is true,
451                 //              but we are in the first par, and there is a next par.
452                 ParagraphList::const_iterator nextpar = par;
453                 ++nextpar;
454                 bool const close_par =
455                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
456                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
457
458                 if (open_par)
459                         openParTag(xs, lay);
460
461                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
462
463                 if (close_par) {
464                         closeTag(xs, lay);
465                         xs << xml::CR();
466                 }
467         }
468         return pend;
469 }
470
471
472 bool isNormalEnv(Layout const &lay)
473 {
474         return lay.latextype == LATEX_ENVIRONMENT
475                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
476 }
477
478
479 ParagraphList::const_iterator makeEnvironment(
480                 Buffer const &buf,
481                 XMLStream &xs,
482                 OutputParams const &runparams,
483                 Text const &text,
484                 ParagraphList::const_iterator const & pbegin,
485                 ParagraphList::const_iterator const & pend)
486 {
487         ParagraphList::const_iterator const begin = text.paragraphs().begin();
488         ParagraphList::const_iterator par = pbegin;
489         Layout const &bstyle = par->layout();
490         depth_type const origdepth = pbegin->params().depth();
491
492         // open tag for this environment
493         openParTag(xs, bstyle);
494         xs << xml::CR();
495
496         // we will on occasion need to remember a layout from before.
497         Layout const *lastlay = nullptr;
498
499         while (par != pend) {
500                 Layout const & style = par->layout();
501                 ParagraphList::const_iterator send;
502
503                 // Actual content of this paragraph.
504                 switch (style.latextype) {
505                 case LATEX_ENVIRONMENT:
506                 case LATEX_LIST_ENVIRONMENT:
507                 case LATEX_ITEM_ENVIRONMENT: {
508                         // There are two possibilities in this case.
509                         // One is that we are still in the environment in which we
510                         // started---which we will be if the depth is the same.
511                         if (par->params().depth() == origdepth) {
512                                 LATTEST(bstyle == style);
513                                 if (lastlay != nullptr) {
514                                         closeItemTag(xs, *lastlay);
515                                         if (lastlay->docbookitemwrappertag() != "NONE") {
516                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
517                                                 xs << xml::CR();
518                                         }
519                                         lastlay = nullptr;
520                                 }
521
522                                 // this will be positive if we want to skip the
523                                 // initial word (if it's been taken for the label).
524                                 pos_type sep = 0;
525
526                                 // Open a wrapper tag if needed.
527                                 if (style.docbookitemwrappertag() != "NONE") {
528                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
529                                         xs << xml::CR();
530                                 }
531
532                                 // label output
533                                 if (style.labeltype != LABEL_NO_LABEL &&
534                                                 style.docbookitemlabeltag() != "NONE") {
535
536                                         if (isNormalEnv(style)) {
537                                                 // in this case, we print the label only for the first
538                                                 // paragraph (as in a theorem or an abstract).
539                                                 if (par == pbegin) {
540                                                         docstring const lbl = pbegin->params().labelString();
541                                                         if (!lbl.empty()) {
542                                                                 openLabelTag(xs, style);
543                                                                 xs << lbl;
544                                                                 closeLabelTag(xs, style);
545                                                         } else {
546                                                                 // No new line after closeLabelTag.
547                                                                 xs << xml::CR();
548                                                         }
549                                                 }
550                                         } else { // some kind of list
551                                                 if (style.labeltype == LABEL_MANUAL) {
552                                                         // Only variablelist gets here.
553
554                                                         openLabelTag(xs, style);
555                                                         sep = par->firstWordDocBook(xs, runparams);
556                                                         closeLabelTag(xs, style);
557                                                 } else {
558                                                         openLabelTag(xs, style);
559                                                         xs << par->params().labelString();
560                                                         closeLabelTag(xs, style);
561                                                 }
562                                         }
563                                 } // end label output
564
565                                 // Start generating the item.
566                                 bool wasInParagraph = runparams.docbook_in_par;
567                                 openItemTag(xs, style);
568                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
569                                 OutputParams rp = runparams;
570                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
571
572                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
573                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
574                                 // Common case: there is only the first word on the line, but there is a nested list instead
575                                 // of more text.
576                                 bool emptyItem = false;
577                                 if (sep == par->size()) {
578                                         auto next_par = par;
579                                         ++next_par;
580                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
581                                                 emptyItem = true;
582                                         else // There is a next paragraph: check depth.
583                                                 emptyItem = par->params().depth() >= next_par->params().depth();
584                                 }
585
586                                 if (emptyItem) {
587                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
588                                         // generation of a full <para>.
589                                         xs << ' ';
590                                 } else {
591                                         // Generate the rest of the paragraph, if need be.
592                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
593                                 }
594
595                                 ++par;
596                                 if (getsIntoParagraph)
597                                         closeInnerItemTag(xs, style);
598
599                                 // We may not want to close the tag yet, in particular:
600                                 // If we're not at the end of the item...
601                                 if (par != pend
602                                         //  and are doing items...
603                                         && !isNormalEnv(style)
604                                         // and if the depth has changed...
605                                         && par->params().depth() != origdepth) {
606                                         // then we'll save this layout for later, and close it when
607                                         // we get another item.
608                                         lastlay = &style;
609                                 } else {
610                                         closeItemTag(xs, style);
611
612                                         // Eventually, close the item wrapper.
613                                         if (style.docbookitemwrappertag() != "NONE") {
614                                                 xs << xml::EndTag(style.docbookitemwrappertag());
615                                                 xs << xml::CR();
616                                         }
617                                 }
618                         }
619                         // The other possibility is that the depth has increased.
620                         else {
621                                 send = findEndOfEnvironment(par, pend);
622                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
623                         }
624                         break;
625                 }
626                 case LATEX_PARAGRAPH:
627                         send = findLastParagraph(par, pend);
628                         par = makeParagraphs(buf, xs, runparams, text, par, send);
629                         break;
630                 case LATEX_BIB_ENVIRONMENT:
631                         send = findLastParagraph(par, pend);
632                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
633                         break;
634                 case LATEX_COMMAND:
635                         ++par;
636                         break;
637                 }
638         }
639
640         if (lastlay != nullptr) {
641                 closeItemTag(xs, *lastlay);
642                 if (lastlay->docbookitemwrappertag() != "NONE") {
643                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
644                         xs << xml::CR();
645                 }
646         }
647         closeTag(xs, bstyle);
648         xs << xml::CR();
649         return pend;
650 }
651
652
653 void makeCommand(
654                 Buffer const & buf,
655                 XMLStream & xs,
656                 OutputParams const & runparams,
657                 Text const & text,
658                 ParagraphList::const_iterator const & pbegin)
659 {
660         Layout const &style = pbegin->layout();
661
662         // No need for labels, as they are handled by DocBook tags.
663
664         openParTag(xs, style);
665
666         ParagraphList::const_iterator const begin = text.paragraphs().begin();
667         pbegin->simpleDocBookOnePar(buf, xs, runparams,
668                                                                 text.outerFont(distance(begin, pbegin)));
669         closeTag(xs, style);
670         xs << xml::CR();
671 }
672
673 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
674                 Text const &text,
675                 Buffer const &buf,
676                 XMLStream &xs,
677                 OutputParams const &ourparams,
678                 ParagraphList::const_iterator par,
679                 ParagraphList::const_iterator send,
680                 ParagraphList::const_iterator pend)
681 {
682         Layout const & style = par->layout();
683
684         switch (style.latextype) {
685                 case LATEX_COMMAND: {
686                         // The files with which we are working never have more than
687                         // one paragraph in a command structure.
688                         // FIXME
689                         // if (ourparams.docbook_in_par)
690                         //   fix it so we don't get sections inside standard, e.g.
691                         // note that we may then need to make runparams not const, so we
692                         // can communicate that back.
693                         // FIXME Maybe this fix should be in the routines themselves, in case
694                         // they are called from elsewhere.
695                         makeCommand(buf, xs, ourparams, text, par);
696                         ++par;
697                         break;
698                 }
699                 case LATEX_ENVIRONMENT:
700                 case LATEX_LIST_ENVIRONMENT:
701                 case LATEX_ITEM_ENVIRONMENT: {
702                         // FIXME Same fix here.
703                         send = findEndOfEnvironment(par, pend);
704                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
705                         break;
706                 }
707                 case LATEX_BIB_ENVIRONMENT: {
708                         send = findLastParagraph(par, pend);
709                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
710                         break;
711                 }
712                 case LATEX_PARAGRAPH: {
713                         send = findLastParagraph(par, pend);
714                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
715                         break;
716                 }
717         }
718
719         return make_pair(par, send);
720 }
721
722 } // end anonymous namespace
723
724
725 using DocBookDocumentSectioning = tuple<bool, pit_type>;
726 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
727
728
729 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
730         bool documentHasSections = false;
731
732         while (bpit < epit) {
733                 Layout const &style = paragraphs[bpit].layout();
734                 documentHasSections |= style.category() == from_utf8("Sectioning");
735
736                 if (documentHasSections) {
737                         break;
738                 }
739                 bpit += 1;
740         }
741         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
742
743         return make_tuple(documentHasSections, bpit);
744 }
745
746
747 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type const bpit, pit_type const epit) {
748         set<pit_type> shouldBeInInfo;
749         set<pit_type> mustBeInInfo;
750
751         pit_type cpit = bpit;
752         while (cpit < epit) {
753                 // Skip paragraphs only containing one note.
754                 Paragraph const &par = paragraphs[cpit];
755                 if (par.size() == 1 && dynamic_cast<InsetNote*>(paragraphs[cpit].insetList().get(0))) {
756                         cpit += 1;
757                         continue;
758                 }
759
760                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
761                 Layout const &style = par.layout();
762
763                 if (style.docbookininfo() == "always") {
764                         mustBeInInfo.emplace(cpit);
765                 } else if (style.docbookininfo() == "maybe") {
766                         shouldBeInInfo.emplace(cpit);
767                 } else {
768                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
769                         break;
770                 }
771                 cpit += 1;
772         }
773         // Now, cpit points to the last paragraph that has things that could go in <info>.
774         // bpit is still the beginning of the <info> part.
775
776         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
777 }
778
779
780 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
781 {
782         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
783         // are just after a document or part title.
784         if (epitAbstract - bpitAbstract <= 0)
785                 return false;
786
787         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
788         pit_type bpit = bpitAbstract;
789         while (bpit < epitAbstract) {
790                 const Paragraph &p = paragraphs.at(bpit);
791
792                 if (p.layout().name() == from_ascii("Abstract"))
793                         return true;
794
795                 if (!p.insetList().empty()) {
796                         for (const auto &i : p.insetList()) {
797                                 if (i.inset->getText(0) != nullptr) {
798                                         return true;
799                                 }
800                         }
801                 }
802                 bpit++;
803         }
804         return false;
805 }
806
807
808 pit_type generateDocBookParagraphWithoutSectioning(
809                 Text const & text,
810                 Buffer const & buf,
811                 XMLStream & xs,
812                 OutputParams const & runparams,
813                 ParagraphList const & paragraphs,
814                 pit_type bpit,
815                 pit_type epit)
816 {
817         auto par = paragraphs.iterator_at(bpit);
818         auto lastStartedPar = par;
819         ParagraphList::const_iterator send;
820         auto const pend =
821                         (epit == (int) paragraphs.size()) ?
822                         paragraphs.end() : paragraphs.iterator_at(epit);
823
824         while (bpit < epit) {
825                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
826                 bpit += distance(lastStartedPar, par);
827                 lastStartedPar = par;
828         }
829
830         return bpit;
831 }
832
833
834 void outputDocBookInfo(
835                 Text const & text,
836                 Buffer const & buf,
837                 XMLStream & xs,
838                 OutputParams const & runparams,
839                 ParagraphList const & paragraphs,
840                 DocBookInfoTag const & info,
841                 pit_type bpitAbstract,
842                 pit_type const epitAbstract)
843 {
844         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
845         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
846
847         set<pit_type> shouldBeInInfo;
848         set<pit_type> mustBeInInfo;
849         pit_type bpitInfo;
850         pit_type epitInfo;
851         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
852
853         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
854         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
855         // then only create the <abstract> tag if these paragraphs generate some content.
856         // This check must be performed *before* a decision on whether or not to output <info> is made.
857         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
858         docstring abstract;
859         if (hasAbstract) {
860                 odocstringstream os2;
861                 XMLStream xs2(os2);
862                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
863
864                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
865                 // even though they must be properly output if there is some abstract.
866                 docstring abstractContent = os2.str();
867                 static const lyx::regex reg("[ \\r\\n]*");
868                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
869
870                 // Nothing? Then there is no abstract!
871                 if (abstractContent.empty())
872                         hasAbstract = false;
873         }
874
875         // The abstract must go in <info>.
876         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
877
878         // Start the <info> tag if required.
879         if (needInfo) {
880                 xs.startDivision(false);
881                 xs << xml::StartTag("info");
882                 xs << xml::CR();
883         }
884
885         // Output the elements that should go in <info>.
886         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
887
888         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
889                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
890                 if (tag == "NONE")
891                         tag = "abstract";
892
893                 xs << xml::StartTag(tag);
894                 xs << xml::CR();
895                 xs << XMLStream::ESCAPE_NONE << abstract;
896                 xs << xml::EndTag(tag);
897                 xs << xml::CR();
898         }
899
900         // End the <info> tag if it was started.
901         if (needInfo) {
902                 xs << xml::EndTag("info");
903                 xs << xml::CR();
904                 xs.endDivision();
905         }
906 }
907
908
909 void docbookFirstParagraphs(
910                 Text const &text,
911                 Buffer const &buf,
912                 XMLStream &xs,
913                 OutputParams const &runparams,
914                 pit_type epit)
915 {
916         // Handle the beginning of the document, supposing it has sections.
917         // Major role: output the first <info> tag.
918
919         ParagraphList const &paragraphs = text.paragraphs();
920         pit_type bpit = runparams.par_begin;
921         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
922         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
923 }
924
925
926 bool isParagraphEmpty(const Paragraph &par)
927 {
928         InsetList const &insets = par.insetList();
929         size_t insetsLength = distance(insets.begin(), insets.end());
930         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
931                                                                 dynamic_cast<InsetNote *>(insets.get(0));
932         return hasParagraphOnlyNote;
933 }
934
935
936 void docbookSimpleAllParagraphs(
937                 Text const & text,
938                 Buffer const & buf,
939                 XMLStream & xs,
940                 OutputParams const & runparams)
941 {
942         // Handle the document, supposing it has no sections (i.e. a "simple" document).
943
944         // First, the <info> tag.
945         ParagraphList const &paragraphs = text.paragraphs();
946         pit_type bpit = runparams.par_begin;
947         pit_type const epit = runparams.par_end;
948         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
949         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
950         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
951
952         // Then, the content.
953         ParagraphList::const_iterator const pend =
954                         (epit == (int) paragraphs.size()) ?
955                         paragraphs.end() : paragraphs.iterator_at(epit);
956
957         while (bpit < epit) {
958                 auto par = paragraphs.iterator_at(bpit);
959                 ParagraphList::const_iterator const lastStartedPar = par;
960                 ParagraphList::const_iterator send;
961
962                 if (isParagraphEmpty(*par)) {
963                         ++par;
964                         bpit += distance(lastStartedPar, par);
965                         continue;
966                 }
967
968                 // Generate this paragraph.
969                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
970                 bpit += distance(lastStartedPar, par);
971         }
972 }
973
974
975 void docbookParagraphs(Text const &text,
976                                            Buffer const &buf,
977                                            XMLStream &xs,
978                                            OutputParams const &runparams) {
979         ParagraphList const &paragraphs = text.paragraphs();
980         if (runparams.par_begin == runparams.par_end) {
981                 runparams.par_begin = 0;
982                 runparams.par_end = paragraphs.size();
983         }
984         pit_type bpit = runparams.par_begin;
985         pit_type const epit = runparams.par_end;
986         LASSERT(bpit < epit,
987                         {
988                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
989                                 return;
990                         });
991
992         ParagraphList::const_iterator const pend =
993                         (epit == (int) paragraphs.size()) ?
994                         paragraphs.end() : paragraphs.iterator_at(epit);
995         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
996         // of the section and the tag that was used to open it.
997
998         // Detect whether the document contains sections. If there are no sections, there can be no automatically
999         // discovered abstract.
1000         bool documentHasSections;
1001         pit_type eppit;
1002         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1003
1004         if (documentHasSections) {
1005                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1006                 bpit = eppit;
1007         } else {
1008                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1009                 return;
1010         }
1011
1012         bool currentlyInAppendix = false;
1013
1014         while (bpit < epit) {
1015                 OutputParams ourparams = runparams;
1016
1017                 auto par = paragraphs.iterator_at(bpit);
1018                 if (par->params().startOfAppendix())
1019                         currentlyInAppendix = true;
1020                 Layout const &style = par->layout();
1021                 ParagraphList::const_iterator const lastStartedPar = par;
1022                 ParagraphList::const_iterator send;
1023
1024                 if (isParagraphEmpty(*par)) {
1025                         ++par;
1026                         bpit += distance(lastStartedPar, par);
1027                         continue;
1028                 }
1029
1030                 // Think about adding <section> and/or </section>s.
1031                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1032                 if (isLayoutSectioning) {
1033                         int level = style.toclevel;
1034
1035                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1036                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1037                         //   - current: h2; back: h1; do not close any <section>
1038                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1039                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1040                                 int stackLevel = headerLevels.top().first;
1041                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1042                                 headerLevels.pop();
1043
1044                                 // Output the tag only if it corresponds to a legit section.
1045                                 if (stackLevel != Layout::NOT_IN_TOC)
1046                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1047                         }
1048
1049                         // Open the new section: first push it onto the stack, then output it in DocBook.
1050                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1051                                                                 "appendix" : style.docbooksectiontag();
1052                         headerLevels.push(std::make_pair(level, sectionTag));
1053
1054                         // Some sectioning-like elements should not be output (such as FrontMatter).
1055                         if (level != Layout::NOT_IN_TOC) {
1056                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1057                                 docstring id = docstring();
1058                                 for (pos_type i = 0; i < par->size(); ++i) {
1059                                         Inset const *inset = par->getInset(i);
1060                                         if (inset) {
1061                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1062                                                         // Generate the attributes for the section if need be.
1063                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1064
1065                                                         // Don't output the ID as a DocBook <anchor>.
1066                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1067
1068                                                         // Cannot have multiple IDs per tag.
1069                                                         break;
1070                                                 }
1071                                         }
1072                                 }
1073
1074                                 // Write the open tag for this section.
1075                                 docstring tag = from_utf8("<" + sectionTag);
1076                                 if (!id.empty())
1077                                         tag += from_utf8(" ") + id;
1078                                 tag += from_utf8(">");
1079                                 xs << XMLStream::ESCAPE_NONE << tag;
1080                                 xs << xml::CR();
1081                         }
1082                 }
1083
1084                 // Close all sections before the bibliography.
1085                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1086                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1087                 if (insetsLength > 0) {
1088                         Inset const *firstInset = par->getInset(0);
1089                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1090                                 while (!headerLevels.empty()) {
1091                                         int level = headerLevels.top().first;
1092                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1093                                         headerLevels.pop();
1094
1095                                         // Output the tag only if it corresponds to a legit section.
1096                                         if (level != Layout::NOT_IN_TOC) {
1097                                                 xs << XMLStream::ESCAPE_NONE << tag;
1098                                                 xs << xml::CR();
1099                                         }
1100                                 }
1101                         }
1102                 }
1103
1104                 // Generate this paragraph.
1105                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1106                 bpit += distance(lastStartedPar, par);
1107         }
1108
1109         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1110         // of the loop).
1111         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1112                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1113                 headerLevels.pop();
1114                 xs << XMLStream::ESCAPE_NONE << tag;
1115                 xs << xml::CR();
1116         }
1117 }
1118
1119 } // namespace lyx