]> git.lyx.org Git - lyx.git/blob - src/output_docbook.cpp
DocBook: use DocBookWrapperMergeWithPrevious in the code.
[lyx.git] / src / output_docbook.cpp
1 /**
2  * \file output_docbook.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author José Matos
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11
12 #include <config.h>
13
14 #include "Buffer.h"
15 #include "buffer_funcs.h"
16 #include "BufferParams.h"
17 #include "Font.h"
18 #include "InsetList.h"
19 #include "Layout.h"
20 #include "OutputParams.h"
21 #include "Paragraph.h"
22 #include "ParagraphList.h"
23 #include "ParagraphParameters.h"
24 #include "xml.h"
25 #include "Text.h"
26 #include "TextClass.h"
27
28 #include "insets/InsetBibtex.h"
29 #include "insets/InsetBibitem.h"
30 #include "insets/InsetLabel.h"
31 #include "insets/InsetNote.h"
32
33 #include "support/convert.h"
34 #include "support/debug.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/textutils.h"
38
39 #include "support/regex.h"
40
41 #include <stack>
42 #include <iostream>
43 #include <algorithm>
44 #include <sstream>
45
46 using namespace std;
47 using namespace lyx::support;
48
49 namespace lyx {
50
51 namespace {
52
53 std::string const fontToDocBookTag(xml::FontTypes type)
54 {
55         switch (type) {
56         case xml::FontTypes::FT_EMPH:
57         case xml::FontTypes::FT_BOLD:
58                 return "emphasis";
59         case xml::FontTypes::FT_NOUN:
60                 return "person";
61         case xml::FontTypes::FT_UBAR:
62         case xml::FontTypes::FT_WAVE:
63         case xml::FontTypes::FT_DBAR:
64         case xml::FontTypes::FT_SOUT:
65         case xml::FontTypes::FT_XOUT:
66         case xml::FontTypes::FT_ITALIC:
67         case xml::FontTypes::FT_UPRIGHT:
68         case xml::FontTypes::FT_SLANTED:
69         case xml::FontTypes::FT_SMALLCAPS:
70         case xml::FontTypes::FT_ROMAN:
71         case xml::FontTypes::FT_SANS:
72                 return "emphasis";
73         case xml::FontTypes::FT_TYPE:
74                 return "code";
75         case xml::FontTypes::FT_SIZE_TINY:
76         case xml::FontTypes::FT_SIZE_SCRIPT:
77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
78         case xml::FontTypes::FT_SIZE_SMALL:
79         case xml::FontTypes::FT_SIZE_NORMAL:
80         case xml::FontTypes::FT_SIZE_LARGE:
81         case xml::FontTypes::FT_SIZE_LARGER:
82         case xml::FontTypes::FT_SIZE_LARGEST:
83         case xml::FontTypes::FT_SIZE_HUGE:
84         case xml::FontTypes::FT_SIZE_HUGER:
85         case xml::FontTypes::FT_SIZE_INCREASE:
86         case xml::FontTypes::FT_SIZE_DECREASE:
87                 return "emphasis";
88         default:
89                 return "";
90         }
91 }
92
93 string fontToRole(xml::FontTypes type)
94 {
95         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
96         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
97         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
98         // Hence, it is not a problem to have many roles by default here.
99         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
100         switch (type) {
101         case xml::FontTypes::FT_ITALIC:
102         case xml::FontTypes::FT_EMPH:
103                 return "";
104         case xml::FontTypes::FT_BOLD:
105                 return "bold";
106         case xml::FontTypes::FT_NOUN:
107                 return ""; // Outputs a <person>
108         case xml::FontTypes::FT_TYPE:
109                 return ""; // Outputs a <code>
110         case xml::FontTypes::FT_UBAR:
111                 return "underline";
112
113                 // All other roles are non-standard for DocBook.
114
115         case xml::FontTypes::FT_WAVE:
116                 return "wave";
117         case xml::FontTypes::FT_DBAR:
118                 return "dbar";
119         case xml::FontTypes::FT_SOUT:
120                 return "sout";
121         case xml::FontTypes::FT_XOUT:
122                 return "xout";
123         case xml::FontTypes::FT_UPRIGHT:
124                 return "upright";
125         case xml::FontTypes::FT_SLANTED:
126                 return "slanted";
127         case xml::FontTypes::FT_SMALLCAPS:
128                 return "smallcaps";
129         case xml::FontTypes::FT_ROMAN:
130                 return "roman";
131         case xml::FontTypes::FT_SANS:
132                 return "sans";
133         case xml::FontTypes::FT_SIZE_TINY:
134                 return "tiny";
135         case xml::FontTypes::FT_SIZE_SCRIPT:
136                 return "size_script";
137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
138                 return "size_footnote";
139         case xml::FontTypes::FT_SIZE_SMALL:
140                 return "size_small";
141         case xml::FontTypes::FT_SIZE_NORMAL:
142                 return "size_normal";
143         case xml::FontTypes::FT_SIZE_LARGE:
144                 return "size_large";
145         case xml::FontTypes::FT_SIZE_LARGER:
146                 return "size_larger";
147         case xml::FontTypes::FT_SIZE_LARGEST:
148                 return "size_largest";
149         case xml::FontTypes::FT_SIZE_HUGE:
150                 return "size_huge";
151         case xml::FontTypes::FT_SIZE_HUGER:
152                 return "size_huger";
153         case xml::FontTypes::FT_SIZE_INCREASE:
154                 return "size_increase";
155         case xml::FontTypes::FT_SIZE_DECREASE:
156                 return "size_decrease";
157         default:
158                 return "";
159         }
160 }
161
162 string fontToAttribute(xml::FontTypes type) {
163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
164         // for the font.
165         string role = fontToRole(type);
166         if (!role.empty()) {
167                 return "role='" + role + "'";
168         } else {
169                 return "";
170         }
171 }
172
173 } // end anonymous namespace
174
175
176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
177 {
178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
179 }
180
181
182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
183 {
184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
185 }
186
187
188 namespace {
189
190 // convenience functions
191
192 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
193 {
194         Layout const & lay = par->layout();
195
196         if (par == prevpar)
197                 prevpar = nullptr;
198
199         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
200         // (usually, they won't have the same layout) and the CURRENT one allows merging.
201         // The main use case is author information in several paragraphs: if the name of the author is the
202         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
203         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
204         // layout, same wrapper tag).
205         bool openWrapper = false;
206         if (prevpar == nullptr) {
207                 openWrapper = lay.docbookwrappertag() != "NONE";
208         } else {
209                 Layout const & prevlay = prevpar->layout();
210                 if (prevlay.docbookwrappertag() == "NONE") {
211                         openWrapper = lay.docbookwrappertag() != "NONE";
212                 } else {
213                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
214                                         && !lay.docbookwrappermergewithprevious();
215                 }
216         }
217
218         // Main logic.
219         if (openWrapper)
220                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
221
222         string tag = lay.docbooktag();
223         if (tag == "Plain Layout")
224                 tag = "para";
225
226         xs << xml::ParTag(tag, lay.docbookattr());
227
228         if (lay.docbookitemtag() != "NONE")
229                 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
230 }
231
232
233 void closeTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
234 {
235         Layout const & lay = par->layout();
236
237         if (par == nextpar)
238                 nextpar = nullptr;
239
240         // See comment in openParTag.
241         bool closeWrapper = false;
242         if (nextpar == nullptr) {
243                 closeWrapper = lay.docbookwrappertag() != "NONE";
244         } else {
245                 Layout const & nextlay = nextpar->layout();
246                 if (nextlay.docbookwrappertag() == "NONE") {
247                         closeWrapper = lay.docbookwrappertag() != "NONE";
248                 } else {
249                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
250                                         && !nextlay.docbookwrappermergewithprevious();
251                 }
252         }
253
254         // Main logic.
255         if (lay.docbookitemtag() != "NONE")
256                 xs << xml::EndTag(lay.docbookitemtag());
257
258         string tag = lay.docbooktag();
259         if (tag == "Plain Layout")
260                 tag = "para";
261
262         xs << xml::EndTag(tag);
263         if (closeWrapper)
264                 xs << xml::EndTag(lay.docbookwrappertag());
265 }
266
267
268 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
269 {
270         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
271 }
272
273
274 void closeLabelTag(XMLStream & xs, Layout const & lay)
275 {
276         xs << xml::EndTag(lay.docbookitemlabeltag());
277         xs << xml::CR();
278 }
279
280
281 void openItemTag(XMLStream & xs, Layout const & lay)
282 {
283         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
284 }
285
286
287 // Return true when new elements are output in a paragraph, false otherwise.
288 bool openInnerItemTag(XMLStream & xs, Layout const & lay)
289 {
290         if (lay.docbookiteminnertag() != "NONE") {
291                 xs << xml::CR();
292                 xs << xml::ParTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
293
294                 if (lay.docbookiteminnertag() == "para") {
295                         return true;
296                 }
297         }
298         return false;
299 }
300
301
302 void closeInnerItemTag(XMLStream & xs, Layout const & lay)
303 {
304         if (lay.docbookiteminnertag()!= "NONE") {
305                 xs << xml::EndTag(lay.docbookiteminnertag());
306                 xs << xml::CR();
307         }
308 }
309
310
311 inline void closeItemTag(XMLStream & xs, Layout const & lay)
312 {
313         xs << xml::EndTag(lay.docbookitemtag());
314         xs << xml::CR();
315 }
316
317 // end of convenience functions
318
319 ParagraphList::const_iterator findLast(
320                 ParagraphList::const_iterator p,
321                 ParagraphList::const_iterator const & pend,
322                 LatexType type) {
323         for (++p; p != pend && p->layout().latextype == type; ++p);
324
325         return p;
326 }
327
328 ParagraphList::const_iterator findLastBibliographyParagraph(
329                 ParagraphList::const_iterator p,
330                 ParagraphList::const_iterator const & pend) {
331         for (++p; p != pend && p->layout().latextype == LATEX_BIB_ENVIRONMENT; ++p);
332
333         return p;
334 }
335
336
337 ParagraphList::const_iterator findEndOfEnvironment(
338                 ParagraphList::const_iterator const & pstart,
339                 ParagraphList::const_iterator const & pend)
340 {
341         ParagraphList::const_iterator p = pstart;
342         size_t const depth = p->params().depth();
343
344         for (++p; p != pend; ++p) {
345                 Layout const &style = p->layout();
346                 // It shouldn't happen that e.g. a section command occurs inside
347                 // a quotation environment, at a higher depth, but as of 6/2009,
348                 // it can happen. We pretend that it's just at lowest depth.
349                 if (style.latextype == LATEX_COMMAND)
350                         return p;
351
352                 // If depth is down, we're done
353                 if (p->params().depth() < depth)
354                         return p;
355
356                 // If depth is up, we're not done
357                 if (p->params().depth() > depth)
358                         continue;
359
360                 // FIXME I am not sure about the first check.
361                 // Surely we *could* have different layouts that count as
362                 // LATEX_PARAGRAPH, right?
363                 if (style.latextype == LATEX_PARAGRAPH || style != p->layout())
364                         return p;
365         }
366
367         return pend;
368 }
369
370
371 ParagraphList::const_iterator makeParagraphBibliography(
372                 Buffer const &buf,
373                 XMLStream &xs,
374                 OutputParams const &runparams,
375                 Text const &text,
376                 ParagraphList::const_iterator const & pbegin,
377                 ParagraphList::const_iterator const & pend)
378 {
379         auto const begin = text.paragraphs().begin();
380         auto const end = text.paragraphs().end();
381
382         // Find the paragraph *before* pbegin.
383         ParagraphList::const_iterator pbegin_before = begin;
384         if (pbegin != begin) {
385                 ParagraphList::const_iterator pbegin_before_next = begin;
386                 ++pbegin_before_next;
387
388                 while (pbegin_before_next != pbegin) {
389                         ++pbegin_before;
390                         ++pbegin_before_next;
391                 }
392         }
393
394         ParagraphList::const_iterator par = pbegin;
395
396         // If this is the first paragraph in a bibliography, open the bibliography tag.
397         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
398                 xs << xml::StartTag("bibliography");
399                 xs << xml::CR();
400         }
401
402         // Generate the required paragraphs, but only if they are .
403         for (; par != pend; ++par) {
404                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
405                 // Don't forget the citation ID!
406                 docstring attr;
407                 for (auto i = 0; i < par->size(); ++i) {
408                         Inset const *ip = par->getInset(0);
409                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
410                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
411                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
412                                 break;
413                         }
414                 }
415                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
416
417                 // Generate the entry.
418                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
419
420                 // End the precooked bibliography entry.
421                 xs << xml::EndTag("bibliomixed");
422                 xs << xml::CR();
423         }
424
425         // If this is the last paragraph in a bibliography, close the bibliography tag.
426         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
427                 xs << xml::EndTag("bibliography");
428                 xs << xml::CR();
429         }
430
431         return pend;
432 }
433
434
435 ParagraphList::const_iterator makeParagraphs(
436                 Buffer const &buf,
437                 XMLStream &xs,
438                 OutputParams const &runparams,
439                 Text const &text,
440                 ParagraphList::const_iterator const & pbegin,
441                 ParagraphList::const_iterator const & pend)
442 {
443         auto const begin = text.paragraphs().begin();
444         auto const end = text.paragraphs().end();
445         ParagraphList::const_iterator par = pbegin;
446         ParagraphList::const_iterator prevpar = pbegin;
447
448         for (; par != pend; prevpar = par, ++par) {
449                 // We want to open the paragraph tag if:
450                 //   (i) the current layout permits multiple paragraphs
451                 //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
452                 //         we are, but this is not the first paragraph
453                 //
454                 // But there is also a special case, and we first see whether we are in it.
455                 // We do not want to open the paragraph tag if this paragraph contains
456                 // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
457                 // as a branch). On the other hand, if that single item has a font change
458                 // applied to it, then we still do need to open the paragraph.
459                 //
460                 // Obviously, this is very fragile. The main reason we need to do this is
461                 // because of branches, e.g., a branch that contains an entire new section.
462                 // We do not really want to wrap that whole thing in a <div>...</div>.
463                 bool special_case = false;
464                 Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
465                 if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
466                         Layout const &style = par->layout();
467                         FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
468                                                                                 style.labelfont : style.font;
469                         FontInfo const our_font =
470                                         par->getFont(buf.masterBuffer()->params(), 0,
471                                                                  text.outerFont(distance(begin, par))).fontInfo();
472
473                         if (first_font == our_font)
474                                 special_case = true;
475                 }
476
477                 // Plain layouts must be ignored.
478                 if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
479                         special_case = true;
480                 // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
481                 if (!special_case && par->size() == 1 && par->getInset(0)) {
482                         Inset const * firstInset = par->getInset(0);
483
484                         // Floats cannot be in paragraphs.
485                         special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
486
487                         // Bibliographies cannot be in paragraphs.
488                         if (!special_case && firstInset->asInsetCommand())
489                                 special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
490
491                         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
492                         if (!special_case && firstInset->asInsetMath())
493                                 special_case = true;
494
495                         // ERTs are in comments, not paragraphs.
496                         if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
497                                 special_case = true;
498
499                         // Listings should not get into their own paragraph.
500                         if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
501                                 special_case = true;
502                 }
503
504                 bool const open_par = runparams.docbook_make_pars
505                                                           && (!runparams.docbook_in_par || par != pbegin)
506                                                           && !special_case;
507
508                 // We want to issue the closing tag if either:
509                 //   (i)  We opened it, and either docbook_in_par is false,
510                 //              or we're not in the last paragraph, anyway.
511                 //   (ii) We didn't open it and docbook_in_par is true,
512                 //              but we are in the first par, and there is a next par.
513                 auto nextpar = par;
514                 ++nextpar;
515                 bool const close_par =
516                                 ((open_par && (!runparams.docbook_in_par || nextpar != pend))
517                                 || (!open_par && runparams.docbook_in_par && par == pbegin && nextpar != pend));
518
519                 // Determine if this paragraph has some real content. Things like new pages are not caught
520                 // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
521                 odocstringstream os2;
522                 XMLStream xs2(os2);
523                 par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
524
525                 docstring cleaned = os2.str();
526                 static const lyx::regex reg("[ \\r\\n]*");
527                 cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
528
529                 if (!cleaned.empty()) {
530                         if (open_par)
531                                 openParTag(xs, &*par, &*prevpar);
532
533                         xs << XMLStream::ESCAPE_NONE << os2.str();
534
535                         if (close_par) {
536                                 closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
537                                 xs << xml::CR();
538                         }
539                 }
540         }
541         return pend;
542 }
543
544
545 bool isNormalEnv(Layout const &lay)
546 {
547         return lay.latextype == LATEX_ENVIRONMENT
548                    || lay.latextype == LATEX_BIB_ENVIRONMENT;
549 }
550
551
552 ParagraphList::const_iterator makeEnvironment(
553                 Buffer const &buf,
554                 XMLStream &xs,
555                 OutputParams const &runparams,
556                 Text const &text,
557                 ParagraphList::const_iterator const & pbegin,
558                 ParagraphList::const_iterator const & pend)
559 {
560         auto const begin = text.paragraphs().begin();
561         auto const end = text.paragraphs().end();
562         ParagraphList::const_iterator par = pbegin;
563         depth_type const origdepth = pbegin->params().depth();
564
565         // Find the previous paragraph.
566         auto prevpar = begin;
567         if (prevpar != par) {
568                 auto prevpar_next = prevpar;
569                 ++prevpar_next;
570
571                 while (prevpar_next != par) {
572                         ++prevpar_next;
573                         ++prevpar;
574                 }
575         }
576
577         // open tag for this environment
578         openParTag(xs, &*par, &*prevpar);
579         xs << xml::CR();
580
581         // we will on occasion need to remember a layout from before.
582         Layout const *lastlay = nullptr;
583
584         while (par != pend) {
585                 Layout const & style = par->layout();
586                 ParagraphList::const_iterator send;
587
588                 // Actual content of this paragraph.
589                 switch (style.latextype) {
590                 case LATEX_ENVIRONMENT:
591                 case LATEX_LIST_ENVIRONMENT:
592                 case LATEX_ITEM_ENVIRONMENT: {
593                         // There are two possibilities in this case.
594                         // One is that we are still in the environment in which we
595                         // started---which we will be if the depth is the same.
596                         if (par->params().depth() == origdepth) {
597                                 LATTEST(par->layout() == style);
598                                 if (lastlay != nullptr) {
599                                         closeItemTag(xs, *lastlay);
600                                         if (lastlay->docbookitemwrappertag() != "NONE") {
601                                                 xs << xml::EndTag(lastlay->docbookitemwrappertag());
602                                                 xs << xml::CR();
603                                         }
604                                         lastlay = nullptr;
605                                 }
606
607                                 // this will be positive if we want to skip the
608                                 // initial word (if it's been taken for the label).
609                                 pos_type sep = 0;
610
611                                 // Open a wrapper tag if needed.
612                                 if (style.docbookitemwrappertag() != "NONE") {
613                                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
614                                         xs << xml::CR();
615                                 }
616
617                                 // label output
618                                 if (style.labeltype != LABEL_NO_LABEL &&
619                                                 style.docbookitemlabeltag() != "NONE") {
620
621                                         if (isNormalEnv(style)) {
622                                                 // in this case, we print the label only for the first
623                                                 // paragraph (as in a theorem or an abstract).
624                                                 if (par == pbegin) {
625                                                         docstring const lbl = pbegin->params().labelString();
626                                                         if (!lbl.empty()) {
627                                                                 openLabelTag(xs, style);
628                                                                 xs << lbl;
629                                                                 closeLabelTag(xs, style);
630                                                         } else {
631                                                                 // No new line after closeLabelTag.
632                                                                 xs << xml::CR();
633                                                         }
634                                                 }
635                                         } else { // some kind of list
636                                                 if (style.labeltype == LABEL_MANUAL) {
637                                                         // Only variablelist gets here.
638
639                                                         openLabelTag(xs, style);
640                                                         sep = par->firstWordDocBook(xs, runparams);
641                                                         closeLabelTag(xs, style);
642                                                 } else {
643                                                         openLabelTag(xs, style);
644                                                         xs << par->params().labelString();
645                                                         closeLabelTag(xs, style);
646                                                 }
647                                         }
648                                 } // end label output
649
650                                 // Start generating the item.
651                                 bool wasInParagraph = runparams.docbook_in_par;
652                                 openItemTag(xs, style);
653                                 bool getsIntoParagraph = openInnerItemTag(xs, style);
654                                 OutputParams rp = runparams;
655                                 rp.docbook_in_par = wasInParagraph | getsIntoParagraph;
656
657                                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
658                                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
659                                 // Common case: there is only the first word on the line, but there is a nested list instead
660                                 // of more text.
661                                 bool emptyItem = false;
662                                 if (sep == par->size()) {
663                                         auto next_par = par;
664                                         ++next_par;
665                                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
666                                                 emptyItem = true;
667                                         else // There is a next paragraph: check depth.
668                                                 emptyItem = par->params().depth() >= next_par->params().depth();
669                                 }
670
671                                 if (emptyItem) {
672                                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
673                                         // generation of a full <para>.
674                                         xs << ' ';
675                                 } else {
676                                         // Generate the rest of the paragraph, if need be.
677                                         par->simpleDocBookOnePar(buf, xs, rp, text.outerFont(distance(begin, par)), true, true, sep);
678                                 }
679
680                                 ++par;
681                                 if (getsIntoParagraph)
682                                         closeInnerItemTag(xs, style);
683
684                                 // We may not want to close the tag yet, in particular:
685                                 // If we're not at the end of the item...
686                                 if (par != pend
687                                         //  and are doing items...
688                                         && !isNormalEnv(style)
689                                         // and if the depth has changed...
690                                         && par->params().depth() != origdepth) {
691                                         // then we'll save this layout for later, and close it when
692                                         // we get another item.
693                                         lastlay = &style;
694                                 } else {
695                                         closeItemTag(xs, style);
696
697                                         // Eventually, close the item wrapper.
698                                         if (style.docbookitemwrappertag() != "NONE") {
699                                                 xs << xml::EndTag(style.docbookitemwrappertag());
700                                                 xs << xml::CR();
701                                         }
702                                 }
703                         }
704                         // The other possibility is that the depth has increased.
705                         else {
706                                 send = findEndOfEnvironment(par, pend);
707                                 par = makeEnvironment(buf, xs, runparams, text, par, send);
708                         }
709                         break;
710                 }
711                 case LATEX_PARAGRAPH:
712                         send = findLast(par, pend, LATEX_PARAGRAPH);
713                         par = makeParagraphs(buf, xs, runparams, text, par, send);
714                         break;
715                 case LATEX_BIB_ENVIRONMENT:
716                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
717                         par = makeParagraphBibliography(buf, xs, runparams, text, par, send);
718                         break;
719                 case LATEX_COMMAND:
720                         ++par;
721                         break;
722                 }
723         }
724
725         if (lastlay != nullptr) {
726                 closeItemTag(xs, *lastlay);
727                 if (lastlay->docbookitemwrappertag() != "NONE") {
728                         xs << xml::EndTag(lastlay->docbookitemwrappertag());
729                         xs << xml::CR();
730                 }
731         }
732         auto nextpar = par;
733         ++nextpar;
734         closeTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
735         xs << xml::CR();
736         return pend;
737 }
738
739
740 void makeCommand(
741                 Buffer const & buf,
742                 XMLStream & xs,
743                 OutputParams const & runparams,
744                 Text const & text,
745                 ParagraphList::const_iterator const & pbegin)
746 {
747         // No need for labels, as they are handled by DocBook tags.
748         auto const begin = text.paragraphs().begin();
749         auto const end = text.paragraphs().end();
750         auto nextpar = pbegin;
751         ++nextpar;
752
753         // Find the previous paragraph.
754         auto prevpar = begin;
755         if (prevpar != pbegin) {
756                 auto prevpar_next = prevpar;
757                 ++prevpar_next;
758
759                 while (prevpar_next != pbegin) {
760                         ++prevpar_next;
761                         ++prevpar;
762                 }
763         }
764
765         // Generate this command.
766         openParTag(xs, &*pbegin, &*prevpar);
767
768         pbegin->simpleDocBookOnePar(buf, xs, runparams,
769                                                                 text.outerFont(distance(begin, pbegin)));
770
771         closeTag(xs, &*pbegin, (nextpar != end) ? &*nextpar : nullptr);
772         xs << xml::CR();
773 }
774
775 pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
776                 Text const &text,
777                 Buffer const &buf,
778                 XMLStream &xs,
779                 OutputParams const &ourparams,
780                 ParagraphList::const_iterator par,
781                 ParagraphList::const_iterator send,
782                 ParagraphList::const_iterator pend)
783 {
784         Layout const & style = par->layout();
785
786         switch (style.latextype) {
787                 case LATEX_COMMAND: {
788                         // The files with which we are working never have more than
789                         // one paragraph in a command structure.
790                         // FIXME
791                         // if (ourparams.docbook_in_par)
792                         //   fix it so we don't get sections inside standard, e.g.
793                         // note that we may then need to make runparams not const, so we
794                         // can communicate that back.
795                         // FIXME Maybe this fix should be in the routines themselves, in case
796                         // they are called from elsewhere.
797                         makeCommand(buf, xs, ourparams, text, par);
798                         ++par;
799                         break;
800                 }
801                 case LATEX_ENVIRONMENT:
802                 case LATEX_LIST_ENVIRONMENT:
803                 case LATEX_ITEM_ENVIRONMENT:
804                         // FIXME Same fix here.
805                         send = findEndOfEnvironment(par, pend);
806                         par = makeEnvironment(buf, xs, ourparams, text, par, send);
807                         break;
808                 case LATEX_PARAGRAPH:
809                         send = findLast(par, pend, LATEX_PARAGRAPH);
810                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
811                         break;
812                 case LATEX_BIB_ENVIRONMENT:
813                         send = findLast(par, pend, LATEX_BIB_ENVIRONMENT);
814                         par = makeParagraphBibliography(buf, xs, ourparams, text, par, send);
815                         break;
816         }
817
818         return make_pair(par, send);
819 }
820
821 } // end anonymous namespace
822
823
824 using DocBookDocumentSectioning = tuple<bool, pit_type>;
825 using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
826
827
828 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
829         bool documentHasSections = false;
830
831         while (bpit < epit) {
832                 Layout const &style = paragraphs[bpit].layout();
833                 documentHasSections |= style.category() == from_utf8("Sectioning");
834
835                 if (documentHasSections) {
836                         break;
837                 }
838                 bpit += 1;
839         }
840         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
841
842         return make_tuple(documentHasSections, bpit);
843 }
844
845
846 bool hasOnlyNotes(Paragraph const & par)
847 {
848         for (int i = 0; i < par.size(); ++i)
849                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
850                         return false;
851         return true;
852 }
853
854
855 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
856         set<pit_type> shouldBeInInfo;
857         set<pit_type> mustBeInInfo;
858
859         // Find the first non empty paragraph by mutating bpit.
860         while (bpit < epit) {
861                 Paragraph const &par = paragraphs[bpit];
862                 if (par.empty() || hasOnlyNotes(par))
863                         bpit += 1;
864                 else
865                         break;
866         }
867
868         // Find the last info-like paragraph.
869         pit_type cpit = bpit;
870         while (cpit < epit) {
871                 // Skip paragraphs only containing one note.
872                 Paragraph const &par = paragraphs[cpit];
873                 if (hasOnlyNotes(par)) {
874                         cpit += 1;
875                         continue;
876                 }
877
878                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
879                 Layout const &style = par.layout();
880
881                 if (style.docbookininfo() == "always") {
882                         mustBeInInfo.emplace(cpit);
883                 } else if (style.docbookininfo() == "maybe") {
884                         shouldBeInInfo.emplace(cpit);
885                 } else {
886                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
887                         break;
888                 }
889                 cpit += 1;
890         }
891         // Now, cpit points to the last paragraph that has things that could go in <info>.
892         // bpit is still the beginning of the <info> part.
893
894         return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
895 }
896
897
898 bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
899 {
900         // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
901         // are just after a document or part title.
902         if (epitAbstract - bpitAbstract <= 0)
903                 return false;
904
905         // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
906         pit_type bpit = bpitAbstract;
907         while (bpit < epitAbstract) {
908                 const Paragraph &p = paragraphs.at(bpit);
909
910                 if (p.layout().name() == from_ascii("Abstract"))
911                         return true;
912
913                 if (!p.insetList().empty()) {
914                         for (const auto &i : p.insetList()) {
915                                 if (i.inset->getText(0) != nullptr) {
916                                         return true;
917                                 }
918                         }
919                 }
920                 bpit++;
921         }
922         return false;
923 }
924
925
926 pit_type generateDocBookParagraphWithoutSectioning(
927                 Text const & text,
928                 Buffer const & buf,
929                 XMLStream & xs,
930                 OutputParams const & runparams,
931                 ParagraphList const & paragraphs,
932                 pit_type bpit,
933                 pit_type epit)
934 {
935         auto par = paragraphs.iterator_at(bpit);
936         auto lastStartedPar = par;
937         ParagraphList::const_iterator send;
938         auto const pend =
939                         (epit == (int) paragraphs.size()) ?
940                         paragraphs.end() : paragraphs.iterator_at(epit);
941
942         while (bpit < epit) {
943                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
944                 bpit += distance(lastStartedPar, par);
945                 lastStartedPar = par;
946         }
947
948         return bpit;
949 }
950
951
952 void outputDocBookInfo(
953                 Text const & text,
954                 Buffer const & buf,
955                 XMLStream & xs,
956                 OutputParams const & runparams,
957                 ParagraphList const & paragraphs,
958                 DocBookInfoTag const & info,
959                 pit_type bpitAbstract,
960                 pit_type const epitAbstract)
961 {
962         // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
963         // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
964
965         set<pit_type> shouldBeInInfo;
966         set<pit_type> mustBeInInfo;
967         pit_type bpitInfo;
968         pit_type epitInfo;
969         tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
970
971         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
972         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
973         // then only create the <abstract> tag if these paragraphs generate some content.
974         // This check must be performed *before* a decision on whether or not to output <info> is made.
975         bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
976         docstring abstract;
977         if (hasAbstract) {
978                 odocstringstream os2;
979                 XMLStream xs2(os2);
980                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
981
982                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
983                 // even though they must be properly output if there is some abstract.
984                 docstring abstractContent = os2.str();
985                 static const lyx::regex reg("[ \\r\\n]*");
986                 abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstractContent), reg, string("")));
987
988                 // Nothing? Then there is no abstract!
989                 if (abstractContent.empty())
990                         hasAbstract = false;
991         }
992
993         // The abstract must go in <info>.
994         bool needInfo = !mustBeInInfo.empty() || hasAbstract;
995
996         // Start the <info> tag if required.
997         if (needInfo) {
998                 xs.startDivision(false);
999                 xs << xml::StartTag("info");
1000                 xs << xml::CR();
1001         }
1002
1003         // Output the elements that should go in <info>.
1004         generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
1005
1006         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
1007                 string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
1008                 if (tag == "NONE")
1009                         tag = "abstract";
1010
1011                 xs << xml::StartTag(tag);
1012                 xs << xml::CR();
1013                 xs << XMLStream::ESCAPE_NONE << abstract;
1014                 xs << xml::EndTag(tag);
1015                 xs << xml::CR();
1016         }
1017
1018         // End the <info> tag if it was started.
1019         if (needInfo) {
1020                 xs << xml::EndTag("info");
1021                 xs << xml::CR();
1022                 xs.endDivision();
1023         }
1024 }
1025
1026
1027 void docbookFirstParagraphs(
1028                 Text const &text,
1029                 Buffer const &buf,
1030                 XMLStream &xs,
1031                 OutputParams const &runparams,
1032                 pit_type epit)
1033 {
1034         // Handle the beginning of the document, supposing it has sections.
1035         // Major role: output the first <info> tag.
1036
1037         ParagraphList const &paragraphs = text.paragraphs();
1038         pit_type bpit = runparams.par_begin;
1039         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1040         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
1041 }
1042
1043
1044 bool isParagraphEmpty(const Paragraph &par)
1045 {
1046         InsetList const &insets = par.insetList();
1047         size_t insetsLength = distance(insets.begin(), insets.end());
1048         bool hasParagraphOnlyNote = insetsLength == 1 && insets.get(0) && insets.get(0)->asInsetCollapsible() &&
1049                                                                 dynamic_cast<InsetNote *>(insets.get(0));
1050         return hasParagraphOnlyNote;
1051 }
1052
1053
1054 void docbookSimpleAllParagraphs(
1055                 Text const & text,
1056                 Buffer const & buf,
1057                 XMLStream & xs,
1058                 OutputParams const & runparams)
1059 {
1060         // Handle the document, supposing it has no sections (i.e. a "simple" document).
1061
1062         // First, the <info> tag.
1063         ParagraphList const &paragraphs = text.paragraphs();
1064         pit_type bpit = runparams.par_begin;
1065         pit_type const epit = runparams.par_end;
1066         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
1067         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
1068         bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
1069
1070         // Then, the content.
1071         ParagraphList::const_iterator const pend =
1072                         (epit == (int) paragraphs.size()) ?
1073                         paragraphs.end() : paragraphs.iterator_at(epit);
1074
1075         while (bpit < epit) {
1076                 auto par = paragraphs.iterator_at(bpit);
1077                 ParagraphList::const_iterator const lastStartedPar = par;
1078                 ParagraphList::const_iterator send;
1079
1080                 if (isParagraphEmpty(*par)) {
1081                         ++par;
1082                         bpit += distance(lastStartedPar, par);
1083                         continue;
1084                 }
1085
1086                 // Generate this paragraph.
1087                 tie(par, send) = makeAny(text, buf, xs, runparams, par, send, pend);
1088                 bpit += distance(lastStartedPar, par);
1089         }
1090 }
1091
1092
1093 void docbookParagraphs(Text const &text,
1094                                            Buffer const &buf,
1095                                            XMLStream &xs,
1096                                            OutputParams const &runparams) {
1097         ParagraphList const &paragraphs = text.paragraphs();
1098         if (runparams.par_begin == runparams.par_end) {
1099                 runparams.par_begin = 0;
1100                 runparams.par_end = paragraphs.size();
1101         }
1102         pit_type bpit = runparams.par_begin;
1103         pit_type const epit = runparams.par_end;
1104         LASSERT(bpit < epit,
1105                         {
1106                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1107                                 return;
1108                         });
1109
1110         ParagraphList::const_iterator const pend =
1111                         (epit == (int) paragraphs.size()) ?
1112                         paragraphs.end() : paragraphs.iterator_at(epit);
1113         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1114         // of the section and the tag that was used to open it.
1115
1116         // Detect whether the document contains sections. If there are no sections, there can be no automatically
1117         // discovered abstract.
1118         bool documentHasSections;
1119         pit_type eppit;
1120         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1121
1122         if (documentHasSections) {
1123                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
1124                 bpit = eppit;
1125         } else {
1126                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1127                 return;
1128         }
1129
1130         bool currentlyInAppendix = false;
1131
1132         while (bpit < epit) {
1133                 OutputParams ourparams = runparams;
1134
1135                 auto par = paragraphs.iterator_at(bpit);
1136                 if (par->params().startOfAppendix())
1137                         currentlyInAppendix = true;
1138                 Layout const &style = par->layout();
1139                 ParagraphList::const_iterator const lastStartedPar = par;
1140                 ParagraphList::const_iterator send;
1141
1142                 if (isParagraphEmpty(*par)) {
1143                         ++par;
1144                         bpit += distance(lastStartedPar, par);
1145                         continue;
1146                 }
1147
1148                 // Think about adding <section> and/or </section>s.
1149                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1150                 if (isLayoutSectioning) {
1151                         int level = style.toclevel;
1152
1153                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1154                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1155                         //   - current: h2; back: h1; do not close any <section>
1156                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1157                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1158                                 int stackLevel = headerLevels.top().first;
1159                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1160                                 headerLevels.pop();
1161
1162                                 // Output the tag only if it corresponds to a legit section.
1163                                 if (stackLevel != Layout::NOT_IN_TOC)
1164                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1165                         }
1166
1167                         // Open the new section: first push it onto the stack, then output it in DocBook.
1168                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1169                                                                 "appendix" : style.docbooksectiontag();
1170                         headerLevels.push(std::make_pair(level, sectionTag));
1171
1172                         // Some sectioning-like elements should not be output (such as FrontMatter).
1173                         if (level != Layout::NOT_IN_TOC) {
1174                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1175                                 docstring id = docstring();
1176                                 for (pos_type i = 0; i < par->size(); ++i) {
1177                                         Inset const *inset = par->getInset(i);
1178                                         if (inset) {
1179                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1180                                                         // Generate the attributes for the section if need be.
1181                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1182
1183                                                         // Don't output the ID as a DocBook <anchor>.
1184                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1185
1186                                                         // Cannot have multiple IDs per tag.
1187                                                         break;
1188                                                 }
1189                                         }
1190                                 }
1191
1192                                 // Write the open tag for this section.
1193                                 docstring tag = from_utf8("<" + sectionTag);
1194                                 if (!id.empty())
1195                                         tag += from_utf8(" ") + id;
1196                                 tag += from_utf8(">");
1197                                 xs << XMLStream::ESCAPE_NONE << tag;
1198                                 xs << xml::CR();
1199                         }
1200                 }
1201
1202                 // Close all sections before the bibliography.
1203                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1204                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1205                 if (insetsLength > 0) {
1206                         Inset const *firstInset = par->getInset(0);
1207                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1208                                 while (!headerLevels.empty()) {
1209                                         int level = headerLevels.top().first;
1210                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1211                                         headerLevels.pop();
1212
1213                                         // Output the tag only if it corresponds to a legit section.
1214                                         if (level != Layout::NOT_IN_TOC) {
1215                                                 xs << XMLStream::ESCAPE_NONE << tag;
1216                                                 xs << xml::CR();
1217                                         }
1218                                 }
1219                         }
1220                 }
1221
1222                 // Generate this paragraph.
1223                 tie(par, send) = makeAny(text, buf, xs, ourparams, par, send, pend);
1224                 bpit += distance(lastStartedPar, par);
1225         }
1226
1227         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1228         // of the loop).
1229         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1230                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1231                 headerLevels.pop();
1232                 xs << XMLStream::ESCAPE_NONE << tag;
1233                 xs << xml::CR();
1234         }
1235 }
1236
1237 } // namespace lyx