src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "output_docbook.h"
  15
  16 #include "Buffer.h"
  17 #include "buffer_funcs.h"
  18 #include "BufferParams.h"
  19 #include "Font.h"
  20 #include "InsetList.h"
  21 #include "Paragraph.h"
  22 #include "ParagraphList.h"
  23 #include "ParagraphParameters.h"
  24 #include "xml.h"
  25 #include "Text.h"
  26 #include "TextClass.h"
  27
  28 #include "insets/InsetBibtex.h"
  29 #include "insets/InsetBibitem.h"
  30 #include "insets/InsetLabel.h"
  31 #include "mathed/InsetMath.h"
  32 #include "insets/InsetNote.h"
  33
  34 #include "support/debug.h"
  35 #include "support/lassert.h"
  36 #include "support/textutils.h"
  37
  38 #include <stack>
  39 #include <iostream>
  40 #include <algorithm>
  41 #include <sstream>
  42
  43 using namespace std;
  44 using namespace lyx::support;
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 std::string fontToDocBookTag(xml::FontTypes type)
  51 {
  52         switch (type) {
  53         case xml::FontTypes::FT_EMPH:
  54         case xml::FontTypes::FT_BOLD:
  55                 return "emphasis";
  56         case xml::FontTypes::FT_NOUN:
  57                 return "personname";
  58         case xml::FontTypes::FT_UBAR:
  59         case xml::FontTypes::FT_WAVE:
  60         case xml::FontTypes::FT_DBAR:
  61         case xml::FontTypes::FT_SOUT:
  62         case xml::FontTypes::FT_XOUT:
  63         case xml::FontTypes::FT_ITALIC:
  64         case xml::FontTypes::FT_UPRIGHT:
  65         case xml::FontTypes::FT_SLANTED:
  66         case xml::FontTypes::FT_SMALLCAPS:
  67         case xml::FontTypes::FT_ROMAN:
  68         case xml::FontTypes::FT_SANS:
  69                 return "emphasis";
  70         case xml::FontTypes::FT_TYPE:
  71                 return "code";
  72         case xml::FontTypes::FT_SIZE_TINY:
  73         case xml::FontTypes::FT_SIZE_SCRIPT:
  74         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  75         case xml::FontTypes::FT_SIZE_SMALL:
  76         case xml::FontTypes::FT_SIZE_NORMAL:
  77         case xml::FontTypes::FT_SIZE_LARGE:
  78         case xml::FontTypes::FT_SIZE_LARGER:
  79         case xml::FontTypes::FT_SIZE_LARGEST:
  80         case xml::FontTypes::FT_SIZE_HUGE:
  81         case xml::FontTypes::FT_SIZE_HUGER:
  82         case xml::FontTypes::FT_SIZE_INCREASE:
  83         case xml::FontTypes::FT_SIZE_DECREASE:
  84                 return "emphasis";
  85         default:
  86                 return "";
  87         }
  88 }
  89
  90
  91 string fontToRole(xml::FontTypes type)
  92 {
  93         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  94         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  95         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  96         // Hence, it is not a problem to have many roles by default here.
  97         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  98         switch (type) {
  99         case xml::FontTypes::FT_ITALIC:
 100         case xml::FontTypes::FT_EMPH:
 101                 return "";
 102         case xml::FontTypes::FT_BOLD:
 103                 return "bold";
 104         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 105         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 106                 return "";
 107         case xml::FontTypes::FT_UBAR:
 108                 return "underline";
 109
 110         // All other roles are non-standard for DocBook.
 111
 112         case xml::FontTypes::FT_WAVE:
 113                 return "wave";
 114         case xml::FontTypes::FT_DBAR:
 115                 return "dbar";
 116         case xml::FontTypes::FT_SOUT:
 117                 return "sout";
 118         case xml::FontTypes::FT_XOUT:
 119                 return "xout";
 120         case xml::FontTypes::FT_UPRIGHT:
 121                 return "upright";
 122         case xml::FontTypes::FT_SLANTED:
 123                 return "slanted";
 124         case xml::FontTypes::FT_SMALLCAPS:
 125                 return "smallcaps";
 126         case xml::FontTypes::FT_ROMAN:
 127                 return "roman";
 128         case xml::FontTypes::FT_SANS:
 129                 return "sans";
 130         case xml::FontTypes::FT_SIZE_TINY:
 131                 return "tiny";
 132         case xml::FontTypes::FT_SIZE_SCRIPT:
 133                 return "size_script";
 134         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 135                 return "size_footnote";
 136         case xml::FontTypes::FT_SIZE_SMALL:
 137                 return "size_small";
 138         case xml::FontTypes::FT_SIZE_NORMAL:
 139                 return "size_normal";
 140         case xml::FontTypes::FT_SIZE_LARGE:
 141                 return "size_large";
 142         case xml::FontTypes::FT_SIZE_LARGER:
 143                 return "size_larger";
 144         case xml::FontTypes::FT_SIZE_LARGEST:
 145                 return "size_largest";
 146         case xml::FontTypes::FT_SIZE_HUGE:
 147                 return "size_huge";
 148         case xml::FontTypes::FT_SIZE_HUGER:
 149                 return "size_huger";
 150         case xml::FontTypes::FT_SIZE_INCREASE:
 151                 return "size_increase";
 152         case xml::FontTypes::FT_SIZE_DECREASE:
 153                 return "size_decrease";
 154         default:
 155                 return "";
 156         }
 157 }
 158
 159
 160 string fontToAttribute(xml::FontTypes type) {
 161         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 162         // for the font.
 163         string role = fontToRole(type);
 164         if (!role.empty())
 165                 return "role='" + role + "'";
 166         else
 167                 return "";
 168 }
 169
 170
 171 // Higher-level convenience functions.
 172
 173 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar, const OutputParams & runparams)
 174 {
 175         if (par == prevpar)
 176                 prevpar = nullptr;
 177
 178         // If the previous paragraph is empty, don't consider it when opening wrappers.
 179         if (prevpar && prevpar->empty() && !prevpar->allowEmpty())
 180                 prevpar = nullptr;
 181
 182         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 183         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 184         // The main use case is author information in several paragraphs: if the name of the author is the
 185         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 186         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 187         // layout, same wrapper tag).
 188         Layout const & lay = par->layout();
 189         bool openWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
 190
 191         if (prevpar != nullptr && !runparams.docbook_ignore_wrapper) {
 192                 Layout const & prevlay = prevpar->layout();
 193                 if (prevlay.docbookwrappertag() != "NONE") {
 194                         if (prevlay.docbookwrappertag() == lay.docbookwrappertag() &&
 195                                         prevlay.docbookwrapperattr() == lay.docbookwrapperattr())
 196                                 openWrapper = !lay.docbookwrappermergewithprevious();
 197                         else
 198                                 openWrapper = true;
 199                 }
 200         }
 201
 202         // Main logic.
 203         if (openWrapper)
 204                 xml::openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 205
 206         const string & tag = lay.docbooktag();
 207         if (tag != "NONE") {
 208                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 209                 if (!xs.isTagOpen(xmltag, 1)) { // Don't nest a paragraph directly in a paragraph.
 210                         // TODO: required or not?
 211                         // TODO: avoid creating a ParTag object just for this query...
 212                         xml::openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 213                         xml::openTag(xs, lay.docbookinnertag(), lay.docbookinnerattr(), lay.docbookinnertagtype());
 214                 }
 215         }
 216
 217         xml::openTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrapperattr(), lay.docbookitemwrappertagtype());
 218         xml::openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 219         xml::openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 220 }
 221
 222
 223 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar, const OutputParams & runparams)
 224 {
 225         if (par == nextpar)
 226                 nextpar = nullptr;
 227
 228         // If the next paragraph is empty, don't consider it when closing wrappers.
 229         if (nextpar && nextpar->empty() && !nextpar->allowEmpty())
 230                 nextpar = nullptr;
 231
 232         // See comment in openParTag.
 233         Layout const & lay = par->layout();
 234         bool closeWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
 235
 236         if (nextpar != nullptr && !runparams.docbook_ignore_wrapper) {
 237                 Layout const & nextlay = nextpar->layout();
 238                 if (nextlay.docbookwrappertag() != "NONE") {
 239                         if (nextlay.docbookwrappertag() == lay.docbookwrappertag() &&
 240                                         nextlay.docbookwrapperattr() == lay.docbookwrapperattr())
 241                                 closeWrapper = !nextlay.docbookwrappermergewithprevious();
 242                         else
 243                                 closeWrapper = true;
 244                 }
 245         }
 246
 247         // Main logic.
 248         xml::closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 249         xml::closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 250         xml::closeTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrappertagtype());
 251         xml::closeTag(xs, lay.docbookinnertag(), lay.docbookinnertagtype());
 252         xml::closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 253         if (closeWrapper)
 254                 xml::closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 255 }
 256
 257
 258 void makeBibliography(
 259                 Text const & text,
 260                 Buffer const & buf,
 261                 XMLStream & xs,
 262                 OutputParams const & runparams,
 263                 ParagraphList::const_iterator const & par)
 264 {
 265         // If this is the first paragraph in a bibliography, open the bibliography tag.
 266         auto const * pbegin_before = text.paragraphs().getParagraphBefore(par);
 267         if (pbegin_before == nullptr || (pbegin_before && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT)) {
 268                 xs << xml::StartTag("bibliography");
 269                 xs << xml::CR();
 270         }
 271
 272         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 273         // Don't forget the citation ID!
 274         docstring attr;
 275         for (auto i = 0; i < par->size(); ++i) {
 276                 Inset const *ip = par->getInset(i);
 277                 if (!ip)
 278                         continue;
 279                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
 280                         auto id = xml::cleanID(bibitem->getParam("key"));
 281                         attr = from_utf8("xml:id='") + id + from_utf8("'");
 282                         break;
 283                 }
 284         }
 285         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 286
 287         // Generate the entry. Concatenate the different parts of the paragraph if any.
 288         auto const begin = text.paragraphs().begin();
 289         std::vector<docstring> pars_prepend;
 290         std::vector<docstring> pars;
 291         std::vector<docstring> pars_append;
 292         tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(begin, par)), 0);
 293
 294         for (auto & parXML : pars_prepend)
 295                 xs << XMLStream::ESCAPE_NONE << parXML;
 296         for (auto & parXML : pars)
 297                 xs << XMLStream::ESCAPE_NONE << parXML;
 298         for (auto & parXML : pars_append)
 299                 xs << XMLStream::ESCAPE_NONE << parXML;
 300
 301         // End the precooked bibliography entry.
 302         xs << xml::EndTag("bibliomixed");
 303         xs << xml::CR();
 304
 305         // If this is the last paragraph in a bibliography, close the bibliography tag.
 306         auto const end = text.paragraphs().end();
 307         auto nextpar = par;
 308         ++nextpar;
 309         bool endBibliography = nextpar == end || nextpar->layout().latextype != LATEX_BIB_ENVIRONMENT;
 310
 311         if (endBibliography) {
 312                 xs << xml::EndTag("bibliography");
 313                 xs << xml::CR();
 314         }
 315 }
 316
 317
 318 void makeParagraph(
 319                 Text const & text,
 320                 Buffer const & buf,
 321                 XMLStream & xs,
 322                 OutputParams const & runparams,
 323                 ParagraphList::const_iterator const & par)
 324 {
 325         // Useful variables.
 326         auto const begin = text.paragraphs().begin();
 327         auto const end = text.paragraphs().end();
 328         auto prevpar = text.paragraphs().getParagraphBefore(par);
 329
 330         // We want to open the paragraph tag if:
 331         //   (i) the current layout permits multiple paragraphs
 332         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 333         //         we are, but this is not the first paragraph
 334         //
 335         // But there is also a special case, and we first see whether we are in it.
 336         // We do not want to open the paragraph tag if this paragraph contains
 337         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 338         // as a branch). On the other hand, if that single item has a font change
 339         // applied to it, then we still do need to open the paragraph.
 340         //
 341         // Obviously, this is very fragile. The main reason we need to do this is
 342         // because of branches, e.g., a branch that contains an entire new section.
 343         // We do not really want to wrap that whole thing in a <div>...</div>.
 344         bool special_case = false;
 345         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 346         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter? docbooknotinpara should be enough in most cases.
 347                 Layout const &style = par->layout();
 348                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 349                                                                         style.labelfont : style.font;
 350                 FontInfo const our_font =
 351                                 par->getFont(buf.masterBuffer()->params(), 0,
 352                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 353
 354                 if (first_font == our_font)
 355                         special_case = true;
 356         }
 357
 358         size_t nInsets = std::distance(par->insetList().begin(), par->insetList().end());
 359         auto parSize = (size_t) par->size();
 360
 361         // Plain layouts must be ignored.
 362         special_case |= buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars;
 363
 364         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 365         // Exception: any case that generates an <inlineequation> must still get a paragraph to be valid.
 366         auto isEquationSpecialCase = [](InsetList::Element inset) {
 367                 return inset.inset && inset.inset->asInsetMath() && inset.inset->asInsetMath()->getType() != hullSimple;
 368         };
 369         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isEquationSpecialCase);
 370
 371         // Things that should not get into their own paragraph. (Only valid for DocBook.)
 372         static std::set<InsetCode> lyxCodeSpecialCases = {
 373                         TABULAR_CODE,
 374                         FLOAT_CODE,
 375                         BIBTEX_CODE, // Bibliographies cannot be in paragraphs. Bibitems should still be handled as paragraphs,
 376                         // though (see makeParagraphBibliography).
 377                         ERT_CODE, // ERTs are in comments, not paragraphs.
 378                         LISTINGS_CODE,
 379                         BOX_CODE,
 380                         INCLUDE_CODE,
 381                         NOMENCL_PRINT_CODE,
 382                         TOC_CODE, // To be ignored in DocBook, the processor afterwards should deal with ToCs.
 383                         NOTE_CODE // Notes do not produce any output.
 384         };
 385         auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
 386                 return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end();
 387         };
 388         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isLyxCodeSpecialCase);
 389
 390         // Flex elements (InsetLayout) have their own parameter to control the special case.
 391         auto isFlexSpecialCase = [](InsetList::Element inset) {
 392                 if (inset.inset->lyxCode() != FLEX_CODE)
 393                         return false;
 394
 395                 // Standard condition: check the parameter.
 396                 if (inset.inset->getLayout().docbooknotinpara())
 397                         return true;
 398
 399                 // If the parameter is not set, maybe the flex inset only contains things that should match the standard
 400                 // condition. In this case, isLyxCodeSpecialCase must also check for bibitems...
 401                 auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
 402                         return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end() ||
 403                                         inset.inset->lyxCode() == BIBITEM_CODE;
 404                 };
 405                 if (InsetText * text = inset.inset->asInsetText()) {
 406                         for (auto const & par : text->paragraphs()) {
 407                                 size_t nInsets = std::distance(par.insetList().begin(), par.insetList().end());
 408                                 auto parSize = (size_t) par.size();
 409
 410                                 if (nInsets == 1 && par.insetList().begin()->inset->lyxCode() == BIBITEM_CODE)
 411                                         return true;
 412                                 if (nInsets != parSize)
 413                                         return false;
 414                                 if (!std::all_of(par.insetList().begin(), par.insetList().end(), isLyxCodeSpecialCase))
 415                                         return false;
 416                         }
 417                         return true;
 418                 }
 419
 420                 // No case matched: give up.
 421                 return false;
 422         };
 423         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isFlexSpecialCase);
 424
 425         // If the insets should be rendered as images, enter the special case.
 426         auto isRenderedAsImageSpecialCase = [](InsetList::Element inset) {
 427                 return inset.inset && inset.inset->getLayout().docbookrenderasimage();
 428         };
 429         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isRenderedAsImageSpecialCase);
 430
 431         // Open a paragraph if it is allowed, we are not already within a paragraph, and the insets in the paragraph do
 432         // not forbid paragraphs (aka special cases).
 433         bool const open_par = runparams.docbook_make_pars
 434                                                   && !runparams.docbook_in_par
 435                                                   && !special_case;
 436
 437         // We want to issue the closing tag if either:
 438         //   (i)  We opened it, and either docbook_in_par is false,
 439         //              or we're not in the last paragraph, anyway.
 440         //   (ii) We didn't open it and docbook_in_par is true,
 441         //              but we are in the first par, and there is a next par.
 442         bool const close_par = open_par && !runparams.docbook_in_par;
 443
 444         // Determine if this paragraph has some real content. Things like new pages are not caught
 445         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 446         // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
 447         // std::all_of allows doing this check without having to copy the string.
 448         // Open and close tags around each contained paragraph.
 449         auto nextpar = par;
 450         ++nextpar;
 451
 452         std::vector<docstring> pars_prepend;
 453         std::vector<docstring> pars;
 454         std::vector<docstring> pars_append;
 455         tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams, text.outerFont(distance(begin, par)), 0, nextpar == end, special_case);
 456
 457         for (docstring const & parXML : pars_prepend)
 458             xs << XMLStream::ESCAPE_NONE << parXML;
 459         for (docstring const & parXML : pars) {
 460                 if (!xml::isNotOnlySpace(parXML))
 461                         continue;
 462
 463                 if (open_par)
 464                         openParTag(xs, &*par, prevpar, runparams);
 465
 466                 xs << XMLStream::ESCAPE_NONE << parXML;
 467
 468                 if (close_par)
 469                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 470         }
 471         for (docstring const & parXML : pars_append)
 472             xs << XMLStream::ESCAPE_NONE << parXML;
 473 }
 474
 475
 476 void makeEnvironment(Text const &text,
 477                                          Buffer const &buf,
 478                      XMLStream &xs,
 479                      OutputParams const &runparams,
 480                      ParagraphList::const_iterator const & par)
 481 {
 482         // Useful variables.
 483         auto const end = text.paragraphs().end();
 484         auto nextpar = par;
 485         ++nextpar;
 486
 487         // Special cases for listing-like environments provided in layouts. This is quite ad-hoc, but provides a useful
 488         // default. This should not be used by too many environments (only LyX-Code right now).
 489         // This would be much simpler if LyX-Code was implemented as InsetListings...
 490         bool mimicListing = false;
 491         bool ignoreFonts = false;
 492         if (par->layout().docbooktag() == "programlisting") {
 493                 mimicListing = true;
 494                 ignoreFonts = true;
 495         }
 496
 497         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 498         // implemented in openParTag).
 499         auto prevpar = text.paragraphs().getParagraphBefore(par);
 500         openParTag(xs, &*par, prevpar, runparams);
 501
 502         // Generate the contents of this environment. There is a special case if this is like some environment.
 503         Layout const & style = par->layout();
 504         if (style.latextype == LATEX_COMMAND) {
 505                 // Nothing to do (otherwise, infinite loops).
 506         } else if (style.latextype == LATEX_ENVIRONMENT) {
 507                 // Generate the paragraph, if need be.
 508                 std::vector<docstring> pars_prepend;
 509         std::vector<docstring> pars;
 510         std::vector<docstring> pars_append;
 511         tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)), 0, false, ignoreFonts);
 512
 513         for (docstring const & parXML : pars_prepend)
 514             xs << XMLStream::ESCAPE_NONE << parXML;
 515                 if (mimicListing) {
 516                         auto p = pars.begin();
 517                         while (p != pars.end()) {
 518                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 519                                              par->layout().docbookiteminnertagtype());
 520                                 xs << XMLStream::ESCAPE_NONE << *p;
 521                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 522                                 ++p;
 523
 524                                 // Insert a new line after each "paragraph" (i.e. line in the listing), except for the last one.
 525                                 // Otherwise, there would one more new line in the output than in the LyX document.
 526                                 if (p != pars.end())
 527                                         xs << xml::CR();
 528                         }
 529                 } else {
 530                         for (auto const & p : pars) {
 531                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 532                                              par->layout().docbookiteminnertagtype());
 533                                 xs << XMLStream::ESCAPE_NONE << p;
 534                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 535                         }
 536                 }
 537         for (docstring const & parXML : pars_append)
 538             xs << XMLStream::ESCAPE_NONE << parXML;
 539         } else {
 540                 makeAny(text, buf, xs, runparams, par);
 541         }
 542
 543         // Close the environment.
 544         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 545 }
 546
 547
 548 ParagraphList::const_iterator findEndOfEnvironment(
 549                 ParagraphList::const_iterator const & pstart,
 550                 ParagraphList::const_iterator const & pend)
 551 {
 552         // Copy-paste from XHTML. Should be factored out at some point...
 553         ParagraphList::const_iterator p = pstart;
 554         Layout const & bstyle = p->layout();
 555         size_t const depth = p->params().depth();
 556         for (++p; p != pend; ++p) {
 557                 Layout const & style = p->layout();
 558                 // It shouldn't happen that e.g. a section command occurs inside
 559                 // a quotation environment, at a higher depth, but as of 6/2009,
 560                 // it can happen. We pretend that it's just at lowest depth.
 561                 if (style.latextype == LATEX_COMMAND)
 562                         return p;
 563
 564                 // If depth is down, we're done
 565                 if (p->params().depth() < depth)
 566                         return p;
 567
 568                 // If depth is up, we're not done
 569                 if (p->params().depth() > depth)
 570                         continue;
 571
 572                 // FIXME I am not sure about the first check.
 573                 // Surely we *could* have different layouts that count as
 574                 // LATEX_PARAGRAPH, right?
 575                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
 576                         return p;
 577         }
 578         return pend;
 579 }
 580
 581
 582 ParagraphList::const_iterator makeListEnvironment(Text const &text,
 583                                                                                                   Buffer const &buf,
 584                                                           XMLStream &xs,
 585                                                           OutputParams const &runparams,
 586                                                           ParagraphList::const_iterator const & begin)
 587 {
 588         // Useful variables.
 589         auto par = begin;
 590         auto const end = text.paragraphs().end();
 591         auto const envend = findEndOfEnvironment(par, end);
 592
 593         // Output the opening tag for this environment.
 594         Layout const & envstyle = par->layout();
 595         xml::openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
 596         xml::openTag(xs, envstyle.docbooktag(), envstyle.docbookattr(), envstyle.docbooktagtype());
 597
 598         // Handle the content of the list environment, item by item.
 599         while (par != envend) {
 600                 // Skip this paragraph if it is both empty and the last one (otherwise, there may be deeper paragraphs after).
 601                 auto nextpar = par;
 602                 ++nextpar;
 603                 if (par->empty() && nextpar == envend)
 604                         break;
 605
 606                 // Open the item wrapper.
 607                 Layout const & style = par->layout();
 608                 xml::openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(),
 609                              style.docbookitemwrappertagtype());
 610
 611                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 612                 // character after the label.
 613                 pos_type sep = 0;
 614                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 615                         if (style.labeltype == LABEL_MANUAL) {
 616                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 617                                 xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
 618                                              style.docbookitemlabeltagtype());
 619                                 sep = 1 + par->firstWordDocBook(xs, runparams);
 620                                 xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
 621                         } else {
 622                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 623                                 docstring const lbl = par->params().labelString();
 624
 625                                 if (!lbl.empty()) {
 626                                         xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
 627                                                      style.docbookitemlabeltagtype());
 628                                         xs << lbl;
 629                                         xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
 630                                 }
 631                         }
 632                 }
 633
 634                 // Open the item (after the wrapper and the label).
 635                 xml::openTag(xs, style.docbookitemtag(), style.docbookitemattr(), style.docbookitemtagtype());
 636
 637                 // Generate the content of the item.
 638                 if (sep < par->size()) {
 639             std::vector<docstring> pars_prepend;
 640             std::vector<docstring> pars;
 641             std::vector<docstring> pars_append;
 642             tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams,
 643                                                              text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
 644             for (docstring const & parXML : pars_prepend)
 645                 xs << XMLStream::ESCAPE_NONE << parXML;
 646                         for (auto &p : pars) {
 647                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 648                                              par->layout().docbookiteminnertagtype());
 649                                 xs << XMLStream::ESCAPE_NONE << p;
 650                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 651                         }
 652             for (docstring const & parXML : pars_append)
 653                 xs << XMLStream::ESCAPE_NONE << parXML;
 654                 } else {
 655                         // DocBook doesn't like emptiness.
 656                         xml::compTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 657                                      par->layout().docbookiteminnertagtype());
 658                 }
 659
 660                 // If the next item is deeper, it must go entirely within this item (do it recursively).
 661                 // By construction, with findEndOfEnvironment, depth can only stay constant or increase, never decrease.
 662                 depth_type currentDepth = par->getDepth();
 663                 ++par;
 664                 while (par != envend && par->getDepth() != currentDepth)
 665                         par = makeAny(text, buf, xs, runparams, par);
 666                 // Usually, this loop only makes one iteration, except in complex scenarios, like an item with a paragraph,
 667                 // a list, and another paragraph; or an item with two types of list (itemise then enumerate, for instance).
 668
 669                 // Close the item.
 670                 xml::closeTag(xs, style.docbookitemtag(), style.docbookitemtagtype());
 671                 xml::closeTag(xs, style.docbookitemwrappertag(), style.docbookitemwrappertagtype());
 672         }
 673
 674         // Close this environment in exactly the same way as it was opened.
 675         xml::closeTag(xs, envstyle.docbooktag(), envstyle.docbooktagtype());
 676         xml::closeTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrappertagtype());
 677
 678         return envend;
 679 }
 680
 681
 682 void makeCommand(
 683                 Text const & text,
 684                 Buffer const & buf,
 685                 XMLStream & xs,
 686                 OutputParams const & runparams,
 687                 ParagraphList::const_iterator const & par)
 688 {
 689         // Useful variables.
 690         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 691         auto const begin = text.paragraphs().begin();
 692         auto const end = text.paragraphs().end();
 693         auto nextpar = par;
 694         ++nextpar;
 695
 696         // Generate this command.
 697         auto prevpar = text.paragraphs().getParagraphBefore(par);
 698
 699     std::vector<docstring> pars_prepend;
 700     std::vector<docstring> pars;
 701     std::vector<docstring> pars_append;
 702     tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams,text.outerFont(distance(begin, par)));
 703
 704     for (docstring const & parXML : pars_prepend)
 705         xs << XMLStream::ESCAPE_NONE << parXML;
 706
 707     openParTag(xs, &*par, prevpar, runparams);
 708         for (auto & parXML : pars)
 709                 // TODO: decide what to do with openParTag/closeParTag in new lines.
 710                 xs << XMLStream::ESCAPE_NONE << parXML;
 711     closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 712
 713     for (docstring const & parXML : pars_append)
 714         xs << XMLStream::ESCAPE_NONE << parXML;
 715 }
 716
 717
 718 bool isLayoutSectioning(Layout const & lay)
 719 {
 720         if (lay.docbooksection()) // Special case: some DocBook styles must be handled as sections.
 721                 return true;
 722         else if (lay.category() == from_utf8("Sectioning") || lay.docbooktag() == "section") // Generic case.
 723                 return lay.toclevel != Layout::NOT_IN_TOC;
 724         return false;
 725 }
 726
 727
 728 bool isLayoutSectioningOrSimilar(Layout const & lay)
 729 {
 730         return isLayoutSectioning(lay) || lay.docbooktag() == "bridgehead";
 731 }
 732
 733
 734 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 735
 736
 737 struct DocBookInfoTag
 738 {
 739         const set<pit_type> shouldBeInInfo;
 740         const set<pit_type> mustBeInInfo; // With the notable exception of the abstract!
 741         const set<pit_type> abstract;
 742         const bool abstractLayout;
 743         pit_type bpit;
 744         pit_type epit;
 745
 746         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 747                                    const set<pit_type> & abstract, bool abstractLayout, pit_type bpit, pit_type epit) :
 748                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 749                                    abstractLayout(abstractLayout), bpit(bpit), epit(epit) {}
 750 };
 751
 752
 753 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 754         bool documentHasSections = false;
 755
 756         while (bpit < epit) {
 757                 LASSERT(static_cast<size_t>(bpit) < paragraphs.size(), return make_tuple(documentHasSections, bpit));
 758
 759                 Layout const &style = paragraphs[bpit].layout();
 760                 documentHasSections |= isLayoutSectioningOrSimilar(style);
 761
 762                 if (documentHasSections)
 763                         break;
 764                 bpit += 1;
 765         }
 766         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 767
 768         return make_tuple(documentHasSections, bpit);
 769 }
 770
 771
 772 bool hasOnlyNotes(Paragraph const & par)
 773 {
 774         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 775         for (int i = 0; i < par.size(); ++i)
 776                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 777                 // return false.
 778                 if (!par.isInset(i) || par.getInset(i)->lyxCode() != NOTE_CODE)
 779                         return false;
 780
 781         // An empty paragraph may still require some output.
 782         if (par.layout().docbooksection())
 783                 return false;
 784
 785         // There should be really no content here.
 786         return true;
 787 }
 788
 789
 790 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
 791                                                                          pit_type bpit, pit_type const epit,
 792                                                                          // Typically, bpit is the beginning of the document and epit the end of the
 793                                                                          // document *or* the first section.
 794                                                                          bool documentHasSections,
 795                                                                          bool detectUnlayoutedAbstract
 796                                                                          // Whether paragraphs with no specific layout should be detected as abstracts.
 797                                                                          // For inner sections, an abstract should only be detected if it has a specific
 798                                                                          // layout. For others, anything that might look like an abstract should be sought.
 799                                                                          ) {
 800         set<pit_type> shouldBeInInfo;
 801         set<pit_type> mustBeInInfo;
 802         set<pit_type> abstractWithLayout;
 803         set<pit_type> abstractNoLayout;
 804
 805         // Find the first nonempty paragraph by mutating bpit.
 806         while (bpit < epit) {
 807                 Paragraph const &par = paragraphs[bpit];
 808                 if (par.empty() || hasOnlyNotes(par))
 809                         bpit += 1;
 810                 else
 811                         break;
 812         }
 813
 814         // Traverse everything that might belong to <info>.
 815         bool hasAbstractLayout = false;
 816         static depth_type INVALID_DEPTH = 100000;
 817         depth_type abstractDepth = INVALID_DEPTH;
 818         pit_type cpit = bpit;
 819         for (; cpit < epit; ++cpit) {
 820                 // Skip paragraphs that don't generate anything in DocBook.
 821                 Paragraph const & par = paragraphs[cpit];
 822                 Layout const &style = par.layout();
 823                 if (hasOnlyNotes(par))
 824                         continue;
 825
 826                 // There should never be any section here, except for the first paragraph (a title can be part of <info>).
 827                 // (Just a sanity check: if this fails, this function could end up processing the whole document.)
 828                 if (cpit != bpit && isLayoutSectioningOrSimilar(par.layout())) {
 829                         LYXERR(Debug::OUTFILE, "Assertion failed: section found in potential <info> paragraphs.");
 830                         break;
 831                 }
 832
 833                 // If this is marked as an abstract by the layout, put it in the right set.
 834                 if (style.docbookabstract()) {
 835                         hasAbstractLayout = true;
 836                         abstractDepth = par.getDepth();
 837                         abstractWithLayout.emplace(cpit);
 838                         continue;
 839                 }
 840
 841                 // Deeper paragraphs following the abstract must still be considered as part of the abstract.
 842                 // For instance, this includes lists. There should not be any other kind of paragraph in between.
 843                 if (abstractDepth != INVALID_DEPTH && style.docbookininfo() == "never") {
 844                         if (par.getDepth() > abstractDepth) {
 845                                 abstractWithLayout.emplace(cpit);
 846                                 continue;
 847                         }
 848                         if (par.getDepth() == abstractDepth) {
 849                                 // This is not an abstract paragraph and it should not either be considered as part
 850                                 // of it. It breaks the rule that abstract paragraphs must follow each other.
 851                                 abstractDepth = INVALID_DEPTH;
 852                                 break;
 853                         }
 854                 }
 855
 856                 // Based on layout information, store this paragraph in one set: should be in <info>, must be,
 857                 // or abstract (either because of layout or of position).
 858                 if (style.docbookininfo() == "always")
 859                         mustBeInInfo.emplace(cpit);
 860                 else if (style.docbookininfo() == "maybe")
 861                         shouldBeInInfo.emplace(cpit);
 862                 else if (documentHasSections && !hasAbstractLayout && detectUnlayoutedAbstract &&
 863                                 (style.docbooktag() == "NONE" || style.docbooktag() == "para") &&
 864                                 style.docbookwrappertag() == "NONE")
 865                         // In this case, it is very likely that style.docbookininfo() == "never"! Be extra careful
 866                         // about anything that gets caught here. For instance, don't ake into account
 867                         abstractNoLayout.emplace(cpit);
 868                 else // This should definitely not be in <info>.
 869                         break;
 870         }
 871         // Now, cpit points to the first paragraph that no more has things that could go in <info>.
 872         // bpit is the beginning of the <info> part.
 873
 874         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo,
 875                                               hasAbstractLayout ? abstractWithLayout : abstractNoLayout,
 876                                               hasAbstractLayout, bpit, cpit);
 877 }
 878
 879 } // end anonymous namespace
 880
 881
 882 std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
 883 {
 884         // This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
 885         // should become the document's <abstract>.
 886         std::set<const Inset *> values;
 887
 888         // If this kind of layout should be ignored, already leave.
 889         if (par->layout().docbooktag() == "IGNORE")
 890                 return values;
 891
 892         // If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
 893         // DocBook doesn't want to be inside the abstract.
 894         for (pos_type i = 0; i < par->size(); ++i) {
 895                 if (par->getInset(i) && par->getInset(i)->asInsetText()) {
 896                         InsetText const *inset = par->getInset(i)->asInsetText();
 897
 898                         if (inset->getLayout().docbookininfo() != "never") {
 899                                 values.insert(inset);
 900                         } else {
 901                                 auto subpar = inset->paragraphs().begin();
 902                                 while (subpar != inset->paragraphs().end()) {
 903                                         auto subinfos = gatherInfo(subpar);
 904                                         for (auto & subinfo: subinfos)
 905                                                 values.insert(subinfo);
 906                                         ++subpar;
 907                                 }
 908                         }
 909                 }
 910         }
 911
 912         return values;
 913 }
 914
 915
 916 ParagraphList::const_iterator makeAny(Text const &text,
 917                                       Buffer const &buf,
 918                                       XMLStream &xs,
 919                                       OutputParams const &runparams,
 920                                       ParagraphList::const_iterator par)
 921 {
 922         bool ignoreParagraph = false;
 923
 924         // If this kind of layout should be ignored, already leave.
 925         ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
 926
 927         // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
 928         // abstract itself.
 929         bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
 930         ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
 931
 932         // Switch on the type of paragraph to call the right handler.
 933         if (!ignoreParagraph) {
 934                 switch (par->layout().latextype) {
 935                 case LATEX_COMMAND:
 936                         makeCommand(text, buf, xs, runparams, par);
 937                         break;
 938                 case LATEX_ENVIRONMENT:
 939                         makeEnvironment(text, buf, xs, runparams, par);
 940                         break;
 941                 case LATEX_LIST_ENVIRONMENT:
 942                 case LATEX_ITEM_ENVIRONMENT:
 943                         // Only case when makeAny() might consume more than one paragraph.
 944                         return makeListEnvironment(text, buf, xs, runparams, par);
 945                 case LATEX_PARAGRAPH:
 946                         makeParagraph(text, buf, xs, runparams, par);
 947                         break;
 948                 case LATEX_BIB_ENVIRONMENT:
 949                         makeBibliography(text, buf, xs, runparams, par);
 950                         break;
 951                 }
 952         }
 953
 954         // For cases that are not lists, the next paragraph to handle is the next one.
 955         ++par;
 956         return par;
 957 }
 958
 959
 960 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 961 {
 962         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 963 }
 964
 965
 966 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 967 {
 968         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 969 }
 970
 971
 972 void outputDocBookInfo(
 973                 Text const & text,
 974                 Buffer const & buf,
 975                 XMLStream & xs,
 976                 OutputParams const & runparams,
 977                 ParagraphList const & paragraphs,
 978                 DocBookInfoTag const & info)
 979 {
 980         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 981         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 982         // then only create the <abstract> tag if these paragraphs generate some content.
 983         // This check must be performed *before* a decision on whether or not to output <info> is made.
 984         bool hasAbstract = !info.abstract.empty();
 985         docstring abstract;
 986         set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
 987         // paragraph. (This happens for quite a few layouts, unfortunately.)
 988
 989         if (hasAbstract) {
 990                 // Generate the abstract XML into a string before further checks.
 991                 // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
 992                 // generate more than one paragraph, as indicated in the return value.
 993                 odocstringstream os2;
 994                 XMLStream xs2(os2);
 995
 996                 auto rp = runparams;
 997                 rp.docbook_generate_info = false;
 998                 rp.docbook_ignore_wrapper = true;
 999
1000                 set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
1001                 for (auto const & p : info.abstract) {
1002                         if (doneParas.find(p) == doneParas.end()) {
1003                                 auto oldPar = paragraphs.iterator_at(p);
1004                                 auto newPar = makeAny(text, buf, xs2, rp, oldPar);
1005
1006                                 // Find insets that should go outside the abstract.
1007                                 auto subinfos = gatherInfo(oldPar);
1008                                 for (auto & subinfo: subinfos)
1009                                         infoInsets.insert(subinfo);
1010
1011                                 // Insert the indices of all the paragraphs that were just generated (typically, one).
1012                                 // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
1013                                 // Otherwise, makeAny and makeListEnvironment would have to be adapted too.
1014                                 pit_type id = p;
1015                                 while (oldPar != newPar) {
1016                                         doneParas.emplace(id);
1017                                         ++oldPar;
1018                                         ++id;
1019                                 }
1020                         }
1021                 }
1022
1023                 // Actually output the abstract if there is something to do. Don't count line feeds, spaces, or comments
1024                 // in this -- even though line feeds and spaces must be properly output if there is some abstract.
1025                 abstract = os2.str();
1026                 docstring cleaned = abstract;
1027                 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), lyx::isSpace), cleaned.end());
1028
1029                 size_t beginComment;
1030                 size_t endComment;
1031                 while ((beginComment = cleaned.find(from_ascii("<!--"))) != lyx::docstring::npos) {
1032                         if ((endComment = cleaned.find(from_ascii("-->"), beginComment)) != lyx::docstring::npos) {
1033                                 cleaned.erase(cleaned.begin() + beginComment, cleaned.begin() + endComment + 3);
1034                         }
1035                 }
1036
1037                 // Nothing? Then there is no abstract!
1038                 if (cleaned.empty())
1039                         hasAbstract = false;
1040         }
1041
1042         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
1043         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1044
1045         // Start the <info> tag if required.
1046         if (needInfo) {
1047                 xs.startDivision(false);
1048                 xs << xml::StartTag("info");
1049                 xs << xml::CR();
1050         }
1051
1052         // Output the elements that should go in <info>.
1053         // - First, the title.
1054         for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1055                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1056                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1057         // If there is no title, generate one (required for the document to be valid).
1058         // This code is called for the main document, for table cells, etc., so be precise in this condition.
1059         if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
1060                 xs << xml::StartTag("title");
1061                 xs << "Untitled Document";
1062                 xs << xml::EndTag("title");
1063                 xs << xml::CR();
1064         }
1065
1066         // - Then, other metadata.
1067         for (auto pit : info.mustBeInInfo)
1068                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1069         for (auto const * inset : infoInsets)
1070                 inset->docbook(xs, runparams);
1071
1072         // - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
1073         // (especially if it contains several paragraphs that are empty).
1074         if (hasAbstract) {
1075                 string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1076                 if (tag == "NONE")
1077                         tag = "abstract";
1078
1079                 if (!xs.isLastTagCR())
1080                         xs << xml::CR();
1081
1082                 xs << xml::StartTag(tag);
1083                 xs << xml::CR();
1084                 xs << XMLStream::ESCAPE_NONE << abstract;
1085                 xs << xml::EndTag(tag);
1086                 xs << xml::CR();
1087         }
1088
1089         // End the <info> tag if it was started.
1090         if (needInfo) {
1091                 if (!xs.isLastTagCR())
1092                         xs << xml::CR();
1093
1094                 xs << xml::EndTag("info");
1095                 xs << xml::CR();
1096                 xs.endDivision();
1097         }
1098 }
1099
1100
1101 void docbookSimpleAllParagraphs(
1102                 Text const & text,
1103                 Buffer const & buf,
1104                 XMLStream & xs,
1105                 OutputParams const & runparams)
1106 {
1107         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
1108         // between a single paragraph to a whole document.
1109         pit_type const bpit = runparams.par_begin;
1110         pit_type const epit = runparams.par_end;
1111         ParagraphList const &paragraphs = text.paragraphs();
1112
1113         // First, the <info> tag.
1114         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false, true);
1115         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1116
1117         // Then, the content. It starts where the <info> ends.
1118         auto par = paragraphs.iterator_at(info.epit);
1119         auto end = paragraphs.iterator_at(epit);
1120         while (par != end) {
1121                 if (!hasOnlyNotes(*par))
1122                         par = makeAny(text, buf, xs, runparams, par);
1123                 else
1124                         ++par;
1125         }
1126 }
1127
1128
1129 void docbookParagraphs(Text const &text,
1130                                            Buffer const &buf,
1131                                            XMLStream &xs,
1132                                            OutputParams const &runparams) {
1133         ParagraphList const &paragraphs = text.paragraphs();
1134         if (runparams.par_begin == runparams.par_end) {
1135                 runparams.par_begin = 0;
1136                 runparams.par_end = paragraphs.size();
1137         }
1138         pit_type bpit = runparams.par_begin;
1139         pit_type const epit = runparams.par_end;
1140         LASSERT(bpit < epit,
1141                         {
1142                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1143                                 return;
1144                         });
1145
1146         // Detect whether the document contains sections. If there are no sections, treatment is largely simplified.
1147         // In particular, there can't be an abstract, unless it is manually marked.
1148         bool documentHasSections;
1149         pit_type eppit;
1150         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1151
1152         // Deal with "simple" documents, i.e. those without sections.
1153         if (!documentHasSections) {
1154                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1155                 return;
1156         }
1157
1158         // Output the first <info> tag (or just the title).
1159         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true, true);
1160         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1161         bpit = info.epit;
1162
1163         // In the specific case of books, there must be parts or chapters. In some cases, star sections are used at the
1164         // beginning for many things like acknowledgements or licenses. DocBook has tags for many of these cases, but not
1165         // the LyX layouts... Gather everything in a <preface>, that's the closest in meaning.
1166         // This is only useful if the things after the <info> tag are not already parts or chapters!
1167         if (buf.params().documentClass().docbookroot() == "book") {
1168             // Check the condition on the first few elements.
1169             bool hasPreface = false;
1170             pit_type pref_bpit = bpit;
1171             pit_type pref_epit = bpit;
1172
1173             static const std::set<std::string> allowedElements = {
1174                     // List from https://tdg.docbook.org/tdg/5.2/book.html
1175                     "acknowledgements", "appendix", "article", "bibliography", "chapter", "colophon", "dedication",
1176                     "glossary", "index", "part", "preface", "reference", "toc"
1177             };
1178
1179             for (; pref_epit < epit; ++pref_epit) {
1180             auto par = text.paragraphs().iterator_at(pref_epit);
1181             if (allowedElements.find(par->layout().docbooktag()) != allowedElements.end() ||
1182                     allowedElements.find(par->layout().docbooksectiontag()) != allowedElements.end())
1183                 break;
1184
1185             hasPreface = true;
1186             }
1187
1188             // Output a preface if required. A title is needed for the document to be valid...
1189             if (hasPreface) {
1190                 xs << xml::StartTag("preface");
1191                 xs << xml::CR();
1192
1193                 xs << xml::StartTag("title");
1194                 xs << "Preface";
1195                 xs << xml::EndTag("title");
1196             xs << xml::CR();
1197
1198             auto pref_par = text.paragraphs().iterator_at(pref_bpit);
1199             auto pref_end = text.paragraphs().iterator_at(pref_epit);
1200             while (pref_par != pref_end) {
1201                 // Skip paragraphs not producing any output.
1202                 if (hasOnlyNotes(*pref_par)) {
1203                     ++pref_par;
1204                     continue;
1205                 }
1206
1207                 // TODO: must sections be handled here? If so, it might be useful to extract the corresponding loop
1208                 // in the rest of this function to use the same here (and avoid copy-paste mistakes).
1209                 pref_par = makeAny(text, buf, xs, runparams, pref_par);
1210             }
1211
1212                 xs << xml::EndTag("preface");
1213             xs << xml::CR();
1214
1215             // Skip what has just been generated in the preface.
1216             bpit = pref_epit;
1217             }
1218         }
1219
1220         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1221         // of the section and the tag that was used to open it.
1222
1223         // Then, iterate through the paragraphs of this document.
1224         auto par = text.paragraphs().iterator_at(bpit);
1225         auto end = text.paragraphs().iterator_at(epit);
1226         while (par != end) {
1227                 // Skip paragraphs not producing any output.
1228                 if (hasOnlyNotes(*par)) {
1229                         ++par;
1230                         continue;
1231                 }
1232
1233                 OutputParams ourparams = runparams;
1234                 Layout const &style = par->layout();
1235
1236                 // Think about adding <section> and/or </section>s.
1237                 if (isLayoutSectioning(style) || par->params().startOfAppendix()) {
1238                         int level = style.toclevel;
1239
1240                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a
1241                         // <h2> after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1242                         //   - current: h2; back: h1; do not close any <section>
1243                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1244                         // Some layouts require that Layout::NOT_IN_TOC sections still cause closing of previous sections. This is
1245                         // mostly to ensure that the section is positioned at a DocBook-compatible level (acknowledgements: cannot
1246                         // be under a section!).
1247                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1248                                 // Output the tag only if it corresponds to a legit section.
1249                                 int stackLevel = headerLevels.top().first;
1250                                 if (stackLevel != Layout::NOT_IN_TOC) {
1251                                         xs << xml::EndTag(headerLevels.top().second);
1252                                         xs << xml::CR();
1253                                 }
1254                                 headerLevels.pop();
1255                         }
1256
1257                         // Open the new section: first push it onto the stack, then output it in DocBook.
1258                         string sectionTag = (par->params().startOfAppendix()) ? "appendix" : style.docbooksectiontag();
1259                         headerLevels.push(std::make_pair(level, sectionTag));
1260
1261                         // Some sectioning-like elements should not be output (such as FrontMatter).
1262                         if (level != Layout::NOT_IN_TOC) {
1263                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1264                                 docstring id = docstring();
1265                                 for (pos_type i = 0; i < par->size(); ++i) {
1266                                         Inset const *inset = par->getInset(i);
1267                                         if (inset) {
1268                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1269                                                         // Generate the attributes for the section if need be.
1270                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1271
1272                                                         // Don't output the ID as a DocBook <anchor>.
1273                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1274
1275                                                         // Cannot have multiple IDs per tag. If there is another ID inset in the document, it will
1276                                                         // be output as a DocBook anchor.
1277                                                         break;
1278                                                 }
1279                                         }
1280                                 }
1281
1282                                 // Write the open tag for this section.
1283                                 docstring attrs;
1284                                 if (!id.empty())
1285                                         attrs = id;
1286                                 xs << xml::StartTag(sectionTag, attrs);
1287                                 xs << xml::CR();
1288                         }
1289                 }
1290
1291                 // Close all sections before the bibliography.
1292                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1293                 if (!par->insetList().empty()) {
1294                         Inset const *firstInset = par->getInset(0);
1295                         if (firstInset && (firstInset->lyxCode() == BIBITEM_CODE || firstInset->lyxCode() == BIBTEX_CODE)) {
1296                                 while (!headerLevels.empty()) {
1297                                         // Don't close appendices before bibliographies.
1298                                         if (headerLevels.top().second == "appendix")
1299                                                 break;
1300
1301                                         // Pop the section from the stack.
1302                                         int level = headerLevels.top().first;
1303                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1304                                         headerLevels.pop();
1305
1306                                         // Output the tag only if it corresponds to a legit section, as the rest of the code.
1307                                         if (level != Layout::NOT_IN_TOC) {
1308                                                 xs << XMLStream::ESCAPE_NONE << tag;
1309                                                 xs << xml::CR();
1310                                         }
1311                                 }
1312                         }
1313                 }
1314
1315                 // Generate the <info> tag if a section was just opened.
1316                 // Some sections may require abstracts (mostly parts, in books: DocBookForceAbstractTag will not be NONE),
1317                 // others can still have an abstract (it must be detected so that it can be output at the right place).
1318                 // TODO: docbookforceabstracttag is a bit contrived here, but it does the job. Having another field just for this would be cleaner, but that's just for <part> and <partintro>, so it's probably not worth the effort.
1319                 if (isLayoutSectioning(style)) {
1320                         // This abstract may be found between the next paragraph and the next title.
1321                         pit_type cpit = std::distance(text.paragraphs().begin(), par);
1322                         pit_type ppit = std::get<1>(hasDocumentSectioning(paragraphs, cpit + 1L, epit));
1323
1324                         // Generate this abstract (this code corresponds to parts of outputDocBookInfo).
1325                         DocBookInfoTag secInfo = getParagraphsWithInfo(paragraphs, cpit, ppit, true,
1326                                                                                                   style.docbookforceabstracttag() != "NONE");
1327
1328                         if (!secInfo.mustBeInInfo.empty() || !secInfo.shouldBeInInfo.empty() || !secInfo.abstract.empty()) {
1329                                 // Generate the <info>, if required. If DocBookForceAbstractTag != NONE, this abstract will not be in
1330                                 // <info>, unlike other ("standard") abstracts.
1331                                 bool hasStandardAbstract = !secInfo.abstract.empty() && style.docbookforceabstracttag() == "NONE";
1332                                 bool needInfo = !secInfo.mustBeInInfo.empty() || hasStandardAbstract;
1333
1334                                 if (needInfo) {
1335                                         xs.startDivision(false);
1336                                         xs << xml::StartTag("info");
1337                                         xs << xml::CR();
1338                                 }
1339
1340                                 // Output the elements that should go in <info>, before and after the abstract.
1341                                 for (auto pit : secInfo.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1342                                         // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1343                                         makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1344                                 for (auto pit : secInfo.mustBeInInfo)
1345                                         makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1346
1347                                 // Deal with the abstract in <info> if it is standard (i.e. its tag is <abstract>).
1348                                 if (!secInfo.abstract.empty() && hasStandardAbstract) {
1349                                         if (!secInfo.abstractLayout) {
1350                                                 xs << xml::StartTag("abstract");
1351                                                 xs << xml::CR();
1352                                         }
1353
1354                                         for (auto const &p : secInfo.abstract)
1355                                                 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1356
1357                                         if (!secInfo.abstractLayout) {
1358                                                 xs << xml::EndTag("abstract");
1359                                                 xs << xml::CR();
1360                                         }
1361                                 }
1362
1363                                 // End the <info> tag if it was started.
1364                                 if (needInfo) {
1365                                         if (!xs.isLastTagCR())
1366                                                 xs << xml::CR();
1367
1368                                         xs << xml::EndTag("info");
1369                                         xs << xml::CR();
1370                                         xs.endDivision();
1371                                 }
1372
1373                                 // Deal with the abstract outside <info> if it is not standard (i.e. its tag is layout-defined).
1374                                 if (!secInfo.abstract.empty() && !hasStandardAbstract) {
1375                                         // Assert: style.docbookforceabstracttag() != NONE.
1376                                         xs << xml::StartTag(style.docbookforceabstracttag());
1377                                         xs << xml::CR();
1378                                         for (auto const &p : secInfo.abstract)
1379                                                 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1380                                         xs << xml::EndTag(style.docbookforceabstracttag());
1381                                         xs << xml::CR();
1382                                 }
1383
1384                                 // Skip all the text that has just been generated.
1385                                 par = paragraphs.iterator_at(secInfo.epit);
1386                         } else {
1387                                 // No <info> tag to generate, proceed as for normal paragraphs.
1388                                 par = makeAny(text, buf, xs, ourparams, par);
1389                         }
1390                 } else {
1391                         // Generate this paragraph, as it has nothing special.
1392                         par = makeAny(text, buf, xs, ourparams, par);
1393                 }
1394         }
1395
1396         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1397         // of the loop).
1398         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1399                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1400                 headerLevels.pop();
1401                 xs << XMLStream::ESCAPE_NONE << tag;
1402                 xs << xml::CR();
1403         }
1404 }
1405
1406 } // namespace lyx