src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "output_docbook.h"
  15
  16 #include "Buffer.h"
  17 #include "buffer_funcs.h"
  18 #include "BufferParams.h"
  19 #include "Font.h"
  20 #include "InsetList.h"
  21 #include "Paragraph.h"
  22 #include "ParagraphList.h"
  23 #include "ParagraphParameters.h"
  24 #include "xml.h"
  25 #include "Text.h"
  26 #include "TextClass.h"
  27
  28 #include "insets/InsetBibtex.h"
  29 #include "insets/InsetBibitem.h"
  30 #include "insets/InsetLabel.h"
  31 #include "mathed/InsetMath.h"
  32 #include "insets/InsetNote.h"
  33
  34 #include "support/debug.h"
  35 #include "support/lassert.h"
  36 #include "support/textutils.h"
  37
  38 #include <stack>
  39 #include <iostream>
  40 #include <algorithm>
  41 #include <sstream>
  42
  43 using namespace std;
  44 using namespace lyx::support;
  45
  46 namespace lyx {
  47
  48 namespace {
  49
  50 std::string fontToDocBookTag(xml::FontTypes type)
  51 {
  52         switch (type) {
  53         case xml::FontTypes::FT_EMPH:
  54         case xml::FontTypes::FT_BOLD:
  55                 return "emphasis";
  56         case xml::FontTypes::FT_NOUN:
  57                 return "personname";
  58         case xml::FontTypes::FT_UBAR:
  59         case xml::FontTypes::FT_WAVE:
  60         case xml::FontTypes::FT_DBAR:
  61         case xml::FontTypes::FT_SOUT:
  62         case xml::FontTypes::FT_XOUT:
  63         case xml::FontTypes::FT_ITALIC:
  64         case xml::FontTypes::FT_UPRIGHT:
  65         case xml::FontTypes::FT_SLANTED:
  66         case xml::FontTypes::FT_SMALLCAPS:
  67         case xml::FontTypes::FT_ROMAN:
  68         case xml::FontTypes::FT_SANS:
  69                 return "emphasis";
  70         case xml::FontTypes::FT_TYPE:
  71                 return "code";
  72         case xml::FontTypes::FT_SIZE_TINY:
  73         case xml::FontTypes::FT_SIZE_SCRIPT:
  74         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  75         case xml::FontTypes::FT_SIZE_SMALL:
  76         case xml::FontTypes::FT_SIZE_NORMAL:
  77         case xml::FontTypes::FT_SIZE_LARGE:
  78         case xml::FontTypes::FT_SIZE_LARGER:
  79         case xml::FontTypes::FT_SIZE_LARGEST:
  80         case xml::FontTypes::FT_SIZE_HUGE:
  81         case xml::FontTypes::FT_SIZE_HUGER:
  82         case xml::FontTypes::FT_SIZE_INCREASE:
  83         case xml::FontTypes::FT_SIZE_DECREASE:
  84                 return "emphasis";
  85         default:
  86                 return "";
  87         }
  88 }
  89
  90
  91 string fontToRole(xml::FontTypes type)
  92 {
  93         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  94         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  95         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  96         // Hence, it is not a problem to have many roles by default here.
  97         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  98         switch (type) {
  99         case xml::FontTypes::FT_ITALIC:
 100         case xml::FontTypes::FT_EMPH:
 101                 return "";
 102         case xml::FontTypes::FT_BOLD:
 103                 return "bold";
 104         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 105         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 106                 return "";
 107         case xml::FontTypes::FT_UBAR:
 108                 return "underline";
 109
 110         // All other roles are non-standard for DocBook.
 111
 112         case xml::FontTypes::FT_WAVE:
 113                 return "wave";
 114         case xml::FontTypes::FT_DBAR:
 115                 return "dbar";
 116         case xml::FontTypes::FT_SOUT:
 117                 return "sout";
 118         case xml::FontTypes::FT_XOUT:
 119                 return "xout";
 120         case xml::FontTypes::FT_UPRIGHT:
 121                 return "upright";
 122         case xml::FontTypes::FT_SLANTED:
 123                 return "slanted";
 124         case xml::FontTypes::FT_SMALLCAPS:
 125                 return "smallcaps";
 126         case xml::FontTypes::FT_ROMAN:
 127                 return "roman";
 128         case xml::FontTypes::FT_SANS:
 129                 return "sans";
 130         case xml::FontTypes::FT_SIZE_TINY:
 131                 return "tiny";
 132         case xml::FontTypes::FT_SIZE_SCRIPT:
 133                 return "size_script";
 134         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 135                 return "size_footnote";
 136         case xml::FontTypes::FT_SIZE_SMALL:
 137                 return "size_small";
 138         case xml::FontTypes::FT_SIZE_NORMAL:
 139                 return "size_normal";
 140         case xml::FontTypes::FT_SIZE_LARGE:
 141                 return "size_large";
 142         case xml::FontTypes::FT_SIZE_LARGER:
 143                 return "size_larger";
 144         case xml::FontTypes::FT_SIZE_LARGEST:
 145                 return "size_largest";
 146         case xml::FontTypes::FT_SIZE_HUGE:
 147                 return "size_huge";
 148         case xml::FontTypes::FT_SIZE_HUGER:
 149                 return "size_huger";
 150         case xml::FontTypes::FT_SIZE_INCREASE:
 151                 return "size_increase";
 152         case xml::FontTypes::FT_SIZE_DECREASE:
 153                 return "size_decrease";
 154         default:
 155                 return "";
 156         }
 157 }
 158
 159
 160 string fontToAttribute(xml::FontTypes type) {
 161         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 162         // for the font.
 163         string role = fontToRole(type);
 164         if (!role.empty())
 165                 return "role='" + role + "'";
 166         else
 167                 return "";
 168 }
 169
 170
 171 // Higher-level convenience functions.
 172
 173 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar, const OutputParams & runparams)
 174 {
 175         if (par == prevpar)
 176                 prevpar = nullptr;
 177
 178         // If the previous paragraph is empty, don't consider it when opening wrappers.
 179         if (prevpar && prevpar->empty() && !prevpar->allowEmpty())
 180                 prevpar = nullptr;
 181
 182         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 183         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 184         // The main use case is author information in several paragraphs: if the name of the author is the
 185         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 186         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 187         // layout, same wrapper tag).
 188         Layout const & lay = par->layout();
 189         bool openWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
 190
 191         if (prevpar != nullptr && !runparams.docbook_ignore_wrapper) {
 192                 Layout const & prevlay = prevpar->layout();
 193                 if (prevlay.docbookwrappertag() != "NONE") {
 194                         if (prevlay.docbookwrappertag() == lay.docbookwrappertag() &&
 195                                         prevlay.docbookwrapperattr() == lay.docbookwrapperattr())
 196                                 openWrapper = !lay.docbookwrappermergewithprevious();
 197                         else
 198                                 openWrapper = true;
 199                 }
 200         }
 201
 202         // Main logic.
 203         if (openWrapper) {
 204                 xml::openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 205
 206                 if (lay.docbookgeneratetitle()) {
 207                         docstring const label = par->params().labelString();
 208
 209                         xml::openTag(xs, "title", "", "paragraph");
 210                         xs << (!label.empty() ? label : from_ascii("No title"));
 211                         xml::closeTag(xs, "title", "paragraph");
 212                 }
 213         }
 214
 215         const string & tag = lay.docbooktag();
 216         if (tag != "NONE") {
 217                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 218                 if (!xs.isTagOpen(xmltag, 1)) { // Don't nest a paragraph directly in a paragraph.
 219                         // TODO: required or not?
 220                         // TODO: avoid creating a ParTag object just for this query...
 221                         xml::openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 222                         xml::openTag(xs, lay.docbookinnertag(), lay.docbookinnerattr(), lay.docbookinnertagtype());
 223                 }
 224         }
 225
 226         xml::openTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrapperattr(), lay.docbookitemwrappertagtype());
 227         xml::openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 228         xml::openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 229 }
 230
 231
 232 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar, const OutputParams & runparams)
 233 {
 234         if (par == nextpar)
 235                 nextpar = nullptr;
 236
 237         // If the next paragraph is empty, don't consider it when closing wrappers.
 238         if (nextpar && nextpar->empty() && !nextpar->allowEmpty())
 239                 nextpar = nullptr;
 240
 241         // See comment in openParTag.
 242         Layout const & lay = par->layout();
 243         bool closeWrapper = lay.docbookwrappertag() != "NONE" && !runparams.docbook_ignore_wrapper;
 244
 245         if (nextpar != nullptr && !runparams.docbook_ignore_wrapper) {
 246                 Layout const & nextlay = nextpar->layout();
 247                 if (nextlay.docbookwrappertag() != "NONE") {
 248                         if (nextlay.docbookwrappertag() == lay.docbookwrappertag() &&
 249                                         nextlay.docbookwrapperattr() == lay.docbookwrapperattr())
 250                                 closeWrapper = !nextlay.docbookwrappermergewithprevious();
 251                         else
 252                                 closeWrapper = true;
 253                 }
 254         }
 255
 256         // Main logic.
 257         xml::closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 258         xml::closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 259         xml::closeTag(xs, lay.docbookitemwrappertag(), lay.docbookitemwrappertagtype());
 260         xml::closeTag(xs, lay.docbookinnertag(), lay.docbookinnertagtype());
 261         xml::closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 262         if (closeWrapper)
 263                 xml::closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 264 }
 265
 266
 267 void makeBibliography(
 268                 Text const & text,
 269                 Buffer const & buf,
 270                 XMLStream & xs,
 271                 OutputParams const & runparams,
 272                 ParagraphList::const_iterator const & par)
 273 {
 274         // If this is the first paragraph in a bibliography, open the bibliography tag.
 275         auto const * pbegin_before = text.paragraphs().getParagraphBefore(par);
 276         if (pbegin_before == nullptr || (pbegin_before && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT)) {
 277                 xs << xml::StartTag("bibliography");
 278                 xs << xml::CR();
 279         }
 280
 281         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 282         // Don't forget the citation ID!
 283         docstring attr;
 284         for (auto i = 0; i < par->size(); ++i) {
 285                 Inset const *ip = par->getInset(i);
 286                 if (!ip)
 287                         continue;
 288                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
 289                         auto id = xml::cleanID(bibitem->getParam("key"));
 290                         attr = from_utf8("xml:id='") + id + from_utf8("'");
 291                         break;
 292                 }
 293         }
 294         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 295
 296         // Generate the entry. Concatenate the different parts of the paragraph if any.
 297         auto const begin = text.paragraphs().begin();
 298         std::vector<docstring> pars_prepend;
 299         std::vector<docstring> pars;
 300         std::vector<docstring> pars_append;
 301         tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(begin, par)), 0);
 302
 303         for (auto & parXML : pars_prepend)
 304                 xs << XMLStream::ESCAPE_NONE << parXML;
 305         for (auto & parXML : pars)
 306                 xs << XMLStream::ESCAPE_NONE << parXML;
 307         for (auto & parXML : pars_append)
 308                 xs << XMLStream::ESCAPE_NONE << parXML;
 309
 310         // End the precooked bibliography entry.
 311         xs << xml::EndTag("bibliomixed");
 312         xs << xml::CR();
 313
 314         // If this is the last paragraph in a bibliography, close the bibliography tag.
 315         auto const end = text.paragraphs().end();
 316         auto nextpar = par;
 317         ++nextpar;
 318         bool endBibliography = nextpar == end || nextpar->layout().latextype != LATEX_BIB_ENVIRONMENT;
 319
 320         if (endBibliography) {
 321                 xs << xml::EndTag("bibliography");
 322                 xs << xml::CR();
 323         }
 324 }
 325
 326
 327 void makeParagraph(
 328                 Text const & text,
 329                 Buffer const & buf,
 330                 XMLStream & xs,
 331                 OutputParams const & runparams,
 332                 ParagraphList::const_iterator const & par)
 333 {
 334         // Useful variables.
 335         auto const begin = text.paragraphs().begin();
 336         auto const end = text.paragraphs().end();
 337         auto prevpar = text.paragraphs().getParagraphBefore(par);
 338
 339         // We want to open the paragraph tag if:
 340         //   (i) the current layout permits multiple paragraphs
 341         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 342         //         we are, but this is not the first paragraph
 343         //
 344         // But there is also a special case, and we first see whether we are in it.
 345         // We do not want to open the paragraph tag if this paragraph contains
 346         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 347         // as a branch). On the other hand, if that single item has a font change
 348         // applied to it, then we still do need to open the paragraph.
 349         //
 350         // Obviously, this is very fragile. The main reason we need to do this is
 351         // because of branches, e.g., a branch that contains an entire new section.
 352         // We do not really want to wrap that whole thing in a <div>...</div>.
 353         bool special_case = false;
 354         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 355         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter? docbooknotinpara should be enough in most cases.
 356                 Layout const &style = par->layout();
 357                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 358                                                                         style.labelfont : style.font;
 359                 FontInfo const our_font =
 360                                 par->getFont(buf.masterBuffer()->params(), 0,
 361                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 362
 363                 if (first_font == our_font)
 364                         special_case = true;
 365         }
 366
 367         size_t nInsets = std::distance(par->insetList().begin(), par->insetList().end());
 368         auto parSize = (size_t) par->size();
 369
 370         // Plain layouts must be ignored.
 371         special_case |= buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars;
 372
 373         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 374         // Exception: any case that generates an <inlineequation> must still get a paragraph to be valid.
 375         auto isEquationSpecialCase = [](InsetList::Element inset) {
 376                 return inset.inset && inset.inset->asInsetMath() && inset.inset->asInsetMath()->getType() != hullSimple;
 377         };
 378         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isEquationSpecialCase);
 379
 380         // Things that should not get into their own paragraph. (Only valid for DocBook.)
 381         static std::set<InsetCode> lyxCodeSpecialCases = {
 382                         TABULAR_CODE,
 383                         FLOAT_CODE,
 384                         BIBTEX_CODE, // Bibliographies cannot be in paragraphs. Bibitems should still be handled as paragraphs,
 385                         // though (see makeBibliography).
 386                         ERT_CODE, // ERTs are in comments, not paragraphs.
 387                         LISTINGS_CODE,
 388                         BOX_CODE,
 389                         INCLUDE_CODE,
 390                         NOMENCL_PRINT_CODE,
 391                         TOC_CODE, // To be ignored in DocBook, the processor afterwards should deal with ToCs.
 392                         NOTE_CODE // Notes do not produce any output.
 393         };
 394         auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
 395                 return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end();
 396         };
 397         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isLyxCodeSpecialCase);
 398
 399         // Flex elements (InsetLayout) have their own parameter to control the special case.
 400         auto isFlexSpecialCase = [](InsetList::Element inset) {
 401                 if (inset.inset->lyxCode() != FLEX_CODE)
 402                         return false;
 403
 404                 // Standard condition: check the parameter.
 405                 if (inset.inset->getLayout().docbooknotinpara())
 406                         return true;
 407
 408                 // If the parameter is not set, maybe the flex inset only contains things that should match the standard
 409                 // condition. In this case, isLyxCodeSpecialCase must also check for bibitems...
 410                 auto isLyxCodeSpecialCase = [](InsetList::Element inset) {
 411                         return lyxCodeSpecialCases.find(inset.inset->lyxCode()) != lyxCodeSpecialCases.end() ||
 412                                         inset.inset->lyxCode() == BIBITEM_CODE;
 413                 };
 414                 if (InsetText * text = inset.inset->asInsetText()) {
 415                         for (auto const & par : text->paragraphs()) {
 416                                 size_t nInsets = std::distance(par.insetList().begin(), par.insetList().end());
 417                                 auto parSize = (size_t) par.size();
 418
 419                                 if (nInsets == 1 && par.insetList().begin()->inset->lyxCode() == BIBITEM_CODE)
 420                                         return true;
 421                                 if (nInsets != parSize)
 422                                         return false;
 423                                 if (!std::all_of(par.insetList().begin(), par.insetList().end(), isLyxCodeSpecialCase))
 424                                         return false;
 425                         }
 426                         return true;
 427                 }
 428
 429                 // No case matched: give up.
 430                 return false;
 431         };
 432         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isFlexSpecialCase);
 433
 434         // If the insets should be rendered as images, enter the special case.
 435         auto isRenderedAsImageSpecialCase = [](InsetList::Element inset) {
 436                 return inset.inset && inset.inset->getLayout().docbookrenderasimage();
 437         };
 438         special_case |= nInsets == parSize && std::all_of(par->insetList().begin(), par->insetList().end(), isRenderedAsImageSpecialCase);
 439
 440         // Open a paragraph if it is allowed, we are not already within a paragraph, and the insets in the paragraph do
 441         // not forbid paragraphs (aka special cases).
 442         bool const open_par = runparams.docbook_make_pars
 443                                                   && !runparams.docbook_in_par
 444                                                   && !special_case;
 445
 446         // We want to issue the closing tag if either:
 447         //   (i)  We opened it, and either docbook_in_par is false,
 448         //              or we're not in the last paragraph, anyway.
 449         //   (ii) We didn't open it and docbook_in_par is true,
 450         //              but we are in the first par, and there is a next par.
 451         bool const close_par = open_par && !runparams.docbook_in_par;
 452
 453         // Determine if this paragraph has some real content. Things like new pages are not caught
 454         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 455         // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
 456         // std::all_of allows doing this check without having to copy the string.
 457         // Open and close tags around each contained paragraph.
 458         auto nextpar = par;
 459         ++nextpar;
 460
 461         std::vector<docstring> pars_prepend;
 462         std::vector<docstring> pars;
 463         std::vector<docstring> pars_append;
 464         tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams, text.outerFont(distance(begin, par)), 0, nextpar == end, special_case);
 465
 466         for (docstring const & parXML : pars_prepend)
 467             xs << XMLStream::ESCAPE_NONE << parXML;
 468         for (docstring const & parXML : pars) {
 469                 if (!xml::isNotOnlySpace(parXML))
 470                         continue;
 471
 472                 if (open_par)
 473                         openParTag(xs, &*par, prevpar, runparams);
 474
 475                 xs << XMLStream::ESCAPE_NONE << parXML;
 476
 477                 if (close_par)
 478                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 479         }
 480         for (docstring const & parXML : pars_append)
 481             xs << XMLStream::ESCAPE_NONE << parXML;
 482 }
 483
 484
 485 void makeEnvironment(Text const &text,
 486                                          Buffer const &buf,
 487                      XMLStream &xs,
 488                      OutputParams const &runparams,
 489                      ParagraphList::const_iterator const & par)
 490 {
 491         // Useful variables.
 492         auto const end = text.paragraphs().end();
 493         auto nextpar = par;
 494         ++nextpar;
 495
 496         // Special cases for listing-like environments provided in layouts. This is quite ad-hoc, but provides a useful
 497         // default. This should not be used by too many environments (only LyX-Code right now).
 498         // This would be much simpler if LyX-Code was implemented as InsetListings...
 499         bool mimicListing = false;
 500         bool ignoreFonts = false;
 501         if (par->layout().docbooktag() == "programlisting") {
 502                 mimicListing = true;
 503                 ignoreFonts = true;
 504         }
 505
 506         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 507         // implemented in openParTag).
 508         auto prevpar = text.paragraphs().getParagraphBefore(par);
 509         openParTag(xs, &*par, prevpar, runparams);
 510
 511         // Generate the contents of this environment. There is a special case if this is like some environment.
 512         Layout const & style = par->layout();
 513         if (style.latextype == LATEX_COMMAND) {
 514                 // Nothing to do (otherwise, infinite loops).
 515         } else if (style.latextype == LATEX_ENVIRONMENT) {
 516                 // Generate the paragraph, if need be.
 517                 std::vector<docstring> pars_prepend;
 518         std::vector<docstring> pars;
 519         std::vector<docstring> pars_append;
 520         tie(pars_prepend, pars, pars_append) =
 521                                 par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 522                                                                                  0, false, ignoreFonts);
 523
 524         for (docstring const & parXML : pars_prepend)
 525             xs << XMLStream::ESCAPE_NONE << parXML;
 526                 if (mimicListing) {
 527                         auto p = pars.begin();
 528                         while (p != pars.end()) {
 529                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 530                                              par->layout().docbookiteminnertagtype());
 531                                 xs << XMLStream::ESCAPE_NONE << *p;
 532                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 533                                 ++p;
 534
 535                                 // Insert a new line after each "paragraph" (i.e. line in the listing), except for the last one.
 536                                 // Otherwise, there would one more new line in the output than in the LyX document.
 537                                 if (p != pars.end())
 538                                         xs << xml::CR();
 539                         }
 540                 } else {
 541                         for (auto const & p : pars) {
 542                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 543                                              par->layout().docbookiteminnertagtype());
 544                                 xs << XMLStream::ESCAPE_NONE << p;
 545                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 546                         }
 547                 }
 548         for (docstring const & parXML : pars_append)
 549             xs << XMLStream::ESCAPE_NONE << parXML;
 550         } else {
 551                 makeAny(text, buf, xs, runparams, par);
 552         }
 553
 554         // Close the environment.
 555         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 556 }
 557
 558
 559 ParagraphList::const_iterator findEndOfEnvironment(
 560                 ParagraphList::const_iterator const & pstart,
 561                 ParagraphList::const_iterator const & pend)
 562 {
 563         // Copy-paste from XHTML. Should be factored out at some point...
 564         ParagraphList::const_iterator p = pstart;
 565         Layout const & bstyle = p->layout();
 566         size_t const depth = p->params().depth();
 567         for (++p; p != pend; ++p) {
 568                 Layout const & style = p->layout();
 569                 // It shouldn't happen that e.g. a section command occurs inside
 570                 // a quotation environment, at a higher depth, but as of 6/2009,
 571                 // it can happen. We pretend that it's just at lowest depth.
 572                 if (style.latextype == LATEX_COMMAND)
 573                         return p;
 574
 575                 // If depth is down, we're done
 576                 if (p->params().depth() < depth)
 577                         return p;
 578
 579                 // If depth is up, we're not done
 580                 if (p->params().depth() > depth)
 581                         continue;
 582
 583                 // FIXME I am not sure about the first check.
 584                 // Surely we *could* have different layouts that count as
 585                 // LATEX_PARAGRAPH, right?
 586                 if (style.latextype == LATEX_PARAGRAPH || style != bstyle)
 587                         return p;
 588         }
 589         return pend;
 590 }
 591
 592
 593 ParagraphList::const_iterator makeListEnvironment(Text const &text,
 594                                                                                                   Buffer const &buf,
 595                                                           XMLStream &xs,
 596                                                           OutputParams const &runparams,
 597                                                           ParagraphList::const_iterator const & begin)
 598 {
 599         // Useful variables.
 600         auto par = begin;
 601         auto const end = text.paragraphs().end();
 602         auto const envend = findEndOfEnvironment(par, end);
 603
 604         // Output the opening tag for this environment.
 605         Layout const & envstyle = par->layout();
 606         xml::openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
 607         xml::openTag(xs, envstyle.docbooktag(), envstyle.docbookattr(), envstyle.docbooktagtype());
 608
 609         // Handle the content of the list environment, item by item.
 610         while (par != envend) {
 611                 // Skip this paragraph if it is both empty and the last one (otherwise, there may be deeper paragraphs after).
 612                 auto nextpar = par;
 613                 ++nextpar;
 614                 if (par->empty() && nextpar == envend)
 615                         break;
 616
 617                 // Open the item wrapper.
 618                 Layout const & style = par->layout();
 619                 xml::openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(),
 620                              style.docbookitemwrappertagtype());
 621
 622                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 623                 // character after the label.
 624                 pos_type sep = 0;
 625                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 626                         if (style.labeltype == LABEL_MANUAL) {
 627                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 628                                 xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
 629                                              style.docbookitemlabeltagtype());
 630                                 sep = 1 + par->firstWordDocBook(xs, runparams);
 631                                 xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
 632                         } else {
 633                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 634                                 docstring const lbl = par->params().labelString();
 635
 636                                 if (!lbl.empty()) {
 637                                         xml::openTag(xs, style.docbookitemlabeltag(), style.docbookitemlabelattr(),
 638                                                      style.docbookitemlabeltagtype());
 639                                         xs << lbl;
 640                                         xml::closeTag(xs, style.docbookitemlabeltag(), style.docbookitemlabeltagtype());
 641                                 }
 642                         }
 643                 }
 644
 645                 // Open the item (after the wrapper and the label).
 646                 xml::openTag(xs, style.docbookitemtag(), style.docbookitemattr(), style.docbookitemtagtype());
 647
 648                 // Generate the content of the item.
 649                 if (sep < par->size()) {
 650             std::vector<docstring> pars_prepend;
 651             std::vector<docstring> pars;
 652             std::vector<docstring> pars_append;
 653             tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams,
 654                                                              text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
 655             for (docstring const & parXML : pars_prepend)
 656                 xs << XMLStream::ESCAPE_NONE << parXML;
 657                         for (auto &p : pars) {
 658                                 xml::openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 659                                              par->layout().docbookiteminnertagtype());
 660                                 xs << XMLStream::ESCAPE_NONE << p;
 661                                 xml::closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 662                         }
 663             for (docstring const & parXML : pars_append)
 664                 xs << XMLStream::ESCAPE_NONE << parXML;
 665                 } else {
 666                         // DocBook doesn't like emptiness.
 667                         xml::compTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(),
 668                                      par->layout().docbookiteminnertagtype());
 669                 }
 670
 671                 // If the next item is deeper, it must go entirely within this item (do it recursively).
 672                 // By construction, with findEndOfEnvironment, depth can only stay constant or increase, never decrease.
 673                 depth_type currentDepth = par->getDepth();
 674                 ++par;
 675                 while (par != envend && par->getDepth() != currentDepth)
 676                         par = makeAny(text, buf, xs, runparams, par);
 677                 // Usually, this loop only makes one iteration, except in complex scenarios, like an item with a paragraph,
 678                 // a list, and another paragraph; or an item with two types of list (itemise then enumerate, for instance).
 679
 680                 // Close the item.
 681                 xml::closeTag(xs, style.docbookitemtag(), style.docbookitemtagtype());
 682                 xml::closeTag(xs, style.docbookitemwrappertag(), style.docbookitemwrappertagtype());
 683         }
 684
 685         // Close this environment in exactly the same way as it was opened.
 686         xml::closeTag(xs, envstyle.docbooktag(), envstyle.docbooktagtype());
 687         xml::closeTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrappertagtype());
 688
 689         return envend;
 690 }
 691
 692
 693 void makeCommand(
 694                 Text const & text,
 695                 Buffer const & buf,
 696                 XMLStream & xs,
 697                 OutputParams const & runparams,
 698                 ParagraphList::const_iterator const & par)
 699 {
 700         // Useful variables.
 701         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 702         auto const begin = text.paragraphs().begin();
 703         auto const end = text.paragraphs().end();
 704         auto nextpar = par;
 705         ++nextpar;
 706
 707         // Generate this command.
 708         auto prevpar = text.paragraphs().getParagraphBefore(par);
 709
 710     std::vector<docstring> pars_prepend;
 711     std::vector<docstring> pars;
 712     std::vector<docstring> pars_append;
 713     tie(pars_prepend, pars, pars_append) = par->simpleDocBookOnePar(buf, runparams,text.outerFont(distance(begin, par)));
 714
 715     for (docstring const & parXML : pars_prepend)
 716         xs << XMLStream::ESCAPE_NONE << parXML;
 717
 718     openParTag(xs, &*par, prevpar, runparams);
 719         for (auto & parXML : pars)
 720                 // TODO: decide what to do with openParTag/closeParTag in new lines.
 721                 xs << XMLStream::ESCAPE_NONE << parXML;
 722     closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr, runparams);
 723
 724     for (docstring const & parXML : pars_append)
 725         xs << XMLStream::ESCAPE_NONE << parXML;
 726 }
 727
 728
 729 bool isLayoutSectioning(Layout const & lay)
 730 {
 731         if (lay.docbooksection()) // Special case: some DocBook styles must be handled as sections.
 732                 return true;
 733         else if (lay.category() == from_utf8("Sectioning") || lay.docbooktag() == "section") // Generic case.
 734                 return lay.toclevel != Layout::NOT_IN_TOC;
 735         return false;
 736 }
 737
 738
 739 bool isLayoutSectioningOrSimilar(Layout const & lay)
 740 {
 741         return isLayoutSectioning(lay) || lay.docbooktag() == "bridgehead";
 742 }
 743
 744
 745 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 746
 747
 748 struct DocBookInfoTag
 749 {
 750         const set<pit_type> shouldBeInInfo;
 751         const set<pit_type> mustBeInInfo; // With the notable exception of the abstract!
 752         const set<pit_type> abstract;
 753         const bool abstractLayout;
 754         pit_type bpit;
 755         pit_type epit;
 756
 757         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 758                                    const set<pit_type> & abstract, bool abstractLayout, pit_type bpit, pit_type epit) :
 759                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 760                                    abstractLayout(abstractLayout), bpit(bpit), epit(epit) {}
 761 };
 762
 763
 764 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 765         bool documentHasSections = false;
 766
 767         while (bpit < epit) {
 768                 LASSERT(static_cast<size_t>(bpit) < paragraphs.size(), return make_tuple(documentHasSections, bpit));
 769
 770                 Layout const &style = paragraphs[bpit].layout();
 771                 documentHasSections |= isLayoutSectioningOrSimilar(style);
 772
 773                 if (documentHasSections)
 774                         break;
 775                 bpit += 1;
 776         }
 777         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 778
 779         return make_tuple(documentHasSections, bpit);
 780 }
 781
 782
 783 bool hasOnlyNotes(Paragraph const & par)
 784 {
 785         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 786         for (int i = 0; i < par.size(); ++i)
 787                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 788                 // return false.
 789                 if (!par.isInset(i) || par.getInset(i)->lyxCode() != NOTE_CODE)
 790                         return false;
 791
 792         // An empty paragraph may still require some output.
 793         if (par.layout().docbooksection())
 794                 return false;
 795
 796         // There should be really no content here.
 797         return true;
 798 }
 799
 800
 801 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
 802                                                                          pit_type bpit, pit_type const epit,
 803                                                                          // Typically, bpit is the beginning of the document and epit the end of the
 804                                                                          // document *or* the first section.
 805                                                                          bool documentHasSections,
 806                                                                          bool detectUnlayoutedAbstract
 807                                                                          // Whether paragraphs with no specific layout should be detected as abstracts.
 808                                                                          // For inner sections, an abstract should only be detected if it has a specific
 809                                                                          // layout. For others, anything that might look like an abstract should be sought.
 810                                                                          ) {
 811         set<pit_type> shouldBeInInfo;
 812         set<pit_type> mustBeInInfo;
 813         set<pit_type> abstractWithLayout;
 814         set<pit_type> abstractNoLayout;
 815
 816         // Find the first nonempty paragraph by mutating bpit.
 817         while (bpit < epit) {
 818                 Paragraph const &par = paragraphs[bpit];
 819                 if (par.empty() || hasOnlyNotes(par))
 820                         bpit += 1;
 821                 else
 822                         break;
 823         }
 824
 825         // Traverse everything that might belong to <info>.
 826         bool hasAbstractLayout = false;
 827         static depth_type INVALID_DEPTH = 100000;
 828         depth_type abstractDepth = INVALID_DEPTH;
 829         pit_type cpit = bpit;
 830         for (; cpit < epit; ++cpit) {
 831                 // Skip paragraphs that don't generate anything in DocBook.
 832                 Paragraph const & par = paragraphs[cpit];
 833                 Layout const &style = par.layout();
 834                 if (hasOnlyNotes(par))
 835                         continue;
 836
 837                 // There should never be any section here, except for the first paragraph (a title can be part of <info>).
 838                 // (Just a sanity check: if this fails, this function could end up processing the whole document.)
 839                 if (cpit != bpit && isLayoutSectioningOrSimilar(par.layout())) {
 840                         LYXERR(Debug::OUTFILE, "Assertion failed: section found in potential <info> paragraphs.");
 841                         break;
 842                 }
 843
 844                 // If this is marked as an abstract by the layout, put it in the right set.
 845                 if (style.docbookabstract()) {
 846                         hasAbstractLayout = true;
 847                         abstractDepth = par.getDepth();
 848                         abstractWithLayout.emplace(cpit);
 849                         continue;
 850                 }
 851
 852                 // Deeper paragraphs following the abstract must still be considered as part of the abstract.
 853                 // For instance, this includes lists. There should not be any other kind of paragraph in between.
 854                 if (abstractDepth != INVALID_DEPTH && style.docbookininfo() == "never") {
 855                         if (par.getDepth() > abstractDepth) {
 856                                 abstractWithLayout.emplace(cpit);
 857                                 continue;
 858                         }
 859                         if (par.getDepth() == abstractDepth) {
 860                                 // This is not an abstract paragraph and it should not either be considered as part
 861                                 // of it. It breaks the rule that abstract paragraphs must follow each other.
 862                                 abstractDepth = INVALID_DEPTH;
 863                                 break;
 864                         }
 865                 }
 866
 867                 // Based on layout information, store this paragraph in one set: should be in <info>, must be,
 868                 // or abstract (either because of layout or of position).
 869                 if (style.docbookininfo() == "always")
 870                         mustBeInInfo.emplace(cpit);
 871                 else if (style.docbookininfo() == "maybe")
 872                         shouldBeInInfo.emplace(cpit);
 873                 else if (documentHasSections && !hasAbstractLayout && detectUnlayoutedAbstract &&
 874                                 (style.docbooktag() == "NONE" || style.docbooktag() == "para") &&
 875                                 style.docbookwrappertag() == "NONE")
 876                         // In this case, it is very likely that style.docbookininfo() == "never"! Be extra careful
 877                         // about anything that gets caught here. For instance, don't ake into account
 878                         abstractNoLayout.emplace(cpit);
 879                 else // This should definitely not be in <info>.
 880                         break;
 881         }
 882         // Now, cpit points to the first paragraph that no more has things that could go in <info>.
 883         // bpit is the beginning of the <info> part.
 884
 885         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo,
 886                                               hasAbstractLayout ? abstractWithLayout : abstractNoLayout,
 887                                               hasAbstractLayout, bpit, cpit);
 888 }
 889
 890 } // end anonymous namespace
 891
 892
 893 std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
 894 {
 895         // This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
 896         // should become the document's <abstract>.
 897         std::set<const Inset *> values;
 898
 899         // If this kind of layout should be ignored, already leave.
 900         if (par->layout().docbooktag() == "IGNORE")
 901                 return values;
 902
 903         // If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
 904         // DocBook doesn't want to be inside the abstract.
 905         for (pos_type i = 0; i < par->size(); ++i) {
 906                 if (par->getInset(i) && par->getInset(i)->asInsetText()) {
 907                         InsetText const *inset = par->getInset(i)->asInsetText();
 908
 909                         if (inset->getLayout().docbookininfo() != "never") {
 910                                 values.insert(inset);
 911                         } else {
 912                                 auto subpar = inset->paragraphs().begin();
 913                                 while (subpar != inset->paragraphs().end()) {
 914                                         auto subinfos = gatherInfo(subpar);
 915                                         for (auto & subinfo: subinfos)
 916                                                 values.insert(subinfo);
 917                                         ++subpar;
 918                                 }
 919                         }
 920                 }
 921         }
 922
 923         return values;
 924 }
 925
 926
 927 ParagraphList::const_iterator makeAny(Text const &text,
 928                                       Buffer const &buf,
 929                                       XMLStream &xs,
 930                                       OutputParams const &runparams,
 931                                       ParagraphList::const_iterator par)
 932 {
 933         bool ignoreParagraph = false;
 934
 935         // If this kind of layout should be ignored, already leave.
 936         ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
 937
 938         // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
 939         // abstract itself.
 940         bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
 941         ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
 942
 943         // Switch on the type of paragraph to call the right handler.
 944         if (!ignoreParagraph) {
 945                 switch (par->layout().latextype) {
 946                 case LATEX_COMMAND:
 947                         makeCommand(text, buf, xs, runparams, par);
 948                         break;
 949                 case LATEX_ENVIRONMENT:
 950                         makeEnvironment(text, buf, xs, runparams, par);
 951                         break;
 952                 case LATEX_LIST_ENVIRONMENT:
 953                 case LATEX_ITEM_ENVIRONMENT:
 954                         // Only case when makeAny() might consume more than one paragraph.
 955                         return makeListEnvironment(text, buf, xs, runparams, par);
 956                 case LATEX_PARAGRAPH:
 957                         makeParagraph(text, buf, xs, runparams, par);
 958                         break;
 959                 case LATEX_BIB_ENVIRONMENT:
 960                         makeBibliography(text, buf, xs, runparams, par);
 961                         break;
 962                 }
 963         }
 964
 965         // For cases that are not lists, the next paragraph to handle is the next one.
 966         ++par;
 967         return par;
 968 }
 969
 970
 971 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 972 {
 973         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 974 }
 975
 976
 977 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 978 {
 979         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 980 }
 981
 982
 983 void outputDocBookInfo(
 984                 Text const & text,
 985                 Buffer const & buf,
 986                 XMLStream & xs,
 987                 OutputParams const & runparams,
 988                 ParagraphList const & paragraphs,
 989                 DocBookInfoTag const & info)
 990 {
 991         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 992         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 993         // then only create the <abstract> tag if these paragraphs generate some content.
 994         // This check must be performed *before* a decision on whether or not to output <info> is made.
 995         bool hasAbstract = !info.abstract.empty();
 996         docstring abstract;
 997         set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
 998         // paragraph. (This happens for quite a few layouts, unfortunately.)
 999
1000         if (hasAbstract) {
1001                 // Generate the abstract XML into a string before further checks.
1002                 // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
1003                 // generate more than one paragraph, as indicated in the return value.
1004                 odocstringstream os2;
1005                 XMLStream xs2(os2);
1006
1007                 auto rp = runparams;
1008                 rp.docbook_generate_info = false;
1009                 rp.docbook_ignore_wrapper = true;
1010
1011                 set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
1012                 for (auto const & p : info.abstract) {
1013                         if (doneParas.find(p) == doneParas.end()) {
1014                                 auto oldPar = paragraphs.iterator_at(p);
1015                                 auto newPar = makeAny(text, buf, xs2, rp, oldPar);
1016
1017                                 // Find insets that should go outside the abstract.
1018                                 auto subinfos = gatherInfo(oldPar);
1019                                 for (auto & subinfo: subinfos)
1020                                         infoInsets.insert(subinfo);
1021
1022                                 // Insert the indices of all the paragraphs that were just generated (typically, one).
1023                                 // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
1024                                 // Otherwise, makeAny and makeListEnvironment would have to be adapted too.
1025                                 pit_type id = p;
1026                                 while (oldPar != newPar) {
1027                                         doneParas.emplace(id);
1028                                         ++oldPar;
1029                                         ++id;
1030                                 }
1031                         }
1032                 }
1033
1034                 // Actually output the abstract if there is something to do. Don't count line feeds, spaces, or comments
1035                 // in this -- even though line feeds and spaces must be properly output if there is some abstract.
1036                 abstract = os2.str();
1037                 docstring cleaned = abstract;
1038                 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), lyx::isSpace), cleaned.end());
1039
1040                 size_t beginComment;
1041                 size_t endComment;
1042                 while ((beginComment = cleaned.find(from_ascii("<!--"))) != lyx::docstring::npos) {
1043                         if ((endComment = cleaned.find(from_ascii("-->"), beginComment)) != lyx::docstring::npos) {
1044                                 cleaned.erase(cleaned.begin() + beginComment, cleaned.begin() + endComment + 3);
1045                         }
1046                 }
1047
1048                 // Nothing? Then there is no abstract!
1049                 if (cleaned.empty())
1050                         hasAbstract = false;
1051         }
1052
1053         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
1054         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
1055
1056         // Start the <info> tag if required.
1057         if (needInfo) {
1058                 xs.startDivision(false);
1059                 xs << xml::StartTag("info");
1060                 xs << xml::CR();
1061         }
1062
1063         // Output the elements that should go in <info>.
1064         // - First, the title.
1065         for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1066                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1067                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1068         // If there is no title, generate one (required for the document to be valid).
1069         // This code is called for the main document, for table cells, etc., so be precise in this condition.
1070         if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
1071                 xs << xml::StartTag("title");
1072                 xs << "Untitled Document";
1073                 xs << xml::EndTag("title");
1074                 xs << xml::CR();
1075         }
1076
1077         // - Then, other metadata.
1078         for (auto pit : info.mustBeInInfo)
1079                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
1080         for (auto const * inset : infoInsets)
1081                 inset->docbook(xs, runparams);
1082
1083         // - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
1084         // (especially if it contains several paragraphs that are empty).
1085         if (hasAbstract) {
1086                 string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
1087                 if (tag == "NONE")
1088                         tag = "abstract";
1089
1090                 if (!xs.isLastTagCR())
1091                         xs << xml::CR();
1092
1093                 xs << xml::StartTag(tag);
1094                 xs << xml::CR();
1095                 xs << XMLStream::ESCAPE_NONE << abstract;
1096                 xs << xml::EndTag(tag);
1097                 xs << xml::CR();
1098         }
1099
1100         // End the <info> tag if it was started.
1101         if (needInfo) {
1102                 if (!xs.isLastTagCR())
1103                         xs << xml::CR();
1104
1105                 xs << xml::EndTag("info");
1106                 xs << xml::CR();
1107                 xs.endDivision();
1108         }
1109 }
1110
1111
1112 void docbookSimpleAllParagraphs(
1113                 Text const & text,
1114                 Buffer const & buf,
1115                 XMLStream & xs,
1116                 OutputParams const & runparams)
1117 {
1118         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
1119         // between a single paragraph to a whole document.
1120         pit_type const bpit = runparams.par_begin;
1121         pit_type const epit = runparams.par_end;
1122         ParagraphList const &paragraphs = text.paragraphs();
1123
1124         // First, the <info> tag.
1125         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false, true);
1126         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1127
1128         // Then, the content. It starts where the <info> ends.
1129         auto par = paragraphs.iterator_at(info.epit);
1130         auto par_epit = paragraphs.iterator_at(epit);
1131         auto par_end = paragraphs.end();
1132         while (par != par_epit && par != par_end) {
1133                 if (!hasOnlyNotes(*par))
1134                         par = makeAny(text, buf, xs, runparams, par);
1135                 else
1136                         ++par;
1137         }
1138 }
1139
1140
1141 void docbookParagraphs(Text const &text,
1142                                            Buffer const &buf,
1143                                            XMLStream &xs,
1144                                            OutputParams const &runparams) {
1145         ParagraphList const &paragraphs = text.paragraphs();
1146         if (runparams.par_begin == runparams.par_end) {
1147                 runparams.par_begin = 0;
1148                 runparams.par_end = paragraphs.size();
1149         }
1150         pit_type bpit = runparams.par_begin;
1151         pit_type const epit = runparams.par_end;
1152         LASSERT(bpit < epit,
1153                         {
1154                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
1155                                 return;
1156                         });
1157
1158         // Detect whether the document contains sections. If there are no sections, treatment is largely simplified.
1159         // In particular, there can't be an abstract, unless it is manually marked.
1160         bool documentHasSections;
1161         pit_type eppit;
1162         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
1163
1164         // Deal with "simple" documents, i.e. those without sections.
1165         if (!documentHasSections) {
1166                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
1167                 return;
1168         }
1169
1170         // Output the first <info> tag (or just the title).
1171         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true, true);
1172         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
1173         bpit = info.epit;
1174
1175         // In the specific case of books, there must be parts or chapters. In some cases, star sections are used at the
1176         // beginning for many things like acknowledgements or licenses. DocBook has tags for many of these cases, but not
1177         // the LyX layouts... Gather everything in a <preface>, that's the closest in meaning.
1178         // This is only useful if the things after the <info> tag are not already parts or chapters!
1179         if (buf.params().documentClass().docbookroot() == "book") {
1180             // Check the condition on the first few elements.
1181             bool hasPreface = false;
1182             pit_type pref_bpit = bpit;
1183             pit_type pref_epit = bpit;
1184
1185             static const std::set<std::string> allowedElements = {
1186                     // List from https://tdg.docbook.org/tdg/5.2/book.html
1187                     "acknowledgements", "appendix", "article", "bibliography", "chapter", "colophon", "dedication",
1188                     "glossary", "index", "part", "preface", "reference", "toc"
1189             };
1190
1191             for (; pref_epit < epit; ++pref_epit) {
1192             auto par = text.paragraphs().iterator_at(pref_epit);
1193             if (allowedElements.find(par->layout().docbooktag()) != allowedElements.end() ||
1194                     allowedElements.find(par->layout().docbooksectiontag()) != allowedElements.end())
1195                 break;
1196
1197             hasPreface = true;
1198             }
1199
1200             // Output a preface if required. A title is needed for the document to be valid...
1201             if (hasPreface) {
1202                 xs << xml::StartTag("preface");
1203                 xs << xml::CR();
1204
1205                 xs << xml::StartTag("title");
1206                 xs << "Preface";
1207                 xs << xml::EndTag("title");
1208             xs << xml::CR();
1209
1210             auto pref_par = text.paragraphs().iterator_at(pref_bpit);
1211             auto pref_end = text.paragraphs().iterator_at(pref_epit);
1212             while (pref_par != pref_end) {
1213                 // Skip paragraphs not producing any output.
1214                 if (hasOnlyNotes(*pref_par)) {
1215                     ++pref_par;
1216                     continue;
1217                 }
1218
1219                 // TODO: must sections be handled here? If so, it might be useful to extract the corresponding loop
1220                 // in the rest of this function to use the same here (and avoid copy-paste mistakes).
1221                 pref_par = makeAny(text, buf, xs, runparams, pref_par);
1222             }
1223
1224                 xs << xml::EndTag("preface");
1225             xs << xml::CR();
1226
1227             // Skip what has just been generated in the preface.
1228             bpit = pref_epit;
1229             }
1230         }
1231
1232         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
1233         // of the section and the tag that was used to open it.
1234
1235         // Then, iterate through the paragraphs of this document.
1236         auto par = text.paragraphs().iterator_at(bpit);
1237         auto end = text.paragraphs().iterator_at(epit);
1238         while (par != end) {
1239                 // Skip paragraphs not producing any output.
1240                 if (hasOnlyNotes(*par)) {
1241                         ++par;
1242                         continue;
1243                 }
1244
1245                 OutputParams ourparams = runparams;
1246                 Layout const &style = par->layout();
1247
1248                 // Think about adding <section> and/or </section>s.
1249                 if (isLayoutSectioning(style) || par->params().startOfAppendix()) {
1250                         int level = style.toclevel;
1251
1252                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a
1253                         // <h2> after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1254                         //   - current: h2; back: h1; do not close any <section>
1255                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1256                         // Some layouts require that Layout::NOT_IN_TOC sections still cause closing of previous sections. This is
1257                         // mostly to ensure that the section is positioned at a DocBook-compatible level (acknowledgements: cannot
1258                         // be under a section!).
1259                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1260                                 // Output the tag only if it corresponds to a legit section.
1261                                 int stackLevel = headerLevels.top().first;
1262                                 if (stackLevel != Layout::NOT_IN_TOC) {
1263                                         xs << xml::EndTag(headerLevels.top().second);
1264                                         xs << xml::CR();
1265                                 }
1266                                 headerLevels.pop();
1267                         }
1268
1269                         // Open the new section: first push it onto the stack, then output it in DocBook.
1270                         string sectionTag = (par->params().startOfAppendix()) ? "appendix" : style.docbooksectiontag();
1271                         headerLevels.push(std::make_pair(level, sectionTag));
1272
1273                         // Some sectioning-like elements should not be output (such as FrontMatter).
1274                         if (level != Layout::NOT_IN_TOC) {
1275                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1276                                 docstring id = docstring();
1277                                 for (pos_type i = 0; i < par->size(); ++i) {
1278                                         Inset const *inset = par->getInset(i);
1279                                         if (inset) {
1280                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1281                                                         // Generate the attributes for the section if need be.
1282                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1283
1284                                                         // Don't output the ID as a DocBook <anchor>.
1285                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1286
1287                                                         // Cannot have multiple IDs per tag. If there is another ID inset in the document, it will
1288                                                         // be output as a DocBook anchor.
1289                                                         break;
1290                                                 }
1291                                         }
1292                                 }
1293
1294                                 // Write the open tag for this section.
1295                                 docstring attrs;
1296                                 if (!id.empty())
1297                                         attrs = id;
1298                                 xs << xml::StartTag(sectionTag, attrs);
1299                                 xs << xml::CR();
1300                         }
1301                 }
1302
1303                 // Close all sections before the bibliography.
1304                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography
1305                 // at the end of the document? Or don't care (as allowed by DocBook)?
1306                 if (!par->insetList().empty()) {
1307                         Inset const *firstInset = par->getInset(0);
1308                         if (firstInset && (firstInset->lyxCode() == BIBITEM_CODE || firstInset->lyxCode() == BIBTEX_CODE)) {
1309                                 while (!headerLevels.empty()) {
1310                                         // Don't close appendices before bibliographies.
1311                                         if (headerLevels.top().second == "appendix")
1312                                                 break;
1313
1314                                         // Pop the section from the stack.
1315                                         int level = headerLevels.top().first;
1316                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1317                                         headerLevels.pop();
1318
1319                                         // Output the tag only if it corresponds to a legit section, as the rest of the code.
1320                                         if (level != Layout::NOT_IN_TOC) {
1321                                                 xs << XMLStream::ESCAPE_NONE << tag;
1322                                                 xs << xml::CR();
1323                                         }
1324                                 }
1325                         }
1326                 }
1327
1328                 // Generate the <info> tag if a section was just opened.
1329                 // Some sections may require abstracts (mostly parts, in books: DocBookForceAbstractTag will not be NONE),
1330                 // others can still have an abstract (it must be detected so that it can be output at the right place).
1331                 // TODO: docbookforceabstracttag is a bit contrived here, but it does the job. Having another field just for
1332                 // this would be cleaner, but that's just for <part> and <partintro>, so it's probably not worth the effort.
1333                 if (isLayoutSectioning(style)) {
1334                         // This abstract may be found between the next paragraph and the next title.
1335                         pit_type cpit = std::distance(text.paragraphs().begin(), par);
1336                         pit_type ppit = std::get<1>(hasDocumentSectioning(paragraphs, cpit + 1L, epit));
1337
1338                         // Generate this abstract (this code corresponds to parts of outputDocBookInfo).
1339                         DocBookInfoTag secInfo = getParagraphsWithInfo(paragraphs, cpit, ppit, true,
1340                                                                                                   style.docbookforceabstracttag() != "NONE");
1341
1342                         if (!secInfo.mustBeInInfo.empty() || !secInfo.shouldBeInInfo.empty() || !secInfo.abstract.empty()) {
1343                                 // Generate the <info>, if required. If DocBookForceAbstractTag != NONE, this abstract will not be in
1344                                 // <info>, unlike other ("standard") abstracts.
1345                                 bool hasStandardAbstract = !secInfo.abstract.empty() && style.docbookforceabstracttag() == "NONE";
1346                                 bool needInfo = !secInfo.mustBeInInfo.empty() || hasStandardAbstract;
1347
1348                                 if (needInfo) {
1349                                         xs.startDivision(false);
1350                                         xs << xml::StartTag("info");
1351                                         xs << xml::CR();
1352                                 }
1353
1354                                 // Output the elements that should go in <info>, before and after the abstract.
1355                                 for (auto pit : secInfo.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
1356                                         // that mandating a wrapper like <info> would repel users. Thus, generate them first.
1357                                         makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1358                                 for (auto pit : secInfo.mustBeInInfo)
1359                                         makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
1360
1361                                 // Deal with the abstract in <info> if it is standard (i.e. its tag is <abstract>).
1362                                 if (!secInfo.abstract.empty() && hasStandardAbstract) {
1363                                         if (!secInfo.abstractLayout) {
1364                                                 xs << xml::StartTag("abstract");
1365                                                 xs << xml::CR();
1366                                         }
1367
1368                                         for (auto const &p : secInfo.abstract)
1369                                                 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1370
1371                                         if (!secInfo.abstractLayout) {
1372                                                 xs << xml::EndTag("abstract");
1373                                                 xs << xml::CR();
1374                                         }
1375                                 }
1376
1377                                 // End the <info> tag if it was started.
1378                                 if (needInfo) {
1379                                         if (!xs.isLastTagCR())
1380                                                 xs << xml::CR();
1381
1382                                         xs << xml::EndTag("info");
1383                                         xs << xml::CR();
1384                                         xs.endDivision();
1385                                 }
1386
1387                                 // Deal with the abstract outside <info> if it is not standard (i.e. its tag is layout-defined).
1388                                 if (!secInfo.abstract.empty() && !hasStandardAbstract) {
1389                                         // Assert: style.docbookforceabstracttag() != NONE.
1390                                         xs << xml::StartTag(style.docbookforceabstracttag());
1391                                         xs << xml::CR();
1392                                         for (auto const &p : secInfo.abstract)
1393                                                 makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
1394                                         xs << xml::EndTag(style.docbookforceabstracttag());
1395                                         xs << xml::CR();
1396                                 }
1397
1398                                 // Skip all the text that has just been generated.
1399                                 par = paragraphs.iterator_at(secInfo.epit);
1400                         } else {
1401                                 // No <info> tag to generate, proceed as for normal paragraphs.
1402                                 par = makeAny(text, buf, xs, ourparams, par);
1403                         }
1404                 } else {
1405                         // Generate this paragraph, as it has nothing special.
1406                         par = makeAny(text, buf, xs, ourparams, par);
1407                 }
1408         }
1409
1410         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1411         // of the loop).
1412         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1413                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1414                 headerLevels.pop();
1415                 xs << XMLStream::ESCAPE_NONE << tag;
1416                 xs << xml::CR();
1417         }
1418 }
1419
1420 } // namespace lyx