src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Layout.h"
  20 #include "OutputParams.h"
  21 #include "Paragraph.h"
  22 #include "ParagraphList.h"
  23 #include "ParagraphParameters.h"
  24 #include "xml.h"
  25 #include "Text.h"
  26 #include "TextClass.h"
  27
  28 #include "insets/InsetBibtex.h"
  29 #include "insets/InsetBibitem.h"
  30 #include "insets/InsetLabel.h"
  31 #include "insets/InsetNote.h"
  32
  33 #include "support/convert.h"
  34 #include "support/debug.h"
  35 #include "support/lassert.h"
  36 #include "support/lstrings.h"
  37 #include "support/textutils.h"
  38
  39 #include "support/regex.h"
  40
  41 #include <stack>
  42 #include <iostream>
  43 #include <algorithm>
  44 #include <sstream>
  45
  46 using namespace std;
  47 using namespace lyx::support;
  48
  49 namespace lyx {
  50
  51 namespace {
  52
  53 std::string fontToDocBookTag(xml::FontTypes type)
  54 {
  55         switch (type) {
  56         case xml::FontTypes::FT_EMPH:
  57         case xml::FontTypes::FT_BOLD:
  58                 return "emphasis";
  59         case xml::FontTypes::FT_NOUN:
  60                 return "person";
  61         case xml::FontTypes::FT_UBAR:
  62         case xml::FontTypes::FT_WAVE:
  63         case xml::FontTypes::FT_DBAR:
  64         case xml::FontTypes::FT_SOUT:
  65         case xml::FontTypes::FT_XOUT:
  66         case xml::FontTypes::FT_ITALIC:
  67         case xml::FontTypes::FT_UPRIGHT:
  68         case xml::FontTypes::FT_SLANTED:
  69         case xml::FontTypes::FT_SMALLCAPS:
  70         case xml::FontTypes::FT_ROMAN:
  71         case xml::FontTypes::FT_SANS:
  72                 return "emphasis";
  73         case xml::FontTypes::FT_TYPE:
  74                 return "code";
  75         case xml::FontTypes::FT_SIZE_TINY:
  76         case xml::FontTypes::FT_SIZE_SCRIPT:
  77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  78         case xml::FontTypes::FT_SIZE_SMALL:
  79         case xml::FontTypes::FT_SIZE_NORMAL:
  80         case xml::FontTypes::FT_SIZE_LARGE:
  81         case xml::FontTypes::FT_SIZE_LARGER:
  82         case xml::FontTypes::FT_SIZE_LARGEST:
  83         case xml::FontTypes::FT_SIZE_HUGE:
  84         case xml::FontTypes::FT_SIZE_HUGER:
  85         case xml::FontTypes::FT_SIZE_INCREASE:
  86         case xml::FontTypes::FT_SIZE_DECREASE:
  87                 return "emphasis";
  88         default:
  89                 return "";
  90         }
  91 }
  92
  93
  94 string fontToRole(xml::FontTypes type)
  95 {
  96         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  97         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  98         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  99         // Hence, it is not a problem to have many roles by default here.
 100         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
 101         switch (type) {
 102         case xml::FontTypes::FT_ITALIC:
 103         case xml::FontTypes::FT_EMPH:
 104                 return "";
 105         case xml::FontTypes::FT_BOLD:
 106                 return "bold";
 107         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 108         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 109                 return "";
 110         case xml::FontTypes::FT_UBAR:
 111                 return "underline";
 112
 113         // All other roles are non-standard for DocBook.
 114
 115         case xml::FontTypes::FT_WAVE:
 116                 return "wave";
 117         case xml::FontTypes::FT_DBAR:
 118                 return "dbar";
 119         case xml::FontTypes::FT_SOUT:
 120                 return "sout";
 121         case xml::FontTypes::FT_XOUT:
 122                 return "xout";
 123         case xml::FontTypes::FT_UPRIGHT:
 124                 return "upright";
 125         case xml::FontTypes::FT_SLANTED:
 126                 return "slanted";
 127         case xml::FontTypes::FT_SMALLCAPS:
 128                 return "smallcaps";
 129         case xml::FontTypes::FT_ROMAN:
 130                 return "roman";
 131         case xml::FontTypes::FT_SANS:
 132                 return "sans";
 133         case xml::FontTypes::FT_SIZE_TINY:
 134                 return "tiny";
 135         case xml::FontTypes::FT_SIZE_SCRIPT:
 136                 return "size_script";
 137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 138                 return "size_footnote";
 139         case xml::FontTypes::FT_SIZE_SMALL:
 140                 return "size_small";
 141         case xml::FontTypes::FT_SIZE_NORMAL:
 142                 return "size_normal";
 143         case xml::FontTypes::FT_SIZE_LARGE:
 144                 return "size_large";
 145         case xml::FontTypes::FT_SIZE_LARGER:
 146                 return "size_larger";
 147         case xml::FontTypes::FT_SIZE_LARGEST:
 148                 return "size_largest";
 149         case xml::FontTypes::FT_SIZE_HUGE:
 150                 return "size_huge";
 151         case xml::FontTypes::FT_SIZE_HUGER:
 152                 return "size_huger";
 153         case xml::FontTypes::FT_SIZE_INCREASE:
 154                 return "size_increase";
 155         case xml::FontTypes::FT_SIZE_DECREASE:
 156                 return "size_decrease";
 157         default:
 158                 return "";
 159         }
 160 }
 161
 162 string fontToAttribute(xml::FontTypes type) {
 163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 164         // for the font.
 165         string role = fontToRole(type);
 166         if (!role.empty()) {
 167                 return "role='" + role + "'";
 168         } else {
 169                 return "";
 170         }
 171 }
 172
 173
 174 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 175 {
 176         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 177 }
 178
 179
 180 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 181 {
 182         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 183 }
 184
 185
 186 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 187 // Block style:
 188 //        Content before
 189 //        <blocktag>
 190 //          Contents of the block.
 191 //        </blocktag>
 192 //        Content after
 193 // Paragraph style:
 194 //        Content before
 195 //          <paratag>Contents of the paragraph.</paratag>
 196 //        Content after
 197 // Inline style:
 198 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 199
 200 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 201 {
 202         xs << xml::StartTag(tag, attr);
 203 }
 204
 205
 206 void closeInlineTag(XMLStream & xs, const std::string & tag)
 207 {
 208         xs << xml::EndTag(tag);
 209 }
 210
 211
 212 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 213 {
 214         if (!xs.isLastTagCR())
 215                 xs << xml::CR();
 216         xs << xml::StartTag(tag, attr);
 217 }
 218
 219
 220 void closeParTag(XMLStream & xs, const std::string & tag)
 221 {
 222         xs << xml::EndTag(tag);
 223         xs << xml::CR();
 224 }
 225
 226
 227 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 228 {
 229         if (!xs.isLastTagCR())
 230                 xs << xml::CR();
 231         xs << xml::StartTag(tag, attr);
 232         xs << xml::CR();
 233 }
 234
 235
 236 void closeBlockTag(XMLStream & xs, const std::string & tag)
 237 {
 238         if (!xs.isLastTagCR())
 239                 xs << xml::CR();
 240         xs << xml::EndTag(tag);
 241         xs << xml::CR();
 242 }
 243
 244
 245 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 246 {
 247         if (tag.empty() || tag == "NONE")
 248                 return;
 249
 250         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 251                 openParTag(xs, tag, attr);
 252         else if (tagtype == "block")
 253                 openBlockTag(xs, tag, attr);
 254         else if (tagtype == "inline")
 255                 openInlineTag(xs, tag, attr);
 256         else
 257                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 258 }
 259
 260
 261 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 262 {
 263         if (tag.empty() || tag == "NONE")
 264                 return;
 265
 266         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 267                 closeParTag(xs, tag);
 268         else if (tagtype == "block")
 269                 closeBlockTag(xs, tag);
 270         else if (tagtype == "inline")
 271                 closeInlineTag(xs, tag);
 272         else
 273                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 274 }
 275
 276
 277 // Higher-level convenience functions.
 278
 279 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 280 {
 281         Layout const & lay = par->layout();
 282
 283         if (par == prevpar)
 284                 prevpar = nullptr;
 285
 286         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 287         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 288         // The main use case is author information in several paragraphs: if the name of the author is the
 289         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 290         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 291         // layout, same wrapper tag).
 292         bool openWrapper = lay.docbookwrappertag() != "NONE";
 293         if (prevpar != nullptr) {
 294                 Layout const & prevlay = prevpar->layout();
 295                 if (prevlay.docbookwrappertag() != "NONE") {
 296                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 297                                         && !lay.docbookwrappermergewithprevious();
 298                 }
 299         }
 300
 301         // Main logic.
 302         if (openWrapper)
 303                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 304
 305         const string & tag = lay.docbooktag();
 306         if (tag != "NONE") {
 307                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 308                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 309                         // TODO: required or not?
 310                         // TODO: avoid creating a ParTag object just for this query...
 311                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 312         }
 313
 314         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 315         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 316 }
 317
 318
 319 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 320 {
 321         if (par == nextpar)
 322                 nextpar = nullptr;
 323
 324         // See comment in openParTag.
 325         Layout const & lay = par->layout();
 326         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 327         if (nextpar != nullptr) {
 328                 Layout const & nextlay = nextpar->layout();
 329                 if (nextlay.docbookwrappertag() != "NONE") {
 330                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 331                                         && !nextlay.docbookwrappermergewithprevious();
 332                 }
 333         }
 334
 335         // Main logic.
 336         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 337         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 338         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 339         if (closeWrapper)
 340                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 341 }
 342
 343
 344 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 345 {
 346         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 347 }
 348
 349
 350 void closeLabelTag(XMLStream & xs, Layout const & lay)
 351 {
 352         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 353 }
 354
 355
 356 void openItemTag(XMLStream & xs, Layout const & lay)
 357 {
 358         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 359 }
 360
 361
 362 void closeItemTag(XMLStream & xs, Layout const & lay)
 363 {
 364         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 365 }
 366
 367
 368 void makeParagraphBibliography(
 369                 Buffer const & buf,
 370                 XMLStream & xs,
 371                 OutputParams const & runparams,
 372                 Text const & text,
 373                 ParagraphList::const_iterator const & pbegin)
 374 {
 375         auto const begin = text.paragraphs().begin();
 376         auto const end = text.paragraphs().end();
 377         auto pend = pbegin;
 378         ++pend;
 379
 380         // Find the paragraph *before* pbegin.
 381         ParagraphList::const_iterator pbegin_before = begin;
 382         if (pbegin != begin) {
 383                 ParagraphList::const_iterator pbegin_before_next = begin;
 384                 ++pbegin_before_next;
 385
 386                 while (pbegin_before_next != pbegin) {
 387                         ++pbegin_before;
 388                         ++pbegin_before_next;
 389                 }
 390         }
 391
 392         ParagraphList::const_iterator par = pbegin;
 393
 394         // If this is the first paragraph in a bibliography, open the bibliography tag.
 395         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 396                 xs << xml::StartTag("bibliography");
 397                 xs << xml::CR();
 398         }
 399
 400         // Generate the required paragraphs, but only if they are .
 401         for (; par != pend; ++par) {
 402                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 403                 // Don't forget the citation ID!
 404                 docstring attr;
 405                 for (auto i = 0; i < par->size(); ++i) {
 406                         Inset const *ip = par->getInset(0);
 407                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
 408                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
 409                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 410                                 break;
 411                         }
 412                 }
 413                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 414
 415                 // Generate the entry.
 416                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
 417
 418                 // End the precooked bibliography entry.
 419                 xs << xml::EndTag("bibliomixed");
 420                 xs << xml::CR();
 421         }
 422
 423         // If this is the last paragraph in a bibliography, close the bibliography tag.
 424         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 425                 xs << xml::EndTag("bibliography");
 426                 xs << xml::CR();
 427         }
 428 }
 429
 430
 431 void makeParagraph(
 432                 Buffer const & buf,
 433                 XMLStream & xs,
 434                 OutputParams const & runparams,
 435                 Text const & text,
 436                 ParagraphList::const_iterator const & par)
 437 {
 438         auto const begin = text.paragraphs().begin();
 439         auto const end = text.paragraphs().end();
 440         auto prevpar = text.paragraphs().getParagraphBefore(par);
 441
 442         // We want to open the paragraph tag if:
 443         //   (i) the current layout permits multiple paragraphs
 444         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 445         //         we are, but this is not the first paragraph
 446         //
 447         // But there is also a special case, and we first see whether we are in it.
 448         // We do not want to open the paragraph tag if this paragraph contains
 449         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 450         // as a branch). On the other hand, if that single item has a font change
 451         // applied to it, then we still do need to open the paragraph.
 452         //
 453         // Obviously, this is very fragile. The main reason we need to do this is
 454         // because of branches, e.g., a branch that contains an entire new section.
 455         // We do not really want to wrap that whole thing in a <div>...</div>.
 456         bool special_case = false;
 457         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 458         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 459                 Layout const &style = par->layout();
 460                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 461                                                                         style.labelfont : style.font;
 462                 FontInfo const our_font =
 463                                 par->getFont(buf.masterBuffer()->params(), 0,
 464                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 465
 466                 if (first_font == our_font)
 467                         special_case = true;
 468         }
 469
 470         // Plain layouts must be ignored.
 471         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 472                 special_case = true;
 473         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 474         if (!special_case && par->size() == 1 && par->getInset(0)) {
 475                 Inset const * firstInset = par->getInset(0);
 476
 477                 // Floats cannot be in paragraphs.
 478                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 479
 480                 // Bibliographies cannot be in paragraphs.
 481                 if (!special_case && firstInset->asInsetCommand())
 482                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 483
 484                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 485                 if (!special_case && firstInset->asInsetMath())
 486                         special_case = true;
 487
 488                 // ERTs are in comments, not paragraphs.
 489                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 490                         special_case = true;
 491
 492                 // Listings should not get into their own paragraph.
 493                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 494                         special_case = true;
 495         }
 496
 497         bool const open_par = runparams.docbook_make_pars
 498                                                   && !runparams.docbook_in_par
 499                                                   && !special_case;
 500
 501         // We want to issue the closing tag if either:
 502         //   (i)  We opened it, and either docbook_in_par is false,
 503         //              or we're not in the last paragraph, anyway.
 504         //   (ii) We didn't open it and docbook_in_par is true,
 505         //              but we are in the first par, and there is a next par.
 506         auto nextpar = par;
 507         ++nextpar;
 508         bool const close_par = open_par && (!runparams.docbook_in_par);
 509
 510         // Determine if this paragraph has some real content. Things like new pages are not caught
 511         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 512         odocstringstream os2;
 513         XMLStream xs2(os2);
 514         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 515
 516         docstring cleaned = os2.str();
 517         static const lyx::regex reg("[ \\r\\n]*");
 518         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 519
 520         if (!cleaned.empty()) {
 521                 if (open_par)
 522                         openParTag(xs, &*par, prevpar);
 523
 524                 xs << XMLStream::ESCAPE_NONE << os2.str();
 525
 526                 if (close_par)
 527                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 528         }
 529 }
 530
 531
 532 void makeAny(
 533                 Text const &text,
 534                 Buffer const &buf,
 535                 XMLStream &xs,
 536                 OutputParams const &ourparams,
 537                 ParagraphList::const_iterator par);
 538
 539
 540 void makeEnvironment(
 541                 Buffer const &buf,
 542                 XMLStream &xs,
 543                 OutputParams const &runparams,
 544                 Text const &text,
 545                 ParagraphList::const_iterator const & par)
 546 {
 547         auto const end = text.paragraphs().end();
 548
 549         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 550         // implemented in openParTag).
 551         auto prevpar = text.paragraphs().getParagraphBefore(par);
 552         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 553
 554         // Generate the contents of this environment. There is a special case if this is like some environment.
 555         Layout const & style = par->layout();
 556         if (style.latextype == LATEX_COMMAND) {
 557                 // Nothing to do (otherwise, infinite loops).
 558         } else if (style.latextype == LATEX_ENVIRONMENT ||
 559                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 560                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 561                 // Open a wrapper tag if needed.
 562                 if (style.docbookitemwrappertag() != "NONE") {
 563                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 564                         xs << xml::CR();
 565                 }
 566
 567                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 568                 // character after the label.
 569                 pos_type sep = 0;
 570                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 571                         // At least one condition must be met:
 572                         //  - this environment is not a list
 573                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 574                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 575                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 576                                 docstring const lbl = par->params().labelString();
 577
 578                                 if (lbl.empty()) {
 579                                         xs << xml::CR();
 580                                 } else {
 581                                         openLabelTag(xs, style);
 582                                         xs << lbl;
 583                                         closeLabelTag(xs, style);
 584                                 }
 585                         } else {
 586                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 587                                 openLabelTag(xs, style);
 588                                 sep = par->firstWordDocBook(xs, runparams);
 589                                 closeLabelTag(xs, style);
 590                         }
 591                 }
 592
 593                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 594                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 595                 // Common case: there is only the first word on the line, but there is a nested list instead
 596                 // of more text.
 597                 bool emptyItem = false;
 598                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 599                         auto next_par = par;
 600                         ++next_par;
 601                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 602                                 emptyItem = true;
 603                         else // There is a next paragraph: check depth.
 604                                 emptyItem = par->params().depth() >= next_par->params().depth();
 605                 }
 606
 607                 if (emptyItem) {
 608                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 609                         // generation of a full <para>.
 610                         // TODO: this always worked only by magic...
 611                         xs << ' ';
 612                 } else {
 613                         // Generate the rest of the paragraph, if need be.
 614                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 615                                                                  true, true, sep);
 616                 }
 617         } else {
 618                 makeAny(text, buf, xs, runparams, par);
 619         }
 620
 621         // Close the environment.
 622         auto nextpar = par;
 623         ++nextpar;
 624         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 625 }
 626
 627
 628 void makeCommand(
 629                 Buffer const & buf,
 630                 XMLStream & xs,
 631                 OutputParams const & runparams,
 632                 Text const & text,
 633                 ParagraphList::const_iterator const & par)
 634 {
 635         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 636         auto const begin = text.paragraphs().begin();
 637         auto const end = text.paragraphs().end();
 638         auto nextpar = par;
 639         ++nextpar;
 640
 641         // Generate this command.
 642         auto prevpar = text.paragraphs().getParagraphBefore(par);
 643         openParTag(xs, &*par, prevpar);
 644
 645         par->simpleDocBookOnePar(buf, xs, runparams,
 646                                  text.outerFont(distance(begin, par)));
 647
 648         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 649 }
 650
 651
 652 void makeAny(
 653                 Text const &text,
 654                 Buffer const &buf,
 655                 XMLStream &xs,
 656                 OutputParams const &ourparams,
 657                 ParagraphList::const_iterator par)
 658 {
 659         switch (par->layout().latextype) {
 660         case LATEX_COMMAND:
 661                 makeCommand(buf, xs, ourparams, text, par);
 662                 break;
 663         case LATEX_ENVIRONMENT:
 664         case LATEX_LIST_ENVIRONMENT:
 665         case LATEX_ITEM_ENVIRONMENT:
 666                 makeEnvironment(buf, xs, ourparams, text, par);
 667                 break;
 668         case LATEX_PARAGRAPH:
 669                 makeParagraph(buf, xs, ourparams, text, par);
 670                 break;
 671         case LATEX_BIB_ENVIRONMENT:
 672                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 673                 break;
 674         }
 675 }
 676
 677 } // end anonymous namespace
 678
 679
 680 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 681
 682
 683 struct DocBookInfoTag
 684 {
 685         const set<pit_type> shouldBeInInfo;
 686         const set<pit_type> mustBeInInfo;
 687         const set<pit_type> abstract;
 688         pit_type bpit;
 689         pit_type epit;
 690
 691         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 692                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 693                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 694                                    bpit(bpit), epit(epit) {}
 695 };
 696
 697
 698 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 699         bool documentHasSections = false;
 700
 701         while (bpit < epit) {
 702                 Layout const &style = paragraphs[bpit].layout();
 703                 documentHasSections |= style.category() == from_utf8("Sectioning");
 704
 705                 if (documentHasSections)
 706                         break;
 707                 bpit += 1;
 708         }
 709         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 710
 711         return make_tuple(documentHasSections, bpit);
 712 }
 713
 714
 715 bool hasOnlyNotes(Paragraph const & par)
 716 {
 717         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 718         for (int i = 0; i < par.size(); ++i)
 719                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 720                 // return false.
 721                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 722                         return false;
 723         return true;
 724 }
 725
 726
 727 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 728         set<pit_type> shouldBeInInfo;
 729         set<pit_type> mustBeInInfo;
 730         set<pit_type> abstract;
 731
 732         // Find the first non empty paragraph by mutating bpit.
 733         while (bpit < epit) {
 734                 Paragraph const &par = paragraphs[bpit];
 735                 if (par.empty() || hasOnlyNotes(par))
 736                         bpit += 1;
 737                 else
 738                         break;
 739         }
 740
 741         // Find the last info-like paragraph.
 742         pit_type cpit = bpit;
 743         bool hasAbstractLayout = false;
 744         while (cpit < epit) {
 745                 // Skip paragraphs only containing one note.
 746                 Paragraph const & par = paragraphs[cpit];
 747                 if (hasOnlyNotes(par)) {
 748                         cpit += 1;
 749                         continue;
 750                 }
 751
 752                 if (par.layout().docbookabstract())
 753                         hasAbstractLayout = true;
 754
 755                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 756                 Layout const &style = par.layout();
 757
 758                 if (style.docbookininfo() == "always") {
 759                         mustBeInInfo.emplace(cpit);
 760                 } else if (style.docbookininfo() == "maybe") {
 761                         shouldBeInInfo.emplace(cpit);
 762                 } else {
 763                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 764                         // There may be notes in between, but nothing else.
 765                         break;
 766                 }
 767                 cpit += 1;
 768         }
 769         // Now, cpit points to the last paragraph that has things that could go in <info>.
 770         // bpit is the beginning of the <info> part.
 771
 772         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 773         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 774         if (hasAbstractLayout) {
 775                 pit_type pit = bpit;
 776                 while (pit < cpit) { // Don't overshoot the <info> part.
 777                         if (paragraphs[pit].layout().docbookabstract())
 778                                 abstract.emplace(pit);
 779                         pit++;
 780                 }
 781         } else {
 782                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 783                 docstring lastAbstractLayout;
 784
 785                 pit_type pit = bpit;
 786                 while (pit < cpit) { // Don't overshoot the <info> part.
 787                         const Paragraph & par = paragraphs.at(pit);
 788                         if (!par.insetList().empty()) {
 789                                 for (const auto &i : par.insetList()) {
 790                                         if (i.inset->getText(0) != nullptr) {
 791                                                 if (lastAbstract == epit + 1) {
 792                                                         // First paragraph that matches the heuristic definition of abstract.
 793                                                         lastAbstract = pit;
 794                                                         lastAbstractLayout = par.layout().name();
 795                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 796                                                         // This is either too far from the last abstract paragraph or doesn't
 797                                                         // have the right layout name, BUT there has already been an abstract
 798                                                         // in this document: done with detecting the abstract.
 799                                                         goto done; // Easier to get out of two nested loops.
 800                                                 }
 801
 802                                                 abstract.emplace(pit);
 803                                                 break;
 804                                         }
 805                                 }
 806                         }
 807                         pit++;
 808                 }
 809         }
 810
 811         done:
 812         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 813 }
 814
 815
 816 void outputDocBookInfo(
 817                 Text const & text,
 818                 Buffer const & buf,
 819                 XMLStream & xs,
 820                 OutputParams const & runparams,
 821                 ParagraphList const & paragraphs,
 822                 DocBookInfoTag const & info)
 823 {
 824         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 825         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 826         // then only create the <abstract> tag if these paragraphs generate some content.
 827         // This check must be performed *before* a decision on whether or not to output <info> is made.
 828         bool hasAbstract = !info.abstract.empty();
 829         docstring abstract;
 830         if (hasAbstract) {
 831                 // Generate the abstract XML into a string before further checks.
 832                 odocstringstream os2;
 833                 {
 834                         XMLStream xs2(os2);
 835                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 836                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 837                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 838
 839                         while (bpit < epit) {
 840                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 841                                 bpit += 1;
 842                         }
 843                 }
 844
 845                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 846                 // even though they must be properly output if there is some abstract.
 847                 abstract = os2.str();
 848                 static const lyx::regex reg("[ \\r\\n]*");
 849                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 850
 851                 // Nothing? Then there is no abstract!
 852                 if (abstractContent.empty())
 853                         hasAbstract = false;
 854         }
 855
 856         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 857         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 858
 859         // Start the <info> tag if required.
 860         if (needInfo) {
 861                 xs.startDivision(false);
 862                 xs << xml::StartTag("info");
 863                 xs << xml::CR();
 864         }
 865
 866         // Output the elements that should go in <info>, before and after the abstract.
 867         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 868                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 869                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 870         }
 871         for (auto pit : info.mustBeInInfo) {
 872                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 873                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 874         }
 875
 876         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 877         // it contains several paragraphs that are empty).
 878         if (hasAbstract) {
 879 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 880 //              if (tag == "NONE")
 881 //                      tag = "abstract";
 882 //
 883 //              xs << xml::StartTag(tag);
 884 //              xs << xml::CR();
 885                 xs << XMLStream::ESCAPE_NONE << abstract;
 886 //              xs << xml::EndTag(tag);
 887 //              xs << xml::CR();
 888         }
 889
 890         // End the <info> tag if it was started.
 891         if (needInfo) {
 892                 xs << xml::EndTag("info");
 893                 xs << xml::CR();
 894                 xs.endDivision();
 895         }
 896 }
 897
 898
 899 void docbookFirstParagraphs(
 900                 Text const &text,
 901                 Buffer const &buf,
 902                 XMLStream &xs,
 903                 OutputParams const &runparams,
 904                 pit_type epit)
 905 {
 906         // Handle the beginning of the document, supposing it has sections.
 907         // Major role: output the first <info> tag.
 908
 909         ParagraphList const &paragraphs = text.paragraphs();
 910         pit_type bpit = runparams.par_begin;
 911         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 912         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 913 }
 914
 915
 916 void docbookSimpleAllParagraphs(
 917                 Text const & text,
 918                 Buffer const & buf,
 919                 XMLStream & xs,
 920                 OutputParams const & runparams)
 921 {
 922         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 923         // between a single paragraph to a whole document.
 924
 925         // First, the <info> tag.
 926         ParagraphList const &paragraphs = text.paragraphs();
 927         pit_type bpit = runparams.par_begin;
 928         pit_type const epit = runparams.par_end;
 929         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 930         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 931
 932         // Then, the content. It starts where the <info> ends.
 933         bpit = info.epit;
 934         while (bpit < epit) {
 935                 auto par = paragraphs.iterator_at(bpit);
 936                 if (!hasOnlyNotes(*par))
 937                         makeAny(text, buf, xs, runparams, par);
 938                 bpit += 1;
 939         }
 940 }
 941
 942
 943 void docbookParagraphs(Text const &text,
 944                                            Buffer const &buf,
 945                                            XMLStream &xs,
 946                                            OutputParams const &runparams) {
 947         ParagraphList const &paragraphs = text.paragraphs();
 948         if (runparams.par_begin == runparams.par_end) {
 949                 runparams.par_begin = 0;
 950                 runparams.par_end = paragraphs.size();
 951         }
 952         pit_type bpit = runparams.par_begin;
 953         pit_type const epit = runparams.par_end;
 954         LASSERT(bpit < epit,
 955                         {
 956                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 957                                 return;
 958                         });
 959
 960         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 961         // of the section and the tag that was used to open it.
 962
 963         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 964         // discovered abstract.
 965         bool documentHasSections;
 966         pit_type eppit;
 967         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 968
 969         if (documentHasSections) {
 970                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 971                 bpit = eppit;
 972         } else {
 973                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 974                 return;
 975         }
 976
 977         bool currentlyInAppendix = false;
 978
 979         while (bpit < epit) {
 980                 OutputParams ourparams = runparams;
 981
 982                 auto par = paragraphs.iterator_at(bpit);
 983                 if (par->params().startOfAppendix())
 984                         currentlyInAppendix = true;
 985                 Layout const &style = par->layout();
 986                 ParagraphList::const_iterator const lastStartedPar = par;
 987                 ParagraphList::const_iterator send;
 988
 989                 if (hasOnlyNotes(*par)) {
 990                         bpit += 1;
 991                         continue;
 992                 }
 993
 994                 // Think about adding <section> and/or </section>s.
 995                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 996                 if (isLayoutSectioning) {
 997                         int level = style.toclevel;
 998
 999                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1000                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1001                         //   - current: h2; back: h1; do not close any <section>
1002                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1003                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1004                                 int stackLevel = headerLevels.top().first;
1005                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1006                                 headerLevels.pop();
1007
1008                                 // Output the tag only if it corresponds to a legit section.
1009                                 if (stackLevel != Layout::NOT_IN_TOC)
1010                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1011                         }
1012
1013                         // Open the new section: first push it onto the stack, then output it in DocBook.
1014                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1015                                                                 "appendix" : style.docbooksectiontag();
1016                         headerLevels.push(std::make_pair(level, sectionTag));
1017
1018                         // Some sectioning-like elements should not be output (such as FrontMatter).
1019                         if (level != Layout::NOT_IN_TOC) {
1020                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1021                                 docstring id = docstring();
1022                                 for (pos_type i = 0; i < par->size(); ++i) {
1023                                         Inset const *inset = par->getInset(i);
1024                                         if (inset) {
1025                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1026                                                         // Generate the attributes for the section if need be.
1027                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1028
1029                                                         // Don't output the ID as a DocBook <anchor>.
1030                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1031
1032                                                         // Cannot have multiple IDs per tag.
1033                                                         break;
1034                                                 }
1035                                         }
1036                                 }
1037
1038                                 // Write the open tag for this section.
1039                                 docstring tag = from_utf8("<" + sectionTag);
1040                                 if (!id.empty())
1041                                         tag += from_utf8(" ") + id;
1042                                 tag += from_utf8(">");
1043                                 xs << XMLStream::ESCAPE_NONE << tag;
1044                                 xs << xml::CR();
1045                         }
1046                 }
1047
1048                 // Close all sections before the bibliography.
1049                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1050                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1051                 if (insetsLength > 0) {
1052                         Inset const *firstInset = par->getInset(0);
1053                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1054                                 while (!headerLevels.empty()) {
1055                                         int level = headerLevels.top().first;
1056                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1057                                         headerLevels.pop();
1058
1059                                         // Output the tag only if it corresponds to a legit section.
1060                                         if (level != Layout::NOT_IN_TOC) {
1061                                                 xs << XMLStream::ESCAPE_NONE << tag;
1062                                                 xs << xml::CR();
1063                                         }
1064                                 }
1065                         }
1066                 }
1067
1068                 // Generate this paragraph.
1069                 makeAny(text, buf, xs, ourparams, par);
1070                 bpit += 1;
1071         }
1072
1073         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1074         // of the loop).
1075         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1076                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1077                 headerLevels.pop();
1078                 xs << XMLStream::ESCAPE_NONE << tag;
1079                 xs << xml::CR();
1080         }
1081 }
1082
1083 } // namespace lyx