src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Layout.h"
  20 #include "OutputParams.h"
  21 #include "Paragraph.h"
  22 #include "ParagraphList.h"
  23 #include "ParagraphParameters.h"
  24 #include "xml.h"
  25 #include "Text.h"
  26 #include "TextClass.h"
  27
  28 #include "insets/InsetBibtex.h"
  29 #include "insets/InsetBibitem.h"
  30 #include "insets/InsetLabel.h"
  31 #include "insets/InsetNote.h"
  32
  33 #include "support/convert.h"
  34 #include "support/debug.h"
  35 #include "support/lassert.h"
  36 #include "support/lstrings.h"
  37 #include "support/textutils.h"
  38
  39 #include "support/regex.h"
  40
  41 #include <stack>
  42 #include <iostream>
  43 #include <algorithm>
  44 #include <sstream>
  45
  46 using namespace std;
  47 using namespace lyx::support;
  48
  49 namespace lyx {
  50
  51 namespace {
  52
  53 std::string fontToDocBookTag(xml::FontTypes type)
  54 {
  55         switch (type) {
  56         case xml::FontTypes::FT_EMPH:
  57         case xml::FontTypes::FT_BOLD:
  58                 return "emphasis";
  59         case xml::FontTypes::FT_NOUN:
  60                 return "person";
  61         case xml::FontTypes::FT_UBAR:
  62         case xml::FontTypes::FT_WAVE:
  63         case xml::FontTypes::FT_DBAR:
  64         case xml::FontTypes::FT_SOUT:
  65         case xml::FontTypes::FT_XOUT:
  66         case xml::FontTypes::FT_ITALIC:
  67         case xml::FontTypes::FT_UPRIGHT:
  68         case xml::FontTypes::FT_SLANTED:
  69         case xml::FontTypes::FT_SMALLCAPS:
  70         case xml::FontTypes::FT_ROMAN:
  71         case xml::FontTypes::FT_SANS:
  72                 return "emphasis";
  73         case xml::FontTypes::FT_TYPE:
  74                 return "code";
  75         case xml::FontTypes::FT_SIZE_TINY:
  76         case xml::FontTypes::FT_SIZE_SCRIPT:
  77         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  78         case xml::FontTypes::FT_SIZE_SMALL:
  79         case xml::FontTypes::FT_SIZE_NORMAL:
  80         case xml::FontTypes::FT_SIZE_LARGE:
  81         case xml::FontTypes::FT_SIZE_LARGER:
  82         case xml::FontTypes::FT_SIZE_LARGEST:
  83         case xml::FontTypes::FT_SIZE_HUGE:
  84         case xml::FontTypes::FT_SIZE_HUGER:
  85         case xml::FontTypes::FT_SIZE_INCREASE:
  86         case xml::FontTypes::FT_SIZE_DECREASE:
  87                 return "emphasis";
  88         default:
  89                 return "";
  90         }
  91 }
  92
  93
  94 string fontToRole(xml::FontTypes type)
  95 {
  96         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  97         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  98         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  99         // Hence, it is not a problem to have many roles by default here.
 100         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
 101         switch (type) {
 102         case xml::FontTypes::FT_ITALIC:
 103         case xml::FontTypes::FT_EMPH:
 104                 return "";
 105         case xml::FontTypes::FT_BOLD:
 106                 return "bold";
 107         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 108         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 109                 return "";
 110         case xml::FontTypes::FT_UBAR:
 111                 return "underline";
 112
 113         // All other roles are non-standard for DocBook.
 114
 115         case xml::FontTypes::FT_WAVE:
 116                 return "wave";
 117         case xml::FontTypes::FT_DBAR:
 118                 return "dbar";
 119         case xml::FontTypes::FT_SOUT:
 120                 return "sout";
 121         case xml::FontTypes::FT_XOUT:
 122                 return "xout";
 123         case xml::FontTypes::FT_UPRIGHT:
 124                 return "upright";
 125         case xml::FontTypes::FT_SLANTED:
 126                 return "slanted";
 127         case xml::FontTypes::FT_SMALLCAPS:
 128                 return "smallcaps";
 129         case xml::FontTypes::FT_ROMAN:
 130                 return "roman";
 131         case xml::FontTypes::FT_SANS:
 132                 return "sans";
 133         case xml::FontTypes::FT_SIZE_TINY:
 134                 return "tiny";
 135         case xml::FontTypes::FT_SIZE_SCRIPT:
 136                 return "size_script";
 137         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 138                 return "size_footnote";
 139         case xml::FontTypes::FT_SIZE_SMALL:
 140                 return "size_small";
 141         case xml::FontTypes::FT_SIZE_NORMAL:
 142                 return "size_normal";
 143         case xml::FontTypes::FT_SIZE_LARGE:
 144                 return "size_large";
 145         case xml::FontTypes::FT_SIZE_LARGER:
 146                 return "size_larger";
 147         case xml::FontTypes::FT_SIZE_LARGEST:
 148                 return "size_largest";
 149         case xml::FontTypes::FT_SIZE_HUGE:
 150                 return "size_huge";
 151         case xml::FontTypes::FT_SIZE_HUGER:
 152                 return "size_huger";
 153         case xml::FontTypes::FT_SIZE_INCREASE:
 154                 return "size_increase";
 155         case xml::FontTypes::FT_SIZE_DECREASE:
 156                 return "size_decrease";
 157         default:
 158                 return "";
 159         }
 160 }
 161
 162 string fontToAttribute(xml::FontTypes type) {
 163         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 164         // for the font.
 165         string role = fontToRole(type);
 166         if (!role.empty()) {
 167                 return "role='" + role + "'";
 168         } else {
 169                 return "";
 170         }
 171 }
 172
 173 } // end anonymous namespace
 174
 175
 176 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 177 {
 178         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 179 }
 180
 181
 182 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 183 {
 184         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 185 }
 186
 187
 188 namespace {
 189
 190 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 191 // Block style:
 192 //        Content before
 193 //        <blocktag>
 194 //          Contents of the block.
 195 //        </blocktag>
 196 //        Content after
 197 // Paragraph style:
 198 //        Content before
 199 //          <paratag>Contents of the paragraph.</paratag>
 200 //        Content after
 201 // Inline style:
 202 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 203
 204 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 205 {
 206         xs << xml::StartTag(tag, attr);
 207 }
 208
 209
 210 void closeInlineTag(XMLStream & xs, const std::string & tag)
 211 {
 212         xs << xml::EndTag(tag);
 213 }
 214
 215
 216 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 217 {
 218         if (!xs.isLastTagCR())
 219                 xs << xml::CR();
 220         xs << xml::StartTag(tag, attr);
 221 }
 222
 223
 224 void closeParTag(XMLStream & xs, const std::string & tag)
 225 {
 226         xs << xml::EndTag(tag);
 227         xs << xml::CR();
 228 }
 229
 230
 231 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 232 {
 233         if (!xs.isLastTagCR())
 234                 xs << xml::CR();
 235         xs << xml::StartTag(tag, attr);
 236         xs << xml::CR();
 237 }
 238
 239
 240 void closeBlockTag(XMLStream & xs, const std::string & tag)
 241 {
 242         if (!xs.isLastTagCR())
 243                 xs << xml::CR();
 244         xs << xml::EndTag(tag);
 245         xs << xml::CR();
 246 }
 247
 248
 249 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 250 {
 251         if (tag.empty() || tag == "NONE")
 252                 return;
 253
 254         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 255                 openParTag(xs, tag, attr);
 256         else if (tagtype == "block")
 257                 openBlockTag(xs, tag, attr);
 258         else if (tagtype == "inline")
 259                 openInlineTag(xs, tag, attr);
 260         else
 261                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 262 }
 263
 264
 265 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 266 {
 267         if (tag.empty() || tag == "NONE")
 268                 return;
 269
 270         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 271                 closeParTag(xs, tag);
 272         else if (tagtype == "block")
 273                 closeBlockTag(xs, tag);
 274         else if (tagtype == "inline")
 275                 closeInlineTag(xs, tag);
 276         else
 277                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 278 }
 279
 280
 281 // Higher-level convenience functions.
 282
 283 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 284 {
 285         Layout const & lay = par->layout();
 286
 287         if (par == prevpar)
 288                 prevpar = nullptr;
 289
 290         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 291         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 292         // The main use case is author information in several paragraphs: if the name of the author is the
 293         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 294         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 295         // layout, same wrapper tag).
 296         bool openWrapper = lay.docbookwrappertag() != "NONE";
 297         if (prevpar != nullptr) {
 298                 Layout const & prevlay = prevpar->layout();
 299                 if (prevlay.docbookwrappertag() != "NONE") {
 300                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 301                                         && !lay.docbookwrappermergewithprevious();
 302                 }
 303         }
 304
 305         // Main logic.
 306         if (openWrapper)
 307                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 308
 309         const string & tag = lay.docbooktag();
 310         if (tag != "NONE") {
 311                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 312                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 313                         // TODO: required or not?
 314                         // TODO: avoid creating a ParTag object just for this query...
 315                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 316         }
 317
 318         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 319         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 320 }
 321
 322
 323 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 324 {
 325         if (par == nextpar)
 326                 nextpar = nullptr;
 327
 328         // See comment in openParTag.
 329         Layout const & lay = par->layout();
 330         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 331         if (nextpar != nullptr) {
 332                 Layout const & nextlay = nextpar->layout();
 333                 if (nextlay.docbookwrappertag() != "NONE") {
 334                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 335                                         && !nextlay.docbookwrappermergewithprevious();
 336                 }
 337         }
 338
 339         // Main logic.
 340         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 341         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 342         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 343         if (closeWrapper)
 344                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 345 }
 346
 347
 348 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 349 {
 350         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 351 }
 352
 353
 354 void closeLabelTag(XMLStream & xs, Layout const & lay)
 355 {
 356         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 357 }
 358
 359
 360 void openItemTag(XMLStream & xs, Layout const & lay)
 361 {
 362         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 363 }
 364
 365
 366 void closeItemTag(XMLStream & xs, Layout const & lay)
 367 {
 368         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 369 }
 370
 371
 372 void makeParagraphBibliography(
 373                 Buffer const & buf,
 374                 XMLStream & xs,
 375                 OutputParams const & runparams,
 376                 Text const & text,
 377                 ParagraphList::const_iterator const & pbegin)
 378 {
 379         auto const begin = text.paragraphs().begin();
 380         auto const end = text.paragraphs().end();
 381         auto pend = pbegin;
 382         ++pend;
 383
 384         // Find the paragraph *before* pbegin.
 385         ParagraphList::const_iterator pbegin_before = begin;
 386         if (pbegin != begin) {
 387                 ParagraphList::const_iterator pbegin_before_next = begin;
 388                 ++pbegin_before_next;
 389
 390                 while (pbegin_before_next != pbegin) {
 391                         ++pbegin_before;
 392                         ++pbegin_before_next;
 393                 }
 394         }
 395
 396         ParagraphList::const_iterator par = pbegin;
 397
 398         // If this is the first paragraph in a bibliography, open the bibliography tag.
 399         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 400                 xs << xml::StartTag("bibliography");
 401                 xs << xml::CR();
 402         }
 403
 404         // Generate the required paragraphs, but only if they are .
 405         for (; par != pend; ++par) {
 406                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 407                 // Don't forget the citation ID!
 408                 docstring attr;
 409                 for (auto i = 0; i < par->size(); ++i) {
 410                         Inset const *ip = par->getInset(0);
 411                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
 412                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
 413                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 414                                 break;
 415                         }
 416                 }
 417                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 418
 419                 // Generate the entry.
 420                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
 421
 422                 // End the precooked bibliography entry.
 423                 xs << xml::EndTag("bibliomixed");
 424                 xs << xml::CR();
 425         }
 426
 427         // If this is the last paragraph in a bibliography, close the bibliography tag.
 428         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 429                 xs << xml::EndTag("bibliography");
 430                 xs << xml::CR();
 431         }
 432 }
 433
 434
 435 void makeParagraph(
 436                 Buffer const & buf,
 437                 XMLStream & xs,
 438                 OutputParams const & runparams,
 439                 Text const & text,
 440                 ParagraphList::const_iterator const & par)
 441 {
 442         auto const begin = text.paragraphs().begin();
 443         auto const end = text.paragraphs().end();
 444         auto prevpar = text.paragraphs().getParagraphBefore(par);
 445
 446         // We want to open the paragraph tag if:
 447         //   (i) the current layout permits multiple paragraphs
 448         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 449         //         we are, but this is not the first paragraph
 450         //
 451         // But there is also a special case, and we first see whether we are in it.
 452         // We do not want to open the paragraph tag if this paragraph contains
 453         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 454         // as a branch). On the other hand, if that single item has a font change
 455         // applied to it, then we still do need to open the paragraph.
 456         //
 457         // Obviously, this is very fragile. The main reason we need to do this is
 458         // because of branches, e.g., a branch that contains an entire new section.
 459         // We do not really want to wrap that whole thing in a <div>...</div>.
 460         bool special_case = false;
 461         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 462         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 463                 Layout const &style = par->layout();
 464                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 465                                                                         style.labelfont : style.font;
 466                 FontInfo const our_font =
 467                                 par->getFont(buf.masterBuffer()->params(), 0,
 468                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 469
 470                 if (first_font == our_font)
 471                         special_case = true;
 472         }
 473
 474         // Plain layouts must be ignored.
 475         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 476                 special_case = true;
 477         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 478         if (!special_case && par->size() == 1 && par->getInset(0)) {
 479                 Inset const * firstInset = par->getInset(0);
 480
 481                 // Floats cannot be in paragraphs.
 482                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 483
 484                 // Bibliographies cannot be in paragraphs.
 485                 if (!special_case && firstInset->asInsetCommand())
 486                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 487
 488                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 489                 if (!special_case && firstInset->asInsetMath())
 490                         special_case = true;
 491
 492                 // ERTs are in comments, not paragraphs.
 493                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 494                         special_case = true;
 495
 496                 // Listings should not get into their own paragraph.
 497                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 498                         special_case = true;
 499         }
 500
 501         bool const open_par = runparams.docbook_make_pars
 502                                                   && !runparams.docbook_in_par
 503                                                   && !special_case;
 504
 505         // We want to issue the closing tag if either:
 506         //   (i)  We opened it, and either docbook_in_par is false,
 507         //              or we're not in the last paragraph, anyway.
 508         //   (ii) We didn't open it and docbook_in_par is true,
 509         //              but we are in the first par, and there is a next par.
 510         auto nextpar = par;
 511         ++nextpar;
 512         bool const close_par = open_par && (!runparams.docbook_in_par);
 513
 514         // Determine if this paragraph has some real content. Things like new pages are not caught
 515         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 516         odocstringstream os2;
 517         XMLStream xs2(os2);
 518         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 519
 520         docstring cleaned = os2.str();
 521         static const lyx::regex reg("[ \\r\\n]*");
 522         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 523
 524         if (!cleaned.empty()) {
 525                 if (open_par)
 526                         openParTag(xs, &*par, prevpar);
 527
 528                 xs << XMLStream::ESCAPE_NONE << os2.str();
 529
 530                 if (close_par)
 531                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 532         }
 533 }
 534
 535
 536 void makeAny(
 537                 Text const &text,
 538                 Buffer const &buf,
 539                 XMLStream &xs,
 540                 OutputParams const &ourparams,
 541                 ParagraphList::const_iterator par);
 542
 543
 544 void makeEnvironment(
 545                 Buffer const &buf,
 546                 XMLStream &xs,
 547                 OutputParams const &runparams,
 548                 Text const &text,
 549                 ParagraphList::const_iterator const & par)
 550 {
 551         auto const end = text.paragraphs().end();
 552
 553         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 554         // implemented in openParTag).
 555         auto prevpar = text.paragraphs().getParagraphBefore(par);
 556         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 557
 558         // Generate the contents of this environment. There is a special case if this is like some environment.
 559         Layout const & style = par->layout();
 560         if (style.latextype == LATEX_COMMAND) {
 561                 // Nothing to do (otherwise, infinite loops).
 562         } else if (style.latextype == LATEX_ENVIRONMENT ||
 563                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 564                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 565                 // Open a wrapper tag if needed.
 566                 if (style.docbookitemwrappertag() != "NONE") {
 567                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 568                         xs << xml::CR();
 569                 }
 570
 571                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 572                 // character after the label.
 573                 pos_type sep = 0;
 574                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 575                         // At least one condition must be met:
 576                         //  - this environment is not a list
 577                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 578                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 579                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 580                                 docstring const lbl = par->params().labelString();
 581
 582                                 if (lbl.empty()) {
 583                                         xs << xml::CR();
 584                                 } else {
 585                                         openLabelTag(xs, style);
 586                                         xs << lbl;
 587                                         closeLabelTag(xs, style);
 588                                 }
 589                         } else {
 590                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 591                                 openLabelTag(xs, style);
 592                                 sep = par->firstWordDocBook(xs, runparams);
 593                                 closeLabelTag(xs, style);
 594                         }
 595                 }
 596
 597                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 598                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 599                 // Common case: there is only the first word on the line, but there is a nested list instead
 600                 // of more text.
 601                 bool emptyItem = false;
 602                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 603                         auto next_par = par;
 604                         ++next_par;
 605                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 606                                 emptyItem = true;
 607                         else // There is a next paragraph: check depth.
 608                                 emptyItem = par->params().depth() >= next_par->params().depth();
 609                 }
 610
 611                 if (emptyItem) {
 612                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 613                         // generation of a full <para>.
 614                         // TODO: this always worked only by magic...
 615                         xs << ' ';
 616                 } else {
 617                         // Generate the rest of the paragraph, if need be.
 618                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 619                                                                  true, true, sep);
 620                 }
 621         } else {
 622                 makeAny(text, buf, xs, runparams, par);
 623         }
 624
 625         // Close the environment.
 626         auto nextpar = par;
 627         ++nextpar;
 628         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 629 }
 630
 631
 632 void makeCommand(
 633                 Buffer const & buf,
 634                 XMLStream & xs,
 635                 OutputParams const & runparams,
 636                 Text const & text,
 637                 ParagraphList::const_iterator const & par)
 638 {
 639         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 640         auto const begin = text.paragraphs().begin();
 641         auto const end = text.paragraphs().end();
 642         auto nextpar = par;
 643         ++nextpar;
 644
 645         // Generate this command.
 646         auto prevpar = text.paragraphs().getParagraphBefore(par);
 647         openParTag(xs, &*par, prevpar);
 648
 649         par->simpleDocBookOnePar(buf, xs, runparams,
 650                                  text.outerFont(distance(begin, par)));
 651
 652         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 653 }
 654
 655
 656 void makeAny(
 657                 Text const &text,
 658                 Buffer const &buf,
 659                 XMLStream &xs,
 660                 OutputParams const &ourparams,
 661                 ParagraphList::const_iterator par)
 662 {
 663         switch (par->layout().latextype) {
 664         case LATEX_COMMAND:
 665                 makeCommand(buf, xs, ourparams, text, par);
 666                 break;
 667         case LATEX_ENVIRONMENT:
 668         case LATEX_LIST_ENVIRONMENT:
 669         case LATEX_ITEM_ENVIRONMENT:
 670                 makeEnvironment(buf, xs, ourparams, text, par);
 671                 break;
 672         case LATEX_PARAGRAPH:
 673                 makeParagraph(buf, xs, ourparams, text, par);
 674                 break;
 675         case LATEX_BIB_ENVIRONMENT:
 676                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 677                 break;
 678         }
 679 }
 680
 681 } // end anonymous namespace
 682
 683
 684 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 685
 686
 687 struct DocBookInfoTag
 688 {
 689         const set<pit_type> shouldBeInInfo;
 690         const set<pit_type> mustBeInInfo;
 691         const set<pit_type> abstract;
 692         pit_type bpit;
 693         pit_type epit;
 694
 695         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 696                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 697                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 698                                    bpit(bpit), epit(epit) {}
 699 };
 700
 701
 702 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 703         bool documentHasSections = false;
 704
 705         while (bpit < epit) {
 706                 Layout const &style = paragraphs[bpit].layout();
 707                 documentHasSections |= style.category() == from_utf8("Sectioning");
 708
 709                 if (documentHasSections)
 710                         break;
 711                 bpit += 1;
 712         }
 713         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 714
 715         return make_tuple(documentHasSections, bpit);
 716 }
 717
 718
 719 bool hasOnlyNotes(Paragraph const & par)
 720 {
 721         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 722         for (int i = 0; i < par.size(); ++i)
 723                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 724                 // return false.
 725                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 726                         return false;
 727         return true;
 728 }
 729
 730
 731 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 732         set<pit_type> shouldBeInInfo;
 733         set<pit_type> mustBeInInfo;
 734         set<pit_type> abstract;
 735
 736         // Find the first non empty paragraph by mutating bpit.
 737         while (bpit < epit) {
 738                 Paragraph const &par = paragraphs[bpit];
 739                 if (par.empty() || hasOnlyNotes(par))
 740                         bpit += 1;
 741                 else
 742                         break;
 743         }
 744
 745         // Find the last info-like paragraph.
 746         pit_type cpit = bpit;
 747         bool hasAbstractLayout = false;
 748         while (cpit < epit) {
 749                 // Skip paragraphs only containing one note.
 750                 Paragraph const & par = paragraphs[cpit];
 751                 if (hasOnlyNotes(par)) {
 752                         cpit += 1;
 753                         continue;
 754                 }
 755
 756                 if (par.layout().docbookabstract())
 757                         hasAbstractLayout = true;
 758
 759                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 760                 Layout const &style = par.layout();
 761
 762                 if (style.docbookininfo() == "always") {
 763                         mustBeInInfo.emplace(cpit);
 764                 } else if (style.docbookininfo() == "maybe") {
 765                         shouldBeInInfo.emplace(cpit);
 766                 } else {
 767                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 768                         // There may be notes in between, but nothing else.
 769                         break;
 770                 }
 771                 cpit += 1;
 772         }
 773         // Now, cpit points to the last paragraph that has things that could go in <info>.
 774         // bpit is the beginning of the <info> part.
 775
 776         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 777         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 778         if (hasAbstractLayout) {
 779                 pit_type pit = bpit;
 780                 while (pit < cpit) { // Don't overshoot the <info> part.
 781                         if (paragraphs[pit].layout().docbookabstract())
 782                                 abstract.emplace(pit);
 783                         pit++;
 784                 }
 785         } else {
 786                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 787                 docstring lastAbstractLayout;
 788
 789                 pit_type pit = bpit;
 790                 while (pit < cpit) { // Don't overshoot the <info> part.
 791                         const Paragraph & par = paragraphs.at(pit);
 792                         if (!par.insetList().empty()) {
 793                                 for (const auto &i : par.insetList()) {
 794                                         if (i.inset->getText(0) != nullptr) {
 795                                                 if (lastAbstract == epit + 1) {
 796                                                         // First paragraph that matches the heuristic definition of abstract.
 797                                                         lastAbstract = pit;
 798                                                         lastAbstractLayout = par.layout().name();
 799                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 800                                                         // This is either too far from the last abstract paragraph or doesn't
 801                                                         // have the right layout name, BUT there has already been an abstract
 802                                                         // in this document: done with detecting the abstract.
 803                                                         goto done; // Easier to get out of two nested loops.
 804                                                 }
 805
 806                                                 abstract.emplace(pit);
 807                                                 break;
 808                                         }
 809                                 }
 810                         }
 811                         pit++;
 812                 }
 813         }
 814
 815         done:
 816         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 817 }
 818
 819
 820 void outputDocBookInfo(
 821                 Text const & text,
 822                 Buffer const & buf,
 823                 XMLStream & xs,
 824                 OutputParams const & runparams,
 825                 ParagraphList const & paragraphs,
 826                 DocBookInfoTag const & info)
 827 {
 828         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 829         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 830         // then only create the <abstract> tag if these paragraphs generate some content.
 831         // This check must be performed *before* a decision on whether or not to output <info> is made.
 832         bool hasAbstract = !info.abstract.empty();
 833         docstring abstract;
 834         if (hasAbstract) {
 835                 // Generate the abstract XML into a string before further checks.
 836                 odocstringstream os2;
 837                 {
 838                         XMLStream xs2(os2);
 839                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 840                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 841                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 842
 843                         while (bpit < epit) {
 844                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 845                                 bpit += 1;
 846                         }
 847                 }
 848
 849                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 850                 // even though they must be properly output if there is some abstract.
 851                 abstract = os2.str();
 852                 static const lyx::regex reg("[ \\r\\n]*");
 853                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 854
 855                 // Nothing? Then there is no abstract!
 856                 if (abstractContent.empty())
 857                         hasAbstract = false;
 858         }
 859
 860         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 861         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 862
 863         // Start the <info> tag if required.
 864         if (needInfo) {
 865                 xs.startDivision(false);
 866                 xs << xml::StartTag("info");
 867                 xs << xml::CR();
 868         }
 869
 870         // Output the elements that should go in <info>, before and after the abstract.
 871         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 872                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 873                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 874         }
 875         for (auto pit : info.mustBeInInfo) {
 876                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 877                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 878         }
 879
 880         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 881         // it contains several paragraphs that are empty).
 882         if (hasAbstract) {
 883 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 884 //              if (tag == "NONE")
 885 //                      tag = "abstract";
 886 //
 887 //              xs << xml::StartTag(tag);
 888 //              xs << xml::CR();
 889                 xs << XMLStream::ESCAPE_NONE << abstract;
 890 //              xs << xml::EndTag(tag);
 891 //              xs << xml::CR();
 892         }
 893
 894         // End the <info> tag if it was started.
 895         if (needInfo) {
 896                 xs << xml::EndTag("info");
 897                 xs << xml::CR();
 898                 xs.endDivision();
 899         }
 900 }
 901
 902
 903 void docbookFirstParagraphs(
 904                 Text const &text,
 905                 Buffer const &buf,
 906                 XMLStream &xs,
 907                 OutputParams const &runparams,
 908                 pit_type epit)
 909 {
 910         // Handle the beginning of the document, supposing it has sections.
 911         // Major role: output the first <info> tag.
 912
 913         ParagraphList const &paragraphs = text.paragraphs();
 914         pit_type bpit = runparams.par_begin;
 915         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 916         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 917 }
 918
 919
 920 void docbookSimpleAllParagraphs(
 921                 Text const & text,
 922                 Buffer const & buf,
 923                 XMLStream & xs,
 924                 OutputParams const & runparams)
 925 {
 926         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 927         // between a single paragraph to a whole document.
 928
 929         // First, the <info> tag.
 930         ParagraphList const &paragraphs = text.paragraphs();
 931         pit_type bpit = runparams.par_begin;
 932         pit_type const epit = runparams.par_end;
 933         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 934         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 935
 936         // Then, the content. It starts where the <info> ends.
 937         bpit = info.epit;
 938         while (bpit < epit) {
 939                 auto par = paragraphs.iterator_at(bpit);
 940                 if (!hasOnlyNotes(*par))
 941                         makeAny(text, buf, xs, runparams, par);
 942                 bpit += 1;
 943         }
 944 }
 945
 946
 947 void docbookParagraphs(Text const &text,
 948                                            Buffer const &buf,
 949                                            XMLStream &xs,
 950                                            OutputParams const &runparams) {
 951         ParagraphList const &paragraphs = text.paragraphs();
 952         if (runparams.par_begin == runparams.par_end) {
 953                 runparams.par_begin = 0;
 954                 runparams.par_end = paragraphs.size();
 955         }
 956         pit_type bpit = runparams.par_begin;
 957         pit_type const epit = runparams.par_end;
 958         LASSERT(bpit < epit,
 959                         {
 960                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 961                                 return;
 962                         });
 963
 964         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 965         // of the section and the tag that was used to open it.
 966
 967         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 968         // discovered abstract.
 969         bool documentHasSections;
 970         pit_type eppit;
 971         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 972
 973         if (documentHasSections) {
 974                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 975                 bpit = eppit;
 976         } else {
 977                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 978                 return;
 979         }
 980
 981         bool currentlyInAppendix = false;
 982
 983         while (bpit < epit) {
 984                 OutputParams ourparams = runparams;
 985
 986                 auto par = paragraphs.iterator_at(bpit);
 987                 if (par->params().startOfAppendix())
 988                         currentlyInAppendix = true;
 989                 Layout const &style = par->layout();
 990                 ParagraphList::const_iterator const lastStartedPar = par;
 991                 ParagraphList::const_iterator send;
 992
 993                 if (hasOnlyNotes(*par)) {
 994                         bpit += 1;
 995                         continue;
 996                 }
 997
 998                 // Think about adding <section> and/or </section>s.
 999                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1000                 if (isLayoutSectioning) {
1001                         int level = style.toclevel;
1002
1003                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1004                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1005                         //   - current: h2; back: h1; do not close any <section>
1006                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1007                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1008                                 int stackLevel = headerLevels.top().first;
1009                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1010                                 headerLevels.pop();
1011
1012                                 // Output the tag only if it corresponds to a legit section.
1013                                 if (stackLevel != Layout::NOT_IN_TOC)
1014                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1015                         }
1016
1017                         // Open the new section: first push it onto the stack, then output it in DocBook.
1018                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1019                                                                 "appendix" : style.docbooksectiontag();
1020                         headerLevels.push(std::make_pair(level, sectionTag));
1021
1022                         // Some sectioning-like elements should not be output (such as FrontMatter).
1023                         if (level != Layout::NOT_IN_TOC) {
1024                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1025                                 docstring id = docstring();
1026                                 for (pos_type i = 0; i < par->size(); ++i) {
1027                                         Inset const *inset = par->getInset(i);
1028                                         if (inset) {
1029                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1030                                                         // Generate the attributes for the section if need be.
1031                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1032
1033                                                         // Don't output the ID as a DocBook <anchor>.
1034                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1035
1036                                                         // Cannot have multiple IDs per tag.
1037                                                         break;
1038                                                 }
1039                                         }
1040                                 }
1041
1042                                 // Write the open tag for this section.
1043                                 docstring tag = from_utf8("<" + sectionTag);
1044                                 if (!id.empty())
1045                                         tag += from_utf8(" ") + id;
1046                                 tag += from_utf8(">");
1047                                 xs << XMLStream::ESCAPE_NONE << tag;
1048                                 xs << xml::CR();
1049                         }
1050                 }
1051
1052                 // Close all sections before the bibliography.
1053                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1054                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1055                 if (insetsLength > 0) {
1056                         Inset const *firstInset = par->getInset(0);
1057                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1058                                 while (!headerLevels.empty()) {
1059                                         int level = headerLevels.top().first;
1060                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1061                                         headerLevels.pop();
1062
1063                                         // Output the tag only if it corresponds to a legit section.
1064                                         if (level != Layout::NOT_IN_TOC) {
1065                                                 xs << XMLStream::ESCAPE_NONE << tag;
1066                                                 xs << xml::CR();
1067                                         }
1068                                 }
1069                         }
1070                 }
1071
1072                 // Generate this paragraph.
1073                 makeAny(text, buf, xs, ourparams, par);
1074                 bpit += 1;
1075         }
1076
1077         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1078         // of the loop).
1079         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1080                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1081                 headerLevels.pop();
1082                 xs << XMLStream::ESCAPE_NONE << tag;
1083                 xs << xml::CR();
1084         }
1085 }
1086
1087 } // namespace lyx