src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Layout.h"
  20 #include "OutputParams.h"
  21 #include "Paragraph.h"
  22 #include "ParagraphList.h"
  23 #include "ParagraphParameters.h"
  24 #include "xml.h"
  25 #include "Text.h"
  26 #include "TextClass.h"
  27
  28 #include "insets/InsetBibtex.h"
  29 #include "insets/InsetBibitem.h"
  30 #include "insets/InsetLabel.h"
  31 #include "insets/InsetNote.h"
  32
  33 #include "support/convert.h"
  34 #include "support/debug.h"
  35 #include "support/lassert.h"
  36 #include "support/lstrings.h"
  37 #include "support/textutils.h"
  38
  39 #include "support/regex.h"
  40
  41 #include <stack>
  42 #include <iostream>
  43 #include <algorithm>
  44 #include <sstream>
  45
  46 // #define DOCBOOK_DEBUG_NEWLINES
  47
  48 using namespace std;
  49 using namespace lyx::support;
  50
  51 namespace lyx {
  52
  53 namespace {
  54
  55 std::string fontToDocBookTag(xml::FontTypes type)
  56 {
  57         switch (type) {
  58         case xml::FontTypes::FT_EMPH:
  59         case xml::FontTypes::FT_BOLD:
  60                 return "emphasis";
  61         case xml::FontTypes::FT_NOUN:
  62                 return "person";
  63         case xml::FontTypes::FT_UBAR:
  64         case xml::FontTypes::FT_WAVE:
  65         case xml::FontTypes::FT_DBAR:
  66         case xml::FontTypes::FT_SOUT:
  67         case xml::FontTypes::FT_XOUT:
  68         case xml::FontTypes::FT_ITALIC:
  69         case xml::FontTypes::FT_UPRIGHT:
  70         case xml::FontTypes::FT_SLANTED:
  71         case xml::FontTypes::FT_SMALLCAPS:
  72         case xml::FontTypes::FT_ROMAN:
  73         case xml::FontTypes::FT_SANS:
  74                 return "emphasis";
  75         case xml::FontTypes::FT_TYPE:
  76                 return "code";
  77         case xml::FontTypes::FT_SIZE_TINY:
  78         case xml::FontTypes::FT_SIZE_SCRIPT:
  79         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  80         case xml::FontTypes::FT_SIZE_SMALL:
  81         case xml::FontTypes::FT_SIZE_NORMAL:
  82         case xml::FontTypes::FT_SIZE_LARGE:
  83         case xml::FontTypes::FT_SIZE_LARGER:
  84         case xml::FontTypes::FT_SIZE_LARGEST:
  85         case xml::FontTypes::FT_SIZE_HUGE:
  86         case xml::FontTypes::FT_SIZE_HUGER:
  87         case xml::FontTypes::FT_SIZE_INCREASE:
  88         case xml::FontTypes::FT_SIZE_DECREASE:
  89                 return "emphasis";
  90         default:
  91                 return "";
  92         }
  93 }
  94
  95
  96 string fontToRole(xml::FontTypes type)
  97 {
  98         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  99         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
 100         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
 101         // Hence, it is not a problem to have many roles by default here.
 102         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
 103         switch (type) {
 104         case xml::FontTypes::FT_ITALIC:
 105         case xml::FontTypes::FT_EMPH:
 106                 return "";
 107         case xml::FontTypes::FT_BOLD:
 108                 return "bold";
 109         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 110         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 111                 return "";
 112         case xml::FontTypes::FT_UBAR:
 113                 return "underline";
 114
 115         // All other roles are non-standard for DocBook.
 116
 117         case xml::FontTypes::FT_WAVE:
 118                 return "wave";
 119         case xml::FontTypes::FT_DBAR:
 120                 return "dbar";
 121         case xml::FontTypes::FT_SOUT:
 122                 return "sout";
 123         case xml::FontTypes::FT_XOUT:
 124                 return "xout";
 125         case xml::FontTypes::FT_UPRIGHT:
 126                 return "upright";
 127         case xml::FontTypes::FT_SLANTED:
 128                 return "slanted";
 129         case xml::FontTypes::FT_SMALLCAPS:
 130                 return "smallcaps";
 131         case xml::FontTypes::FT_ROMAN:
 132                 return "roman";
 133         case xml::FontTypes::FT_SANS:
 134                 return "sans";
 135         case xml::FontTypes::FT_SIZE_TINY:
 136                 return "tiny";
 137         case xml::FontTypes::FT_SIZE_SCRIPT:
 138                 return "size_script";
 139         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 140                 return "size_footnote";
 141         case xml::FontTypes::FT_SIZE_SMALL:
 142                 return "size_small";
 143         case xml::FontTypes::FT_SIZE_NORMAL:
 144                 return "size_normal";
 145         case xml::FontTypes::FT_SIZE_LARGE:
 146                 return "size_large";
 147         case xml::FontTypes::FT_SIZE_LARGER:
 148                 return "size_larger";
 149         case xml::FontTypes::FT_SIZE_LARGEST:
 150                 return "size_largest";
 151         case xml::FontTypes::FT_SIZE_HUGE:
 152                 return "size_huge";
 153         case xml::FontTypes::FT_SIZE_HUGER:
 154                 return "size_huger";
 155         case xml::FontTypes::FT_SIZE_INCREASE:
 156                 return "size_increase";
 157         case xml::FontTypes::FT_SIZE_DECREASE:
 158                 return "size_decrease";
 159         default:
 160                 return "";
 161         }
 162 }
 163
 164 string fontToAttribute(xml::FontTypes type) {
 165         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 166         // for the font.
 167         string role = fontToRole(type);
 168         if (!role.empty()) {
 169                 return "role='" + role + "'";
 170         } else {
 171                 return "";
 172         }
 173 }
 174
 175 } // end anonymous namespace
 176
 177
 178 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 179 {
 180         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 181 }
 182
 183
 184 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 185 {
 186         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 187 }
 188
 189
 190 namespace {
 191
 192 // convenience functions
 193
 194 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 195 {
 196 #ifdef DOCBOOK_DEBUG_NEWLINES
 197         xs << XMLStream::ESCAPE_NONE << "<!-- openParTag -->";
 198 #endif
 199
 200         Layout const & lay = par->layout();
 201
 202         if (par == prevpar)
 203                 prevpar = nullptr;
 204
 205         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 206         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 207         // The main use case is author information in several paragraphs: if the name of the author is the
 208         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 209         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 210         // layout, same wrapper tag).
 211         bool openWrapper = lay.docbookwrappertag() != "NONE";
 212         if (prevpar != nullptr) {
 213                 Layout const & prevlay = prevpar->layout();
 214                 if (prevlay.docbookwrappertag() != "NONE") {
 215                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 216                                         && !lay.docbookwrappermergewithprevious();
 217                 }
 218         }
 219
 220         // Main logic.
 221         if (openWrapper) {
 222                 xs << xml::StartTag(lay.docbookwrappertag(), lay.docbookwrapperattr());
 223                 xs << xml::CR();
 224         }
 225
 226         string tag = lay.docbooktag();
 227         if (tag != "NONE") {
 228                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 229                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph. TODO: required or not?
 230                         xs << xmltag;
 231         }
 232
 233         if (lay.docbookitemtag() != "NONE") {
 234                 xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
 235                 xs << xml::CR();
 236         }
 237
 238         if (lay.docbookiteminnertag() != "NONE")
 239                 xs << xml::StartTag(lay.docbookiteminnertag(), lay.docbookiteminnerattr());
 240
 241 #ifdef DOCBOOK_DEBUG_NEWLINES
 242         xs << XMLStream::ESCAPE_NONE << "<!-- /openParTag -->";
 243 #endif
 244 }
 245
 246
 247 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 248 {
 249 #ifdef DOCBOOK_DEBUG_NEWLINES
 250         xs << XMLStream::ESCAPE_NONE << "<!-- closeParTag -->";
 251 #endif
 252
 253         if (par == nextpar)
 254                 nextpar = nullptr;
 255
 256         // See comment in openParTag.
 257         Layout const & lay = par->layout();
 258         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 259         if (nextpar != nullptr) {
 260                 Layout const & nextlay = nextpar->layout();
 261                 if (nextlay.docbookwrappertag() != "NONE") {
 262                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 263                                         && !nextlay.docbookwrappermergewithprevious();
 264                 }
 265         }
 266
 267         // Main logic.
 268         if (lay.docbookiteminnertag() != "NONE") {
 269                 xs << xml::EndTag(lay.docbookiteminnertag());
 270                 xs << xml::CR();
 271         }
 272
 273         if (lay.docbookitemtag() != "NONE") {
 274                 xs << xml::EndTag(lay.docbookitemtag());
 275                 xs << xml::CR();
 276         }
 277
 278         if (lay.docbooktag() != "NONE") {
 279                 xs << xml::EndTag(lay.docbooktag());
 280                 xs << xml::CR();
 281         }
 282
 283         if (closeWrapper) {
 284                 xs << xml::EndTag(lay.docbookwrappertag());
 285                 xs << xml::CR();
 286         }
 287
 288 #ifdef DOCBOOK_DEBUG_NEWLINES
 289         xs << XMLStream::ESCAPE_NONE << "<!-- /closeParTag -->";
 290 #endif
 291 }
 292
 293
 294 void openBlockTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 295 {
 296 #ifdef DOCBOOK_DEBUG_NEWLINES
 297         xs << XMLStream::ESCAPE_NONE << "<!-- openBlockTag -->";
 298 #endif
 299
 300         // Similar as openParTag, but with a line feed after.
 301         openParTag(xs, par, prevpar);
 302         xs << xml::CR();
 303
 304 #ifdef DOCBOOK_DEBUG_NEWLINES
 305         xs << XMLStream::ESCAPE_NONE << "<!-- /openBlockTag -->";
 306 #endif
 307 }
 308
 309
 310 void closeBlockTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 311 {
 312 #ifdef DOCBOOK_DEBUG_NEWLINES
 313         xs << XMLStream::ESCAPE_NONE << "<!-- closeBlockTag -->";
 314 #endif
 315
 316         // Similar as closeParTag, but with a line feed before.
 317         xs << xml::CR();
 318         closeParTag(xs, par, prevpar);
 319
 320 #ifdef DOCBOOK_DEBUG_NEWLINES
 321         xs << XMLStream::ESCAPE_NONE << "<!-- /closeBlockTag -->";
 322 #endif
 323 }
 324
 325
 326 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 327 {
 328 #ifdef DOCBOOK_DEBUG_NEWLINES
 329         xs << XMLStream::ESCAPE_NONE << "<!-- openLabelTag -->";
 330 #endif
 331
 332         xs << xml::StartTag(lay.docbookitemlabeltag(), lay.docbookitemlabelattr());
 333
 334 #ifdef DOCBOOK_DEBUG_NEWLINES
 335         xs << XMLStream::ESCAPE_NONE << "<!-- /openLabelTag -->";
 336 #endif
 337 }
 338
 339
 340 void closeLabelTag(XMLStream & xs, Layout const & lay)
 341 {
 342 #ifdef DOCBOOK_DEBUG_NEWLINES
 343         xs << XMLStream::ESCAPE_NONE << "<!-- closeLabelTag -->";
 344 #endif
 345
 346         xs << xml::EndTag(lay.docbookitemlabeltag());
 347         xs << xml::CR();
 348
 349 #ifdef DOCBOOK_DEBUG_NEWLINES
 350         xs << XMLStream::ESCAPE_NONE << "<!-- closeLabelTag -->";
 351 #endif
 352 }
 353
 354
 355 void openItemTag(XMLStream & xs, Layout const & lay)
 356 {
 357 #ifdef DOCBOOK_DEBUG_NEWLINES
 358         xs << XMLStream::ESCAPE_NONE << "<!-- openItemTag -->";
 359 #endif
 360
 361         xs << xml::StartTag(lay.docbookitemtag(), lay.docbookitemattr());
 362
 363 #ifdef DOCBOOK_DEBUG_NEWLINES
 364         xs << XMLStream::ESCAPE_NONE << "<!-- /openItemTag -->";
 365 #endif
 366 }
 367
 368
 369 void closeItemTag(XMLStream & xs, Layout const & lay)
 370 {
 371 #ifdef DOCBOOK_DEBUG_NEWLINES
 372         xs << XMLStream::ESCAPE_NONE << "<!-- closeItemTag -->";
 373 #endif
 374
 375         xs << xml::EndTag(lay.docbookitemtag());
 376         xs << xml::CR();
 377
 378 #ifdef DOCBOOK_DEBUG_NEWLINES
 379         xs << XMLStream::ESCAPE_NONE << "<!-- /closeItemTag -->";
 380 #endif
 381 }
 382
 383
 384 void makeParagraphBibliography(
 385                 Buffer const & buf,
 386                 XMLStream & xs,
 387                 OutputParams const & runparams,
 388                 Text const & text,
 389                 ParagraphList::const_iterator const & pbegin)
 390 {
 391         auto const begin = text.paragraphs().begin();
 392         auto const end = text.paragraphs().end();
 393         auto pend = pbegin;
 394         ++pend;
 395
 396         // Find the paragraph *before* pbegin.
 397         ParagraphList::const_iterator pbegin_before = begin;
 398         if (pbegin != begin) {
 399                 ParagraphList::const_iterator pbegin_before_next = begin;
 400                 ++pbegin_before_next;
 401
 402                 while (pbegin_before_next != pbegin) {
 403                         ++pbegin_before;
 404                         ++pbegin_before_next;
 405                 }
 406         }
 407
 408         ParagraphList::const_iterator par = pbegin;
 409
 410         // If this is the first paragraph in a bibliography, open the bibliography tag.
 411         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 412                 xs << xml::StartTag("bibliography");
 413                 xs << xml::CR();
 414         }
 415
 416         // Generate the required paragraphs, but only if they are .
 417         for (; par != pend; ++par) {
 418                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 419                 // Don't forget the citation ID!
 420                 docstring attr;
 421                 for (auto i = 0; i < par->size(); ++i) {
 422                         Inset const *ip = par->getInset(0);
 423                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
 424                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
 425                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 426                                 break;
 427                         }
 428                 }
 429                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 430
 431                 // Generate the entry.
 432                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
 433
 434                 // End the precooked bibliography entry.
 435                 xs << xml::EndTag("bibliomixed");
 436                 xs << xml::CR();
 437         }
 438
 439         // If this is the last paragraph in a bibliography, close the bibliography tag.
 440         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 441                 xs << xml::EndTag("bibliography");
 442                 xs << xml::CR();
 443         }
 444 }
 445
 446
 447 void makeParagraph(
 448                 Buffer const & buf,
 449                 XMLStream & xs,
 450                 OutputParams const & runparams,
 451                 Text const & text,
 452                 ParagraphList::const_iterator const & par)
 453 {
 454         auto const begin = text.paragraphs().begin();
 455         auto const end = text.paragraphs().end();
 456         auto prevpar = text.paragraphs().getParagraphBefore(par);
 457
 458         // We want to open the paragraph tag if:
 459         //   (i) the current layout permits multiple paragraphs
 460         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 461         //         we are, but this is not the first paragraph
 462         //
 463         // But there is also a special case, and we first see whether we are in it.
 464         // We do not want to open the paragraph tag if this paragraph contains
 465         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 466         // as a branch). On the other hand, if that single item has a font change
 467         // applied to it, then we still do need to open the paragraph.
 468         //
 469         // Obviously, this is very fragile. The main reason we need to do this is
 470         // because of branches, e.g., a branch that contains an entire new section.
 471         // We do not really want to wrap that whole thing in a <div>...</div>.
 472         bool special_case = false;
 473         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 474         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 475                 Layout const &style = par->layout();
 476                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 477                                                                         style.labelfont : style.font;
 478                 FontInfo const our_font =
 479                                 par->getFont(buf.masterBuffer()->params(), 0,
 480                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 481
 482                 if (first_font == our_font)
 483                         special_case = true;
 484         }
 485
 486         // Plain layouts must be ignored.
 487         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 488                 special_case = true;
 489         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 490         if (!special_case && par->size() == 1 && par->getInset(0)) {
 491                 Inset const * firstInset = par->getInset(0);
 492
 493                 // Floats cannot be in paragraphs.
 494                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 495
 496                 // Bibliographies cannot be in paragraphs.
 497                 if (!special_case && firstInset->asInsetCommand())
 498                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 499
 500                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 501                 if (!special_case && firstInset->asInsetMath())
 502                         special_case = true;
 503
 504                 // ERTs are in comments, not paragraphs.
 505                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 506                         special_case = true;
 507
 508                 // Listings should not get into their own paragraph.
 509                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 510                         special_case = true;
 511         }
 512
 513         bool const open_par = runparams.docbook_make_pars
 514                                                   && !runparams.docbook_in_par
 515                                                   && !special_case;
 516
 517         // We want to issue the closing tag if either:
 518         //   (i)  We opened it, and either docbook_in_par is false,
 519         //              or we're not in the last paragraph, anyway.
 520         //   (ii) We didn't open it and docbook_in_par is true,
 521         //              but we are in the first par, and there is a next par.
 522         auto nextpar = par;
 523         ++nextpar;
 524         bool const close_par = open_par && (!runparams.docbook_in_par);
 525
 526         // Determine if this paragraph has some real content. Things like new pages are not caught
 527         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 528         odocstringstream os2;
 529         XMLStream xs2(os2);
 530         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 531
 532         docstring cleaned = os2.str();
 533         static const lyx::regex reg("[ \\r\\n]*");
 534         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 535
 536         if (!cleaned.empty()) {
 537                 if (open_par)
 538                         openParTag(xs, &*par, prevpar);
 539
 540                 xs << XMLStream::ESCAPE_NONE << os2.str();
 541
 542                 if (close_par)
 543                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 544         }
 545 }
 546
 547
 548 void makeAny(
 549                 Text const &text,
 550                 Buffer const &buf,
 551                 XMLStream &xs,
 552                 OutputParams const &ourparams,
 553                 ParagraphList::const_iterator par);
 554
 555
 556 void makeEnvironment(
 557                 Buffer const &buf,
 558                 XMLStream &xs,
 559                 OutputParams const &runparams,
 560                 Text const &text,
 561                 ParagraphList::const_iterator const & par)
 562 {
 563         auto const end = text.paragraphs().end();
 564
 565         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 566         // implemented in openParTag).
 567         auto prevpar = text.paragraphs().getParagraphBefore(par);
 568         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 569
 570         // Generate the contents of this environment. There is a special case if this is like some environment.
 571         Layout const & style = par->layout();
 572         if (style.latextype == LATEX_COMMAND) {
 573                 // Nothing to do (otherwise, infinite loops).
 574         } else if (style.latextype == LATEX_ENVIRONMENT ||
 575                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 576                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 577                 // Open a wrapper tag if needed.
 578                 if (style.docbookitemwrappertag() != "NONE") {
 579                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 580                         xs << xml::CR();
 581                 }
 582
 583                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 584                 // character after the label.
 585                 pos_type sep = 0;
 586                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 587                         // At least one condition must be met:
 588                         //  - this environment is not a list
 589                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 590                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 591                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 592                                 docstring const lbl = par->params().labelString();
 593
 594                                 if (lbl.empty()) {
 595                                         xs << xml::CR();
 596                                 } else {
 597                                         openLabelTag(xs, style);
 598                                         xs << lbl;
 599                                         closeLabelTag(xs, style);
 600                                 }
 601                         } else {
 602                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 603                                 openLabelTag(xs, style);
 604                                 sep = par->firstWordDocBook(xs, runparams);
 605                                 closeLabelTag(xs, style);
 606                         }
 607                 }
 608
 609                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 610                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 611                 // Common case: there is only the first word on the line, but there is a nested list instead
 612                 // of more text.
 613                 bool emptyItem = false;
 614                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 615                         auto next_par = par;
 616                         ++next_par;
 617                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 618                                 emptyItem = true;
 619                         else // There is a next paragraph: check depth.
 620                                 emptyItem = par->params().depth() >= next_par->params().depth();
 621                 }
 622
 623                 if (emptyItem) {
 624                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 625                         // generation of a full <para>.
 626                         // TODO: this always worked only by magic...
 627                         xs << ' ';
 628                 } else {
 629                         // Generate the rest of the paragraph, if need be.
 630                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 631                                                                  true, true, sep);
 632                 }
 633         } else {
 634                 makeAny(text, buf, xs, runparams, par);
 635         }
 636
 637         // Close the environment.
 638         auto nextpar = par;
 639         ++nextpar;
 640         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 641 }
 642
 643
 644 void makeCommand(
 645                 Buffer const & buf,
 646                 XMLStream & xs,
 647                 OutputParams const & runparams,
 648                 Text const & text,
 649                 ParagraphList::const_iterator const & par)
 650 {
 651         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 652         auto const begin = text.paragraphs().begin();
 653         auto const end = text.paragraphs().end();
 654         auto nextpar = par;
 655         ++nextpar;
 656
 657         // Generate this command.
 658         auto prevpar = text.paragraphs().getParagraphBefore(par);
 659         openParTag(xs, &*par, prevpar);
 660
 661         par->simpleDocBookOnePar(buf, xs, runparams,
 662                                  text.outerFont(distance(begin, par)));
 663
 664         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 665 }
 666
 667
 668 void makeAny(
 669                 Text const &text,
 670                 Buffer const &buf,
 671                 XMLStream &xs,
 672                 OutputParams const &ourparams,
 673                 ParagraphList::const_iterator par)
 674 {
 675         switch (par->layout().latextype) {
 676         case LATEX_COMMAND:
 677                 makeCommand(buf, xs, ourparams, text, par);
 678                 break;
 679         case LATEX_ENVIRONMENT:
 680         case LATEX_LIST_ENVIRONMENT:
 681         case LATEX_ITEM_ENVIRONMENT:
 682                 makeEnvironment(buf, xs, ourparams, text, par);
 683                 break;
 684         case LATEX_PARAGRAPH:
 685                 makeParagraph(buf, xs, ourparams, text, par);
 686                 break;
 687         case LATEX_BIB_ENVIRONMENT:
 688                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 689                 break;
 690         }
 691 }
 692
 693 } // end anonymous namespace
 694
 695
 696 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 697
 698
 699 struct DocBookInfoTag
 700 {
 701         const set<pit_type> shouldBeInInfo;
 702         const set<pit_type> mustBeInInfo;
 703         const set<pit_type> abstract;
 704         pit_type bpit;
 705         pit_type epit;
 706
 707         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 708                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 709                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 710                                    bpit(bpit), epit(epit) {}
 711 };
 712
 713
 714 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 715         bool documentHasSections = false;
 716
 717         while (bpit < epit) {
 718                 Layout const &style = paragraphs[bpit].layout();
 719                 documentHasSections |= style.category() == from_utf8("Sectioning");
 720
 721                 if (documentHasSections)
 722                         break;
 723                 bpit += 1;
 724         }
 725         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 726
 727         return make_tuple(documentHasSections, bpit);
 728 }
 729
 730
 731 bool hasOnlyNotes(Paragraph const & par)
 732 {
 733         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 734         for (int i = 0; i < par.size(); ++i)
 735                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 736                 // return false.
 737                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 738                         return false;
 739         return true;
 740 }
 741
 742
 743 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 744         set<pit_type> shouldBeInInfo;
 745         set<pit_type> mustBeInInfo;
 746         set<pit_type> abstract;
 747
 748         // Find the first non empty paragraph by mutating bpit.
 749         while (bpit < epit) {
 750                 Paragraph const &par = paragraphs[bpit];
 751                 if (par.empty() || hasOnlyNotes(par))
 752                         bpit += 1;
 753                 else
 754                         break;
 755         }
 756
 757         // Find the last info-like paragraph.
 758         pit_type cpit = bpit;
 759         bool hasAbstractLayout = false;
 760         while (cpit < epit) {
 761                 // Skip paragraphs only containing one note.
 762                 Paragraph const & par = paragraphs[cpit];
 763                 if (hasOnlyNotes(par)) {
 764                         cpit += 1;
 765                         continue;
 766                 }
 767
 768                 if (par.layout().docbookabstract())
 769                         hasAbstractLayout = true;
 770
 771                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 772                 Layout const &style = par.layout();
 773
 774                 if (style.docbookininfo() == "always") {
 775                         mustBeInInfo.emplace(cpit);
 776                 } else if (style.docbookininfo() == "maybe") {
 777                         shouldBeInInfo.emplace(cpit);
 778                 } else {
 779                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 780                         // There may be notes in between, but nothing else.
 781                         break;
 782                 }
 783                 cpit += 1;
 784         }
 785         // Now, cpit points to the last paragraph that has things that could go in <info>.
 786         // bpit is the beginning of the <info> part.
 787
 788         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 789         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 790         if (hasAbstractLayout) {
 791                 pit_type pit = bpit;
 792                 while (pit < cpit) { // Don't overshoot the <info> part.
 793                         if (paragraphs[pit].layout().docbookabstract())
 794                                 abstract.emplace(pit);
 795                         pit++;
 796                 }
 797         } else {
 798                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 799                 docstring lastAbstractLayout;
 800
 801                 pit_type pit = bpit;
 802                 while (pit < cpit) { // Don't overshoot the <info> part.
 803                         const Paragraph & par = paragraphs.at(pit);
 804                         if (!par.insetList().empty()) {
 805                                 for (const auto &i : par.insetList()) {
 806                                         if (i.inset->getText(0) != nullptr) {
 807                                                 if (lastAbstract == epit + 1) {
 808                                                         // First paragraph that matches the heuristic definition of abstract.
 809                                                         lastAbstract = pit;
 810                                                         lastAbstractLayout = par.layout().name();
 811                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 812                                                         // This is either too far from the last abstract paragraph or doesn't
 813                                                         // have the right layout name, BUT there has already been an abstract
 814                                                         // in this document: done with detecting the abstract.
 815                                                         goto done; // Easier to get out of two nested loops.
 816                                                 }
 817
 818                                                 abstract.emplace(pit);
 819                                                 break;
 820                                         }
 821                                 }
 822                         }
 823                         pit++;
 824                 }
 825         }
 826
 827         done:
 828         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 829 }
 830
 831
 832 void outputDocBookInfo(
 833                 Text const & text,
 834                 Buffer const & buf,
 835                 XMLStream & xs,
 836                 OutputParams const & runparams,
 837                 ParagraphList const & paragraphs,
 838                 DocBookInfoTag const & info)
 839 {
 840         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 841         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 842         // then only create the <abstract> tag if these paragraphs generate some content.
 843         // This check must be performed *before* a decision on whether or not to output <info> is made.
 844         bool hasAbstract = !info.abstract.empty();
 845         docstring abstract;
 846         if (hasAbstract) {
 847                 // Generate the abstract XML into a string before further checks.
 848                 odocstringstream os2;
 849                 {
 850                         XMLStream xs2(os2);
 851                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 852                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 853                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 854
 855                         while (bpit < epit) {
 856                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 857                                 bpit += 1;
 858                         }
 859                 }
 860
 861                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 862                 // even though they must be properly output if there is some abstract.
 863                 abstract = os2.str();
 864                 static const lyx::regex reg("[ \\r\\n]*");
 865                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 866
 867                 // Nothing? Then there is no abstract!
 868                 if (abstractContent.empty())
 869                         hasAbstract = false;
 870         }
 871
 872         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 873         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 874
 875         // Start the <info> tag if required.
 876         if (needInfo) {
 877                 xs.startDivision(false);
 878                 xs << xml::StartTag("info");
 879                 xs << xml::CR();
 880         }
 881
 882         // Output the elements that should go in <info>, before and after the abstract.
 883         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 884                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 885                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 886         }
 887         for (auto pit : info.mustBeInInfo) {
 888                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 889                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 890         }
 891
 892         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 893         // it contains several paragraphs that are empty).
 894         if (hasAbstract) {
 895 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 896 //              if (tag == "NONE")
 897 //                      tag = "abstract";
 898 //
 899 //              xs << xml::StartTag(tag);
 900 //              xs << xml::CR();
 901                 xs << XMLStream::ESCAPE_NONE << abstract;
 902 //              xs << xml::EndTag(tag);
 903 //              xs << xml::CR();
 904         }
 905
 906         // End the <info> tag if it was started.
 907         if (needInfo) {
 908                 xs << xml::EndTag("info");
 909                 xs << xml::CR();
 910                 xs.endDivision();
 911         }
 912 }
 913
 914
 915 void docbookFirstParagraphs(
 916                 Text const &text,
 917                 Buffer const &buf,
 918                 XMLStream &xs,
 919                 OutputParams const &runparams,
 920                 pit_type epit)
 921 {
 922         // Handle the beginning of the document, supposing it has sections.
 923         // Major role: output the first <info> tag.
 924
 925         ParagraphList const &paragraphs = text.paragraphs();
 926         pit_type bpit = runparams.par_begin;
 927         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 928         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 929 }
 930
 931
 932 void docbookSimpleAllParagraphs(
 933                 Text const & text,
 934                 Buffer const & buf,
 935                 XMLStream & xs,
 936                 OutputParams const & runparams)
 937 {
 938         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 939         // between a single paragraph to a whole document.
 940
 941         // First, the <info> tag.
 942         ParagraphList const &paragraphs = text.paragraphs();
 943         pit_type bpit = runparams.par_begin;
 944         pit_type const epit = runparams.par_end;
 945         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 946         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 947
 948         // Then, the content. It starts where the <info> ends.
 949         bpit = info.epit;
 950         while (bpit < epit) {
 951                 auto par = paragraphs.iterator_at(bpit);
 952                 if (!hasOnlyNotes(*par))
 953                         makeAny(text, buf, xs, runparams, par);
 954                 bpit += 1;
 955         }
 956 }
 957
 958
 959 void docbookParagraphs(Text const &text,
 960                                            Buffer const &buf,
 961                                            XMLStream &xs,
 962                                            OutputParams const &runparams) {
 963         ParagraphList const &paragraphs = text.paragraphs();
 964         if (runparams.par_begin == runparams.par_end) {
 965                 runparams.par_begin = 0;
 966                 runparams.par_end = paragraphs.size();
 967         }
 968         pit_type bpit = runparams.par_begin;
 969         pit_type const epit = runparams.par_end;
 970         LASSERT(bpit < epit,
 971                         {
 972                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 973                                 return;
 974                         });
 975
 976         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 977         // of the section and the tag that was used to open it.
 978
 979         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 980         // discovered abstract.
 981         bool documentHasSections;
 982         pit_type eppit;
 983         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 984
 985         if (documentHasSections) {
 986                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 987                 bpit = eppit;
 988         } else {
 989                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 990                 return;
 991         }
 992
 993         bool currentlyInAppendix = false;
 994
 995         while (bpit < epit) {
 996                 OutputParams ourparams = runparams;
 997
 998                 auto par = paragraphs.iterator_at(bpit);
 999                 if (par->params().startOfAppendix())
1000                         currentlyInAppendix = true;
1001                 Layout const &style = par->layout();
1002                 ParagraphList::const_iterator const lastStartedPar = par;
1003                 ParagraphList::const_iterator send;
1004
1005                 if (hasOnlyNotes(*par)) {
1006                         bpit += 1;
1007                         continue;
1008                 }
1009
1010                 // Think about adding <section> and/or </section>s.
1011                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
1012                 if (isLayoutSectioning) {
1013                         int level = style.toclevel;
1014
1015                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
1016                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
1017                         //   - current: h2; back: h1; do not close any <section>
1018                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
1019                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
1020                                 int stackLevel = headerLevels.top().first;
1021                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1022                                 headerLevels.pop();
1023
1024                                 // Output the tag only if it corresponds to a legit section.
1025                                 if (stackLevel != Layout::NOT_IN_TOC)
1026                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1027                         }
1028
1029                         // Open the new section: first push it onto the stack, then output it in DocBook.
1030                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1031                                                                 "appendix" : style.docbooksectiontag();
1032                         headerLevels.push(std::make_pair(level, sectionTag));
1033
1034                         // Some sectioning-like elements should not be output (such as FrontMatter).
1035                         if (level != Layout::NOT_IN_TOC) {
1036                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1037                                 docstring id = docstring();
1038                                 for (pos_type i = 0; i < par->size(); ++i) {
1039                                         Inset const *inset = par->getInset(i);
1040                                         if (inset) {
1041                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1042                                                         // Generate the attributes for the section if need be.
1043                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1044
1045                                                         // Don't output the ID as a DocBook <anchor>.
1046                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1047
1048                                                         // Cannot have multiple IDs per tag.
1049                                                         break;
1050                                                 }
1051                                         }
1052                                 }
1053
1054                                 // Write the open tag for this section.
1055                                 docstring tag = from_utf8("<" + sectionTag);
1056                                 if (!id.empty())
1057                                         tag += from_utf8(" ") + id;
1058                                 tag += from_utf8(">");
1059                                 xs << XMLStream::ESCAPE_NONE << tag;
1060                                 xs << xml::CR();
1061                         }
1062                 }
1063
1064                 // Close all sections before the bibliography.
1065                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1066                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1067                 if (insetsLength > 0) {
1068                         Inset const *firstInset = par->getInset(0);
1069                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1070                                 while (!headerLevels.empty()) {
1071                                         int level = headerLevels.top().first;
1072                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1073                                         headerLevels.pop();
1074
1075                                         // Output the tag only if it corresponds to a legit section.
1076                                         if (level != Layout::NOT_IN_TOC) {
1077                                                 xs << XMLStream::ESCAPE_NONE << tag;
1078                                                 xs << xml::CR();
1079                                         }
1080                                 }
1081                         }
1082                 }
1083
1084                 // Generate this paragraph.
1085                 makeAny(text, buf, xs, ourparams, par);
1086                 bpit += 1;
1087         }
1088
1089         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1090         // of the loop).
1091         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1092                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1093                 headerLevels.pop();
1094                 xs << XMLStream::ESCAPE_NONE << tag;
1095                 xs << xml::CR();
1096         }
1097 }
1098
1099 } // namespace lyx