src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Paragraph.h"
  20 #include "ParagraphList.h"
  21 #include "ParagraphParameters.h"
  22 #include "xml.h"
  23 #include "Text.h"
  24 #include "TextClass.h"
  25
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetBibitem.h"
  28 #include "insets/InsetLabel.h"
  29 #include "insets/InsetNote.h"
  30
  31 #include "support/lassert.h"
  32
  33 #include "support/regex.h"
  34
  35 #include <stack>
  36 #include <iostream>
  37 #include <algorithm>
  38 #include <sstream>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 std::string fontToDocBookTag(xml::FontTypes type)
  48 {
  49         switch (type) {
  50         case xml::FontTypes::FT_EMPH:
  51         case xml::FontTypes::FT_BOLD:
  52                 return "emphasis";
  53         case xml::FontTypes::FT_NOUN:
  54                 return "person";
  55         case xml::FontTypes::FT_UBAR:
  56         case xml::FontTypes::FT_WAVE:
  57         case xml::FontTypes::FT_DBAR:
  58         case xml::FontTypes::FT_SOUT:
  59         case xml::FontTypes::FT_XOUT:
  60         case xml::FontTypes::FT_ITALIC:
  61         case xml::FontTypes::FT_UPRIGHT:
  62         case xml::FontTypes::FT_SLANTED:
  63         case xml::FontTypes::FT_SMALLCAPS:
  64         case xml::FontTypes::FT_ROMAN:
  65         case xml::FontTypes::FT_SANS:
  66                 return "emphasis";
  67         case xml::FontTypes::FT_TYPE:
  68                 return "code";
  69         case xml::FontTypes::FT_SIZE_TINY:
  70         case xml::FontTypes::FT_SIZE_SCRIPT:
  71         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  72         case xml::FontTypes::FT_SIZE_SMALL:
  73         case xml::FontTypes::FT_SIZE_NORMAL:
  74         case xml::FontTypes::FT_SIZE_LARGE:
  75         case xml::FontTypes::FT_SIZE_LARGER:
  76         case xml::FontTypes::FT_SIZE_LARGEST:
  77         case xml::FontTypes::FT_SIZE_HUGE:
  78         case xml::FontTypes::FT_SIZE_HUGER:
  79         case xml::FontTypes::FT_SIZE_INCREASE:
  80         case xml::FontTypes::FT_SIZE_DECREASE:
  81                 return "emphasis";
  82         default:
  83                 return "";
  84         }
  85 }
  86
  87
  88 string fontToRole(xml::FontTypes type)
  89 {
  90         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  91         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  92         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  93         // Hence, it is not a problem to have many roles by default here.
  94         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  95         switch (type) {
  96         case xml::FontTypes::FT_ITALIC:
  97         case xml::FontTypes::FT_EMPH:
  98                 return "";
  99         case xml::FontTypes::FT_BOLD:
 100                 return "bold";
 101         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 102         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 103                 return "";
 104         case xml::FontTypes::FT_UBAR:
 105                 return "underline";
 106
 107         // All other roles are non-standard for DocBook.
 108
 109         case xml::FontTypes::FT_WAVE:
 110                 return "wave";
 111         case xml::FontTypes::FT_DBAR:
 112                 return "dbar";
 113         case xml::FontTypes::FT_SOUT:
 114                 return "sout";
 115         case xml::FontTypes::FT_XOUT:
 116                 return "xout";
 117         case xml::FontTypes::FT_UPRIGHT:
 118                 return "upright";
 119         case xml::FontTypes::FT_SLANTED:
 120                 return "slanted";
 121         case xml::FontTypes::FT_SMALLCAPS:
 122                 return "smallcaps";
 123         case xml::FontTypes::FT_ROMAN:
 124                 return "roman";
 125         case xml::FontTypes::FT_SANS:
 126                 return "sans";
 127         case xml::FontTypes::FT_SIZE_TINY:
 128                 return "tiny";
 129         case xml::FontTypes::FT_SIZE_SCRIPT:
 130                 return "size_script";
 131         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 132                 return "size_footnote";
 133         case xml::FontTypes::FT_SIZE_SMALL:
 134                 return "size_small";
 135         case xml::FontTypes::FT_SIZE_NORMAL:
 136                 return "size_normal";
 137         case xml::FontTypes::FT_SIZE_LARGE:
 138                 return "size_large";
 139         case xml::FontTypes::FT_SIZE_LARGER:
 140                 return "size_larger";
 141         case xml::FontTypes::FT_SIZE_LARGEST:
 142                 return "size_largest";
 143         case xml::FontTypes::FT_SIZE_HUGE:
 144                 return "size_huge";
 145         case xml::FontTypes::FT_SIZE_HUGER:
 146                 return "size_huger";
 147         case xml::FontTypes::FT_SIZE_INCREASE:
 148                 return "size_increase";
 149         case xml::FontTypes::FT_SIZE_DECREASE:
 150                 return "size_decrease";
 151         default:
 152                 return "";
 153         }
 154 }
 155
 156
 157 string fontToAttribute(xml::FontTypes type) {
 158         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 159         // for the font.
 160         string role = fontToRole(type);
 161         if (!role.empty()) {
 162                 return "role='" + role + "'";
 163         } else {
 164                 return "";
 165         }
 166 }
 167
 168
 169 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 170 {
 171         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 172 }
 173
 174
 175 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 176 {
 177         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 178 }
 179
 180
 181 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 182 // Block style:
 183 //        Content before
 184 //        <blocktag>
 185 //          Contents of the block.
 186 //        </blocktag>
 187 //        Content after
 188 // Paragraph style:
 189 //        Content before
 190 //          <paratag>Contents of the paragraph.</paratag>
 191 //        Content after
 192 // Inline style:
 193 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 194
 195 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 196 {
 197         xs << xml::StartTag(tag, attr);
 198 }
 199
 200
 201 void closeInlineTag(XMLStream & xs, const std::string & tag)
 202 {
 203         xs << xml::EndTag(tag);
 204 }
 205
 206
 207 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 208 {
 209         if (!xs.isLastTagCR())
 210                 xs << xml::CR();
 211         xs << xml::StartTag(tag, attr);
 212 }
 213
 214
 215 void closeParTag(XMLStream & xs, const std::string & tag)
 216 {
 217         xs << xml::EndTag(tag);
 218         xs << xml::CR();
 219 }
 220
 221
 222 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 223 {
 224         if (!xs.isLastTagCR())
 225                 xs << xml::CR();
 226         xs << xml::StartTag(tag, attr);
 227         xs << xml::CR();
 228 }
 229
 230
 231 void closeBlockTag(XMLStream & xs, const std::string & tag)
 232 {
 233         if (!xs.isLastTagCR())
 234                 xs << xml::CR();
 235         xs << xml::EndTag(tag);
 236         xs << xml::CR();
 237 }
 238
 239
 240 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 241 {
 242         if (tag.empty() || tag == "NONE")
 243                 return;
 244
 245         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 246                 openParTag(xs, tag, attr);
 247         else if (tagtype == "block")
 248                 openBlockTag(xs, tag, attr);
 249         else if (tagtype == "inline")
 250                 openInlineTag(xs, tag, attr);
 251         else
 252                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 253 }
 254
 255
 256 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 257 {
 258         if (tag.empty() || tag == "NONE")
 259                 return;
 260
 261         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 262                 closeParTag(xs, tag);
 263         else if (tagtype == "block")
 264                 closeBlockTag(xs, tag);
 265         else if (tagtype == "inline")
 266                 closeInlineTag(xs, tag);
 267         else
 268                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 269 }
 270
 271
 272 // Higher-level convenience functions.
 273
 274 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 275 {
 276         Layout const & lay = par->layout();
 277
 278         if (par == prevpar)
 279                 prevpar = nullptr;
 280
 281         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 282         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 283         // The main use case is author information in several paragraphs: if the name of the author is the
 284         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 285         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 286         // layout, same wrapper tag).
 287         bool openWrapper = lay.docbookwrappertag() != "NONE";
 288         if (prevpar != nullptr) {
 289                 Layout const & prevlay = prevpar->layout();
 290                 if (prevlay.docbookwrappertag() != "NONE") {
 291                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 292                                         && !lay.docbookwrappermergewithprevious();
 293                 }
 294         }
 295
 296         // Main logic.
 297         if (openWrapper)
 298                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 299
 300         const string & tag = lay.docbooktag();
 301         if (tag != "NONE") {
 302                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 303                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 304                         // TODO: required or not?
 305                         // TODO: avoid creating a ParTag object just for this query...
 306                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 307         }
 308
 309         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 310         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 311 }
 312
 313
 314 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 315 {
 316         if (par == nextpar)
 317                 nextpar = nullptr;
 318
 319         // See comment in openParTag.
 320         Layout const & lay = par->layout();
 321         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 322         if (nextpar != nullptr) {
 323                 Layout const & nextlay = nextpar->layout();
 324                 if (nextlay.docbookwrappertag() != "NONE") {
 325                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 326                                         && !nextlay.docbookwrappermergewithprevious();
 327                 }
 328         }
 329
 330         // Main logic.
 331         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 332         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 333         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 334         if (closeWrapper)
 335                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 336 }
 337
 338
 339 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 340 {
 341         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 342 }
 343
 344
 345 void closeLabelTag(XMLStream & xs, Layout const & lay)
 346 {
 347         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 348 }
 349
 350
 351 void openItemTag(XMLStream & xs, Layout const & lay)
 352 {
 353         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 354 }
 355
 356
 357 void closeItemTag(XMLStream & xs, Layout const & lay)
 358 {
 359         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 360 }
 361
 362
 363 void makeParagraphBibliography(
 364                 Buffer const & buf,
 365                 XMLStream & xs,
 366                 OutputParams const & runparams,
 367                 Text const & text,
 368                 ParagraphList::const_iterator const & pbegin)
 369 {
 370         auto const begin = text.paragraphs().begin();
 371         auto const end = text.paragraphs().end();
 372         auto pend = pbegin;
 373         ++pend;
 374
 375         // Find the paragraph *before* pbegin.
 376         ParagraphList::const_iterator pbegin_before = begin;
 377         if (pbegin != begin) {
 378                 ParagraphList::const_iterator pbegin_before_next = begin;
 379                 ++pbegin_before_next;
 380
 381                 while (pbegin_before_next != pbegin) {
 382                         ++pbegin_before;
 383                         ++pbegin_before_next;
 384                 }
 385         }
 386
 387         ParagraphList::const_iterator par = pbegin;
 388
 389         // If this is the first paragraph in a bibliography, open the bibliography tag.
 390         if (pbegin != begin && pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 391                 xs << xml::StartTag("bibliography");
 392                 xs << xml::CR();
 393         }
 394
 395         // Generate the required paragraphs, but only if they are .
 396         for (; par != pend; ++par) {
 397                 // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 398                 // Don't forget the citation ID!
 399                 docstring attr;
 400                 for (auto i = 0; i < par->size(); ++i) {
 401                         Inset const *ip = par->getInset(0);
 402                         if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
 403                                 const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
 404                                 attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 405                                 break;
 406                         }
 407                 }
 408                 xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 409
 410                 // Generate the entry.
 411                 par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)), true, true, 0);
 412
 413                 // End the precooked bibliography entry.
 414                 xs << xml::EndTag("bibliomixed");
 415                 xs << xml::CR();
 416         }
 417
 418         // If this is the last paragraph in a bibliography, close the bibliography tag.
 419         if (par == end || par->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 420                 xs << xml::EndTag("bibliography");
 421                 xs << xml::CR();
 422         }
 423 }
 424
 425
 426 void makeParagraph(
 427                 Buffer const & buf,
 428                 XMLStream & xs,
 429                 OutputParams const & runparams,
 430                 Text const & text,
 431                 ParagraphList::const_iterator const & par)
 432 {
 433         auto const begin = text.paragraphs().begin();
 434         auto const end = text.paragraphs().end();
 435         auto prevpar = text.paragraphs().getParagraphBefore(par);
 436
 437         // We want to open the paragraph tag if:
 438         //   (i) the current layout permits multiple paragraphs
 439         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 440         //         we are, but this is not the first paragraph
 441         //
 442         // But there is also a special case, and we first see whether we are in it.
 443         // We do not want to open the paragraph tag if this paragraph contains
 444         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 445         // as a branch). On the other hand, if that single item has a font change
 446         // applied to it, then we still do need to open the paragraph.
 447         //
 448         // Obviously, this is very fragile. The main reason we need to do this is
 449         // because of branches, e.g., a branch that contains an entire new section.
 450         // We do not really want to wrap that whole thing in a <div>...</div>.
 451         bool special_case = false;
 452         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 453         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 454                 Layout const &style = par->layout();
 455                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 456                                                                         style.labelfont : style.font;
 457                 FontInfo const our_font =
 458                                 par->getFont(buf.masterBuffer()->params(), 0,
 459                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 460
 461                 if (first_font == our_font)
 462                         special_case = true;
 463         }
 464
 465         // Plain layouts must be ignored.
 466         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 467                 special_case = true;
 468         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 469         if (!special_case && par->size() == 1 && par->getInset(0)) {
 470                 Inset const * firstInset = par->getInset(0);
 471
 472                 // Floats cannot be in paragraphs.
 473                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 474
 475                 // Bibliographies cannot be in paragraphs.
 476                 if (!special_case && firstInset->asInsetCommand())
 477                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 478
 479                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 480                 if (!special_case && firstInset->asInsetMath())
 481                         special_case = true;
 482
 483                 // ERTs are in comments, not paragraphs.
 484                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 485                         special_case = true;
 486
 487                 // Listings should not get into their own paragraph.
 488                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 489                         special_case = true;
 490         }
 491
 492         bool const open_par = runparams.docbook_make_pars
 493                                                   && !runparams.docbook_in_par
 494                                                   && !special_case;
 495
 496         // We want to issue the closing tag if either:
 497         //   (i)  We opened it, and either docbook_in_par is false,
 498         //              or we're not in the last paragraph, anyway.
 499         //   (ii) We didn't open it and docbook_in_par is true,
 500         //              but we are in the first par, and there is a next par.
 501         auto nextpar = par;
 502         ++nextpar;
 503         bool const close_par = open_par && (!runparams.docbook_in_par);
 504
 505         // Determine if this paragraph has some real content. Things like new pages are not caught
 506         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 507         odocstringstream os2;
 508         XMLStream xs2(os2);
 509         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 510
 511         docstring cleaned = os2.str();
 512         static const lyx::regex reg("[ \\r\\n]*");
 513         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 514
 515         if (!cleaned.empty()) {
 516                 if (open_par)
 517                         openParTag(xs, &*par, prevpar);
 518
 519                 xs << XMLStream::ESCAPE_NONE << os2.str();
 520
 521                 if (close_par)
 522                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 523         }
 524 }
 525
 526
 527 void makeAny(
 528                 Text const &text,
 529                 Buffer const &buf,
 530                 XMLStream &xs,
 531                 OutputParams const &ourparams,
 532                 ParagraphList::const_iterator par);
 533
 534
 535 void makeEnvironment(
 536                 Buffer const &buf,
 537                 XMLStream &xs,
 538                 OutputParams const &runparams,
 539                 Text const &text,
 540                 ParagraphList::const_iterator const & par)
 541 {
 542         auto const end = text.paragraphs().end();
 543
 544         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 545         // implemented in openParTag).
 546         auto prevpar = text.paragraphs().getParagraphBefore(par);
 547         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 548
 549         // Generate the contents of this environment. There is a special case if this is like some environment.
 550         Layout const & style = par->layout();
 551         if (style.latextype == LATEX_COMMAND) {
 552                 // Nothing to do (otherwise, infinite loops).
 553         } else if (style.latextype == LATEX_ENVIRONMENT ||
 554                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 555                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 556                 // Open a wrapper tag if needed.
 557                 if (style.docbookitemwrappertag() != "NONE") {
 558                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 559                         xs << xml::CR();
 560                 }
 561
 562                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 563                 // character after the label.
 564                 pos_type sep = 0;
 565                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 566                         // At least one condition must be met:
 567                         //  - this environment is not a list
 568                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 569                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 570                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 571                                 docstring const lbl = par->params().labelString();
 572
 573                                 if (lbl.empty()) {
 574                                         xs << xml::CR();
 575                                 } else {
 576                                         openLabelTag(xs, style);
 577                                         xs << lbl;
 578                                         closeLabelTag(xs, style);
 579                                 }
 580                         } else {
 581                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 582                                 openLabelTag(xs, style);
 583                                 sep = par->firstWordDocBook(xs, runparams);
 584                                 closeLabelTag(xs, style);
 585                         }
 586                 }
 587
 588                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 589                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 590                 // Common case: there is only the first word on the line, but there is a nested list instead
 591                 // of more text.
 592                 bool emptyItem = false;
 593                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 594                         auto next_par = par;
 595                         ++next_par;
 596                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 597                                 emptyItem = true;
 598                         else // There is a next paragraph: check depth.
 599                                 emptyItem = par->params().depth() >= next_par->params().depth();
 600                 }
 601
 602                 if (emptyItem) {
 603                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 604                         // generation of a full <para>.
 605                         // TODO: this always worked only by magic...
 606                         xs << ' ';
 607                 } else {
 608                         // Generate the rest of the paragraph, if need be.
 609                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 610                                                                  true, true, sep);
 611                 }
 612         } else {
 613                 makeAny(text, buf, xs, runparams, par);
 614         }
 615
 616         // Close the environment.
 617         auto nextpar = par;
 618         ++nextpar;
 619         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 620 }
 621
 622
 623 void makeCommand(
 624                 Buffer const & buf,
 625                 XMLStream & xs,
 626                 OutputParams const & runparams,
 627                 Text const & text,
 628                 ParagraphList::const_iterator const & par)
 629 {
 630         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 631         auto const begin = text.paragraphs().begin();
 632         auto const end = text.paragraphs().end();
 633         auto nextpar = par;
 634         ++nextpar;
 635
 636         // Generate this command.
 637         auto prevpar = text.paragraphs().getParagraphBefore(par);
 638         openParTag(xs, &*par, prevpar);
 639
 640         par->simpleDocBookOnePar(buf, xs, runparams,
 641                                  text.outerFont(distance(begin, par)));
 642
 643         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 644 }
 645
 646
 647 void makeAny(
 648                 Text const &text,
 649                 Buffer const &buf,
 650                 XMLStream &xs,
 651                 OutputParams const &ourparams,
 652                 ParagraphList::const_iterator par)
 653 {
 654         switch (par->layout().latextype) {
 655         case LATEX_COMMAND:
 656                 makeCommand(buf, xs, ourparams, text, par);
 657                 break;
 658         case LATEX_ENVIRONMENT:
 659         case LATEX_LIST_ENVIRONMENT:
 660         case LATEX_ITEM_ENVIRONMENT:
 661                 makeEnvironment(buf, xs, ourparams, text, par);
 662                 break;
 663         case LATEX_PARAGRAPH:
 664                 makeParagraph(buf, xs, ourparams, text, par);
 665                 break;
 666         case LATEX_BIB_ENVIRONMENT:
 667                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 668                 break;
 669         }
 670 }
 671
 672 } // end anonymous namespace
 673
 674
 675 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 676
 677
 678 struct DocBookInfoTag
 679 {
 680         const set<pit_type> shouldBeInInfo;
 681         const set<pit_type> mustBeInInfo;
 682         const set<pit_type> abstract;
 683         pit_type bpit;
 684         pit_type epit;
 685
 686         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 687                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 688                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 689                                    bpit(bpit), epit(epit) {}
 690 };
 691
 692
 693 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 694         bool documentHasSections = false;
 695
 696         while (bpit < epit) {
 697                 Layout const &style = paragraphs[bpit].layout();
 698                 documentHasSections |= style.category() == from_utf8("Sectioning");
 699
 700                 if (documentHasSections)
 701                         break;
 702                 bpit += 1;
 703         }
 704         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 705
 706         return make_tuple(documentHasSections, bpit);
 707 }
 708
 709
 710 bool hasOnlyNotes(Paragraph const & par)
 711 {
 712         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 713         for (int i = 0; i < par.size(); ++i)
 714                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 715                 // return false.
 716                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 717                         return false;
 718         return true;
 719 }
 720
 721
 722 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 723         set<pit_type> shouldBeInInfo;
 724         set<pit_type> mustBeInInfo;
 725         set<pit_type> abstract;
 726
 727         // Find the first non empty paragraph by mutating bpit.
 728         while (bpit < epit) {
 729                 Paragraph const &par = paragraphs[bpit];
 730                 if (par.empty() || hasOnlyNotes(par))
 731                         bpit += 1;
 732                 else
 733                         break;
 734         }
 735
 736         // Find the last info-like paragraph.
 737         pit_type cpit = bpit;
 738         bool hasAbstractLayout = false;
 739         while (cpit < epit) {
 740                 // Skip paragraphs only containing one note.
 741                 Paragraph const & par = paragraphs[cpit];
 742                 if (hasOnlyNotes(par)) {
 743                         cpit += 1;
 744                         continue;
 745                 }
 746
 747                 if (par.layout().docbookabstract())
 748                         hasAbstractLayout = true;
 749
 750                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 751                 Layout const &style = par.layout();
 752
 753                 if (style.docbookininfo() == "always") {
 754                         mustBeInInfo.emplace(cpit);
 755                 } else if (style.docbookininfo() == "maybe") {
 756                         shouldBeInInfo.emplace(cpit);
 757                 } else {
 758                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 759                         // There may be notes in between, but nothing else.
 760                         break;
 761                 }
 762                 cpit += 1;
 763         }
 764         // Now, cpit points to the last paragraph that has things that could go in <info>.
 765         // bpit is the beginning of the <info> part.
 766
 767         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 768         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 769         if (hasAbstractLayout) {
 770                 pit_type pit = bpit;
 771                 while (pit < cpit) { // Don't overshoot the <info> part.
 772                         if (paragraphs[pit].layout().docbookabstract())
 773                                 abstract.emplace(pit);
 774                         pit++;
 775                 }
 776         } else {
 777                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 778                 docstring lastAbstractLayout;
 779
 780                 pit_type pit = bpit;
 781                 while (pit < cpit) { // Don't overshoot the <info> part.
 782                         const Paragraph & par = paragraphs.at(pit);
 783                         if (!par.insetList().empty()) {
 784                                 for (const auto &i : par.insetList()) {
 785                                         if (i.inset->getText(0) != nullptr) {
 786                                                 if (lastAbstract == epit + 1) {
 787                                                         // First paragraph that matches the heuristic definition of abstract.
 788                                                         lastAbstract = pit;
 789                                                         lastAbstractLayout = par.layout().name();
 790                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 791                                                         // This is either too far from the last abstract paragraph or doesn't
 792                                                         // have the right layout name, BUT there has already been an abstract
 793                                                         // in this document: done with detecting the abstract.
 794                                                         goto done; // Easier to get out of two nested loops.
 795                                                 }
 796
 797                                                 abstract.emplace(pit);
 798                                                 break;
 799                                         }
 800                                 }
 801                         }
 802                         pit++;
 803                 }
 804         }
 805
 806         done:
 807         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 808 }
 809
 810
 811 void outputDocBookInfo(
 812                 Text const & text,
 813                 Buffer const & buf,
 814                 XMLStream & xs,
 815                 OutputParams const & runparams,
 816                 ParagraphList const & paragraphs,
 817                 DocBookInfoTag const & info)
 818 {
 819         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 820         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 821         // then only create the <abstract> tag if these paragraphs generate some content.
 822         // This check must be performed *before* a decision on whether or not to output <info> is made.
 823         bool hasAbstract = !info.abstract.empty();
 824         docstring abstract;
 825         if (hasAbstract) {
 826                 // Generate the abstract XML into a string before further checks.
 827                 odocstringstream os2;
 828                 {
 829                         XMLStream xs2(os2);
 830                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 831                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 832                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 833
 834                         while (bpit < epit) {
 835                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 836                                 bpit += 1;
 837                         }
 838                 }
 839
 840                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 841                 // even though they must be properly output if there is some abstract.
 842                 abstract = os2.str();
 843                 static const lyx::regex reg("[ \\r\\n]*");
 844                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 845
 846                 // Nothing? Then there is no abstract!
 847                 if (abstractContent.empty())
 848                         hasAbstract = false;
 849         }
 850
 851         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 852         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 853
 854         // Start the <info> tag if required.
 855         if (needInfo) {
 856                 xs.startDivision(false);
 857                 xs << xml::StartTag("info");
 858                 xs << xml::CR();
 859         }
 860
 861         // Output the elements that should go in <info>, before and after the abstract.
 862         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 863                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 864                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 865         }
 866         for (auto pit : info.mustBeInInfo) {
 867                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 868                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 869         }
 870
 871         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 872         // it contains several paragraphs that are empty).
 873         if (hasAbstract) {
 874 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 875 //              if (tag == "NONE")
 876 //                      tag = "abstract";
 877 //
 878 //              xs << xml::StartTag(tag);
 879 //              xs << xml::CR();
 880                 xs << XMLStream::ESCAPE_NONE << abstract;
 881 //              xs << xml::EndTag(tag);
 882 //              xs << xml::CR();
 883         }
 884
 885         // End the <info> tag if it was started.
 886         if (needInfo) {
 887                 xs << xml::EndTag("info");
 888                 xs << xml::CR();
 889                 xs.endDivision();
 890         }
 891 }
 892
 893
 894 void docbookFirstParagraphs(
 895                 Text const &text,
 896                 Buffer const &buf,
 897                 XMLStream &xs,
 898                 OutputParams const &runparams,
 899                 pit_type epit)
 900 {
 901         // Handle the beginning of the document, supposing it has sections.
 902         // Major role: output the first <info> tag.
 903
 904         ParagraphList const &paragraphs = text.paragraphs();
 905         pit_type bpit = runparams.par_begin;
 906         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 907         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 908 }
 909
 910
 911 void docbookSimpleAllParagraphs(
 912                 Text const & text,
 913                 Buffer const & buf,
 914                 XMLStream & xs,
 915                 OutputParams const & runparams)
 916 {
 917         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 918         // between a single paragraph to a whole document.
 919
 920         // First, the <info> tag.
 921         ParagraphList const &paragraphs = text.paragraphs();
 922         pit_type bpit = runparams.par_begin;
 923         pit_type const epit = runparams.par_end;
 924         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 925         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 926
 927         // Then, the content. It starts where the <info> ends.
 928         bpit = info.epit;
 929         while (bpit < epit) {
 930                 auto par = paragraphs.iterator_at(bpit);
 931                 if (!hasOnlyNotes(*par))
 932                         makeAny(text, buf, xs, runparams, par);
 933                 bpit += 1;
 934         }
 935 }
 936
 937
 938 void docbookParagraphs(Text const &text,
 939                                            Buffer const &buf,
 940                                            XMLStream &xs,
 941                                            OutputParams const &runparams) {
 942         ParagraphList const &paragraphs = text.paragraphs();
 943         if (runparams.par_begin == runparams.par_end) {
 944                 runparams.par_begin = 0;
 945                 runparams.par_end = paragraphs.size();
 946         }
 947         pit_type bpit = runparams.par_begin;
 948         pit_type const epit = runparams.par_end;
 949         LASSERT(bpit < epit,
 950                         {
 951                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 952                                 return;
 953                         });
 954
 955         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 956         // of the section and the tag that was used to open it.
 957
 958         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 959         // discovered abstract.
 960         bool documentHasSections;
 961         pit_type eppit;
 962         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 963
 964         if (documentHasSections) {
 965                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 966                 bpit = eppit;
 967         } else {
 968                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 969                 return;
 970         }
 971
 972         bool currentlyInAppendix = false;
 973
 974         while (bpit < epit) {
 975                 OutputParams ourparams = runparams;
 976
 977                 auto par = paragraphs.iterator_at(bpit);
 978                 if (par->params().startOfAppendix())
 979                         currentlyInAppendix = true;
 980                 Layout const &style = par->layout();
 981                 ParagraphList::const_iterator const lastStartedPar = par;
 982                 ParagraphList::const_iterator send;
 983
 984                 if (hasOnlyNotes(*par)) {
 985                         bpit += 1;
 986                         continue;
 987                 }
 988
 989                 // Think about adding <section> and/or </section>s.
 990                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 991                 if (isLayoutSectioning) {
 992                         int level = style.toclevel;
 993
 994                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
 995                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
 996                         //   - current: h2; back: h1; do not close any <section>
 997                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
 998                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
 999                                 int stackLevel = headerLevels.top().first;
1000                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
1001                                 headerLevels.pop();
1002
1003                                 // Output the tag only if it corresponds to a legit section.
1004                                 if (stackLevel != Layout::NOT_IN_TOC)
1005                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1006                         }
1007
1008                         // Open the new section: first push it onto the stack, then output it in DocBook.
1009                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1010                                                                 "appendix" : style.docbooksectiontag();
1011                         headerLevels.push(std::make_pair(level, sectionTag));
1012
1013                         // Some sectioning-like elements should not be output (such as FrontMatter).
1014                         if (level != Layout::NOT_IN_TOC) {
1015                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1016                                 docstring id = docstring();
1017                                 for (pos_type i = 0; i < par->size(); ++i) {
1018                                         Inset const *inset = par->getInset(i);
1019                                         if (inset) {
1020                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1021                                                         // Generate the attributes for the section if need be.
1022                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1023
1024                                                         // Don't output the ID as a DocBook <anchor>.
1025                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1026
1027                                                         // Cannot have multiple IDs per tag.
1028                                                         break;
1029                                                 }
1030                                         }
1031                                 }
1032
1033                                 // Write the open tag for this section.
1034                                 docstring tag = from_utf8("<" + sectionTag);
1035                                 if (!id.empty())
1036                                         tag += from_utf8(" ") + id;
1037                                 tag += from_utf8(">");
1038                                 xs << XMLStream::ESCAPE_NONE << tag;
1039                                 xs << xml::CR();
1040                         }
1041                 }
1042
1043                 // Close all sections before the bibliography.
1044                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1045                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1046                 if (insetsLength > 0) {
1047                         Inset const *firstInset = par->getInset(0);
1048                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1049                                 while (!headerLevels.empty()) {
1050                                         int level = headerLevels.top().first;
1051                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1052                                         headerLevels.pop();
1053
1054                                         // Output the tag only if it corresponds to a legit section.
1055                                         if (level != Layout::NOT_IN_TOC) {
1056                                                 xs << XMLStream::ESCAPE_NONE << tag;
1057                                                 xs << xml::CR();
1058                                         }
1059                                 }
1060                         }
1061                 }
1062
1063                 // Generate this paragraph.
1064                 makeAny(text, buf, xs, ourparams, par);
1065                 bpit += 1;
1066         }
1067
1068         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1069         // of the loop).
1070         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1071                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1072                 headerLevels.pop();
1073                 xs << XMLStream::ESCAPE_NONE << tag;
1074                 xs << xml::CR();
1075         }
1076 }
1077
1078 } // namespace lyx