src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Paragraph.h"
  20 #include "ParagraphList.h"
  21 #include "ParagraphParameters.h"
  22 #include "xml.h"
  23 #include "Text.h"
  24 #include "TextClass.h"
  25
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetBibitem.h"
  28 #include "insets/InsetLabel.h"
  29 #include "insets/InsetNote.h"
  30
  31 #include "support/lassert.h"
  32
  33 #include "support/regex.h"
  34
  35 #include <stack>
  36 #include <iostream>
  37 #include <algorithm>
  38 #include <sstream>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 std::string fontToDocBookTag(xml::FontTypes type)
  48 {
  49         switch (type) {
  50         case xml::FontTypes::FT_EMPH:
  51         case xml::FontTypes::FT_BOLD:
  52                 return "emphasis";
  53         case xml::FontTypes::FT_NOUN:
  54                 return "person";
  55         case xml::FontTypes::FT_UBAR:
  56         case xml::FontTypes::FT_WAVE:
  57         case xml::FontTypes::FT_DBAR:
  58         case xml::FontTypes::FT_SOUT:
  59         case xml::FontTypes::FT_XOUT:
  60         case xml::FontTypes::FT_ITALIC:
  61         case xml::FontTypes::FT_UPRIGHT:
  62         case xml::FontTypes::FT_SLANTED:
  63         case xml::FontTypes::FT_SMALLCAPS:
  64         case xml::FontTypes::FT_ROMAN:
  65         case xml::FontTypes::FT_SANS:
  66                 return "emphasis";
  67         case xml::FontTypes::FT_TYPE:
  68                 return "code";
  69         case xml::FontTypes::FT_SIZE_TINY:
  70         case xml::FontTypes::FT_SIZE_SCRIPT:
  71         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  72         case xml::FontTypes::FT_SIZE_SMALL:
  73         case xml::FontTypes::FT_SIZE_NORMAL:
  74         case xml::FontTypes::FT_SIZE_LARGE:
  75         case xml::FontTypes::FT_SIZE_LARGER:
  76         case xml::FontTypes::FT_SIZE_LARGEST:
  77         case xml::FontTypes::FT_SIZE_HUGE:
  78         case xml::FontTypes::FT_SIZE_HUGER:
  79         case xml::FontTypes::FT_SIZE_INCREASE:
  80         case xml::FontTypes::FT_SIZE_DECREASE:
  81                 return "emphasis";
  82         default:
  83                 return "";
  84         }
  85 }
  86
  87
  88 string fontToRole(xml::FontTypes type)
  89 {
  90         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  91         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  92         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  93         // Hence, it is not a problem to have many roles by default here.
  94         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  95         switch (type) {
  96         case xml::FontTypes::FT_ITALIC:
  97         case xml::FontTypes::FT_EMPH:
  98                 return "";
  99         case xml::FontTypes::FT_BOLD:
 100                 return "bold";
 101         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 102         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 103                 return "";
 104         case xml::FontTypes::FT_UBAR:
 105                 return "underline";
 106
 107         // All other roles are non-standard for DocBook.
 108
 109         case xml::FontTypes::FT_WAVE:
 110                 return "wave";
 111         case xml::FontTypes::FT_DBAR:
 112                 return "dbar";
 113         case xml::FontTypes::FT_SOUT:
 114                 return "sout";
 115         case xml::FontTypes::FT_XOUT:
 116                 return "xout";
 117         case xml::FontTypes::FT_UPRIGHT:
 118                 return "upright";
 119         case xml::FontTypes::FT_SLANTED:
 120                 return "slanted";
 121         case xml::FontTypes::FT_SMALLCAPS:
 122                 return "smallcaps";
 123         case xml::FontTypes::FT_ROMAN:
 124                 return "roman";
 125         case xml::FontTypes::FT_SANS:
 126                 return "sans";
 127         case xml::FontTypes::FT_SIZE_TINY:
 128                 return "tiny";
 129         case xml::FontTypes::FT_SIZE_SCRIPT:
 130                 return "size_script";
 131         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 132                 return "size_footnote";
 133         case xml::FontTypes::FT_SIZE_SMALL:
 134                 return "size_small";
 135         case xml::FontTypes::FT_SIZE_NORMAL:
 136                 return "size_normal";
 137         case xml::FontTypes::FT_SIZE_LARGE:
 138                 return "size_large";
 139         case xml::FontTypes::FT_SIZE_LARGER:
 140                 return "size_larger";
 141         case xml::FontTypes::FT_SIZE_LARGEST:
 142                 return "size_largest";
 143         case xml::FontTypes::FT_SIZE_HUGE:
 144                 return "size_huge";
 145         case xml::FontTypes::FT_SIZE_HUGER:
 146                 return "size_huger";
 147         case xml::FontTypes::FT_SIZE_INCREASE:
 148                 return "size_increase";
 149         case xml::FontTypes::FT_SIZE_DECREASE:
 150                 return "size_decrease";
 151         default:
 152                 return "";
 153         }
 154 }
 155
 156
 157 string fontToAttribute(xml::FontTypes type) {
 158         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 159         // for the font.
 160         string role = fontToRole(type);
 161         if (!role.empty()) {
 162                 return "role='" + role + "'";
 163         } else {
 164                 return "";
 165         }
 166 }
 167
 168
 169 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 170 {
 171         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 172 }
 173
 174
 175 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 176 {
 177         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 178 }
 179
 180
 181 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 182 // Block style:
 183 //        Content before
 184 //        <blocktag>
 185 //          Contents of the block.
 186 //        </blocktag>
 187 //        Content after
 188 // Paragraph style:
 189 //        Content before
 190 //          <paratag>Contents of the paragraph.</paratag>
 191 //        Content after
 192 // Inline style:
 193 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 194
 195 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 196 {
 197         xs << xml::StartTag(tag, attr);
 198 }
 199
 200
 201 void closeInlineTag(XMLStream & xs, const std::string & tag)
 202 {
 203         xs << xml::EndTag(tag);
 204 }
 205
 206
 207 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 208 {
 209         if (!xs.isLastTagCR())
 210                 xs << xml::CR();
 211         xs << xml::StartTag(tag, attr);
 212 }
 213
 214
 215 void closeParTag(XMLStream & xs, const std::string & tag)
 216 {
 217         xs << xml::EndTag(tag);
 218         xs << xml::CR();
 219 }
 220
 221
 222 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 223 {
 224         if (!xs.isLastTagCR())
 225                 xs << xml::CR();
 226         xs << xml::StartTag(tag, attr);
 227         xs << xml::CR();
 228 }
 229
 230
 231 void closeBlockTag(XMLStream & xs, const std::string & tag)
 232 {
 233         if (!xs.isLastTagCR())
 234                 xs << xml::CR();
 235         xs << xml::EndTag(tag);
 236         xs << xml::CR();
 237 }
 238
 239
 240 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 241 {
 242         if (tag.empty() || tag == "NONE")
 243                 return;
 244
 245         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 246                 openParTag(xs, tag, attr);
 247         else if (tagtype == "block")
 248                 openBlockTag(xs, tag, attr);
 249         else if (tagtype == "inline")
 250                 openInlineTag(xs, tag, attr);
 251         else
 252                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 253 }
 254
 255
 256 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 257 {
 258         if (tag.empty() || tag == "NONE")
 259                 return;
 260
 261         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 262                 closeParTag(xs, tag);
 263         else if (tagtype == "block")
 264                 closeBlockTag(xs, tag);
 265         else if (tagtype == "inline")
 266                 closeInlineTag(xs, tag);
 267         else
 268                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 269 }
 270
 271
 272 // Higher-level convenience functions.
 273
 274 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 275 {
 276         Layout const & lay = par->layout();
 277
 278         if (par == prevpar)
 279                 prevpar = nullptr;
 280
 281         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 282         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 283         // The main use case is author information in several paragraphs: if the name of the author is the
 284         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 285         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 286         // layout, same wrapper tag).
 287         bool openWrapper = lay.docbookwrappertag() != "NONE";
 288         if (prevpar != nullptr) {
 289                 Layout const & prevlay = prevpar->layout();
 290                 if (prevlay.docbookwrappertag() != "NONE") {
 291                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 292                                         && !lay.docbookwrappermergewithprevious();
 293                 }
 294         }
 295
 296         // Main logic.
 297         if (openWrapper)
 298                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 299
 300         const string & tag = lay.docbooktag();
 301         if (tag != "NONE") {
 302                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 303                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 304                         // TODO: required or not?
 305                         // TODO: avoid creating a ParTag object just for this query...
 306                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 307         }
 308
 309         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 310         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 311 }
 312
 313
 314 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 315 {
 316         if (par == nextpar)
 317                 nextpar = nullptr;
 318
 319         // See comment in openParTag.
 320         Layout const & lay = par->layout();
 321         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 322         if (nextpar != nullptr) {
 323                 Layout const & nextlay = nextpar->layout();
 324                 if (nextlay.docbookwrappertag() != "NONE") {
 325                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 326                                         && !nextlay.docbookwrappermergewithprevious();
 327                 }
 328         }
 329
 330         // Main logic.
 331         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 332         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 333         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 334         if (closeWrapper)
 335                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 336 }
 337
 338
 339 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 340 {
 341         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 342 }
 343
 344
 345 void closeLabelTag(XMLStream & xs, Layout const & lay)
 346 {
 347         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 348 }
 349
 350
 351 void openItemTag(XMLStream & xs, Layout const & lay)
 352 {
 353         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 354 }
 355
 356
 357 void closeItemTag(XMLStream & xs, Layout const & lay)
 358 {
 359         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 360 }
 361
 362
 363 void makeAny(
 364                 Text const &,
 365                 Buffer const &,
 366                 XMLStream &,
 367                 OutputParams const &,
 368                 ParagraphList::const_iterator);
 369
 370
 371 void makeParagraphBibliography(
 372                 Buffer const & buf,
 373                 XMLStream & xs,
 374                 OutputParams const & runparams,
 375                 Text const & text,
 376                 ParagraphList::const_iterator const & par)
 377 {
 378         // If this is the first paragraph in a bibliography, open the bibliography tag.
 379         auto pbegin_before = text.paragraphs().getParagraphBefore(par);
 380         if (pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 381                 xs << xml::StartTag("bibliography");
 382                 xs << xml::CR();
 383         }
 384
 385         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 386         // Don't forget the citation ID!
 387         docstring attr;
 388         for (auto i = 0; i < par->size(); ++i) {
 389                 Inset const *ip = par->getInset(i);
 390                 if (!ip)
 391                         continue;
 392                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
 393                         attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 394                         break;
 395                 }
 396         }
 397         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 398
 399         // Generate the entry.
 400         auto const begin = text.paragraphs().begin();
 401         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(begin, par)), true, true, 0);
 402
 403         // End the precooked bibliography entry.
 404         xs << xml::EndTag("bibliomixed");
 405         xs << xml::CR();
 406
 407         // If this is the last paragraph in a bibliography, close the bibliography tag.
 408         auto const end = text.paragraphs().end();
 409         bool endBibliography = par == end;
 410         if (!endBibliography) {
 411                 auto nextpar = par;
 412                 ++nextpar;
 413                 endBibliography = par->layout().latextype != LATEX_BIB_ENVIRONMENT;
 414         }
 415
 416         if (endBibliography) {
 417                 xs << xml::EndTag("bibliography");
 418                 xs << xml::CR();
 419         }
 420 }
 421
 422
 423 void makeParagraph(
 424                 Buffer const & buf,
 425                 XMLStream & xs,
 426                 OutputParams const & runparams,
 427                 Text const & text,
 428                 ParagraphList::const_iterator const & par)
 429 {
 430         auto const begin = text.paragraphs().begin();
 431         auto const end = text.paragraphs().end();
 432         auto prevpar = text.paragraphs().getParagraphBefore(par);
 433
 434         // We want to open the paragraph tag if:
 435         //   (i) the current layout permits multiple paragraphs
 436         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 437         //         we are, but this is not the first paragraph
 438         //
 439         // But there is also a special case, and we first see whether we are in it.
 440         // We do not want to open the paragraph tag if this paragraph contains
 441         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 442         // as a branch). On the other hand, if that single item has a font change
 443         // applied to it, then we still do need to open the paragraph.
 444         //
 445         // Obviously, this is very fragile. The main reason we need to do this is
 446         // because of branches, e.g., a branch that contains an entire new section.
 447         // We do not really want to wrap that whole thing in a <div>...</div>.
 448         bool special_case = false;
 449         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 450         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 451                 Layout const &style = par->layout();
 452                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 453                                                                         style.labelfont : style.font;
 454                 FontInfo const our_font =
 455                                 par->getFont(buf.masterBuffer()->params(), 0,
 456                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 457
 458                 if (first_font == our_font)
 459                         special_case = true;
 460         }
 461
 462         // Plain layouts must be ignored.
 463         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 464                 special_case = true;
 465         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 466         if (!special_case && par->size() == 1 && par->getInset(0)) {
 467                 Inset const * firstInset = par->getInset(0);
 468
 469                 // Floats cannot be in paragraphs.
 470                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 471
 472                 // Bibliographies cannot be in paragraphs.
 473                 if (!special_case && firstInset->asInsetCommand())
 474                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 475
 476                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 477                 if (!special_case && firstInset->asInsetMath())
 478                         special_case = true;
 479
 480                 // ERTs are in comments, not paragraphs.
 481                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 482                         special_case = true;
 483
 484                 // Listings should not get into their own paragraph.
 485                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 486                         special_case = true;
 487         }
 488
 489         bool const open_par = runparams.docbook_make_pars
 490                                                   && !runparams.docbook_in_par
 491                                                   && !special_case;
 492
 493         // We want to issue the closing tag if either:
 494         //   (i)  We opened it, and either docbook_in_par is false,
 495         //              or we're not in the last paragraph, anyway.
 496         //   (ii) We didn't open it and docbook_in_par is true,
 497         //              but we are in the first par, and there is a next par.
 498         auto nextpar = par;
 499         ++nextpar;
 500         bool const close_par = open_par && (!runparams.docbook_in_par);
 501
 502         // Determine if this paragraph has some real content. Things like new pages are not caught
 503         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 504         odocstringstream os2;
 505         XMLStream xs2(os2);
 506         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 507
 508         docstring cleaned = os2.str();
 509         static const lyx::regex reg("[ \\r\\n]*");
 510         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 511
 512         if (!cleaned.empty()) {
 513                 if (open_par)
 514                         openParTag(xs, &*par, prevpar);
 515
 516                 xs << XMLStream::ESCAPE_NONE << os2.str();
 517
 518                 if (close_par)
 519                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 520         }
 521 }
 522
 523
 524 void makeEnvironment(
 525                 Buffer const &buf,
 526                 XMLStream &xs,
 527                 OutputParams const &runparams,
 528                 Text const &text,
 529                 ParagraphList::const_iterator const & par)
 530 {
 531         auto const end = text.paragraphs().end();
 532
 533         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 534         // implemented in openParTag).
 535         auto prevpar = text.paragraphs().getParagraphBefore(par);
 536         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 537
 538         // Generate the contents of this environment. There is a special case if this is like some environment.
 539         Layout const & style = par->layout();
 540         if (style.latextype == LATEX_COMMAND) {
 541                 // Nothing to do (otherwise, infinite loops).
 542         } else if (style.latextype == LATEX_ENVIRONMENT ||
 543                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 544                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 545                 // Open a wrapper tag if needed.
 546                 if (style.docbookitemwrappertag() != "NONE") {
 547                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 548                         xs << xml::CR();
 549                 }
 550
 551                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 552                 // character after the label.
 553                 pos_type sep = 0;
 554                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 555                         // At least one condition must be met:
 556                         //  - this environment is not a list
 557                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 558                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 559                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 560                                 docstring const lbl = par->params().labelString();
 561
 562                                 if (lbl.empty()) {
 563                                         xs << xml::CR();
 564                                 } else {
 565                                         openLabelTag(xs, style);
 566                                         xs << lbl;
 567                                         closeLabelTag(xs, style);
 568                                 }
 569                         } else {
 570                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 571                                 openLabelTag(xs, style);
 572                                 sep = par->firstWordDocBook(xs, runparams);
 573                                 closeLabelTag(xs, style);
 574                         }
 575                 }
 576
 577                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 578                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 579                 // Common case: there is only the first word on the line, but there is a nested list instead
 580                 // of more text.
 581                 bool emptyItem = false;
 582                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 583                         auto next_par = par;
 584                         ++next_par;
 585                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 586                                 emptyItem = true;
 587                         else // There is a next paragraph: check depth.
 588                                 emptyItem = par->params().depth() >= next_par->params().depth();
 589                 }
 590
 591                 if (emptyItem) {
 592                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 593                         // generation of a full <para>.
 594                         // TODO: this always worked only by magic...
 595                         xs << ' ';
 596                 } else {
 597                         // Generate the rest of the paragraph, if need be.
 598                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 599                                                                  true, true, sep);
 600                 }
 601         } else {
 602                 makeAny(text, buf, xs, runparams, par);
 603         }
 604
 605         // Close the environment.
 606         auto nextpar = par;
 607         ++nextpar;
 608         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 609 }
 610
 611
 612 void makeCommand(
 613                 Buffer const & buf,
 614                 XMLStream & xs,
 615                 OutputParams const & runparams,
 616                 Text const & text,
 617                 ParagraphList::const_iterator const & par)
 618 {
 619         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 620         auto const begin = text.paragraphs().begin();
 621         auto const end = text.paragraphs().end();
 622         auto nextpar = par;
 623         ++nextpar;
 624
 625         // Generate this command.
 626         auto prevpar = text.paragraphs().getParagraphBefore(par);
 627         openParTag(xs, &*par, prevpar);
 628
 629         par->simpleDocBookOnePar(buf, xs, runparams,
 630                                  text.outerFont(distance(begin, par)));
 631
 632         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 633 }
 634
 635
 636 void makeAny(
 637                 Text const &text,
 638                 Buffer const &buf,
 639                 XMLStream &xs,
 640                 OutputParams const &ourparams,
 641                 ParagraphList::const_iterator par)
 642 {
 643         switch (par->layout().latextype) {
 644         case LATEX_COMMAND:
 645                 makeCommand(buf, xs, ourparams, text, par);
 646                 break;
 647         case LATEX_ENVIRONMENT:
 648         case LATEX_LIST_ENVIRONMENT:
 649         case LATEX_ITEM_ENVIRONMENT:
 650                 makeEnvironment(buf, xs, ourparams, text, par);
 651                 break;
 652         case LATEX_PARAGRAPH:
 653                 makeParagraph(buf, xs, ourparams, text, par);
 654                 break;
 655         case LATEX_BIB_ENVIRONMENT:
 656                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 657                 break;
 658         }
 659 }
 660
 661 } // end anonymous namespace
 662
 663
 664 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 665
 666
 667 struct DocBookInfoTag
 668 {
 669         const set<pit_type> shouldBeInInfo;
 670         const set<pit_type> mustBeInInfo;
 671         const set<pit_type> abstract;
 672         pit_type bpit;
 673         pit_type epit;
 674
 675         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 676                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 677                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 678                                    bpit(bpit), epit(epit) {}
 679 };
 680
 681
 682 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 683         bool documentHasSections = false;
 684
 685         while (bpit < epit) {
 686                 Layout const &style = paragraphs[bpit].layout();
 687                 documentHasSections |= style.category() == from_utf8("Sectioning");
 688
 689                 if (documentHasSections)
 690                         break;
 691                 bpit += 1;
 692         }
 693         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 694
 695         return make_tuple(documentHasSections, bpit);
 696 }
 697
 698
 699 bool hasOnlyNotes(Paragraph const & par)
 700 {
 701         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 702         for (int i = 0; i < par.size(); ++i)
 703                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 704                 // return false.
 705                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 706                         return false;
 707         return true;
 708 }
 709
 710
 711 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 712         set<pit_type> shouldBeInInfo;
 713         set<pit_type> mustBeInInfo;
 714         set<pit_type> abstract;
 715
 716         // Find the first non empty paragraph by mutating bpit.
 717         while (bpit < epit) {
 718                 Paragraph const &par = paragraphs[bpit];
 719                 if (par.empty() || hasOnlyNotes(par))
 720                         bpit += 1;
 721                 else
 722                         break;
 723         }
 724
 725         // Find the last info-like paragraph.
 726         pit_type cpit = bpit;
 727         bool hasAbstractLayout = false;
 728         while (cpit < epit) {
 729                 // Skip paragraphs only containing one note.
 730                 Paragraph const & par = paragraphs[cpit];
 731                 if (hasOnlyNotes(par)) {
 732                         cpit += 1;
 733                         continue;
 734                 }
 735
 736                 if (par.layout().docbookabstract())
 737                         hasAbstractLayout = true;
 738
 739                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 740                 Layout const &style = par.layout();
 741
 742                 if (style.docbookininfo() == "always") {
 743                         mustBeInInfo.emplace(cpit);
 744                 } else if (style.docbookininfo() == "maybe") {
 745                         shouldBeInInfo.emplace(cpit);
 746                 } else {
 747                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 748                         // There may be notes in between, but nothing else.
 749                         break;
 750                 }
 751                 cpit += 1;
 752         }
 753         // Now, cpit points to the last paragraph that has things that could go in <info>.
 754         // bpit is the beginning of the <info> part.
 755
 756         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 757         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 758         if (hasAbstractLayout) {
 759                 pit_type pit = bpit;
 760                 while (pit < cpit) { // Don't overshoot the <info> part.
 761                         if (paragraphs[pit].layout().docbookabstract())
 762                                 abstract.emplace(pit);
 763                         pit++;
 764                 }
 765         } else {
 766                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 767                 docstring lastAbstractLayout;
 768
 769                 pit_type pit = bpit;
 770                 while (pit < cpit) { // Don't overshoot the <info> part.
 771                         const Paragraph & par = paragraphs.at(pit);
 772                         if (!par.insetList().empty()) {
 773                                 for (const auto &i : par.insetList()) {
 774                                         if (i.inset->getText(0) != nullptr) {
 775                                                 if (lastAbstract == epit + 1) {
 776                                                         // First paragraph that matches the heuristic definition of abstract.
 777                                                         lastAbstract = pit;
 778                                                         lastAbstractLayout = par.layout().name();
 779                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 780                                                         // This is either too far from the last abstract paragraph or doesn't
 781                                                         // have the right layout name, BUT there has already been an abstract
 782                                                         // in this document: done with detecting the abstract.
 783                                                         goto done; // Easier to get out of two nested loops.
 784                                                 }
 785
 786                                                 abstract.emplace(pit);
 787                                                 break;
 788                                         }
 789                                 }
 790                         }
 791                         pit++;
 792                 }
 793         }
 794
 795         done:
 796         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 797 }
 798
 799
 800 void outputDocBookInfo(
 801                 Text const & text,
 802                 Buffer const & buf,
 803                 XMLStream & xs,
 804                 OutputParams const & runparams,
 805                 ParagraphList const & paragraphs,
 806                 DocBookInfoTag const & info)
 807 {
 808         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 809         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 810         // then only create the <abstract> tag if these paragraphs generate some content.
 811         // This check must be performed *before* a decision on whether or not to output <info> is made.
 812         bool hasAbstract = !info.abstract.empty();
 813         docstring abstract;
 814         if (hasAbstract) {
 815                 // Generate the abstract XML into a string before further checks.
 816                 odocstringstream os2;
 817                 {
 818                         XMLStream xs2(os2);
 819                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 820                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 821                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 822
 823                         while (bpit < epit) {
 824                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 825                                 bpit += 1;
 826                         }
 827                 }
 828
 829                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 830                 // even though they must be properly output if there is some abstract.
 831                 abstract = os2.str();
 832                 static const lyx::regex reg("[ \\r\\n]*");
 833                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 834
 835                 // Nothing? Then there is no abstract!
 836                 if (abstractContent.empty())
 837                         hasAbstract = false;
 838         }
 839
 840         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 841         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 842
 843         // Start the <info> tag if required.
 844         if (needInfo) {
 845                 xs.startDivision(false);
 846                 xs << xml::StartTag("info");
 847                 xs << xml::CR();
 848         }
 849
 850         // Output the elements that should go in <info>, before and after the abstract.
 851         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 852                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 853                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 854         }
 855         for (auto pit : info.mustBeInInfo) {
 856                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 857                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 858         }
 859
 860         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 861         // it contains several paragraphs that are empty).
 862         if (hasAbstract) {
 863 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 864 //              if (tag == "NONE")
 865 //                      tag = "abstract";
 866 //
 867 //              xs << xml::StartTag(tag);
 868 //              xs << xml::CR();
 869                 xs << XMLStream::ESCAPE_NONE << abstract;
 870 //              xs << xml::EndTag(tag);
 871 //              xs << xml::CR();
 872         }
 873
 874         // End the <info> tag if it was started.
 875         if (needInfo) {
 876                 xs << xml::EndTag("info");
 877                 xs << xml::CR();
 878                 xs.endDivision();
 879         }
 880 }
 881
 882
 883 void docbookFirstParagraphs(
 884                 Text const &text,
 885                 Buffer const &buf,
 886                 XMLStream &xs,
 887                 OutputParams const &runparams,
 888                 pit_type epit)
 889 {
 890         // Handle the beginning of the document, supposing it has sections.
 891         // Major role: output the first <info> tag.
 892
 893         ParagraphList const &paragraphs = text.paragraphs();
 894         pit_type bpit = runparams.par_begin;
 895         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 896         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 897 }
 898
 899
 900 void docbookSimpleAllParagraphs(
 901                 Text const & text,
 902                 Buffer const & buf,
 903                 XMLStream & xs,
 904                 OutputParams const & runparams)
 905 {
 906         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 907         // between a single paragraph to a whole document.
 908
 909         // First, the <info> tag.
 910         ParagraphList const &paragraphs = text.paragraphs();
 911         pit_type bpit = runparams.par_begin;
 912         pit_type const epit = runparams.par_end;
 913         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 914         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 915
 916         // Then, the content. It starts where the <info> ends.
 917         bpit = info.epit;
 918         while (bpit < epit) {
 919                 auto par = paragraphs.iterator_at(bpit);
 920                 if (!hasOnlyNotes(*par))
 921                         makeAny(text, buf, xs, runparams, par);
 922                 bpit += 1;
 923         }
 924 }
 925
 926
 927 void docbookParagraphs(Text const &text,
 928                                            Buffer const &buf,
 929                                            XMLStream &xs,
 930                                            OutputParams const &runparams) {
 931         ParagraphList const &paragraphs = text.paragraphs();
 932         if (runparams.par_begin == runparams.par_end) {
 933                 runparams.par_begin = 0;
 934                 runparams.par_end = paragraphs.size();
 935         }
 936         pit_type bpit = runparams.par_begin;
 937         pit_type const epit = runparams.par_end;
 938         LASSERT(bpit < epit,
 939                         {
 940                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 941                                 return;
 942                         });
 943
 944         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 945         // of the section and the tag that was used to open it.
 946
 947         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 948         // discovered abstract.
 949         bool documentHasSections;
 950         pit_type eppit;
 951         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 952
 953         if (documentHasSections) {
 954                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 955                 bpit = eppit;
 956         } else {
 957                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 958                 return;
 959         }
 960
 961         bool currentlyInAppendix = false;
 962
 963         while (bpit < epit) {
 964                 OutputParams ourparams = runparams;
 965
 966                 auto par = paragraphs.iterator_at(bpit);
 967                 if (par->params().startOfAppendix())
 968                         currentlyInAppendix = true;
 969                 Layout const &style = par->layout();
 970                 ParagraphList::const_iterator const lastStartedPar = par;
 971                 ParagraphList::const_iterator send;
 972
 973                 if (hasOnlyNotes(*par)) {
 974                         bpit += 1;
 975                         continue;
 976                 }
 977
 978                 // Think about adding <section> and/or </section>s.
 979                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 980                 if (isLayoutSectioning) {
 981                         int level = style.toclevel;
 982
 983                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
 984                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
 985                         //   - current: h2; back: h1; do not close any <section>
 986                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
 987                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
 988                                 int stackLevel = headerLevels.top().first;
 989                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
 990                                 headerLevels.pop();
 991
 992                                 // Output the tag only if it corresponds to a legit section.
 993                                 if (stackLevel != Layout::NOT_IN_TOC)
 994                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
 995                         }
 996
 997                         // Open the new section: first push it onto the stack, then output it in DocBook.
 998                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
 999                                                                 "appendix" : style.docbooksectiontag();
1000                         headerLevels.push(std::make_pair(level, sectionTag));
1001
1002                         // Some sectioning-like elements should not be output (such as FrontMatter).
1003                         if (level != Layout::NOT_IN_TOC) {
1004                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1005                                 docstring id = docstring();
1006                                 for (pos_type i = 0; i < par->size(); ++i) {
1007                                         Inset const *inset = par->getInset(i);
1008                                         if (inset) {
1009                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1010                                                         // Generate the attributes for the section if need be.
1011                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1012
1013                                                         // Don't output the ID as a DocBook <anchor>.
1014                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1015
1016                                                         // Cannot have multiple IDs per tag.
1017                                                         break;
1018                                                 }
1019                                         }
1020                                 }
1021
1022                                 // Write the open tag for this section.
1023                                 docstring tag = from_utf8("<" + sectionTag);
1024                                 if (!id.empty())
1025                                         tag += from_utf8(" ") + id;
1026                                 tag += from_utf8(">");
1027                                 xs << XMLStream::ESCAPE_NONE << tag;
1028                                 xs << xml::CR();
1029                         }
1030                 }
1031
1032                 // Close all sections before the bibliography.
1033                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1034                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1035                 if (insetsLength > 0) {
1036                         Inset const *firstInset = par->getInset(0);
1037                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1038                                 while (!headerLevels.empty()) {
1039                                         int level = headerLevels.top().first;
1040                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1041                                         headerLevels.pop();
1042
1043                                         // Output the tag only if it corresponds to a legit section.
1044                                         if (level != Layout::NOT_IN_TOC) {
1045                                                 xs << XMLStream::ESCAPE_NONE << tag;
1046                                                 xs << xml::CR();
1047                                         }
1048                                 }
1049                         }
1050                 }
1051
1052                 // Generate this paragraph.
1053                 makeAny(text, buf, xs, ourparams, par);
1054                 bpit += 1;
1055         }
1056
1057         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1058         // of the loop).
1059         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1060                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1061                 headerLevels.pop();
1062                 xs << XMLStream::ESCAPE_NONE << tag;
1063                 xs << xml::CR();
1064         }
1065 }
1066
1067 } // namespace lyx