src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Paragraph.h"
  20 #include "ParagraphList.h"
  21 #include "ParagraphParameters.h"
  22 #include "xml.h"
  23 #include "Text.h"
  24 #include "TextClass.h"
  25
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetBibitem.h"
  28 #include "insets/InsetLabel.h"
  29 #include "insets/InsetNote.h"
  30
  31 #include "support/lassert.h"
  32
  33 #include "support/regex.h"
  34
  35 #include <stack>
  36 #include <iostream>
  37 #include <algorithm>
  38 #include <sstream>
  39
  40 using namespace std;
  41 using namespace lyx::support;
  42
  43 namespace lyx {
  44
  45 namespace {
  46
  47 std::string fontToDocBookTag(xml::FontTypes type)
  48 {
  49         switch (type) {
  50         case xml::FontTypes::FT_EMPH:
  51         case xml::FontTypes::FT_BOLD:
  52                 return "emphasis";
  53         case xml::FontTypes::FT_NOUN:
  54                 return "person";
  55         case xml::FontTypes::FT_UBAR:
  56         case xml::FontTypes::FT_WAVE:
  57         case xml::FontTypes::FT_DBAR:
  58         case xml::FontTypes::FT_SOUT:
  59         case xml::FontTypes::FT_XOUT:
  60         case xml::FontTypes::FT_ITALIC:
  61         case xml::FontTypes::FT_UPRIGHT:
  62         case xml::FontTypes::FT_SLANTED:
  63         case xml::FontTypes::FT_SMALLCAPS:
  64         case xml::FontTypes::FT_ROMAN:
  65         case xml::FontTypes::FT_SANS:
  66                 return "emphasis";
  67         case xml::FontTypes::FT_TYPE:
  68                 return "code";
  69         case xml::FontTypes::FT_SIZE_TINY:
  70         case xml::FontTypes::FT_SIZE_SCRIPT:
  71         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  72         case xml::FontTypes::FT_SIZE_SMALL:
  73         case xml::FontTypes::FT_SIZE_NORMAL:
  74         case xml::FontTypes::FT_SIZE_LARGE:
  75         case xml::FontTypes::FT_SIZE_LARGER:
  76         case xml::FontTypes::FT_SIZE_LARGEST:
  77         case xml::FontTypes::FT_SIZE_HUGE:
  78         case xml::FontTypes::FT_SIZE_HUGER:
  79         case xml::FontTypes::FT_SIZE_INCREASE:
  80         case xml::FontTypes::FT_SIZE_DECREASE:
  81                 return "emphasis";
  82         default:
  83                 return "";
  84         }
  85 }
  86
  87
  88 string fontToRole(xml::FontTypes type)
  89 {
  90         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  91         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  92         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  93         // Hence, it is not a problem to have many roles by default here.
  94         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  95         switch (type) {
  96         case xml::FontTypes::FT_ITALIC:
  97         case xml::FontTypes::FT_EMPH:
  98                 return "";
  99         case xml::FontTypes::FT_BOLD:
 100                 return "bold";
 101         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 102         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 103                 return "";
 104         case xml::FontTypes::FT_UBAR:
 105                 return "underline";
 106
 107         // All other roles are non-standard for DocBook.
 108
 109         case xml::FontTypes::FT_WAVE:
 110                 return "wave";
 111         case xml::FontTypes::FT_DBAR:
 112                 return "dbar";
 113         case xml::FontTypes::FT_SOUT:
 114                 return "sout";
 115         case xml::FontTypes::FT_XOUT:
 116                 return "xout";
 117         case xml::FontTypes::FT_UPRIGHT:
 118                 return "upright";
 119         case xml::FontTypes::FT_SLANTED:
 120                 return "slanted";
 121         case xml::FontTypes::FT_SMALLCAPS:
 122                 return "smallcaps";
 123         case xml::FontTypes::FT_ROMAN:
 124                 return "roman";
 125         case xml::FontTypes::FT_SANS:
 126                 return "sans";
 127         case xml::FontTypes::FT_SIZE_TINY:
 128                 return "tiny";
 129         case xml::FontTypes::FT_SIZE_SCRIPT:
 130                 return "size_script";
 131         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 132                 return "size_footnote";
 133         case xml::FontTypes::FT_SIZE_SMALL:
 134                 return "size_small";
 135         case xml::FontTypes::FT_SIZE_NORMAL:
 136                 return "size_normal";
 137         case xml::FontTypes::FT_SIZE_LARGE:
 138                 return "size_large";
 139         case xml::FontTypes::FT_SIZE_LARGER:
 140                 return "size_larger";
 141         case xml::FontTypes::FT_SIZE_LARGEST:
 142                 return "size_largest";
 143         case xml::FontTypes::FT_SIZE_HUGE:
 144                 return "size_huge";
 145         case xml::FontTypes::FT_SIZE_HUGER:
 146                 return "size_huger";
 147         case xml::FontTypes::FT_SIZE_INCREASE:
 148                 return "size_increase";
 149         case xml::FontTypes::FT_SIZE_DECREASE:
 150                 return "size_decrease";
 151         default:
 152                 return "";
 153         }
 154 }
 155
 156
 157 string fontToAttribute(xml::FontTypes type) {
 158         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 159         // for the font.
 160         string role = fontToRole(type);
 161         if (!role.empty()) {
 162                 return "role='" + role + "'";
 163         } else {
 164                 return "";
 165         }
 166 }
 167
 168
 169 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 170 {
 171         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 172 }
 173
 174
 175 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 176 {
 177         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 178 }
 179
 180
 181 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 182 // Block style:
 183 //        Content before
 184 //        <blocktag>
 185 //          Contents of the block.
 186 //        </blocktag>
 187 //        Content after
 188 // Paragraph style:
 189 //        Content before
 190 //          <paratag>Contents of the paragraph.</paratag>
 191 //        Content after
 192 // Inline style:
 193 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 194
 195 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 196 {
 197         xs << xml::StartTag(tag, attr);
 198 }
 199
 200
 201 void closeInlineTag(XMLStream & xs, const std::string & tag)
 202 {
 203         xs << xml::EndTag(tag);
 204 }
 205
 206
 207 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 208 {
 209         if (!xs.isLastTagCR())
 210                 xs << xml::CR();
 211         xs << xml::StartTag(tag, attr);
 212 }
 213
 214
 215 void closeParTag(XMLStream & xs, const std::string & tag)
 216 {
 217         xs << xml::EndTag(tag);
 218         xs << xml::CR();
 219 }
 220
 221
 222 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 223 {
 224         if (!xs.isLastTagCR())
 225                 xs << xml::CR();
 226         xs << xml::StartTag(tag, attr);
 227         xs << xml::CR();
 228 }
 229
 230
 231 void closeBlockTag(XMLStream & xs, const std::string & tag)
 232 {
 233         if (!xs.isLastTagCR())
 234                 xs << xml::CR();
 235         xs << xml::EndTag(tag);
 236         xs << xml::CR();
 237 }
 238
 239
 240 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 241 {
 242         if (tag.empty() || tag == "NONE")
 243                 return;
 244
 245         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 246                 openParTag(xs, tag, attr);
 247         else if (tagtype == "block")
 248                 openBlockTag(xs, tag, attr);
 249         else if (tagtype == "inline")
 250                 openInlineTag(xs, tag, attr);
 251         else
 252                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 253 }
 254
 255
 256 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 257 {
 258         if (tag.empty() || tag == "NONE")
 259                 return;
 260
 261         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 262                 closeParTag(xs, tag);
 263         else if (tagtype == "block")
 264                 closeBlockTag(xs, tag);
 265         else if (tagtype == "inline")
 266                 closeInlineTag(xs, tag);
 267         else
 268                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 269 }
 270
 271
 272 // Higher-level convenience functions.
 273
 274 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 275 {
 276         Layout const & lay = par->layout();
 277
 278         if (par == prevpar)
 279                 prevpar = nullptr;
 280
 281         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 282         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 283         // The main use case is author information in several paragraphs: if the name of the author is the
 284         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 285         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 286         // layout, same wrapper tag).
 287         bool openWrapper = lay.docbookwrappertag() != "NONE";
 288         if (prevpar != nullptr) {
 289                 Layout const & prevlay = prevpar->layout();
 290                 if (prevlay.docbookwrappertag() != "NONE") {
 291                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 292                                         && !lay.docbookwrappermergewithprevious();
 293                 }
 294         }
 295
 296         // Main logic.
 297         if (openWrapper)
 298                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 299
 300         const string & tag = lay.docbooktag();
 301         if (tag != "NONE") {
 302                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 303                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 304                         // TODO: required or not?
 305                         // TODO: avoid creating a ParTag object just for this query...
 306                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 307         }
 308
 309         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 310         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 311 }
 312
 313
 314 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 315 {
 316         if (par == nextpar)
 317                 nextpar = nullptr;
 318
 319         // See comment in openParTag.
 320         Layout const & lay = par->layout();
 321         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 322         if (nextpar != nullptr) {
 323                 Layout const & nextlay = nextpar->layout();
 324                 if (nextlay.docbookwrappertag() != "NONE") {
 325                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 326                                         && !nextlay.docbookwrappermergewithprevious();
 327                 }
 328         }
 329
 330         // Main logic.
 331         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 332         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 333         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 334         if (closeWrapper)
 335                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 336 }
 337
 338
 339 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 340 {
 341         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 342 }
 343
 344
 345 void closeLabelTag(XMLStream & xs, Layout const & lay)
 346 {
 347         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 348 }
 349
 350
 351 void openItemTag(XMLStream & xs, Layout const & lay)
 352 {
 353         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 354 }
 355
 356
 357 void closeItemTag(XMLStream & xs, Layout const & lay)
 358 {
 359         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 360 }
 361
 362
 363 void makeAny(
 364                 Text const &,
 365                 Buffer const &,
 366                 XMLStream &,
 367                 OutputParams const &,
 368                 ParagraphList::const_iterator);
 369
 370
 371 void makeParagraphBibliography(
 372                 Buffer const & buf,
 373                 XMLStream & xs,
 374                 OutputParams const & runparams,
 375                 Text const & text,
 376                 ParagraphList::const_iterator const & par)
 377 {
 378         // If this is the first paragraph in a bibliography, open the bibliography tag.
 379         auto pbegin_before = text.paragraphs().getParagraphBefore(par);
 380         if (pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 381                 xs << xml::StartTag("bibliography");
 382                 xs << xml::CR();
 383         }
 384
 385         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 386         // Don't forget the citation ID!
 387         docstring attr;
 388         for (auto i = 0; i < par->size(); ++i) {
 389                 Inset const *ip = par->getInset(0);
 390                 if (ip != nullptr && ip->lyxCode() == BIBITEM_CODE) {
 391                         const auto * bibitem = dynamic_cast<const InsetBibitem*>(par->getInset(i));
 392                         attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 393                         break;
 394                 }
 395         }
 396         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 397
 398         // Generate the entry.
 399         auto const begin = text.paragraphs().begin();
 400         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(begin, par)), true, true, 0);
 401
 402         // End the precooked bibliography entry.
 403         xs << xml::EndTag("bibliomixed");
 404         xs << xml::CR();
 405
 406         // If this is the last paragraph in a bibliography, close the bibliography tag.
 407         auto const end = text.paragraphs().end();
 408         bool endBibliography = par == end;
 409         if (!endBibliography) {
 410                 auto nextpar = par;
 411                 ++nextpar;
 412                 endBibliography = par->layout().latextype != LATEX_BIB_ENVIRONMENT;
 413         }
 414
 415         if (endBibliography) {
 416                 xs << xml::EndTag("bibliography");
 417                 xs << xml::CR();
 418         }
 419 }
 420
 421
 422 void makeParagraph(
 423                 Buffer const & buf,
 424                 XMLStream & xs,
 425                 OutputParams const & runparams,
 426                 Text const & text,
 427                 ParagraphList::const_iterator const & par)
 428 {
 429         auto const begin = text.paragraphs().begin();
 430         auto const end = text.paragraphs().end();
 431         auto prevpar = text.paragraphs().getParagraphBefore(par);
 432
 433         // We want to open the paragraph tag if:
 434         //   (i) the current layout permits multiple paragraphs
 435         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 436         //         we are, but this is not the first paragraph
 437         //
 438         // But there is also a special case, and we first see whether we are in it.
 439         // We do not want to open the paragraph tag if this paragraph contains
 440         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 441         // as a branch). On the other hand, if that single item has a font change
 442         // applied to it, then we still do need to open the paragraph.
 443         //
 444         // Obviously, this is very fragile. The main reason we need to do this is
 445         // because of branches, e.g., a branch that contains an entire new section.
 446         // We do not really want to wrap that whole thing in a <div>...</div>.
 447         bool special_case = false;
 448         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 449         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 450                 Layout const &style = par->layout();
 451                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 452                                                                         style.labelfont : style.font;
 453                 FontInfo const our_font =
 454                                 par->getFont(buf.masterBuffer()->params(), 0,
 455                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 456
 457                 if (first_font == our_font)
 458                         special_case = true;
 459         }
 460
 461         // Plain layouts must be ignored.
 462         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 463                 special_case = true;
 464         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 465         if (!special_case && par->size() == 1 && par->getInset(0)) {
 466                 Inset const * firstInset = par->getInset(0);
 467
 468                 // Floats cannot be in paragraphs.
 469                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 470
 471                 // Bibliographies cannot be in paragraphs.
 472                 if (!special_case && firstInset->asInsetCommand())
 473                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 474
 475                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 476                 if (!special_case && firstInset->asInsetMath())
 477                         special_case = true;
 478
 479                 // ERTs are in comments, not paragraphs.
 480                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 481                         special_case = true;
 482
 483                 // Listings should not get into their own paragraph.
 484                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 485                         special_case = true;
 486         }
 487
 488         bool const open_par = runparams.docbook_make_pars
 489                                                   && !runparams.docbook_in_par
 490                                                   && !special_case;
 491
 492         // We want to issue the closing tag if either:
 493         //   (i)  We opened it, and either docbook_in_par is false,
 494         //              or we're not in the last paragraph, anyway.
 495         //   (ii) We didn't open it and docbook_in_par is true,
 496         //              but we are in the first par, and there is a next par.
 497         auto nextpar = par;
 498         ++nextpar;
 499         bool const close_par = open_par && (!runparams.docbook_in_par);
 500
 501         // Determine if this paragraph has some real content. Things like new pages are not caught
 502         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 503         odocstringstream os2;
 504         XMLStream xs2(os2);
 505         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 506
 507         docstring cleaned = os2.str();
 508         static const lyx::regex reg("[ \\r\\n]*");
 509         cleaned = from_utf8(lyx::regex_replace(to_utf8(cleaned), reg, string("")));
 510
 511         if (!cleaned.empty()) {
 512                 if (open_par)
 513                         openParTag(xs, &*par, prevpar);
 514
 515                 xs << XMLStream::ESCAPE_NONE << os2.str();
 516
 517                 if (close_par)
 518                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 519         }
 520 }
 521
 522
 523 void makeEnvironment(
 524                 Buffer const &buf,
 525                 XMLStream &xs,
 526                 OutputParams const &runparams,
 527                 Text const &text,
 528                 ParagraphList::const_iterator const & par)
 529 {
 530         auto const end = text.paragraphs().end();
 531
 532         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 533         // implemented in openParTag).
 534         auto prevpar = text.paragraphs().getParagraphBefore(par);
 535         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 536
 537         // Generate the contents of this environment. There is a special case if this is like some environment.
 538         Layout const & style = par->layout();
 539         if (style.latextype == LATEX_COMMAND) {
 540                 // Nothing to do (otherwise, infinite loops).
 541         } else if (style.latextype == LATEX_ENVIRONMENT ||
 542                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 543                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 544                 // Open a wrapper tag if needed.
 545                 if (style.docbookitemwrappertag() != "NONE") {
 546                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 547                         xs << xml::CR();
 548                 }
 549
 550                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 551                 // character after the label.
 552                 pos_type sep = 0;
 553                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 554                         // At least one condition must be met:
 555                         //  - this environment is not a list
 556                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 557                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 558                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 559                                 docstring const lbl = par->params().labelString();
 560
 561                                 if (lbl.empty()) {
 562                                         xs << xml::CR();
 563                                 } else {
 564                                         openLabelTag(xs, style);
 565                                         xs << lbl;
 566                                         closeLabelTag(xs, style);
 567                                 }
 568                         } else {
 569                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 570                                 openLabelTag(xs, style);
 571                                 sep = par->firstWordDocBook(xs, runparams);
 572                                 closeLabelTag(xs, style);
 573                         }
 574                 }
 575
 576                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 577                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 578                 // Common case: there is only the first word on the line, but there is a nested list instead
 579                 // of more text.
 580                 bool emptyItem = false;
 581                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 582                         auto next_par = par;
 583                         ++next_par;
 584                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 585                                 emptyItem = true;
 586                         else // There is a next paragraph: check depth.
 587                                 emptyItem = par->params().depth() >= next_par->params().depth();
 588                 }
 589
 590                 if (emptyItem) {
 591                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 592                         // generation of a full <para>.
 593                         // TODO: this always worked only by magic...
 594                         xs << ' ';
 595                 } else {
 596                         // Generate the rest of the paragraph, if need be.
 597                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 598                                                                  true, true, sep);
 599                 }
 600         } else {
 601                 makeAny(text, buf, xs, runparams, par);
 602         }
 603
 604         // Close the environment.
 605         auto nextpar = par;
 606         ++nextpar;
 607         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 608 }
 609
 610
 611 void makeCommand(
 612                 Buffer const & buf,
 613                 XMLStream & xs,
 614                 OutputParams const & runparams,
 615                 Text const & text,
 616                 ParagraphList::const_iterator const & par)
 617 {
 618         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 619         auto const begin = text.paragraphs().begin();
 620         auto const end = text.paragraphs().end();
 621         auto nextpar = par;
 622         ++nextpar;
 623
 624         // Generate this command.
 625         auto prevpar = text.paragraphs().getParagraphBefore(par);
 626         openParTag(xs, &*par, prevpar);
 627
 628         par->simpleDocBookOnePar(buf, xs, runparams,
 629                                  text.outerFont(distance(begin, par)));
 630
 631         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 632 }
 633
 634
 635 void makeAny(
 636                 Text const &text,
 637                 Buffer const &buf,
 638                 XMLStream &xs,
 639                 OutputParams const &ourparams,
 640                 ParagraphList::const_iterator par)
 641 {
 642         switch (par->layout().latextype) {
 643         case LATEX_COMMAND:
 644                 makeCommand(buf, xs, ourparams, text, par);
 645                 break;
 646         case LATEX_ENVIRONMENT:
 647         case LATEX_LIST_ENVIRONMENT:
 648         case LATEX_ITEM_ENVIRONMENT:
 649                 makeEnvironment(buf, xs, ourparams, text, par);
 650                 break;
 651         case LATEX_PARAGRAPH:
 652                 makeParagraph(buf, xs, ourparams, text, par);
 653                 break;
 654         case LATEX_BIB_ENVIRONMENT:
 655                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 656                 break;
 657         }
 658 }
 659
 660 } // end anonymous namespace
 661
 662
 663 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 664
 665
 666 struct DocBookInfoTag
 667 {
 668         const set<pit_type> shouldBeInInfo;
 669         const set<pit_type> mustBeInInfo;
 670         const set<pit_type> abstract;
 671         pit_type bpit;
 672         pit_type epit;
 673
 674         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 675                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 676                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 677                                    bpit(bpit), epit(epit) {}
 678 };
 679
 680
 681 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 682         bool documentHasSections = false;
 683
 684         while (bpit < epit) {
 685                 Layout const &style = paragraphs[bpit].layout();
 686                 documentHasSections |= style.category() == from_utf8("Sectioning");
 687
 688                 if (documentHasSections)
 689                         break;
 690                 bpit += 1;
 691         }
 692         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 693
 694         return make_tuple(documentHasSections, bpit);
 695 }
 696
 697
 698 bool hasOnlyNotes(Paragraph const & par)
 699 {
 700         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 701         for (int i = 0; i < par.size(); ++i)
 702                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 703                 // return false.
 704                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 705                         return false;
 706         return true;
 707 }
 708
 709
 710 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 711         set<pit_type> shouldBeInInfo;
 712         set<pit_type> mustBeInInfo;
 713         set<pit_type> abstract;
 714
 715         // Find the first non empty paragraph by mutating bpit.
 716         while (bpit < epit) {
 717                 Paragraph const &par = paragraphs[bpit];
 718                 if (par.empty() || hasOnlyNotes(par))
 719                         bpit += 1;
 720                 else
 721                         break;
 722         }
 723
 724         // Find the last info-like paragraph.
 725         pit_type cpit = bpit;
 726         bool hasAbstractLayout = false;
 727         while (cpit < epit) {
 728                 // Skip paragraphs only containing one note.
 729                 Paragraph const & par = paragraphs[cpit];
 730                 if (hasOnlyNotes(par)) {
 731                         cpit += 1;
 732                         continue;
 733                 }
 734
 735                 if (par.layout().docbookabstract())
 736                         hasAbstractLayout = true;
 737
 738                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 739                 Layout const &style = par.layout();
 740
 741                 if (style.docbookininfo() == "always") {
 742                         mustBeInInfo.emplace(cpit);
 743                 } else if (style.docbookininfo() == "maybe") {
 744                         shouldBeInInfo.emplace(cpit);
 745                 } else {
 746                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 747                         // There may be notes in between, but nothing else.
 748                         break;
 749                 }
 750                 cpit += 1;
 751         }
 752         // Now, cpit points to the last paragraph that has things that could go in <info>.
 753         // bpit is the beginning of the <info> part.
 754
 755         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 756         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 757         if (hasAbstractLayout) {
 758                 pit_type pit = bpit;
 759                 while (pit < cpit) { // Don't overshoot the <info> part.
 760                         if (paragraphs[pit].layout().docbookabstract())
 761                                 abstract.emplace(pit);
 762                         pit++;
 763                 }
 764         } else {
 765                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 766                 docstring lastAbstractLayout;
 767
 768                 pit_type pit = bpit;
 769                 while (pit < cpit) { // Don't overshoot the <info> part.
 770                         const Paragraph & par = paragraphs.at(pit);
 771                         if (!par.insetList().empty()) {
 772                                 for (const auto &i : par.insetList()) {
 773                                         if (i.inset->getText(0) != nullptr) {
 774                                                 if (lastAbstract == epit + 1) {
 775                                                         // First paragraph that matches the heuristic definition of abstract.
 776                                                         lastAbstract = pit;
 777                                                         lastAbstractLayout = par.layout().name();
 778                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 779                                                         // This is either too far from the last abstract paragraph or doesn't
 780                                                         // have the right layout name, BUT there has already been an abstract
 781                                                         // in this document: done with detecting the abstract.
 782                                                         goto done; // Easier to get out of two nested loops.
 783                                                 }
 784
 785                                                 abstract.emplace(pit);
 786                                                 break;
 787                                         }
 788                                 }
 789                         }
 790                         pit++;
 791                 }
 792         }
 793
 794         done:
 795         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 796 }
 797
 798
 799 void outputDocBookInfo(
 800                 Text const & text,
 801                 Buffer const & buf,
 802                 XMLStream & xs,
 803                 OutputParams const & runparams,
 804                 ParagraphList const & paragraphs,
 805                 DocBookInfoTag const & info)
 806 {
 807         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 808         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 809         // then only create the <abstract> tag if these paragraphs generate some content.
 810         // This check must be performed *before* a decision on whether or not to output <info> is made.
 811         bool hasAbstract = !info.abstract.empty();
 812         docstring abstract;
 813         if (hasAbstract) {
 814                 // Generate the abstract XML into a string before further checks.
 815                 odocstringstream os2;
 816                 {
 817                         XMLStream xs2(os2);
 818                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 819                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 820                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 821
 822                         while (bpit < epit) {
 823                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 824                                 bpit += 1;
 825                         }
 826                 }
 827
 828                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 829                 // even though they must be properly output if there is some abstract.
 830                 abstract = os2.str();
 831                 static const lyx::regex reg("[ \\r\\n]*");
 832                 docstring abstractContent = from_utf8(lyx::regex_replace(to_utf8(abstract), reg, string("")));
 833
 834                 // Nothing? Then there is no abstract!
 835                 if (abstractContent.empty())
 836                         hasAbstract = false;
 837         }
 838
 839         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 840         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 841
 842         // Start the <info> tag if required.
 843         if (needInfo) {
 844                 xs.startDivision(false);
 845                 xs << xml::StartTag("info");
 846                 xs << xml::CR();
 847         }
 848
 849         // Output the elements that should go in <info>, before and after the abstract.
 850         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 851                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 852                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 853         }
 854         for (auto pit : info.mustBeInInfo) {
 855                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 856                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 857         }
 858
 859         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 860         // it contains several paragraphs that are empty).
 861         if (hasAbstract) {
 862 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 863 //              if (tag == "NONE")
 864 //                      tag = "abstract";
 865 //
 866 //              xs << xml::StartTag(tag);
 867 //              xs << xml::CR();
 868                 xs << XMLStream::ESCAPE_NONE << abstract;
 869 //              xs << xml::EndTag(tag);
 870 //              xs << xml::CR();
 871         }
 872
 873         // End the <info> tag if it was started.
 874         if (needInfo) {
 875                 xs << xml::EndTag("info");
 876                 xs << xml::CR();
 877                 xs.endDivision();
 878         }
 879 }
 880
 881
 882 void docbookFirstParagraphs(
 883                 Text const &text,
 884                 Buffer const &buf,
 885                 XMLStream &xs,
 886                 OutputParams const &runparams,
 887                 pit_type epit)
 888 {
 889         // Handle the beginning of the document, supposing it has sections.
 890         // Major role: output the first <info> tag.
 891
 892         ParagraphList const &paragraphs = text.paragraphs();
 893         pit_type bpit = runparams.par_begin;
 894         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 895         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 896 }
 897
 898
 899 void docbookSimpleAllParagraphs(
 900                 Text const & text,
 901                 Buffer const & buf,
 902                 XMLStream & xs,
 903                 OutputParams const & runparams)
 904 {
 905         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 906         // between a single paragraph to a whole document.
 907
 908         // First, the <info> tag.
 909         ParagraphList const &paragraphs = text.paragraphs();
 910         pit_type bpit = runparams.par_begin;
 911         pit_type const epit = runparams.par_end;
 912         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 913         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 914
 915         // Then, the content. It starts where the <info> ends.
 916         bpit = info.epit;
 917         while (bpit < epit) {
 918                 auto par = paragraphs.iterator_at(bpit);
 919                 if (!hasOnlyNotes(*par))
 920                         makeAny(text, buf, xs, runparams, par);
 921                 bpit += 1;
 922         }
 923 }
 924
 925
 926 void docbookParagraphs(Text const &text,
 927                                            Buffer const &buf,
 928                                            XMLStream &xs,
 929                                            OutputParams const &runparams) {
 930         ParagraphList const &paragraphs = text.paragraphs();
 931         if (runparams.par_begin == runparams.par_end) {
 932                 runparams.par_begin = 0;
 933                 runparams.par_end = paragraphs.size();
 934         }
 935         pit_type bpit = runparams.par_begin;
 936         pit_type const epit = runparams.par_end;
 937         LASSERT(bpit < epit,
 938                         {
 939                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 940                                 return;
 941                         });
 942
 943         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 944         // of the section and the tag that was used to open it.
 945
 946         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 947         // discovered abstract.
 948         bool documentHasSections;
 949         pit_type eppit;
 950         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 951
 952         if (documentHasSections) {
 953                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 954                 bpit = eppit;
 955         } else {
 956                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 957                 return;
 958         }
 959
 960         bool currentlyInAppendix = false;
 961
 962         while (bpit < epit) {
 963                 OutputParams ourparams = runparams;
 964
 965                 auto par = paragraphs.iterator_at(bpit);
 966                 if (par->params().startOfAppendix())
 967                         currentlyInAppendix = true;
 968                 Layout const &style = par->layout();
 969                 ParagraphList::const_iterator const lastStartedPar = par;
 970                 ParagraphList::const_iterator send;
 971
 972                 if (hasOnlyNotes(*par)) {
 973                         bpit += 1;
 974                         continue;
 975                 }
 976
 977                 // Think about adding <section> and/or </section>s.
 978                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 979                 if (isLayoutSectioning) {
 980                         int level = style.toclevel;
 981
 982                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
 983                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
 984                         //   - current: h2; back: h1; do not close any <section>
 985                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
 986                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
 987                                 int stackLevel = headerLevels.top().first;
 988                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
 989                                 headerLevels.pop();
 990
 991                                 // Output the tag only if it corresponds to a legit section.
 992                                 if (stackLevel != Layout::NOT_IN_TOC)
 993                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
 994                         }
 995
 996                         // Open the new section: first push it onto the stack, then output it in DocBook.
 997                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
 998                                                                 "appendix" : style.docbooksectiontag();
 999                         headerLevels.push(std::make_pair(level, sectionTag));
1000
1001                         // Some sectioning-like elements should not be output (such as FrontMatter).
1002                         if (level != Layout::NOT_IN_TOC) {
1003                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1004                                 docstring id = docstring();
1005                                 for (pos_type i = 0; i < par->size(); ++i) {
1006                                         Inset const *inset = par->getInset(i);
1007                                         if (inset) {
1008                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1009                                                         // Generate the attributes for the section if need be.
1010                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1011
1012                                                         // Don't output the ID as a DocBook <anchor>.
1013                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1014
1015                                                         // Cannot have multiple IDs per tag.
1016                                                         break;
1017                                                 }
1018                                         }
1019                                 }
1020
1021                                 // Write the open tag for this section.
1022                                 docstring tag = from_utf8("<" + sectionTag);
1023                                 if (!id.empty())
1024                                         tag += from_utf8(" ") + id;
1025                                 tag += from_utf8(">");
1026                                 xs << XMLStream::ESCAPE_NONE << tag;
1027                                 xs << xml::CR();
1028                         }
1029                 }
1030
1031                 // Close all sections before the bibliography.
1032                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1033                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1034                 if (insetsLength > 0) {
1035                         Inset const *firstInset = par->getInset(0);
1036                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1037                                 while (!headerLevels.empty()) {
1038                                         int level = headerLevels.top().first;
1039                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1040                                         headerLevels.pop();
1041
1042                                         // Output the tag only if it corresponds to a legit section.
1043                                         if (level != Layout::NOT_IN_TOC) {
1044                                                 xs << XMLStream::ESCAPE_NONE << tag;
1045                                                 xs << xml::CR();
1046                                         }
1047                                 }
1048                         }
1049                 }
1050
1051                 // Generate this paragraph.
1052                 makeAny(text, buf, xs, ourparams, par);
1053                 bpit += 1;
1054         }
1055
1056         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1057         // of the loop).
1058         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1059                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1060                 headerLevels.pop();
1061                 xs << XMLStream::ESCAPE_NONE << tag;
1062                 xs << xml::CR();
1063         }
1064 }
1065
1066 } // namespace lyx