src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Paragraph.h"
  20 #include "ParagraphList.h"
  21 #include "ParagraphParameters.h"
  22 #include "xml.h"
  23 #include "Text.h"
  24 #include "TextClass.h"
  25
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetBibitem.h"
  28 #include "insets/InsetLabel.h"
  29 #include "insets/InsetNote.h"
  30
  31 #include "support/lassert.h"
  32
  33 #include <stack>
  34 #include <iostream>
  35 #include <algorithm>
  36 #include <sstream>
  37
  38 using namespace std;
  39 using namespace lyx::support;
  40
  41 namespace lyx {
  42
  43 namespace {
  44
  45 std::string fontToDocBookTag(xml::FontTypes type)
  46 {
  47         switch (type) {
  48         case xml::FontTypes::FT_EMPH:
  49         case xml::FontTypes::FT_BOLD:
  50                 return "emphasis";
  51         case xml::FontTypes::FT_NOUN:
  52                 return "person";
  53         case xml::FontTypes::FT_UBAR:
  54         case xml::FontTypes::FT_WAVE:
  55         case xml::FontTypes::FT_DBAR:
  56         case xml::FontTypes::FT_SOUT:
  57         case xml::FontTypes::FT_XOUT:
  58         case xml::FontTypes::FT_ITALIC:
  59         case xml::FontTypes::FT_UPRIGHT:
  60         case xml::FontTypes::FT_SLANTED:
  61         case xml::FontTypes::FT_SMALLCAPS:
  62         case xml::FontTypes::FT_ROMAN:
  63         case xml::FontTypes::FT_SANS:
  64                 return "emphasis";
  65         case xml::FontTypes::FT_TYPE:
  66                 return "code";
  67         case xml::FontTypes::FT_SIZE_TINY:
  68         case xml::FontTypes::FT_SIZE_SCRIPT:
  69         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  70         case xml::FontTypes::FT_SIZE_SMALL:
  71         case xml::FontTypes::FT_SIZE_NORMAL:
  72         case xml::FontTypes::FT_SIZE_LARGE:
  73         case xml::FontTypes::FT_SIZE_LARGER:
  74         case xml::FontTypes::FT_SIZE_LARGEST:
  75         case xml::FontTypes::FT_SIZE_HUGE:
  76         case xml::FontTypes::FT_SIZE_HUGER:
  77         case xml::FontTypes::FT_SIZE_INCREASE:
  78         case xml::FontTypes::FT_SIZE_DECREASE:
  79                 return "emphasis";
  80         default:
  81                 return "";
  82         }
  83 }
  84
  85
  86 string fontToRole(xml::FontTypes type)
  87 {
  88         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  89         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  90         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  91         // Hence, it is not a problem to have many roles by default here.
  92         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  93         switch (type) {
  94         case xml::FontTypes::FT_ITALIC:
  95         case xml::FontTypes::FT_EMPH:
  96                 return "";
  97         case xml::FontTypes::FT_BOLD:
  98                 return "bold";
  99         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 100         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 101                 return "";
 102         case xml::FontTypes::FT_UBAR:
 103                 return "underline";
 104
 105         // All other roles are non-standard for DocBook.
 106
 107         case xml::FontTypes::FT_WAVE:
 108                 return "wave";
 109         case xml::FontTypes::FT_DBAR:
 110                 return "dbar";
 111         case xml::FontTypes::FT_SOUT:
 112                 return "sout";
 113         case xml::FontTypes::FT_XOUT:
 114                 return "xout";
 115         case xml::FontTypes::FT_UPRIGHT:
 116                 return "upright";
 117         case xml::FontTypes::FT_SLANTED:
 118                 return "slanted";
 119         case xml::FontTypes::FT_SMALLCAPS:
 120                 return "smallcaps";
 121         case xml::FontTypes::FT_ROMAN:
 122                 return "roman";
 123         case xml::FontTypes::FT_SANS:
 124                 return "sans";
 125         case xml::FontTypes::FT_SIZE_TINY:
 126                 return "tiny";
 127         case xml::FontTypes::FT_SIZE_SCRIPT:
 128                 return "size_script";
 129         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 130                 return "size_footnote";
 131         case xml::FontTypes::FT_SIZE_SMALL:
 132                 return "size_small";
 133         case xml::FontTypes::FT_SIZE_NORMAL:
 134                 return "size_normal";
 135         case xml::FontTypes::FT_SIZE_LARGE:
 136                 return "size_large";
 137         case xml::FontTypes::FT_SIZE_LARGER:
 138                 return "size_larger";
 139         case xml::FontTypes::FT_SIZE_LARGEST:
 140                 return "size_largest";
 141         case xml::FontTypes::FT_SIZE_HUGE:
 142                 return "size_huge";
 143         case xml::FontTypes::FT_SIZE_HUGER:
 144                 return "size_huger";
 145         case xml::FontTypes::FT_SIZE_INCREASE:
 146                 return "size_increase";
 147         case xml::FontTypes::FT_SIZE_DECREASE:
 148                 return "size_decrease";
 149         default:
 150                 return "";
 151         }
 152 }
 153
 154
 155 string fontToAttribute(xml::FontTypes type) {
 156         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 157         // for the font.
 158         string role = fontToRole(type);
 159         if (!role.empty()) {
 160                 return "role='" + role + "'";
 161         } else {
 162                 return "";
 163         }
 164 }
 165
 166
 167 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 168 // Block style:
 169 //        Content before
 170 //        <blocktag>
 171 //          Contents of the block.
 172 //        </blocktag>
 173 //        Content after
 174 // Paragraph style:
 175 //        Content before
 176 //          <paratag>Contents of the paragraph.</paratag>
 177 //        Content after
 178 // Inline style:
 179 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 180
 181 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 182 {
 183         xs << xml::StartTag(tag, attr);
 184 }
 185
 186
 187 void closeInlineTag(XMLStream & xs, const std::string & tag)
 188 {
 189         xs << xml::EndTag(tag);
 190 }
 191
 192
 193 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 194 {
 195         if (!xs.isLastTagCR())
 196                 xs << xml::CR();
 197         xs << xml::StartTag(tag, attr);
 198 }
 199
 200
 201 void closeParTag(XMLStream & xs, const std::string & tag)
 202 {
 203         xs << xml::EndTag(tag);
 204         xs << xml::CR();
 205 }
 206
 207
 208 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 209 {
 210         if (!xs.isLastTagCR())
 211                 xs << xml::CR();
 212         xs << xml::StartTag(tag, attr);
 213         xs << xml::CR();
 214 }
 215
 216
 217 void closeBlockTag(XMLStream & xs, const std::string & tag)
 218 {
 219         if (!xs.isLastTagCR())
 220                 xs << xml::CR();
 221         xs << xml::EndTag(tag);
 222         xs << xml::CR();
 223 }
 224
 225
 226 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 227 {
 228         if (tag.empty() || tag == "NONE")
 229                 return;
 230
 231         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 232                 openParTag(xs, tag, attr);
 233         else if (tagtype == "block")
 234                 openBlockTag(xs, tag, attr);
 235         else if (tagtype == "inline")
 236                 openInlineTag(xs, tag, attr);
 237         else
 238                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 239 }
 240
 241
 242 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 243 {
 244         if (tag.empty() || tag == "NONE")
 245                 return;
 246
 247         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 248                 closeParTag(xs, tag);
 249         else if (tagtype == "block")
 250                 closeBlockTag(xs, tag);
 251         else if (tagtype == "inline")
 252                 closeInlineTag(xs, tag);
 253         else
 254                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 255 }
 256
 257
 258 // Higher-level convenience functions.
 259
 260 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 261 {
 262         Layout const & lay = par->layout();
 263
 264         if (par == prevpar)
 265                 prevpar = nullptr;
 266
 267         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 268         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 269         // The main use case is author information in several paragraphs: if the name of the author is the
 270         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 271         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 272         // layout, same wrapper tag).
 273         bool openWrapper = lay.docbookwrappertag() != "NONE";
 274         if (prevpar != nullptr) {
 275                 Layout const & prevlay = prevpar->layout();
 276                 if (prevlay.docbookwrappertag() != "NONE") {
 277                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 278                                         && !lay.docbookwrappermergewithprevious();
 279                 }
 280         }
 281
 282         // Main logic.
 283         if (openWrapper)
 284                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 285
 286         const string & tag = lay.docbooktag();
 287         if (tag != "NONE") {
 288                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 289                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 290                         // TODO: required or not?
 291                         // TODO: avoid creating a ParTag object just for this query...
 292                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 293         }
 294
 295         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 296         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 297 }
 298
 299
 300 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 301 {
 302         if (par == nextpar)
 303                 nextpar = nullptr;
 304
 305         // See comment in openParTag.
 306         Layout const & lay = par->layout();
 307         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 308         if (nextpar != nullptr) {
 309                 Layout const & nextlay = nextpar->layout();
 310                 if (nextlay.docbookwrappertag() != "NONE") {
 311                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 312                                         && !nextlay.docbookwrappermergewithprevious();
 313                 }
 314         }
 315
 316         // Main logic.
 317         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 318         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 319         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 320         if (closeWrapper)
 321                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 322 }
 323
 324
 325 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 326 {
 327         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 328 }
 329
 330
 331 void closeLabelTag(XMLStream & xs, Layout const & lay)
 332 {
 333         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 334 }
 335
 336
 337 void openItemTag(XMLStream & xs, Layout const & lay)
 338 {
 339         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 340 }
 341
 342
 343 void closeItemTag(XMLStream & xs, Layout const & lay)
 344 {
 345         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 346 }
 347
 348
 349 void makeAny(
 350                 Text const &,
 351                 Buffer const &,
 352                 XMLStream &,
 353                 OutputParams const &,
 354                 ParagraphList::const_iterator);
 355
 356
 357 void makeParagraphBibliography(
 358                 Buffer const & buf,
 359                 XMLStream & xs,
 360                 OutputParams const & runparams,
 361                 Text const & text,
 362                 ParagraphList::const_iterator const & par)
 363 {
 364         // If this is the first paragraph in a bibliography, open the bibliography tag.
 365         auto pbegin_before = text.paragraphs().getParagraphBefore(par);
 366         if (pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 367                 xs << xml::StartTag("bibliography");
 368                 xs << xml::CR();
 369         }
 370
 371         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 372         // Don't forget the citation ID!
 373         docstring attr;
 374         for (auto i = 0; i < par->size(); ++i) {
 375                 Inset const *ip = par->getInset(i);
 376                 if (!ip)
 377                         continue;
 378                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
 379                         attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 380                         break;
 381                 }
 382         }
 383         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 384
 385         // Generate the entry. Concatenate the different parts of the paragraph if any.
 386         auto const begin = text.paragraphs().begin();
 387         auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(begin, par)), 0);
 388         for (auto & parXML : pars)
 389                 xs << XMLStream::ESCAPE_NONE << parXML;
 390
 391         // End the precooked bibliography entry.
 392         xs << xml::EndTag("bibliomixed");
 393         xs << xml::CR();
 394
 395         // If this is the last paragraph in a bibliography, close the bibliography tag.
 396         auto const end = text.paragraphs().end();
 397         bool endBibliography = par == end;
 398         if (!endBibliography) {
 399                 auto nextpar = par;
 400                 ++nextpar;
 401                 endBibliography = par->layout().latextype != LATEX_BIB_ENVIRONMENT;
 402         }
 403
 404         if (endBibliography) {
 405                 xs << xml::EndTag("bibliography");
 406                 xs << xml::CR();
 407         }
 408 }
 409
 410
 411 void makeParagraph(
 412                 Buffer const & buf,
 413                 XMLStream & xs,
 414                 OutputParams const & runparams,
 415                 Text const & text,
 416                 ParagraphList::const_iterator const & par)
 417 {
 418         auto const begin = text.paragraphs().begin();
 419         auto const end = text.paragraphs().end();
 420         auto prevpar = text.paragraphs().getParagraphBefore(par);
 421
 422         // We want to open the paragraph tag if:
 423         //   (i) the current layout permits multiple paragraphs
 424         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 425         //         we are, but this is not the first paragraph
 426         //
 427         // But there is also a special case, and we first see whether we are in it.
 428         // We do not want to open the paragraph tag if this paragraph contains
 429         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 430         // as a branch). On the other hand, if that single item has a font change
 431         // applied to it, then we still do need to open the paragraph.
 432         //
 433         // Obviously, this is very fragile. The main reason we need to do this is
 434         // because of branches, e.g., a branch that contains an entire new section.
 435         // We do not really want to wrap that whole thing in a <div>...</div>.
 436         bool special_case = false;
 437         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 438         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 439                 Layout const &style = par->layout();
 440                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 441                                                                         style.labelfont : style.font;
 442                 FontInfo const our_font =
 443                                 par->getFont(buf.masterBuffer()->params(), 0,
 444                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 445
 446                 if (first_font == our_font)
 447                         special_case = true;
 448         }
 449
 450         // Plain layouts must be ignored.
 451         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 452                 special_case = true;
 453         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 454         if (!special_case && par->size() == 1 && par->getInset(0)) {
 455                 Inset const * firstInset = par->getInset(0);
 456
 457                 // Floats cannot be in paragraphs.
 458                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 459
 460                 // Bibliographies cannot be in paragraphs.
 461                 if (!special_case && firstInset->asInsetCommand())
 462                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 463
 464                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 465                 if (!special_case && firstInset->asInsetMath())
 466                         special_case = true;
 467
 468                 // ERTs are in comments, not paragraphs.
 469                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 470                         special_case = true;
 471
 472                 // Listings should not get into their own paragraph.
 473                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 474                         special_case = true;
 475         }
 476
 477         bool const open_par = runparams.docbook_make_pars
 478                                                   && !runparams.docbook_in_par
 479                                                   && !special_case;
 480
 481         // We want to issue the closing tag if either:
 482         //   (i)  We opened it, and either docbook_in_par is false,
 483         //              or we're not in the last paragraph, anyway.
 484         //   (ii) We didn't open it and docbook_in_par is true,
 485         //              but we are in the first par, and there is a next par.
 486         auto nextpar = par;
 487         ++nextpar;
 488         bool const close_par = open_par && (!runparams.docbook_in_par);
 489
 490         // Determine if this paragraph has some real content. Things like new pages are not caught
 491         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 492         // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
 493         // std::all_of allows doing this check without having to copy the string.
 494         // Open and close tags around each contained paragraph.
 495         auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(distance(begin, par)), 0);
 496         for (auto & parXML : pars) {
 497                 if (!std::all_of(parXML.begin(), parXML.end(), ::isspace)) {
 498                         if (open_par)
 499                                 openParTag(xs, &*par, prevpar);
 500
 501                         xs << XMLStream::ESCAPE_NONE << parXML;
 502
 503                         if (close_par)
 504                                 closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 505                 }
 506         }
 507 }
 508
 509
 510 void makeEnvironment(
 511                 Buffer const &buf,
 512                 XMLStream &xs,
 513                 OutputParams const &runparams,
 514                 Text const &text,
 515                 ParagraphList::const_iterator const & par)
 516 {
 517         auto const end = text.paragraphs().end();
 518
 519         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 520         // implemented in openParTag).
 521         auto prevpar = text.paragraphs().getParagraphBefore(par);
 522         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 523
 524         // Generate the contents of this environment. There is a special case if this is like some environment.
 525         Layout const & style = par->layout();
 526         if (style.latextype == LATEX_COMMAND) {
 527                 // Nothing to do (otherwise, infinite loops).
 528         } else if (style.latextype == LATEX_ENVIRONMENT ||
 529                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 530                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 531                 // Open a wrapper tag if needed.
 532                 if (style.docbookitemwrappertag() != "NONE")
 533                         openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(), style.docbookitemwrappertagtype());
 534
 535                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 536                 // character after the label.
 537                 pos_type sep = 0;
 538                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 539                         // At least one condition must be met:
 540                         //  - this environment is not a list
 541                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 542                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 543                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 544                                 docstring const lbl = par->params().labelString();
 545
 546                                 if (lbl.empty()) {
 547                                         xs << xml::CR();
 548                                 } else {
 549                                         openLabelTag(xs, style);
 550                                         xs << lbl;
 551                                         closeLabelTag(xs, style);
 552                                 }
 553                         } else {
 554                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 555                                 openLabelTag(xs, style);
 556                                 sep = par->firstWordDocBook(xs, runparams);
 557                                 closeLabelTag(xs, style);
 558                         }
 559                 }
 560
 561                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 562                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 563                 // Common case: there is only the first word on the line, but there is a nested list instead
 564                 // of more text.
 565                 bool emptyItem = false;
 566                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 567                         auto next_par = par;
 568                         ++next_par;
 569                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 570                                 emptyItem = true;
 571                         else // There is a next paragraph: check depth.
 572                                 emptyItem = par->params().depth() >= next_par->params().depth();
 573                 }
 574
 575                 if (emptyItem) {
 576                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 577                         // generation of a full <para>.
 578                         // TODO: this always worked only by magic...
 579                         xs << ' ';
 580                 } else {
 581                         // Generate the rest of the paragraph, if need be. Open as many inner tags as necessary.
 582                         auto pars = par->simpleDocBookOnePar(buf, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)), sep);
 583                         auto p = pars.begin();
 584                         while (true) {
 585                                 xs << XMLStream::ESCAPE_NONE << *p;
 586                                 ++p;
 587                                 if (p != pars.end()) {
 588                                         closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
 589                                         openTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnerattr(), par->layout().docbookiteminnertagtype());
 590                                 } else
 591                                         break;
 592                         }
 593                 }
 594         } else {
 595                 makeAny(text, buf, xs, runparams, par);
 596         }
 597
 598         // Close the environment.
 599         auto nextpar = par;
 600         ++nextpar;
 601         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 602 }
 603
 604
 605 void makeCommand(
 606                 Buffer const & buf,
 607                 XMLStream & xs,
 608                 OutputParams const & runparams,
 609                 Text const & text,
 610                 ParagraphList::const_iterator const & par)
 611 {
 612         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 613         auto const begin = text.paragraphs().begin();
 614         auto const end = text.paragraphs().end();
 615         auto nextpar = par;
 616         ++nextpar;
 617
 618         // Generate this command.
 619         auto prevpar = text.paragraphs().getParagraphBefore(par);
 620         openParTag(xs, &*par, prevpar);
 621
 622         auto pars = par->simpleDocBookOnePar(buf, runparams,text.outerFont(distance(begin, par)));
 623         for (auto & parXML : pars)
 624                 // TODO: decide what to do with openParTag/closeParTag in new lines.
 625                 xs << XMLStream::ESCAPE_NONE << parXML;
 626
 627         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 628 }
 629
 630
 631 void makeAny(
 632                 Text const &text,
 633                 Buffer const &buf,
 634                 XMLStream &xs,
 635                 OutputParams const &ourparams,
 636                 ParagraphList::const_iterator par)
 637 {
 638         switch (par->layout().latextype) {
 639         case LATEX_COMMAND:
 640                 makeCommand(buf, xs, ourparams, text, par);
 641                 break;
 642         case LATEX_ENVIRONMENT:
 643         case LATEX_LIST_ENVIRONMENT:
 644         case LATEX_ITEM_ENVIRONMENT:
 645                 makeEnvironment(buf, xs, ourparams, text, par);
 646                 break;
 647         case LATEX_PARAGRAPH:
 648                 makeParagraph(buf, xs, ourparams, text, par);
 649                 break;
 650         case LATEX_BIB_ENVIRONMENT:
 651                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 652                 break;
 653         }
 654 }
 655
 656
 657 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 658
 659
 660 struct DocBookInfoTag
 661 {
 662         const set<pit_type> shouldBeInInfo;
 663         const set<pit_type> mustBeInInfo;
 664         const set<pit_type> abstract;
 665         pit_type bpit;
 666         pit_type epit;
 667
 668         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 669                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 670                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 671                                    bpit(bpit), epit(epit) {}
 672 };
 673
 674
 675 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 676         bool documentHasSections = false;
 677
 678         while (bpit < epit) {
 679                 Layout const &style = paragraphs[bpit].layout();
 680                 documentHasSections |= style.category() == from_utf8("Sectioning");
 681
 682                 if (documentHasSections)
 683                         break;
 684                 bpit += 1;
 685         }
 686         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 687
 688         return make_tuple(documentHasSections, bpit);
 689 }
 690
 691
 692 bool hasOnlyNotes(Paragraph const & par)
 693 {
 694         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 695         for (int i = 0; i < par.size(); ++i)
 696                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 697                 // return false.
 698                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 699                         return false;
 700         return true;
 701 }
 702
 703
 704 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 705         set<pit_type> shouldBeInInfo;
 706         set<pit_type> mustBeInInfo;
 707         set<pit_type> abstract;
 708
 709         // Find the first non empty paragraph by mutating bpit.
 710         while (bpit < epit) {
 711                 Paragraph const &par = paragraphs[bpit];
 712                 if (par.empty() || hasOnlyNotes(par))
 713                         bpit += 1;
 714                 else
 715                         break;
 716         }
 717
 718         // Find the last info-like paragraph.
 719         pit_type cpit = bpit;
 720         bool hasAbstractLayout = false;
 721         while (cpit < epit) {
 722                 // Skip paragraphs only containing one note.
 723                 Paragraph const & par = paragraphs[cpit];
 724                 if (hasOnlyNotes(par)) {
 725                         cpit += 1;
 726                         continue;
 727                 }
 728
 729                 if (par.layout().docbookabstract())
 730                         hasAbstractLayout = true;
 731
 732                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 733                 Layout const &style = par.layout();
 734
 735                 if (style.docbookininfo() == "always") {
 736                         mustBeInInfo.emplace(cpit);
 737                 } else if (style.docbookininfo() == "maybe") {
 738                         shouldBeInInfo.emplace(cpit);
 739                 } else {
 740                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 741                         // There may be notes in between, but nothing else.
 742                         break;
 743                 }
 744                 cpit += 1;
 745         }
 746         // Now, cpit points to the last paragraph that has things that could go in <info>.
 747         // bpit is the beginning of the <info> part.
 748
 749         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 750         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 751         if (hasAbstractLayout) {
 752                 pit_type pit = bpit;
 753                 while (pit < cpit) { // Don't overshoot the <info> part.
 754                         if (paragraphs[pit].layout().docbookabstract())
 755                                 abstract.emplace(pit);
 756                         pit++;
 757                 }
 758         } else {
 759                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 760                 docstring lastAbstractLayout;
 761
 762                 pit_type pit = bpit;
 763                 while (pit < cpit) { // Don't overshoot the <info> part.
 764                         const Paragraph & par = paragraphs.at(pit);
 765                         if (!par.insetList().empty()) {
 766                                 for (const auto &i : par.insetList()) {
 767                                         if (i.inset->getText(0) != nullptr) {
 768                                                 if (lastAbstract == epit + 1) {
 769                                                         // First paragraph that matches the heuristic definition of abstract.
 770                                                         lastAbstract = pit;
 771                                                         lastAbstractLayout = par.layout().name();
 772                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 773                                                         // This is either too far from the last abstract paragraph or doesn't
 774                                                         // have the right layout name, BUT there has already been an abstract
 775                                                         // in this document: done with detecting the abstract.
 776                                                         goto done; // Easier to get out of two nested loops.
 777                                                 }
 778
 779                                                 abstract.emplace(pit);
 780                                                 break;
 781                                         }
 782                                 }
 783                         }
 784                         pit++;
 785                 }
 786         }
 787
 788         done:
 789         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 790 }
 791
 792 } // end anonymous namespace
 793
 794
 795 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 796 {
 797         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 798 }
 799
 800
 801 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 802 {
 803         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 804 }
 805
 806
 807 void outputDocBookInfo(
 808                 Text const & text,
 809                 Buffer const & buf,
 810                 XMLStream & xs,
 811                 OutputParams const & runparams,
 812                 ParagraphList const & paragraphs,
 813                 DocBookInfoTag const & info)
 814 {
 815         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 816         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 817         // then only create the <abstract> tag if these paragraphs generate some content.
 818         // This check must be performed *before* a decision on whether or not to output <info> is made.
 819         bool hasAbstract = !info.abstract.empty();
 820         docstring abstract;
 821         if (hasAbstract) {
 822                 // Generate the abstract XML into a string before further checks.
 823                 odocstringstream os2;
 824                 {
 825                         XMLStream xs2(os2);
 826                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 827                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 828                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 829
 830                         while (bpit < epit) {
 831                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 832                                 bpit += 1;
 833                         }
 834                 }
 835
 836                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 837                 // even though they must be properly output if there is some abstract.
 838                 abstract = os2.str();
 839                 docstring cleaned = abstract;
 840                 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), ::isspace), cleaned.end());
 841
 842                 // Nothing? Then there is no abstract!
 843                 if (cleaned.empty())
 844                         hasAbstract = false;
 845         }
 846
 847         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 848         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 849
 850         // Start the <info> tag if required.
 851         if (needInfo) {
 852                 xs.startDivision(false);
 853                 xs << xml::StartTag("info");
 854                 xs << xml::CR();
 855         }
 856
 857         // Output the elements that should go in <info>, before and after the abstract.
 858         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 859                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 860                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 861         }
 862         for (auto pit : info.mustBeInInfo) {
 863                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 864                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 865         }
 866
 867         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 868         // it contains several paragraphs that are empty).
 869         if (hasAbstract) {
 870 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 871 //              if (tag == "NONE")
 872 //                      tag = "abstract";
 873 //
 874 //              xs << xml::StartTag(tag);
 875 //              xs << xml::CR();
 876                 xs << XMLStream::ESCAPE_NONE << abstract;
 877 //              xs << xml::EndTag(tag);
 878 //              xs << xml::CR();
 879         }
 880
 881         // End the <info> tag if it was started.
 882         if (needInfo) {
 883                 xs << xml::EndTag("info");
 884                 xs << xml::CR();
 885                 xs.endDivision();
 886         }
 887 }
 888
 889
 890 void docbookFirstParagraphs(
 891                 Text const &text,
 892                 Buffer const &buf,
 893                 XMLStream &xs,
 894                 OutputParams const &runparams,
 895                 pit_type epit)
 896 {
 897         // Handle the beginning of the document, supposing it has sections.
 898         // Major role: output the first <info> tag.
 899
 900         ParagraphList const &paragraphs = text.paragraphs();
 901         pit_type bpit = runparams.par_begin;
 902         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 903         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 904 }
 905
 906
 907 void docbookSimpleAllParagraphs(
 908                 Text const & text,
 909                 Buffer const & buf,
 910                 XMLStream & xs,
 911                 OutputParams const & runparams)
 912 {
 913         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 914         // between a single paragraph to a whole document.
 915
 916         // First, the <info> tag.
 917         ParagraphList const &paragraphs = text.paragraphs();
 918         pit_type bpit = runparams.par_begin;
 919         pit_type const epit = runparams.par_end;
 920         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 921         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 922
 923         // Then, the content. It starts where the <info> ends.
 924         bpit = info.epit;
 925         while (bpit < epit) {
 926                 auto par = paragraphs.iterator_at(bpit);
 927                 if (!hasOnlyNotes(*par))
 928                         makeAny(text, buf, xs, runparams, par);
 929                 bpit += 1;
 930         }
 931 }
 932
 933
 934 void docbookParagraphs(Text const &text,
 935                                            Buffer const &buf,
 936                                            XMLStream &xs,
 937                                            OutputParams const &runparams) {
 938         ParagraphList const &paragraphs = text.paragraphs();
 939         if (runparams.par_begin == runparams.par_end) {
 940                 runparams.par_begin = 0;
 941                 runparams.par_end = paragraphs.size();
 942         }
 943         pit_type bpit = runparams.par_begin;
 944         pit_type const epit = runparams.par_end;
 945         LASSERT(bpit < epit,
 946                         {
 947                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 948                                 return;
 949                         });
 950
 951         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 952         // of the section and the tag that was used to open it.
 953
 954         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 955         // discovered abstract.
 956         bool documentHasSections;
 957         pit_type eppit;
 958         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 959
 960         if (documentHasSections) {
 961                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 962                 bpit = eppit;
 963         } else {
 964                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 965                 return;
 966         }
 967
 968         bool currentlyInAppendix = false;
 969
 970         while (bpit < epit) {
 971                 OutputParams ourparams = runparams;
 972
 973                 auto par = paragraphs.iterator_at(bpit);
 974                 if (par->params().startOfAppendix())
 975                         currentlyInAppendix = true;
 976                 Layout const &style = par->layout();
 977                 ParagraphList::const_iterator const lastStartedPar = par;
 978                 ParagraphList::const_iterator send;
 979
 980                 if (hasOnlyNotes(*par)) {
 981                         bpit += 1;
 982                         continue;
 983                 }
 984
 985                 // Think about adding <section> and/or </section>s.
 986                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 987                 if (isLayoutSectioning) {
 988                         int level = style.toclevel;
 989
 990                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
 991                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
 992                         //   - current: h2; back: h1; do not close any <section>
 993                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
 994                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
 995                                 int stackLevel = headerLevels.top().first;
 996                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
 997                                 headerLevels.pop();
 998
 999                                 // Output the tag only if it corresponds to a legit section.
1000                                 if (stackLevel != Layout::NOT_IN_TOC)
1001                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
1002                         }
1003
1004                         // Open the new section: first push it onto the stack, then output it in DocBook.
1005                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
1006                                                                 "appendix" : style.docbooksectiontag();
1007                         headerLevels.push(std::make_pair(level, sectionTag));
1008
1009                         // Some sectioning-like elements should not be output (such as FrontMatter).
1010                         if (level != Layout::NOT_IN_TOC) {
1011                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1012                                 docstring id = docstring();
1013                                 for (pos_type i = 0; i < par->size(); ++i) {
1014                                         Inset const *inset = par->getInset(i);
1015                                         if (inset) {
1016                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1017                                                         // Generate the attributes for the section if need be.
1018                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1019
1020                                                         // Don't output the ID as a DocBook <anchor>.
1021                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1022
1023                                                         // Cannot have multiple IDs per tag.
1024                                                         break;
1025                                                 }
1026                                         }
1027                                 }
1028
1029                                 // Write the open tag for this section.
1030                                 docstring tag = from_utf8("<" + sectionTag);
1031                                 if (!id.empty())
1032                                         tag += from_utf8(" ") + id;
1033                                 tag += from_utf8(">");
1034                                 xs << XMLStream::ESCAPE_NONE << tag;
1035                                 xs << xml::CR();
1036                         }
1037                 }
1038
1039                 // Close all sections before the bibliography.
1040                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1041                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1042                 if (insetsLength > 0) {
1043                         Inset const *firstInset = par->getInset(0);
1044                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1045                                 while (!headerLevels.empty()) {
1046                                         int level = headerLevels.top().first;
1047                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1048                                         headerLevels.pop();
1049
1050                                         // Output the tag only if it corresponds to a legit section.
1051                                         if (level != Layout::NOT_IN_TOC) {
1052                                                 xs << XMLStream::ESCAPE_NONE << tag;
1053                                                 xs << xml::CR();
1054                                         }
1055                                 }
1056                         }
1057                 }
1058
1059                 // Generate this paragraph.
1060                 makeAny(text, buf, xs, ourparams, par);
1061                 bpit += 1;
1062         }
1063
1064         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1065         // of the loop).
1066         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1067                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1068                 headerLevels.pop();
1069                 xs << XMLStream::ESCAPE_NONE << tag;
1070                 xs << xml::CR();
1071         }
1072 }
1073
1074 } // namespace lyx