src/output_docbook.cpp

   1 /**
   2  * \file output_docbook.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author José Matos
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "Buffer.h"
  15 #include "buffer_funcs.h"
  16 #include "BufferParams.h"
  17 #include "Font.h"
  18 #include "InsetList.h"
  19 #include "Paragraph.h"
  20 #include "ParagraphList.h"
  21 #include "ParagraphParameters.h"
  22 #include "xml.h"
  23 #include "Text.h"
  24 #include "TextClass.h"
  25
  26 #include "insets/InsetBibtex.h"
  27 #include "insets/InsetBibitem.h"
  28 #include "insets/InsetLabel.h"
  29 #include "insets/InsetNote.h"
  30
  31 #include "support/lassert.h"
  32
  33 #include <stack>
  34 #include <iostream>
  35 #include <algorithm>
  36 #include <sstream>
  37
  38 using namespace std;
  39 using namespace lyx::support;
  40
  41 namespace lyx {
  42
  43 namespace {
  44
  45 std::string fontToDocBookTag(xml::FontTypes type)
  46 {
  47         switch (type) {
  48         case xml::FontTypes::FT_EMPH:
  49         case xml::FontTypes::FT_BOLD:
  50                 return "emphasis";
  51         case xml::FontTypes::FT_NOUN:
  52                 return "person";
  53         case xml::FontTypes::FT_UBAR:
  54         case xml::FontTypes::FT_WAVE:
  55         case xml::FontTypes::FT_DBAR:
  56         case xml::FontTypes::FT_SOUT:
  57         case xml::FontTypes::FT_XOUT:
  58         case xml::FontTypes::FT_ITALIC:
  59         case xml::FontTypes::FT_UPRIGHT:
  60         case xml::FontTypes::FT_SLANTED:
  61         case xml::FontTypes::FT_SMALLCAPS:
  62         case xml::FontTypes::FT_ROMAN:
  63         case xml::FontTypes::FT_SANS:
  64                 return "emphasis";
  65         case xml::FontTypes::FT_TYPE:
  66                 return "code";
  67         case xml::FontTypes::FT_SIZE_TINY:
  68         case xml::FontTypes::FT_SIZE_SCRIPT:
  69         case xml::FontTypes::FT_SIZE_FOOTNOTE:
  70         case xml::FontTypes::FT_SIZE_SMALL:
  71         case xml::FontTypes::FT_SIZE_NORMAL:
  72         case xml::FontTypes::FT_SIZE_LARGE:
  73         case xml::FontTypes::FT_SIZE_LARGER:
  74         case xml::FontTypes::FT_SIZE_LARGEST:
  75         case xml::FontTypes::FT_SIZE_HUGE:
  76         case xml::FontTypes::FT_SIZE_HUGER:
  77         case xml::FontTypes::FT_SIZE_INCREASE:
  78         case xml::FontTypes::FT_SIZE_DECREASE:
  79                 return "emphasis";
  80         default:
  81                 return "";
  82         }
  83 }
  84
  85
  86 string fontToRole(xml::FontTypes type)
  87 {
  88         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
  89         // and "bold"/"strong" for bold. With some specific options, other roles are copied into
  90         // HTML output (via the DocBook XSLT sheets); otherwise, if not recognised, they are just ignored.
  91         // Hence, it is not a problem to have many roles by default here.
  92         // See https://www.sourceware.org/ml/docbook/2003-05/msg00269.html
  93         switch (type) {
  94         case xml::FontTypes::FT_ITALIC:
  95         case xml::FontTypes::FT_EMPH:
  96                 return "";
  97         case xml::FontTypes::FT_BOLD:
  98                 return "bold";
  99         case xml::FontTypes::FT_NOUN: // Outputs a <person>
 100         case xml::FontTypes::FT_TYPE: // Outputs a <code>
 101                 return "";
 102         case xml::FontTypes::FT_UBAR:
 103                 return "underline";
 104
 105         // All other roles are non-standard for DocBook.
 106
 107         case xml::FontTypes::FT_WAVE:
 108                 return "wave";
 109         case xml::FontTypes::FT_DBAR:
 110                 return "dbar";
 111         case xml::FontTypes::FT_SOUT:
 112                 return "sout";
 113         case xml::FontTypes::FT_XOUT:
 114                 return "xout";
 115         case xml::FontTypes::FT_UPRIGHT:
 116                 return "upright";
 117         case xml::FontTypes::FT_SLANTED:
 118                 return "slanted";
 119         case xml::FontTypes::FT_SMALLCAPS:
 120                 return "smallcaps";
 121         case xml::FontTypes::FT_ROMAN:
 122                 return "roman";
 123         case xml::FontTypes::FT_SANS:
 124                 return "sans";
 125         case xml::FontTypes::FT_SIZE_TINY:
 126                 return "tiny";
 127         case xml::FontTypes::FT_SIZE_SCRIPT:
 128                 return "size_script";
 129         case xml::FontTypes::FT_SIZE_FOOTNOTE:
 130                 return "size_footnote";
 131         case xml::FontTypes::FT_SIZE_SMALL:
 132                 return "size_small";
 133         case xml::FontTypes::FT_SIZE_NORMAL:
 134                 return "size_normal";
 135         case xml::FontTypes::FT_SIZE_LARGE:
 136                 return "size_large";
 137         case xml::FontTypes::FT_SIZE_LARGER:
 138                 return "size_larger";
 139         case xml::FontTypes::FT_SIZE_LARGEST:
 140                 return "size_largest";
 141         case xml::FontTypes::FT_SIZE_HUGE:
 142                 return "size_huge";
 143         case xml::FontTypes::FT_SIZE_HUGER:
 144                 return "size_huger";
 145         case xml::FontTypes::FT_SIZE_INCREASE:
 146                 return "size_increase";
 147         case xml::FontTypes::FT_SIZE_DECREASE:
 148                 return "size_decrease";
 149         default:
 150                 return "";
 151         }
 152 }
 153
 154
 155 string fontToAttribute(xml::FontTypes type) {
 156         // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
 157         // for the font.
 158         string role = fontToRole(type);
 159         if (!role.empty()) {
 160                 return "role='" + role + "'";
 161         } else {
 162                 return "";
 163         }
 164 }
 165
 166
 167 // Convenience functions to open and close tags. First, very low-level ones to ensure a consistent new-line behaviour.
 168 // Block style:
 169 //        Content before
 170 //        <blocktag>
 171 //          Contents of the block.
 172 //        </blocktag>
 173 //        Content after
 174 // Paragraph style:
 175 //        Content before
 176 //          <paratag>Contents of the paragraph.</paratag>
 177 //        Content after
 178 // Inline style:
 179 //    Content before<inlinetag>Contents of the paragraph.</inlinetag>Content after
 180
 181 void openInlineTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 182 {
 183         xs << xml::StartTag(tag, attr);
 184 }
 185
 186
 187 void closeInlineTag(XMLStream & xs, const std::string & tag)
 188 {
 189         xs << xml::EndTag(tag);
 190 }
 191
 192
 193 void openParTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 194 {
 195         if (!xs.isLastTagCR())
 196                 xs << xml::CR();
 197         xs << xml::StartTag(tag, attr);
 198 }
 199
 200
 201 void closeParTag(XMLStream & xs, const std::string & tag)
 202 {
 203         xs << xml::EndTag(tag);
 204         xs << xml::CR();
 205 }
 206
 207
 208 void openBlockTag(XMLStream & xs, const std::string & tag, const std::string & attr)
 209 {
 210         if (!xs.isLastTagCR())
 211                 xs << xml::CR();
 212         xs << xml::StartTag(tag, attr);
 213         xs << xml::CR();
 214 }
 215
 216
 217 void closeBlockTag(XMLStream & xs, const std::string & tag)
 218 {
 219         if (!xs.isLastTagCR())
 220                 xs << xml::CR();
 221         xs << xml::EndTag(tag);
 222         xs << xml::CR();
 223 }
 224
 225
 226 void openTag(XMLStream & xs, const std::string & tag, const std::string & attr, const std::string & tagtype)
 227 {
 228         if (tag.empty() || tag == "NONE")
 229                 return;
 230
 231         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 232                 openParTag(xs, tag, attr);
 233         else if (tagtype == "block")
 234                 openBlockTag(xs, tag, attr);
 235         else if (tagtype == "inline")
 236                 openInlineTag(xs, tag, attr);
 237         else
 238                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + " " + attr + "'");
 239 }
 240
 241
 242 void closeTag(XMLStream & xs, const std::string & tag, const std::string & tagtype)
 243 {
 244         if (tag.empty() || tag == "NONE")
 245                 return;
 246
 247         if (tag == "para" || tagtype == "paragraph") // Special case for <para>: always considered as a paragraph.
 248                 closeParTag(xs, tag);
 249         else if (tagtype == "block")
 250                 closeBlockTag(xs, tag);
 251         else if (tagtype == "inline")
 252                 closeInlineTag(xs, tag);
 253         else
 254                 xs.writeError("Unrecognised tag type '" + tagtype + "' for '" + tag + "'");
 255 }
 256
 257
 258 // Higher-level convenience functions.
 259
 260 void openParTag(XMLStream & xs, const Paragraph * par, const Paragraph * prevpar)
 261 {
 262         Layout const & lay = par->layout();
 263
 264         if (par == prevpar)
 265                 prevpar = nullptr;
 266
 267         // When should the wrapper be opened here? Only if the previous paragraph has the SAME wrapper tag
 268         // (usually, they won't have the same layout) and the CURRENT one allows merging.
 269         // The main use case is author information in several paragraphs: if the name of the author is the
 270         // first paragraph of an author, then merging with the previous tag does not make sense. Say the
 271         // next paragraph is the affiliation, then it should be output in the same <author> tag (different
 272         // layout, same wrapper tag).
 273         bool openWrapper = lay.docbookwrappertag() != "NONE";
 274         if (prevpar != nullptr) {
 275                 Layout const & prevlay = prevpar->layout();
 276                 if (prevlay.docbookwrappertag() != "NONE") {
 277                         openWrapper = prevlay.docbookwrappertag() == lay.docbookwrappertag()
 278                                         && !lay.docbookwrappermergewithprevious();
 279                 }
 280         }
 281
 282         // Main logic.
 283         if (openWrapper)
 284                 openTag(xs, lay.docbookwrappertag(), lay.docbookwrapperattr(), lay.docbookwrappertagtype());
 285
 286         const string & tag = lay.docbooktag();
 287         if (tag != "NONE") {
 288                 auto xmltag = xml::ParTag(tag, lay.docbookattr());
 289                 if (!xs.isTagOpen(xmltag, 1)) // Don't nest a paragraph directly in a paragraph.
 290                         // TODO: required or not?
 291                         // TODO: avoid creating a ParTag object just for this query...
 292                         openTag(xs, lay.docbooktag(), lay.docbookattr(), lay.docbooktagtype());
 293         }
 294
 295         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 296         openTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnerattr(), lay.docbookiteminnertagtype());
 297 }
 298
 299
 300 void closeParTag(XMLStream & xs, Paragraph const * par, Paragraph const * nextpar)
 301 {
 302         if (par == nextpar)
 303                 nextpar = nullptr;
 304
 305         // See comment in openParTag.
 306         Layout const & lay = par->layout();
 307         bool closeWrapper = lay.docbookwrappertag() != "NONE";
 308         if (nextpar != nullptr) {
 309                 Layout const & nextlay = nextpar->layout();
 310                 if (nextlay.docbookwrappertag() != "NONE") {
 311                         closeWrapper = nextlay.docbookwrappertag() == lay.docbookwrappertag()
 312                                         && !nextlay.docbookwrappermergewithprevious();
 313                 }
 314         }
 315
 316         // Main logic.
 317         closeTag(xs, lay.docbookiteminnertag(), lay.docbookiteminnertagtype());
 318         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 319         closeTag(xs, lay.docbooktag(), lay.docbooktagtype());
 320         if (closeWrapper)
 321                 closeTag(xs, lay.docbookwrappertag(), lay.docbookwrappertagtype());
 322 }
 323
 324
 325 void openLabelTag(XMLStream & xs, Layout const & lay) // Mostly for definition lists.
 326 {
 327         openTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabelattr(), lay.docbookitemlabeltagtype());
 328 }
 329
 330
 331 void closeLabelTag(XMLStream & xs, Layout const & lay)
 332 {
 333         closeTag(xs, lay.docbookitemlabeltag(), lay.docbookitemlabeltagtype());
 334 }
 335
 336
 337 void openItemTag(XMLStream & xs, Layout const & lay)
 338 {
 339         openTag(xs, lay.docbookitemtag(), lay.docbookitemattr(), lay.docbookitemtagtype());
 340 }
 341
 342
 343 void closeItemTag(XMLStream & xs, Layout const & lay)
 344 {
 345         closeTag(xs, lay.docbookitemtag(), lay.docbookitemtagtype());
 346 }
 347
 348
 349 void makeAny(
 350                 Text const &,
 351                 Buffer const &,
 352                 XMLStream &,
 353                 OutputParams const &,
 354                 ParagraphList::const_iterator);
 355
 356
 357 void makeParagraphBibliography(
 358                 Buffer const & buf,
 359                 XMLStream & xs,
 360                 OutputParams const & runparams,
 361                 Text const & text,
 362                 ParagraphList::const_iterator const & par)
 363 {
 364         // If this is the first paragraph in a bibliography, open the bibliography tag.
 365         auto pbegin_before = text.paragraphs().getParagraphBefore(par);
 366         if (pbegin_before->layout().latextype != LATEX_BIB_ENVIRONMENT) {
 367                 xs << xml::StartTag("bibliography");
 368                 xs << xml::CR();
 369         }
 370
 371         // Start the precooked bibliography entry. This is very much like opening a paragraph tag.
 372         // Don't forget the citation ID!
 373         docstring attr;
 374         for (auto i = 0; i < par->size(); ++i) {
 375                 Inset const *ip = par->getInset(i);
 376                 if (!ip)
 377                         continue;
 378                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
 379                         attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
 380                         break;
 381                 }
 382         }
 383         xs << xml::StartTag(from_utf8("bibliomixed"), attr);
 384
 385         // Generate the entry.
 386         auto const begin = text.paragraphs().begin();
 387         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(begin, par)), true, true, 0);
 388
 389         // End the precooked bibliography entry.
 390         xs << xml::EndTag("bibliomixed");
 391         xs << xml::CR();
 392
 393         // If this is the last paragraph in a bibliography, close the bibliography tag.
 394         auto const end = text.paragraphs().end();
 395         bool endBibliography = par == end;
 396         if (!endBibliography) {
 397                 auto nextpar = par;
 398                 ++nextpar;
 399                 endBibliography = par->layout().latextype != LATEX_BIB_ENVIRONMENT;
 400         }
 401
 402         if (endBibliography) {
 403                 xs << xml::EndTag("bibliography");
 404                 xs << xml::CR();
 405         }
 406 }
 407
 408
 409 void makeParagraph(
 410                 Buffer const & buf,
 411                 XMLStream & xs,
 412                 OutputParams const & runparams,
 413                 Text const & text,
 414                 ParagraphList::const_iterator const & par)
 415 {
 416         auto const begin = text.paragraphs().begin();
 417         auto const end = text.paragraphs().end();
 418         auto prevpar = text.paragraphs().getParagraphBefore(par);
 419
 420         // We want to open the paragraph tag if:
 421         //   (i) the current layout permits multiple paragraphs
 422         //  (ii) we are either not already inside a paragraph (HTMLIsBlock) OR
 423         //         we are, but this is not the first paragraph
 424         //
 425         // But there is also a special case, and we first see whether we are in it.
 426         // We do not want to open the paragraph tag if this paragraph contains
 427         // only one item, and that item is "inline", i.e., not HTMLIsBlock (such
 428         // as a branch). On the other hand, if that single item has a font change
 429         // applied to it, then we still do need to open the paragraph.
 430         //
 431         // Obviously, this is very fragile. The main reason we need to do this is
 432         // because of branches, e.g., a branch that contains an entire new section.
 433         // We do not really want to wrap that whole thing in a <div>...</div>.
 434         bool special_case = false;
 435         Inset const *specinset = par->size() == 1 ? par->getInset(0) : nullptr;
 436         if (specinset && !specinset->getLayout().htmlisblock()) { // TODO: Convert htmlisblock to a DocBook parameter?
 437                 Layout const &style = par->layout();
 438                 FontInfo const first_font = style.labeltype == LABEL_MANUAL ?
 439                                                                         style.labelfont : style.font;
 440                 FontInfo const our_font =
 441                                 par->getFont(buf.masterBuffer()->params(), 0,
 442                                                          text.outerFont(std::distance(begin, par))).fontInfo();
 443
 444                 if (first_font == our_font)
 445                         special_case = true;
 446         }
 447
 448         // Plain layouts must be ignored.
 449         if (!special_case && buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars)
 450                 special_case = true;
 451         // TODO: Could get rid of this with a DocBook equivalent to htmlisblock?
 452         if (!special_case && par->size() == 1 && par->getInset(0)) {
 453                 Inset const * firstInset = par->getInset(0);
 454
 455                 // Floats cannot be in paragraphs.
 456                 special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
 457
 458                 // Bibliographies cannot be in paragraphs.
 459                 if (!special_case && firstInset->asInsetCommand())
 460                         special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
 461
 462                 // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
 463                 if (!special_case && firstInset->asInsetMath())
 464                         special_case = true;
 465
 466                 // ERTs are in comments, not paragraphs.
 467                 if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
 468                         special_case = true;
 469
 470                 // Listings should not get into their own paragraph.
 471                 if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
 472                         special_case = true;
 473         }
 474
 475         bool const open_par = runparams.docbook_make_pars
 476                                                   && !runparams.docbook_in_par
 477                                                   && !special_case;
 478
 479         // We want to issue the closing tag if either:
 480         //   (i)  We opened it, and either docbook_in_par is false,
 481         //              or we're not in the last paragraph, anyway.
 482         //   (ii) We didn't open it and docbook_in_par is true,
 483         //              but we are in the first par, and there is a next par.
 484         auto nextpar = par;
 485         ++nextpar;
 486         bool const close_par = open_par && (!runparams.docbook_in_par);
 487
 488         // Determine if this paragraph has some real content. Things like new pages are not caught
 489         // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
 490         // Thus, remove all spaces (including new lines: \r, \n) before checking for emptiness.
 491         odocstringstream os2;
 492         XMLStream xs2(os2);
 493         par->simpleDocBookOnePar(buf, xs2, runparams, text.outerFont(distance(begin, par)), open_par, close_par, 0);
 494
 495         docstring cleaned = os2.str();
 496         cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), ::isspace), cleaned.end());
 497
 498         if (!cleaned.empty()) {
 499                 if (open_par)
 500                         openParTag(xs, &*par, prevpar);
 501
 502                 xs << XMLStream::ESCAPE_NONE << os2.str();
 503
 504                 if (close_par)
 505                         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 506         }
 507 }
 508
 509
 510 void makeEnvironment(
 511                 Buffer const &buf,
 512                 XMLStream &xs,
 513                 OutputParams const &runparams,
 514                 Text const &text,
 515                 ParagraphList::const_iterator const & par)
 516 {
 517         auto const end = text.paragraphs().end();
 518
 519         // Output the opening tag for this environment, but only if it has not been previously opened (condition
 520         // implemented in openParTag).
 521         auto prevpar = text.paragraphs().getParagraphBefore(par);
 522         openParTag(xs, &*par, prevpar); // TODO: switch in layout for par/block?
 523
 524         // Generate the contents of this environment. There is a special case if this is like some environment.
 525         Layout const & style = par->layout();
 526         if (style.latextype == LATEX_COMMAND) {
 527                 // Nothing to do (otherwise, infinite loops).
 528         } else if (style.latextype == LATEX_ENVIRONMENT ||
 529                         style.latextype == LATEX_LIST_ENVIRONMENT ||
 530                         style.latextype == LATEX_ITEM_ENVIRONMENT) {
 531                 // Open a wrapper tag if needed.
 532                 if (style.docbookitemwrappertag() != "NONE") {
 533                         xs << xml::StartTag(style.docbookitemwrappertag(), style.docbookitemwrapperattr());
 534                         xs << xml::CR();
 535                 }
 536
 537                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
 538                 // character after the label.
 539                 pos_type sep = 0;
 540                 if (style.labeltype != LABEL_NO_LABEL && style.docbookitemlabeltag() != "NONE") {
 541                         // At least one condition must be met:
 542                         //  - this environment is not a list
 543                         //  - if this is a list, the label must not be manual (i.e. it must be taken from the layout)
 544                         if (style.latextype != LATEX_LIST_ENVIRONMENT || style.labeltype != LABEL_MANUAL) {
 545                                 // Usual cases: maybe there is something specified at the layout level. Highly unlikely, though.
 546                                 docstring const lbl = par->params().labelString();
 547
 548                                 if (lbl.empty()) {
 549                                         xs << xml::CR();
 550                                 } else {
 551                                         openLabelTag(xs, style);
 552                                         xs << lbl;
 553                                         closeLabelTag(xs, style);
 554                                 }
 555                         } else {
 556                                 // Only variablelist gets here (or similar items defined as an extension in the layout).
 557                                 openLabelTag(xs, style);
 558                                 sep = par->firstWordDocBook(xs, runparams);
 559                                 closeLabelTag(xs, style);
 560                         }
 561                 }
 562
 563                 // Maybe the item is completely empty, i.e. if the first word ends at the end of the current paragraph
 564                 // AND if the next paragraph doesn't have the same depth (if there is such a paragraph).
 565                 // Common case: there is only the first word on the line, but there is a nested list instead
 566                 // of more text.
 567                 bool emptyItem = false;
 568                 if (sep == par->size()) { // If the separator is already at the end of this paragraph...
 569                         auto next_par = par;
 570                         ++next_par;
 571                         if (next_par == text.paragraphs().end()) // There is no next paragraph.
 572                                 emptyItem = true;
 573                         else // There is a next paragraph: check depth.
 574                                 emptyItem = par->params().depth() >= next_par->params().depth();
 575                 }
 576
 577                 if (emptyItem) {
 578                         // Avoid having an empty item, this is not valid DocBook. A single character is enough to force
 579                         // generation of a full <para>.
 580                         // TODO: this always worked only by magic...
 581                         xs << ' ';
 582                 } else {
 583                         // Generate the rest of the paragraph, if need be.
 584                         par->simpleDocBookOnePar(buf, xs, runparams, text.outerFont(std::distance(text.paragraphs().begin(), par)),
 585                                                                  true, true, sep);
 586                 }
 587         } else {
 588                 makeAny(text, buf, xs, runparams, par);
 589         }
 590
 591         // Close the environment.
 592         auto nextpar = par;
 593         ++nextpar;
 594         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr); // TODO: switch in layout for par/block?
 595 }
 596
 597
 598 void makeCommand(
 599                 Buffer const & buf,
 600                 XMLStream & xs,
 601                 OutputParams const & runparams,
 602                 Text const & text,
 603                 ParagraphList::const_iterator const & par)
 604 {
 605         // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
 606         auto const begin = text.paragraphs().begin();
 607         auto const end = text.paragraphs().end();
 608         auto nextpar = par;
 609         ++nextpar;
 610
 611         // Generate this command.
 612         auto prevpar = text.paragraphs().getParagraphBefore(par);
 613         openParTag(xs, &*par, prevpar);
 614
 615         par->simpleDocBookOnePar(buf, xs, runparams,
 616                                  text.outerFont(distance(begin, par)));
 617
 618         closeParTag(xs, &*par, (nextpar != end) ? &*nextpar : nullptr);
 619 }
 620
 621
 622 void makeAny(
 623                 Text const &text,
 624                 Buffer const &buf,
 625                 XMLStream &xs,
 626                 OutputParams const &ourparams,
 627                 ParagraphList::const_iterator par)
 628 {
 629         switch (par->layout().latextype) {
 630         case LATEX_COMMAND:
 631                 makeCommand(buf, xs, ourparams, text, par);
 632                 break;
 633         case LATEX_ENVIRONMENT:
 634         case LATEX_LIST_ENVIRONMENT:
 635         case LATEX_ITEM_ENVIRONMENT:
 636                 makeEnvironment(buf, xs, ourparams, text, par);
 637                 break;
 638         case LATEX_PARAGRAPH:
 639                 makeParagraph(buf, xs, ourparams, text, par);
 640                 break;
 641         case LATEX_BIB_ENVIRONMENT:
 642                 makeParagraphBibliography(buf, xs, ourparams, text, par);
 643                 break;
 644         }
 645 }
 646
 647
 648 using DocBookDocumentSectioning = tuple<bool, pit_type>;
 649
 650
 651 struct DocBookInfoTag
 652 {
 653         const set<pit_type> shouldBeInInfo;
 654         const set<pit_type> mustBeInInfo;
 655         const set<pit_type> abstract;
 656         pit_type bpit;
 657         pit_type epit;
 658
 659         DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
 660                                    const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
 661                                    shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
 662                                    bpit(bpit), epit(epit) {}
 663 };
 664
 665
 666 DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 667         bool documentHasSections = false;
 668
 669         while (bpit < epit) {
 670                 Layout const &style = paragraphs[bpit].layout();
 671                 documentHasSections |= style.category() == from_utf8("Sectioning");
 672
 673                 if (documentHasSections)
 674                         break;
 675                 bpit += 1;
 676         }
 677         // Paragraphs before the first section: [ runparams.par_begin ; eppit )
 678
 679         return make_tuple(documentHasSections, bpit);
 680 }
 681
 682
 683 bool hasOnlyNotes(Paragraph const & par)
 684 {
 685         // Precondition: the paragraph is not empty. Otherwise, the function will always return true...
 686         for (int i = 0; i < par.size(); ++i)
 687                 // If you find something that is not an inset (like actual text) or an inset that is not a note,
 688                 // return false.
 689                 if (!par.isInset(i) || !dynamic_cast<InsetNote *>(par.insetList().get(i)))
 690                         return false;
 691         return true;
 692 }
 693
 694
 695 DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
 696         set<pit_type> shouldBeInInfo;
 697         set<pit_type> mustBeInInfo;
 698         set<pit_type> abstract;
 699
 700         // Find the first non empty paragraph by mutating bpit.
 701         while (bpit < epit) {
 702                 Paragraph const &par = paragraphs[bpit];
 703                 if (par.empty() || hasOnlyNotes(par))
 704                         bpit += 1;
 705                 else
 706                         break;
 707         }
 708
 709         // Find the last info-like paragraph.
 710         pit_type cpit = bpit;
 711         bool hasAbstractLayout = false;
 712         while (cpit < epit) {
 713                 // Skip paragraphs only containing one note.
 714                 Paragraph const & par = paragraphs[cpit];
 715                 if (hasOnlyNotes(par)) {
 716                         cpit += 1;
 717                         continue;
 718                 }
 719
 720                 if (par.layout().docbookabstract())
 721                         hasAbstractLayout = true;
 722
 723                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
 724                 Layout const &style = par.layout();
 725
 726                 if (style.docbookininfo() == "always") {
 727                         mustBeInInfo.emplace(cpit);
 728                 } else if (style.docbookininfo() == "maybe") {
 729                         shouldBeInInfo.emplace(cpit);
 730                 } else {
 731                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
 732                         // There may be notes in between, but nothing else.
 733                         break;
 734                 }
 735                 cpit += 1;
 736         }
 737         // Now, cpit points to the last paragraph that has things that could go in <info>.
 738         // bpit is the beginning of the <info> part.
 739
 740         // Go once again through the list of paragraphs to find the abstract. If there is an abstract
 741         // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
 742         if (hasAbstractLayout) {
 743                 pit_type pit = bpit;
 744                 while (pit < cpit) { // Don't overshoot the <info> part.
 745                         if (paragraphs[pit].layout().docbookabstract())
 746                                 abstract.emplace(pit);
 747                         pit++;
 748                 }
 749         } else {
 750                 pit_type lastAbstract = epit + 1; // A nonsensical value.
 751                 docstring lastAbstractLayout;
 752
 753                 pit_type pit = bpit;
 754                 while (pit < cpit) { // Don't overshoot the <info> part.
 755                         const Paragraph & par = paragraphs.at(pit);
 756                         if (!par.insetList().empty()) {
 757                                 for (const auto &i : par.insetList()) {
 758                                         if (i.inset->getText(0) != nullptr) {
 759                                                 if (lastAbstract == epit + 1) {
 760                                                         // First paragraph that matches the heuristic definition of abstract.
 761                                                         lastAbstract = pit;
 762                                                         lastAbstractLayout = par.layout().name();
 763                                                 } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
 764                                                         // This is either too far from the last abstract paragraph or doesn't
 765                                                         // have the right layout name, BUT there has already been an abstract
 766                                                         // in this document: done with detecting the abstract.
 767                                                         goto done; // Easier to get out of two nested loops.
 768                                                 }
 769
 770                                                 abstract.emplace(pit);
 771                                                 break;
 772                                         }
 773                                 }
 774                         }
 775                         pit++;
 776                 }
 777         }
 778
 779         done:
 780         return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
 781 }
 782
 783 } // end anonymous namespace
 784
 785
 786 xml::FontTag docbookStartFontTag(xml::FontTypes type)
 787 {
 788         return xml::FontTag(from_utf8(fontToDocBookTag(type)), from_utf8(fontToAttribute(type)), type);
 789 }
 790
 791
 792 xml::EndFontTag docbookEndFontTag(xml::FontTypes type)
 793 {
 794         return xml::EndFontTag(from_utf8(fontToDocBookTag(type)), type);
 795 }
 796
 797
 798 void outputDocBookInfo(
 799                 Text const & text,
 800                 Buffer const & buf,
 801                 XMLStream & xs,
 802                 OutputParams const & runparams,
 803                 ParagraphList const & paragraphs,
 804                 DocBookInfoTag const & info)
 805 {
 806         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
 807         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
 808         // then only create the <abstract> tag if these paragraphs generate some content.
 809         // This check must be performed *before* a decision on whether or not to output <info> is made.
 810         bool hasAbstract = !info.abstract.empty();
 811         docstring abstract;
 812         if (hasAbstract) {
 813                 // Generate the abstract XML into a string before further checks.
 814                 odocstringstream os2;
 815                 {
 816                         XMLStream xs2(os2);
 817                         auto bpit = *std::min_element(info.abstract.begin(), info.abstract.end());
 818                         auto epit = 1 + *std::max_element(info.abstract.begin(), info.abstract.end());
 819                         // info.abstract is inclusive, epit is exclusive, hence +1 for looping.
 820
 821                         while (bpit < epit) {
 822                                 makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(bpit));
 823                                 bpit += 1;
 824                         }
 825                 }
 826
 827                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
 828                 // even though they must be properly output if there is some abstract.
 829                 abstract = os2.str();
 830                 docstring cleaned = abstract;
 831                 cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), ::isspace), cleaned.end());
 832
 833                 // Nothing? Then there is no abstract!
 834                 if (cleaned.empty())
 835                         hasAbstract = false;
 836         }
 837
 838         // The abstract must go in <info>. Otherwise, decide whether to open <info> based on the layouts.
 839         bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
 840
 841         // Start the <info> tag if required.
 842         if (needInfo) {
 843                 xs.startDivision(false);
 844                 xs << xml::StartTag("info");
 845                 xs << xml::CR();
 846         }
 847
 848         // Output the elements that should go in <info>, before and after the abstract.
 849         for (auto pit : info.shouldBeInInfo) { // Typically, the title: these elements are so important and ubiquitous
 850                 // that mandating a wrapper like <info> would repel users. Thus, generate them first.
 851                 makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 852         }
 853         for (auto pit : info.mustBeInInfo) {
 854                 if (info.abstract.find(pit) == info.abstract.end()) // The abstract must be in info, but is dealt with after.
 855                         makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
 856         }
 857
 858         // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
 859         // it contains several paragraphs that are empty).
 860         if (hasAbstract) {
 861 //              string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
 862 //              if (tag == "NONE")
 863 //                      tag = "abstract";
 864 //
 865 //              xs << xml::StartTag(tag);
 866 //              xs << xml::CR();
 867                 xs << XMLStream::ESCAPE_NONE << abstract;
 868 //              xs << xml::EndTag(tag);
 869 //              xs << xml::CR();
 870         }
 871
 872         // End the <info> tag if it was started.
 873         if (needInfo) {
 874                 xs << xml::EndTag("info");
 875                 xs << xml::CR();
 876                 xs.endDivision();
 877         }
 878 }
 879
 880
 881 void docbookFirstParagraphs(
 882                 Text const &text,
 883                 Buffer const &buf,
 884                 XMLStream &xs,
 885                 OutputParams const &runparams,
 886                 pit_type epit)
 887 {
 888         // Handle the beginning of the document, supposing it has sections.
 889         // Major role: output the first <info> tag.
 890
 891         ParagraphList const &paragraphs = text.paragraphs();
 892         pit_type bpit = runparams.par_begin;
 893         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 894         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 895 }
 896
 897
 898 void docbookSimpleAllParagraphs(
 899                 Text const & text,
 900                 Buffer const & buf,
 901                 XMLStream & xs,
 902                 OutputParams const & runparams)
 903 {
 904         // Handle the given text, supposing it has no sections (i.e. a "simple" text). The input may vary in length
 905         // between a single paragraph to a whole document.
 906
 907         // First, the <info> tag.
 908         ParagraphList const &paragraphs = text.paragraphs();
 909         pit_type bpit = runparams.par_begin;
 910         pit_type const epit = runparams.par_end;
 911         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
 912         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
 913
 914         // Then, the content. It starts where the <info> ends.
 915         bpit = info.epit;
 916         while (bpit < epit) {
 917                 auto par = paragraphs.iterator_at(bpit);
 918                 if (!hasOnlyNotes(*par))
 919                         makeAny(text, buf, xs, runparams, par);
 920                 bpit += 1;
 921         }
 922 }
 923
 924
 925 void docbookParagraphs(Text const &text,
 926                                            Buffer const &buf,
 927                                            XMLStream &xs,
 928                                            OutputParams const &runparams) {
 929         ParagraphList const &paragraphs = text.paragraphs();
 930         if (runparams.par_begin == runparams.par_end) {
 931                 runparams.par_begin = 0;
 932                 runparams.par_end = paragraphs.size();
 933         }
 934         pit_type bpit = runparams.par_begin;
 935         pit_type const epit = runparams.par_end;
 936         LASSERT(bpit < epit,
 937                         {
 938                                 xs << XMLStream::ESCAPE_NONE << "<!-- DocBook output error! -->\n";
 939                                 return;
 940                         });
 941
 942         std::stack<std::pair<int, string>> headerLevels; // Used to determine when to open/close sections: store the depth
 943         // of the section and the tag that was used to open it.
 944
 945         // Detect whether the document contains sections. If there are no sections, there can be no automatically
 946         // discovered abstract.
 947         bool documentHasSections;
 948         pit_type eppit;
 949         tie(documentHasSections, eppit) = hasDocumentSectioning(paragraphs, bpit, epit);
 950
 951         if (documentHasSections) {
 952                 docbookFirstParagraphs(text, buf, xs, runparams, eppit);
 953                 bpit = eppit;
 954         } else {
 955                 docbookSimpleAllParagraphs(text, buf, xs, runparams);
 956                 return;
 957         }
 958
 959         bool currentlyInAppendix = false;
 960
 961         while (bpit < epit) {
 962                 OutputParams ourparams = runparams;
 963
 964                 auto par = paragraphs.iterator_at(bpit);
 965                 if (par->params().startOfAppendix())
 966                         currentlyInAppendix = true;
 967                 Layout const &style = par->layout();
 968                 ParagraphList::const_iterator const lastStartedPar = par;
 969                 ParagraphList::const_iterator send;
 970
 971                 if (hasOnlyNotes(*par)) {
 972                         bpit += 1;
 973                         continue;
 974                 }
 975
 976                 // Think about adding <section> and/or </section>s.
 977                 const bool isLayoutSectioning = style.category() == from_utf8("Sectioning");
 978                 if (isLayoutSectioning) {
 979                         int level = style.toclevel;
 980
 981                         // Need to close a previous section if it has the same level or a higher one (close <section> if opening a <h2>
 982                         // after a <h2>, <h3>, <h4>, <h5> or <h6>). More examples:
 983                         //   - current: h2; back: h1; do not close any <section>
 984                         //   - current: h1; back: h2; close two <section> (first the <h2>, then the <h1>, so a new <h1> can come)
 985                         while (!headerLevels.empty() && level <= headerLevels.top().first) {
 986                                 int stackLevel = headerLevels.top().first;
 987                                 docstring stackTag = from_utf8("</" + headerLevels.top().second + ">");
 988                                 headerLevels.pop();
 989
 990                                 // Output the tag only if it corresponds to a legit section.
 991                                 if (stackLevel != Layout::NOT_IN_TOC)
 992                                         xs << XMLStream::ESCAPE_NONE << stackTag << xml::CR();
 993                         }
 994
 995                         // Open the new section: first push it onto the stack, then output it in DocBook.
 996                         string sectionTag = (currentlyInAppendix && style.docbooksectiontag() == "chapter") ?
 997                                                                 "appendix" : style.docbooksectiontag();
 998                         headerLevels.push(std::make_pair(level, sectionTag));
 999
1000                         // Some sectioning-like elements should not be output (such as FrontMatter).
1001                         if (level != Layout::NOT_IN_TOC) {
1002                                 // Look for a label in the title, i.e. a InsetLabel as a child.
1003                                 docstring id = docstring();
1004                                 for (pos_type i = 0; i < par->size(); ++i) {
1005                                         Inset const *inset = par->getInset(i);
1006                                         if (inset) {
1007                                                 if (auto label = dynamic_cast<InsetLabel const *>(inset)) {
1008                                                         // Generate the attributes for the section if need be.
1009                                                         id += "xml:id=\"" + xml::cleanID(label->screenLabel()) + "\"";
1010
1011                                                         // Don't output the ID as a DocBook <anchor>.
1012                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
1013
1014                                                         // Cannot have multiple IDs per tag.
1015                                                         break;
1016                                                 }
1017                                         }
1018                                 }
1019
1020                                 // Write the open tag for this section.
1021                                 docstring tag = from_utf8("<" + sectionTag);
1022                                 if (!id.empty())
1023                                         tag += from_utf8(" ") + id;
1024                                 tag += from_utf8(">");
1025                                 xs << XMLStream::ESCAPE_NONE << tag;
1026                                 xs << xml::CR();
1027                         }
1028                 }
1029
1030                 // Close all sections before the bibliography.
1031                 // TODO: Only close all when the bibliography is at the end of the document? Or force to output the bibliography at the end of the document? Or don't care (as allowed by DocBook)?
1032                 auto insetsLength = distance(par->insetList().begin(), par->insetList().end());
1033                 if (insetsLength > 0) {
1034                         Inset const *firstInset = par->getInset(0);
1035                         if (firstInset && dynamic_cast<InsetBibtex const *>(firstInset)) {
1036                                 while (!headerLevels.empty()) {
1037                                         int level = headerLevels.top().first;
1038                                         docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1039                                         headerLevels.pop();
1040
1041                                         // Output the tag only if it corresponds to a legit section.
1042                                         if (level != Layout::NOT_IN_TOC) {
1043                                                 xs << XMLStream::ESCAPE_NONE << tag;
1044                                                 xs << xml::CR();
1045                                         }
1046                                 }
1047                         }
1048                 }
1049
1050                 // Generate this paragraph.
1051                 makeAny(text, buf, xs, ourparams, par);
1052                 bpit += 1;
1053         }
1054
1055         // If need be, close <section>s, but only at the end of the document (otherwise, dealt with at the beginning
1056         // of the loop).
1057         while (!headerLevels.empty() && headerLevels.top().first > Layout::NOT_IN_TOC) {
1058                 docstring tag = from_utf8("</" + headerLevels.top().second + ">");
1059                 headerLevels.pop();
1060                 xs << XMLStream::ESCAPE_NONE << tag;
1061                 xs << xml::CR();
1062         }
1063 }
1064
1065 } // namespace lyx