DocBook: use the same normalisation for biblio IDs and their references.

[features.git] / src / output_docbook.cpp
diff --git a/src/output_docbook.cpp b/src/output_docbook.cpp

index e9d826b9bd30f0decc641f794262dbe511ff65b6..e9bd621898ed7e485a4fc91cf79bf48ffc44f421 100644 (file)
--- a/src/output_docbook.cpp
+++ b/src/output_docbook.cpp
@@ -11,12 +11,13 @@
  
  #include <config.h>
  
+#include "output_docbook.h"
+
  #include "Buffer.h"
  #include "buffer_funcs.h"
  #include "BufferParams.h"
  #include "Font.h"
  #include "InsetList.h"
-#include "output_docbook.h"
  #include "Paragraph.h"
  #include "ParagraphList.h"
  #include "ParagraphParameters.h"
@@ -27,9 +28,11 @@
  #include "insets/InsetBibtex.h"
  #include "insets/InsetBibitem.h"
  #include "insets/InsetLabel.h"
+#include "mathed/InsetMath.h"
  #include "insets/InsetNote.h"
  
  #include "support/lassert.h"
+#include "support/textutils.h"
  
  #include <stack>
  #include <iostream>
@@ -373,7 +376,8 @@ void makeBibliography(
                 if (!ip)
                         continue;
                 if (const auto * bibitem = dynamic_cast<const InsetBibitem*>(ip)) {
-                       attr = from_utf8("xml:id='") + bibitem->getParam("key") + from_utf8("'");
+                       auto id = xml::cleanID(bibitem->getParam("key"));
+                       attr = from_utf8("xml:id='") + id + from_utf8("'");
                         break;
                 }
         }
@@ -446,33 +450,35 @@ void makeParagraph(
         // Plain layouts must be ignored.
         special_case |= buf.params().documentClass().isPlainLayout(par->layout()) && !runparams.docbook_force_pars;
         // Equations do not deserve their own paragraph (DocBook allows them outside paragraphs).
+       // Exception: any case that generates an <inlineequation> must still get a paragraph to be valid.
         special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
-               return inset.inset && inset.inset->asInsetMath();
+               return inset.inset && inset.inset->asInsetMath() && inset.inset->asInsetMath()->getType() != hullSimple;
+       });
+       // Floats cannot be in paragraphs.
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == FLOAT_CODE;
+       });
+       // Bibliographies cannot be in paragraphs. Bibitems should still be handled as paragraphs, though
+       // (see makeParagraphBibliography).
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == BIBTEX_CODE;
+       });
+       // ERTs are in comments, not paragraphs.
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == ERT_CODE;
+       });
+       // Listings should not get into their own paragraph.
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == LISTINGS_CODE;
+       });
+       // Boxes cannot get into their own paragraph.
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == BOX_CODE;
+       });
+       // Includes should not have a paragraph.
+       special_case |= nInsets == (size_t) par->size() && std::all_of(par->insetList().begin(), par->insetList().end(), [](InsetList::Element inset) {
+               return inset.inset->lyxCode() == INCLUDE_CODE;
         });
-
-       // TODO: Could get rid of this with a DocBook equivalent to htmlisblock? Not for all cases, unfortunately... See above for those that have been determined not to be allowable for this potential refactoring.
-       if (!special_case && par->size() == 1 && par->getInset(0)) {
-               Inset const * firstInset = par->getInset(0);
-
-               // Floats cannot be in paragraphs.
-               special_case = to_utf8(firstInset->layoutName()).substr(0, 6) == "Float:";
-
-               // Bibliographies cannot be in paragraphs.
-               if (!special_case && firstInset->asInsetCommand())
-                       special_case = firstInset->asInsetCommand()->params().getCmdName() == "bibtex";
-
-               // ERTs are in comments, not paragraphs.
-               if (!special_case && firstInset->lyxCode() == lyx::ERT_CODE)
-                       special_case = true;
-
-               // Listings should not get into their own paragraph.
-               if (!special_case && firstInset->lyxCode() == lyx::LISTINGS_CODE)
-                       special_case = true;
-
-               // Boxes cannot get into their own paragraph.
-               if (!special_case && firstInset->lyxCode() == lyx::BOX_CODE)
-                       special_case = true;
-       }
  
         bool const open_par = runparams.docbook_make_pars
                                                   && !runparams.docbook_in_par
@@ -548,6 +554,8 @@ void makeEnvironment(Text const &text,
                                 closeTag(xs, par->layout().docbookiteminnertag(), par->layout().docbookiteminnertagtype());
                                 ++p;
  
+                               // Insert a new line after each "paragraph" (i.e. line in the listing), except for the last one.
+                               // Otherwise, there would one more new line in the output than in the LyX document.
                                 if (p != pars.end())
                                         xs << xml::CR();
                         }
@@ -618,9 +626,14 @@ ParagraphList::const_iterator makeListEnvironment(Text const &text,
  
         // Handle the content of the list environment, item by item.
         while (par != envend) {
-               Layout const & style = par->layout();
+               // Skip this paragraph if it is both empty and the last one (otherwise, there may be deeper paragraphs after).
+               auto nextpar = par;
+               ++nextpar;
+               if (par->empty() && nextpar == envend)
+                       break;
  
                 // Open the item wrapper.
+               Layout const & style = par->layout();
                 openTag(xs, style.docbookitemwrappertag(), style.docbookitemwrapperattr(), style.docbookitemwrappertagtype());
  
                 // Generate the label, if need be. If it is taken from the text, sep != 0 and corresponds to the first
@@ -713,7 +726,11 @@ void makeCommand(
  
  bool isLayoutSectioning(Layout const & lay)
  {
-       return lay.category() == from_utf8("Sectioning");
+       if (lay.docbooksection()) // Special case: some DocBook styles must be handled as sections.
+               return true;
+       else if (lay.category() == from_utf8("Sectioning")) // Generic case.
+               return lay.toclevel != Layout::NOT_IN_TOC;
+       return false;
  }
  
  
@@ -761,14 +778,26 @@ bool hasOnlyNotes(Paragraph const & par)
                 // return false.
                 if (!par.isInset(i) || par.getInset(i)->lyxCode() != NOTE_CODE)
                         return false;
+
+       // An empty paragraph may still require some output.
+       if (par.layout().docbooksection())
+               return false;
+
+       // There should be really no content here.
         return true;
  }
  
  
  DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
                                                                          pit_type bpit, pit_type const epit,
-                                                                        // Typically, bpit is the beginning of the document and epit the end *or* the first section.
-                                                                        bool documentHasSections) {
+                                                                        // Typically, bpit is the beginning of the document and epit the end of the
+                                                                        // document *or* the first section.
+                                                                        bool documentHasSections,
+                                                                        bool detectUnlayoutedAbstract
+                                                                        // Whether paragraphs with no specific layout should be detected as abstracts.
+                                                                        // For inner sections, an abstract should only be detected if it has a specific
+                                                                        // layout. For others, anything that might look like an abstract should be sought.
+                                                                        ) {
         set<pit_type> shouldBeInInfo;
         set<pit_type> mustBeInInfo;
         set<pit_type> abstractWithLayout;
@@ -789,18 +818,19 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
         for (; cpit < epit; ++cpit) {
                 // Skip paragraphs that don't generate anything in DocBook.
                 Paragraph const & par = paragraphs[cpit];
+               Layout const &style = par.layout();
                 if (hasOnlyNotes(par))
                         continue;
  
-               // There should never be any section here. (Just a sanity check: if this fails, this function could end up
-               // processing the whole document.)
-               if (isLayoutSectioning(par.layout())) {
+               // There should never be any section here, except for the first paragraph (a title can be part of <info>).
+               // (Just a sanity check: if this fails, this function could end up processing the whole document.)
+               if (cpit != bpit && isLayoutSectioning(par.layout())) {
                         LYXERR0("Assertion failed: section found in potential <info> paragraphs.");
                         break;
                 }
  
                 // If this is marked as an abstract by the layout, put it in the right set.
-               if (par.layout().docbookabstract()) {
+               if (style.docbookabstract()) {
                         hasAbstractLayout = true;
                         abstractWithLayout.emplace(cpit);
                         continue;
@@ -808,13 +838,11 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
  
                 // Based on layout information, store this paragraph in one set: should be in <info>, must be,
                 // or abstract (either because of layout or of position).
-               Layout const &style = par.layout();
-
                 if (style.docbookininfo() == "always")
                         mustBeInInfo.emplace(cpit);
                 else if (style.docbookininfo() == "maybe")
                         shouldBeInInfo.emplace(cpit);
-               else if (documentHasSections && !hasAbstractLayout)
+               else if (documentHasSections && !hasAbstractLayout && detectUnlayoutedAbstract)
                         abstractNoLayout.emplace(cpit);
                 else // This should definitely not be in <info>.
                         break;
@@ -887,16 +915,33 @@ void outputDocBookInfo(
         docstring abstract;
         if (hasAbstract) {
                 // Generate the abstract XML into a string before further checks.
+               // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
+               // generate more than one paragraph, as indicated in the return value.
                 odocstringstream os2;
                 XMLStream xs2(os2);
-               for (auto const & p : info.abstract)
-                       makeAny(text, buf, xs2, runparams, paragraphs.iterator_at(p));
+
+               set<pit_type> doneParas;
+               for (auto const & p : info.abstract) {
+                       if (doneParas.find(p) == doneParas.end()) {
+                               auto oldPar = paragraphs.iterator_at(p);
+                               auto newPar = makeAny(text, buf, xs2, runparams, oldPar);
+
+                               // Insert the indices of all the paragraphs that were just generated (typically, one).
+                               // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
+                               pit_type id = p;
+                               while (oldPar != newPar) {
+                                       doneParas.emplace(id);
+                                       ++oldPar;
+                                       ++id;
+                               }
+                       }
+               }
  
                 // Actually output the abstract if there is something to do. Don't count line feeds or spaces in this,
                 // even though they must be properly output if there is some abstract.
                 abstract = os2.str();
                 docstring cleaned = abstract;
-               cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), ::isspace), cleaned.end());
+               cleaned.erase(std::remove_if(cleaned.begin(), cleaned.end(), lyx::isSpace), cleaned.end());
  
                 // Nothing? Then there is no abstract!
                 if (cleaned.empty())
@@ -922,7 +967,7 @@ void outputDocBookInfo(
  
         // If there is no title, generate one (required for the document to be valid).
         // This code is called for the main document, for table cells, etc., so be precise in this condition.
-       if (text.isMainText() && info.mustBeInInfo.empty()) {
+       if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
                 xs << xml::StartTag("title");
                 xs << "Untitled Document";
                 xs << xml::EndTag("title");
@@ -976,7 +1021,7 @@ void docbookSimpleAllParagraphs(
         ParagraphList const &paragraphs = text.paragraphs();
  
         // First, the <info> tag.
-       DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false);
+       DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit, false, true);
         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
  
         // Then, the content. It starts where the <info> ends.
@@ -1024,7 +1069,7 @@ void docbookParagraphs(Text const &text,
         }
  
         // Output the first <info> tag (or just the title).
-       DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true);
+       DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, eppit, true, true);
         outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
         bpit = info.epit;
  
@@ -1082,7 +1127,8 @@ void docbookParagraphs(Text const &text,
                                                         // Don't output the ID as a DocBook <anchor>.
                                                         ourparams.docbook_anchors_to_ignore.emplace(label->screenLabel());
  
-                                                       // Cannot have multiple IDs per tag.
+                                                       // Cannot have multiple IDs per tag. If there is another ID inset in the document, it will
+                                                       // be output as a DocBook anchor.
                                                         break;
                                                 }
                                         }
@@ -1116,30 +1162,84 @@ void docbookParagraphs(Text const &text,
                         }
                 }
  
-               // Generate this paragraph.
-               par = makeAny(text, buf, xs, ourparams, par);
-
-               // Some special sections may require abstracts (mostly parts, in books).
+               // Generate the <info> tag if a section was just opened.
+               // Some sections may require abstracts (mostly parts, in books: DocBookForceAbstractTag will not be NONE),
+               // others can still have an abstract (it must be detected so that it can be output at the right place).
                 // TODO: docbookforceabstracttag is a bit contrived here, but it does the job. Having another field just for this would be cleaner, but that's just for <part> and <partintro>, so it's probably not worth the effort.
-               if (isLayoutSectioning(style) && style.docbookforceabstracttag() != "NONE") {
+               if (isLayoutSectioning(style)) {
                         // This abstract may be found between the next paragraph and the next title.
                         pit_type cpit = std::distance(text.paragraphs().begin(), par);
-                       pit_type ppit = std::get<1>(hasDocumentSectioning(paragraphs, cpit, epit));
+                       pit_type ppit = std::get<1>(hasDocumentSectioning(paragraphs, cpit + 1L, epit));
  
                         // Generate this abstract (this code corresponds to parts of outputDocBookInfo).
-                       DocBookInfoTag secInfo = getParagraphsWithInfo(paragraphs, cpit, ppit, true);
+                       DocBookInfoTag secInfo = getParagraphsWithInfo(paragraphs, cpit, ppit, true,
+                                                                                                 style.docbookforceabstracttag() != "NONE");
+
+                       if (!secInfo.mustBeInInfo.empty() || !secInfo.shouldBeInInfo.empty() || !secInfo.abstract.empty()) {
+                               // Generate the <info>, if required. If DocBookForceAbstractTag != NONE, this abstract will not be in
+                               // <info>, unlike other ("standard") abstracts.
+                               bool hasStandardAbstract = !secInfo.abstract.empty() && style.docbookforceabstracttag() == "NONE";
+                               bool needInfo = !secInfo.mustBeInInfo.empty() || hasStandardAbstract;
+
+                               if (needInfo) {
+                                       xs.startDivision(false);
+                                       xs << xml::StartTag("info");
+                                       xs << xml::CR();
+                               }
  
-                       if (!secInfo.abstract.empty()) {
-                               xs << xml::StartTag(style.docbookforceabstracttag());
-                               xs << xml::CR();
-                               for (auto const &p : secInfo.abstract)
-                                       makeAny(text, buf, xs, runparams, paragraphs.iterator_at(p));
-                               xs << xml::EndTag(style.docbookforceabstracttag());
-                               xs << xml::CR();
-                       }
+                               // Output the elements that should go in <info>, before and after the abstract.
+                               for (auto pit : secInfo.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
+                                       // that mandating a wrapper like <info> would repel users. Thus, generate them first.
+                                       makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
+                               for (auto pit : secInfo.mustBeInInfo)
+                                       makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(pit));
+
+                               // Deal with the abstract in <info> if it is standard (i.e. its tag is <abstract>).
+                               if (!secInfo.abstract.empty() && hasStandardAbstract) {
+                                       if (!secInfo.abstractLayout) {
+                                               xs << xml::StartTag("abstract");
+                                               xs << xml::CR();
+                                       }
  
-                       // Skip all the text that just has been generated.
-                       par = paragraphs.iterator_at(ppit);
+                                       for (auto const &p : secInfo.abstract)
+                                               makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
+
+                                       if (!secInfo.abstractLayout) {
+                                               xs << xml::EndTag("abstract");
+                                               xs << xml::CR();
+                                       }
+                               }
+
+                               // End the <info> tag if it was started.
+                               if (needInfo) {
+                                       if (!xs.isLastTagCR())
+                                               xs << xml::CR();
+
+                                       xs << xml::EndTag("info");
+                                       xs << xml::CR();
+                                       xs.endDivision();
+                               }
+
+                               // Deal with the abstract outside <info> if it is not standard (i.e. its tag is layout-defined).
+                               if (!secInfo.abstract.empty() && !hasStandardAbstract) {
+                                       // Assert: style.docbookforceabstracttag() != NONE.
+                                       xs << xml::StartTag(style.docbookforceabstracttag());
+                                       xs << xml::CR();
+                                       for (auto const &p : secInfo.abstract)
+                                               makeAny(text, buf, xs, ourparams, paragraphs.iterator_at(p));
+                                       xs << xml::EndTag(style.docbookforceabstracttag());
+                                       xs << xml::CR();
+                               }
+
+                               // Skip all the text that has just been generated.
+                               par = paragraphs.iterator_at(secInfo.epit);
+                       } else {
+                               // No <info> tag to generate, proceed as for normal paragraphs.
+                               par = makeAny(text, buf, xs, ourparams, par);
+                       }
+               } else {
+                       // Generate this paragraph, as it has nothing special.
+                       par = makeAny(text, buf, xs, ourparams, par);
                 }
         }