DocBook: simplify code to handle abstracts.

author Thibaut Cuvelier <tcuvelier@lyx.org>

Sun, 2 Aug 2020 18:43:39 +0000 (20:43 +0200)

committer Thibaut Cuvelier <tcuvelier@lyx.org>

Mon, 3 Aug 2020 14:04:58 +0000 (16:04 +0200)
author Thibaut Cuvelier <tcuvelier@lyx.org>
Sun, 2 Aug 2020 18:43:39 +0000 (20:43 +0200)
committer Thibaut Cuvelier <tcuvelier@lyx.org>
Mon, 3 Aug 2020 14:04:58 +0000 (16:04 +0200)
diff --git a/autotests/export/docbook/bibliography_precooked_aastex.xml b/autotests/export/docbook/bibliography_precooked_aastex.xml

index 2c9f5bd32aab762ba31d6cab4d7a277910435370..e15bfddc51e0e445f8d9b2ce69a322af457fe44b 100644 (file)
--- a/autotests/export/docbook/bibliography_precooked_aastex.xml
+++ b/autotests/export/docbook/bibliography_precooked_aastex.xml
@@ -19,8 +19,10 @@
  <affiliation role="alternate"><orgname>Patron, Alonso's Bar and Grill</orgname></affiliation></author>
  <abstract><para>
  <para>This is a preliminary report on surface photometry of the major fraction of known globular clusters, to see which of them show the signs of a collapsed core. We also explore some diversionary mathematics and recreational tables. </para>
-<!-- Output Error: Tried to close `Keywords' when tag was not open. Tag discarded. -->
+<!-- Output Error: Tried to close `keyword' when tag was not open. Tag discarded. -->
+<!-- Output Error: Tried to close `keywordset' when tag was not open. Tag discarded. -->
  
+<keywordset><keyword>clusters: globular, peanut—bosons: bozos</keyword></keywordset>
  <!-- Output Error: Closing tag `info' when other tags are open, namely: -->
  <!-- Output Error: para -->
  </para><!-- Output Error: abstract -->
diff --git a/lib/layouts/aastex.layout b/lib/layouts/aastex.layout

index 445d25f536031198acba94b093ef7add5eb7a234..97c8b64940590c8cac1362d9b6a573e94d2eec5a 100644 (file)
--- a/lib/layouts/aastex.layout
+++ b/lib/layouts/aastex.layout
@@ -282,6 +282,9 @@ Style Abstract
           Series        Bold
           Size          Normal
         EndFont
+       DocBookTag            abstract
+       DocBookItemTag        para
+       DocBookInInfo         always
  End
  
  
@@ -331,6 +334,9 @@ Style Keywords
           Shape         Italic
           Size          Normal
         EndFont
+       DocBookTag          keyword
+       DocBookWrapperTag   keywordset
+       DocBookInInfo       always
  End
  
  
diff --git a/src/output_docbook.cpp b/src/output_docbook.cpp

index d0536eee3ad7c07080c79520e3ea1c85d48b78cb..fd38940064c744f4dcbee4d581d7e2d3aaa92640 100644 (file)
--- a/src/output_docbook.cpp
+++ b/src/output_docbook.cpp
@@ -50,7 +50,7 @@ namespace lyx {
  
  namespace {
  
-std::string const fontToDocBookTag(xml::FontTypes type)
+std::string fontToDocBookTag(xml::FontTypes type)
  {
         switch (type) {
         case xml::FontTypes::FT_EMPH:
@@ -90,6 +90,7 @@ std::string const fontToDocBookTag(xml::FontTypes type)
         }
  }
  
+
  string fontToRole(xml::FontTypes type)
  {
         // Specific fonts are achieved with roles. The only common ones are "" for basic emphasis,
@@ -103,14 +104,13 @@ string fontToRole(xml::FontTypes type)
                 return "";
         case xml::FontTypes::FT_BOLD:
                 return "bold";
-       case xml::FontTypes::FT_NOUN:
-               return ""; // Outputs a <person>
-       case xml::FontTypes::FT_TYPE:
-               return ""; // Outputs a <code>
+       case xml::FontTypes::FT_NOUN: // Outputs a <person>
+       case xml::FontTypes::FT_TYPE: // Outputs a <code>
+               return "";
         case xml::FontTypes::FT_UBAR:
                 return "underline";
  
-               // All other roles are non-standard for DocBook.
+       // All other roles are non-standard for DocBook.
  
         case xml::FontTypes::FT_WAVE:
                 return "wave";
@@ -814,7 +814,21 @@ pair<ParagraphList::const_iterator, ParagraphList::const_iterator> makeAny(
  
  
  using DocBookDocumentSectioning = tuple<bool, pit_type>;
-using DocBookInfoTag = tuple<set<pit_type>, set<pit_type>, pit_type, pit_type>;
+
+
+struct DocBookInfoTag
+{
+       const set<pit_type> shouldBeInInfo;
+       const set<pit_type> mustBeInInfo;
+       const set<pit_type> abstract;
+       pit_type bpit;
+       pit_type epit;
+
+       DocBookInfoTag(const set<pit_type> & shouldBeInInfo, const set<pit_type> & mustBeInInfo,
+                                  const set<pit_type> & abstract, pit_type bpit, pit_type epit) :
+                                  shouldBeInInfo(shouldBeInInfo), mustBeInInfo(mustBeInInfo), abstract(abstract),
+                                  bpit(bpit), epit(epit) {}
+};
  
  
  DocBookDocumentSectioning hasDocumentSectioning(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
@@ -847,6 +861,7 @@ bool hasOnlyNotes(Paragraph const & par)
  DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type bpit, pit_type const epit) {
         set<pit_type> shouldBeInInfo;
         set<pit_type> mustBeInInfo;
+       set<pit_type> abstract;
  
         // Find the first non empty paragraph by mutating bpit.
         while (bpit < epit) {
@@ -859,14 +874,18 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type b
  
         // Find the last info-like paragraph.
         pit_type cpit = bpit;
+       bool hasAbstractLayout = false;
         while (cpit < epit) {
                 // Skip paragraphs only containing one note.
-               Paragraph const &par = paragraphs[cpit];
+               Paragraph const & par = paragraphs[cpit];
                 if (hasOnlyNotes(par)) {
                         cpit += 1;
                         continue;
                 }
  
+               if (par.layout().name() == from_ascii("Abstract"))
+                       hasAbstractLayout = true;
+
                 // Based on layout information, store this paragraph in one set: should be in <info>, must be.
                 Layout const &style = par.layout();
  
@@ -876,42 +895,55 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs, pit_type b
                         shouldBeInInfo.emplace(cpit);
                 } else {
                         // Hypothesis: the <info> parts should be grouped together near the beginning bpit.
+                       // There may be notes in between, but nothing else.
                         break;
                 }
                 cpit += 1;
         }
         // Now, cpit points to the last paragraph that has things that could go in <info>.
-       // bpit is still the beginning of the <info> part.
-
-       return make_tuple(shouldBeInInfo, mustBeInInfo, bpit, cpit);
-}
-
-
-bool hasAbstractBetween(ParagraphList const &paragraphs, pit_type const bpitAbstract, pit_type const epitAbstract)
-{
-       // Hypothesis: the paragraphs between bpitAbstract and epitAbstract can be considered an abstract because they
-       // are just after a document or part title.
-       if (epitAbstract - bpitAbstract <= 0)
-               return false;
-
-       // If there is something between these paragraphs, check if it's compatible with an abstract (i.e. some text).
-       pit_type bpit = bpitAbstract;
-       while (bpit < epitAbstract) {
-               const Paragraph &p = paragraphs.at(bpit);
-
-               if (p.layout().name() == from_ascii("Abstract"))
-                       return true;
+       // bpit is the beginning of the <info> part.
+
+       // Go once again through the list of paragraphs to find the abstract. If there is an abstract
+       // layout, only consider it. Otherwise, an abstract is just a sequence of paragraphs with text.
+       if (hasAbstractLayout) {
+               pit_type pit = bpit;
+               while (pit < cpit) { // Don't overshoot the <info> part.
+                       if (paragraphs[pit].layout().name() == from_ascii("Abstract"))
+                               abstract.emplace(pit);
+                       pit++;
+               }
+       } else {
+               pit_type lastAbstract = epit + 1; // A nonsensical value.
+               docstring lastAbstractLayout;
+
+               pit_type pit = bpit;
+               while (pit < cpit) { // Don't overshoot the <info> part.
+                       const Paragraph & par = paragraphs.at(pit);
+                       if (!par.insetList().empty()) {
+                               for (const auto &i : par.insetList()) {
+                                       if (i.inset->getText(0) != nullptr) {
+                                               if (lastAbstract == epit + 1) {
+                                                       // First paragraph that matches the heuristic definition of abstract.
+                                                       lastAbstract = pit;
+                                                       lastAbstractLayout = par.layout().name();
+                                               } else if (pit > lastAbstract + 1 || par.layout().name() != lastAbstractLayout) {
+                                                       // This is either too far from the last abstract paragraph or doesn't
+                                                       // have the right layout name, BUT there has already been an abstract
+                                                       // in this document: done with detecting the abstract.
+                                                       goto done; // Easier to get out of two nested loops.
+                                               }
  
-               if (!p.insetList().empty()) {
-                       for (const auto &i : p.insetList()) {
-                               if (i.inset->getText(0) != nullptr) {
-                                       return true;
+                                               abstract.emplace(pit);
+                                               break;
+                                       }
                                 }
                         }
+                       pit++;
                 }
-               bpit++;
         }
-       return false;
+
+       done:
+       return DocBookInfoTag(shouldBeInInfo, mustBeInInfo, abstract, bpit, cpit);
  }
  
  
@@ -947,26 +979,18 @@ void outputDocBookInfo(
                 XMLStream & xs,
                 OutputParams const & runparams,
                 ParagraphList const & paragraphs,
-               DocBookInfoTag const & info,
-               pit_type bpitAbstract,
-               pit_type const epitAbstract)
+               DocBookInfoTag const & info)
  {
-       // Consider everything between bpitAbstract and epitAbstract (excluded) as paragraphs for the abstract.
-       // Use bpitAbstract >= epitAbstract to indicate there is no abstract.
-
-       set<pit_type> shouldBeInInfo;
-       set<pit_type> mustBeInInfo;
-       pit_type bpitInfo;
-       pit_type epitInfo;
-       tie(shouldBeInInfo, mustBeInInfo, bpitInfo, epitInfo) = info;
-
         // Perform an additional check on the abstract. Sometimes, there are many paragraphs that should go
         // into the abstract, but none generates actual content. Thus, first generate to a temporary stream,
         // then only create the <abstract> tag if these paragraphs generate some content.
         // This check must be performed *before* a decision on whether or not to output <info> is made.
-       bool hasAbstract = hasAbstractBetween(paragraphs, bpitAbstract, epitAbstract);
+       bool hasAbstract = !info.abstract.empty();
         docstring abstract;
         if (hasAbstract) {
+               pit_type bpitAbstract = *std::min_element(info.abstract.begin(), info.abstract.end());
+               pit_type epitAbstract = *std::max_element(info.abstract.begin(), info.abstract.end());
+
                 odocstringstream os2;
                 XMLStream xs2(os2);
                 generateDocBookParagraphWithoutSectioning(text, buf, xs2, runparams, paragraphs, bpitAbstract, epitAbstract);
@@ -983,7 +1007,7 @@ void outputDocBookInfo(
         }
  
         // The abstract must go in <info>.
-       bool needInfo = !mustBeInInfo.empty() || hasAbstract;
+       bool needInfo = !info.mustBeInInfo.empty() || hasAbstract;
  
         // Start the <info> tag if required.
         if (needInfo) {
@@ -993,10 +1017,10 @@ void outputDocBookInfo(
         }
  
         // Output the elements that should go in <info>.
-       generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, bpitInfo, epitInfo);
+       generateDocBookParagraphWithoutSectioning(text, buf, xs, runparams, paragraphs, info.bpit, info.epit);
  
         if (hasAbstract && !abstract.empty()) { // The second test is probably superfluous.
-               string tag = paragraphs[bpitAbstract].layout().docbookforceabstracttag();
+               string tag = paragraphs[*info.abstract.begin()].layout().docbookforceabstracttag();
                 if (tag == "NONE")
                         tag = "abstract";
  
@@ -1029,7 +1053,7 @@ void docbookFirstParagraphs(
         ParagraphList const &paragraphs = text.paragraphs();
         pit_type bpit = runparams.par_begin;
         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
-       outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, get<3>(info), epit);
+       outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
  }
  
  
@@ -1056,8 +1080,8 @@ void docbookSimpleAllParagraphs(
         pit_type bpit = runparams.par_begin;
         pit_type const epit = runparams.par_end;
         DocBookInfoTag info = getParagraphsWithInfo(paragraphs, bpit, epit);
-       outputDocBookInfo(text, buf, xs, runparams, paragraphs, info, 0, 0);
-       bpit = get<3>(info); // Generate the content starting from the end of the <info> part.
+       outputDocBookInfo(text, buf, xs, runparams, paragraphs, info);
+       bpit = info.bpit;
  
         // Then, the content.
         ParagraphList::const_iterator const pend =
author	Thibaut Cuvelier <tcuvelier@lyx.org>
	Sun, 2 Aug 2020 18:43:39 +0000 (20:43 +0200)
committer	Thibaut Cuvelier <tcuvelier@lyx.org>
	Mon, 3 Aug 2020 14:04:58 +0000 (16:04 +0200)
autotests/export/docbook/bibliography_precooked_aastex.xml		patch \| blob \| history
lib/layouts/aastex.layout		patch \| blob \| history
src/output_docbook.cpp		patch \| blob \| history