]> git.lyx.org Git - lyx.git/commitdiff
DocBook: ensure that <info>-related insets in the abstract are not generated in the...
authorThibaut Cuvelier <tcuvelier@lyx.org>
Wed, 18 Nov 2020 00:51:05 +0000 (01:51 +0100)
committerThibaut Cuvelier <tcuvelier@lyx.org>
Fri, 20 Nov 2020 17:41:02 +0000 (18:41 +0100)
This helps generate more conformant DocBook files.

Also implement wrapper tags for InsetText.

autotests/export/docbook/svglo.lyx [new file with mode: 0644]
autotests/export/docbook/svglo.xml [new file with mode: 0644]
lib/layouts/svglobal3.layout
src/OutputParams.h
src/Paragraph.cpp
src/insets/InsetLayout.cpp
src/insets/InsetLayout.h
src/insets/InsetText.cpp
src/output_docbook.cpp

diff --git a/autotests/export/docbook/svglo.lyx b/autotests/export/docbook/svglo.lyx
new file mode 100644 (file)
index 0000000..397f144
--- /dev/null
@@ -0,0 +1,215 @@
+#LyX 2.4 created this file. For more info see https://www.lyx.org/
+\lyxformat 599
+\begin_document
+\begin_header
+\save_transient_properties true
+\origin unavailable
+\textclass svglobal3
+\begin_preamble
+\RequirePackage{fix-cm}
+
+\smartqed  % flush right qed marks, e.g. at end of proof
+\end_preamble
+\use_default_options true
+\maintain_unincluded_children no
+\language english
+\language_package default
+\inputencoding utf8
+\fontencoding auto
+\font_roman "default" "default"
+\font_sans "default" "default"
+\font_typewriter "default" "default"
+\font_math "auto" "auto"
+\font_default_family default
+\use_non_tex_fonts false
+\font_sc false
+\font_roman_osf false
+\font_sans_osf false
+\font_typewriter_osf false
+\font_sf_scale 100 100
+\font_tt_scale 100 100
+\use_microtype false
+\use_dash_ligatures false
+\graphics default
+\default_output_format default
+\output_sync 0
+\bibtex_command bibtex
+\index_command default
+\paperfontsize default
+\spacing single
+\use_hyperref false
+\papersize default
+\use_geometry false
+\use_package amsmath 1
+\use_package amssymb 1
+\use_package cancel 1
+\use_package esint 1
+\use_package mathdots 1
+\use_package mathtools 1
+\use_package mhchem 1
+\use_package stackrel 1
+\use_package stmaryrd 1
+\use_package undertilde 1
+\cite_engine basic
+\cite_engine_type default
+\biblio_style plain
+\use_bibtopic false
+\use_indices false
+\paperorientation portrait
+\suppress_date false
+\justification true
+\use_refstyle 0
+\use_minted 0
+\use_lineno 0
+\index Index
+\shortcut idx
+\color #008000
+\end_index
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation indent
+\paragraph_indentation default
+\is_math_indent 0
+\math_numbering_side default
+\quotes_style english
+\dynamic_quotes 0
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tablestyle default
+\tracking_changes false
+\output_changes false
+\change_bars false
+\postpone_fragile_content false
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict false
+\docbook_table_output 0
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Title
+\end_layout
+
+\begin_layout Abstract
+Abstract text.
+\begin_inset Flex Keywords
+status open
+
+\begin_layout Plain Layout
+First keyword 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+Second keyword 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+More
+\end_layout
+
+\end_inset
+
+\begin_inset Flex PACS
+status open
+
+\begin_layout Plain Layout
+PACS code1 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+PACS code2 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+more
+\end_layout
+
+\end_inset
+
+\begin_inset Flex Subclass
+status open
+
+\begin_layout Plain Layout
+MSC code1 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+MSC code2 
+\begin_inset ERT
+status collapsed
+
+\begin_layout Plain Layout
+
+
+\backslash
+and 
+\end_layout
+
+\end_inset
+
+more
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Your text comes here.
+ Separate text sections with
+\end_layout
+
+\end_body
+\end_document
diff --git a/autotests/export/docbook/svglo.xml b/autotests/export/docbook/svglo.xml
new file mode 100644 (file)
index 0000000..9615ca9
--- /dev/null
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- This DocBook file was created by LyX 2.4.0dev
+  See http://www.lyx.org/ for more information -->
+<article xml:lang="en_US" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:m="http://www.w3.org/1998/Math/MathML" xmlns:xi="http://www.w3.org/2001/XInclude" version="5.2">
+<info>
+<title>Title</title>
+<subjectset role='pacs'><subject>PACS code1 <!-- \and  -->
+PACS code2 <!-- \and  -->
+more</subject></subjectset><subjectset role='mcs'><subject>MSC code1 <!-- \and  -->
+MSC code2 <!-- \and  -->
+more</subject></subjectset><keywordset><keyword>First keyword <!-- \and  -->
+Second keyword <!-- \and  -->
+More</keyword></keywordset><abstract>
+<para>Abstract text.   </para>
+</abstract>
+
+</info>
+<para>Your text comes here. Separate text sections with</para>
+</article>
\ No newline at end of file
index c65c2a7cd3e813a9983e511f0258b3202b577290..7a9083ae592d42ae52e338738b32fdd565b38803 100644 (file)
@@ -83,6 +83,11 @@ InsetLayout Flex:Subclass
        CopyStyle       Flex:Keywords
        LatexName       subclass
        LabelString     "Mathematics Subject Classification"
+       DocBookTag          subject
+       DocBookTagType      paragraph
+       DocBookWrapperTag   subjectset
+       DocBookWrapperAttr  role='mcs'
+       DocBookInInfo       always
 End
 
 InsetLayout Flex:CRSC
index e64e623079c6d590e40e32b2cefeee1a1925a36c..ec9461371e1f2cc22ebbc2363d01e9ded03e338c 100644 (file)
@@ -376,6 +376,9 @@ public:
        /// Is the current context a table?
        bool docbook_in_table = false;
 
+       /// Should the layouts that should/must go into <info> be generated?
+       bool docbook_generate_info = true;
+
        /// Are we generating this material for inclusion in a TOC-like entity?
        bool for_toc = false;
 
index c668f3b686734b8c267a6a6e6a8bbce5035e6c67..aa6bd303041cf15c7674667c7d98196e1ad98e1d 100644 (file)
@@ -3365,7 +3365,7 @@ std::vector<docstring> Paragraph::simpleDocBookOnePar(Buffer const & buf,
 
                // If this is an InsetNewline, generate a new paragraph. Also reset the fonts, so that tags are closed in
                // this paragraph.
-               if (getInset(i) != nullptr && getInset(i)->lyxCode() == NEWLINE_CODE) {
+               if (getInset(i) && getInset(i)->lyxCode() == NEWLINE_CODE) {
                        if (!ignore_fonts)
                                xs->closeFontTags();
 
index c22aea9aeda252788486e6dd8fd54884af290268..dcb863c8cd511265ae5d91887e1d9ad428f39786 100644 (file)
@@ -91,9 +91,10 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
                IL_HTMLSTYLE,
                IL_HTMLPREAMBLE,
                IL_DOCBOOKTAG,
-               IL_DOCBOOKTAGTYPE,
                IL_DOCBOOKATTR,
+               IL_DOCBOOKTAGTYPE,
                IL_DOCBOOKSECTION,
+               IL_DOCBOOKININFO,
                IL_DOCBOOKWRAPPERTAG,
                IL_DOCBOOKWRAPPERTAGTYPE,
                IL_DOCBOOKWRAPPERATTR,
@@ -142,6 +143,7 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
                { "decoration", IL_DECORATION },
                { "display", IL_DISPLAY },
                { "docbookattr", IL_DOCBOOKATTR },
+               { "docbookininfo", IL_DOCBOOKININFO },
                { "docbooksection", IL_DOCBOOKSECTION },
                { "docbooktag", IL_DOCBOOKTAG },
                { "docbooktagtype", IL_DOCBOOKTAGTYPE },
@@ -491,11 +493,14 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass,
                case IL_DOCBOOKTAG:
                        lex >> docbooktag_;
                        break;
+               case IL_DOCBOOKATTR:
+                       lex >> docbookattr_;
+                       break;
                case IL_DOCBOOKTAGTYPE:
                        lex >> docbooktagtype_;
                        break;
-               case IL_DOCBOOKATTR:
-                       lex >> docbookattr_;
+               case IL_DOCBOOKININFO:
+                       lex >> docbookininfo_;
                        break;
                case IL_DOCBOOKSECTION:
                        lex >> docbooksection_;
@@ -638,6 +643,17 @@ docstring InsetLayout::htmlstyle() const
        return retval;
 }
 
+
+std::string const & InsetLayout::docbookininfo() const
+{
+       // Same as Layout::docbookininfo.
+       // Indeed, a trilean. Only titles should be "maybe": otherwise, metadata is "always", content is "never".
+       if (docbookininfo_.empty() || (docbookininfo_ != "never" && docbookininfo_ != "always" && docbookininfo_ != "maybe"))
+               docbookininfo_ = "never";
+       return docbookininfo_;
+}
+
+
 void InsetLayout::readArgument(Lexer & lex)
 {
        Layout::latexarg arg;
index 8cc83ddaaacee68e1e76fc460e3e373ba457b2d1..6f2f3ddfe02410a230f46c29be42c706ea40a5d9 100644 (file)
@@ -154,6 +154,8 @@ public:
        ///
        std::string docbookattr() const { return docbookattr_; }
        ///
+       std::string const & docbookininfo() const;
+       ///
        bool docbooksection() const { return docbooksection_; }
        ///
        std::string docbookwrappertag() const { return docbookwrappertag_; }
@@ -295,6 +297,8 @@ private:
        ///
        std::string docbookattr_;
        ///
+       mutable std::string docbookininfo_;
+       ///
        bool docbooksection_ = false;
        ///
        std::string docbookwrappertag_;
index 9e5f4ac02ee185af575eb188f5d55f231904921a..99ddfeead286394bf6ce172ddc50e3a6802d7f3c 100644 (file)
@@ -616,16 +616,28 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
        }
 
        InsetLayout const & il = getLayout();
-       if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
-               docstring attrs = docstring();
-               if (!il.docbookattr().empty())
-                       attrs += from_ascii(il.docbookattr());
-               if (il.docbooktag() == "link")
-                       attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
-               xs << xml::StartTag(il.docbooktag(), attrs);
+
+       // Maybe this is an <info> paragraph that should not be generated at all (i.e. right now, its place is somewhere
+       // else, typically outside the current paragraph).
+       if (!rp.docbook_generate_info && il.docbookininfo() != "never")
+               return;
+
+       // Start outputting this inset.
+       if (opts & WriteOuterTag) {
+               if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
+                       xs << xml::StartTag(il.docbookwrappertag(), il.docbookwrapperattr());
+
+               if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE") {
+                       docstring attrs = docstring();
+                       if (!il.docbookattr().empty())
+                               attrs += from_ascii(il.docbookattr());
+                       if (il.docbooktag() == "link")
+                               attrs += from_ascii(" xlink:href=\"") + text_.asString() + from_ascii("\"");
+                       xs << xml::StartTag(il.docbooktag(), attrs);
+               }
        }
 
-       // No need for labels that are generated from counters.
+       // No need for labels that are generated from counters. They should be handled by the external DocBook processor.
 
        // With respect to XHTML, paragraphs are still allowed here.
        if (!allowMultiPar())
@@ -637,8 +649,13 @@ void InsetText::docbook(XMLStream & xs, OutputParams const & rp, XHTMLOptions op
        docbookParagraphs(text_, buffer(), xs, runparams);
        xs.endDivision();
 
-       if (opts & WriteOuterTag && !il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE")
-               xs << xml::EndTag(il.docbooktag());
+       if (opts & WriteOuterTag) {
+               if (!il.docbooktag().empty() && il.docbooktag() != "NONE" && il.docbooktag() != "IGNORE")
+                       xs << xml::EndTag(il.docbooktag());
+
+               if (!il.docbookwrappertag().empty() && il.docbookwrappertag() != "NONE" && il.docbookwrappertag() != "IGNORE")
+                       xs << xml::EndTag(il.docbookwrappertag());
+       }
 }
 
 
index fad06582a5285dd23a554dd928ad3b9aa6ddb4cf..76d1cdc8bdb759753920858f3192371dee5a0b63 100644 (file)
@@ -160,11 +160,10 @@ string fontToAttribute(xml::FontTypes type) {
        // If there is a role (i.e. nonstandard use of a tag), output the attribute. Otherwise, the sheer tag is sufficient
        // for the font.
        string role = fontToRole(type);
-       if (!role.empty()) {
+       if (!role.empty())
                return "role='" + role + "'";
-       } else {
+       else
                return "";
-       }
 }
 
 
@@ -412,10 +411,6 @@ void makeParagraph(
                OutputParams const & runparams,
                ParagraphList::const_iterator const & par)
 {
-       // If this kind of layout should be ignored, already leave.
-       if (par->layout().docbooktag() == "IGNORE")
-               return;
-
        // Useful variables.
        auto const begin = text.paragraphs().begin();
        auto const end = text.paragraphs().end();
@@ -511,7 +506,7 @@ void makeParagraph(
        //              or we're not in the last paragraph, anyway.
        //   (ii) We didn't open it and docbook_in_par is true,
        //              but we are in the first par, and there is a next par.
-       bool const close_par = open_par && (!runparams.docbook_in_par);
+       bool const close_par = open_par && !runparams.docbook_in_par;
 
        // Determine if this paragraph has some real content. Things like new pages are not caught
        // by Paragraph::empty(), even though they do not generate anything useful in DocBook.
@@ -542,10 +537,6 @@ void makeEnvironment(Text const &text,
                      OutputParams const &runparams,
                      ParagraphList::const_iterator const & par)
 {
-       // If this kind of layout should be ignored, already leave.
-       if (par->layout().docbooktag() == "IGNORE")
-               return;
-
        // Useful variables.
        auto const end = text.paragraphs().end();
        auto nextpar = par;
@@ -648,13 +639,6 @@ ParagraphList::const_iterator makeListEnvironment(Text const &text,
        auto const end = text.paragraphs().end();
        auto const envend = findEndOfEnvironment(par, end);
 
-       // If this kind of layout should be ignored, already leave.
-       if (begin->layout().docbooktag() == "IGNORE") {
-               auto nextpar = par;
-               ++nextpar;
-               return nextpar;
-       }
-
        // Output the opening tag for this environment.
        Layout const & envstyle = par->layout();
        openTag(xs, envstyle.docbookwrappertag(), envstyle.docbookwrapperattr(), envstyle.docbookwrappertagtype());
@@ -741,9 +725,6 @@ void makeCommand(
                OutputParams const & runparams,
                ParagraphList::const_iterator const & par)
 {
-       // If this kind of layout should be ignored, already leave.
-       if (par->layout().docbooktag() == "IGNORE")
-               return;
 
        // Useful variables.
        // Unlike XHTML, no need for labels, as they are handled by DocBook tags.
@@ -909,30 +890,77 @@ DocBookInfoTag getParagraphsWithInfo(ParagraphList const &paragraphs,
 } // end anonymous namespace
 
 
+std::set<const Inset *> gatherInfo(ParagraphList::const_iterator par)
+{
+       // This function has a structure highly similar to makeAny and its friends. It's only made to be called on what
+       // should become the document's <abstract>.
+       std::set<const Inset *> values;
+
+       // If this kind of layout should be ignored, already leave.
+       if (par->layout().docbooktag() == "IGNORE")
+               return values;
+
+       // If this should go in info, mark it as such. Dive deep into the abstract, as it may hide many things that
+       // DocBook doesn't want to be inside the abstract.
+       for (pos_type i = 0; i < par->size(); ++i) {
+               if (par->getInset(i) && par->getInset(i)->asInsetText()) {
+                       InsetText const *inset = par->getInset(i)->asInsetText();
+
+                       if (inset->getLayout().docbookininfo() != "never") {
+                               values.insert(inset);
+                       } else {
+                               auto subpar = inset->paragraphs().begin();
+                               while (subpar != inset->paragraphs().end()) {
+                                       values.merge(gatherInfo(subpar));
+                                       ++subpar;
+                               }
+                       }
+               }
+       }
+
+       return values;
+}
+
+
 ParagraphList::const_iterator makeAny(Text const &text,
                                       Buffer const &buf,
                                       XMLStream &xs,
                                       OutputParams const &runparams,
                                       ParagraphList::const_iterator par)
 {
-       switch (par->layout().latextype) {
-       case LATEX_COMMAND:
-               makeCommand(text, buf, xs, runparams, par);
-               break;
-       case LATEX_ENVIRONMENT:
-               makeEnvironment(text, buf, xs, runparams, par);
-               break;
-       case LATEX_LIST_ENVIRONMENT:
-       case LATEX_ITEM_ENVIRONMENT:
-               // Only case when makeAny() might consume more than one paragraph.
-               return makeListEnvironment(text, buf, xs, runparams, par);
-       case LATEX_PARAGRAPH:
-               makeParagraph(text, buf, xs, runparams, par);
-               break;
-       case LATEX_BIB_ENVIRONMENT:
-               makeBibliography(text, buf, xs, runparams, par);
-               break;
+       bool ignoreParagraph = false;
+
+       // If this kind of layout should be ignored, already leave.
+       ignoreParagraph |= par->layout().docbooktag() == "IGNORE";
+
+       // For things that should go into <info>, check the variable rp.docbook_generate_info. This does not apply to the
+       // abstract itself.
+       bool isAbstract = par->layout().docbookabstract() || par->layout().docbooktag() == "abstract";
+       ignoreParagraph |= !isAbstract && par->layout().docbookininfo() != "never" && !runparams.docbook_generate_info;
+
+       // Switch on the type of paragraph to call the right handler.
+       if (!ignoreParagraph) {
+               switch (par->layout().latextype) {
+               case LATEX_COMMAND:
+                       makeCommand(text, buf, xs, runparams, par);
+                       break;
+               case LATEX_ENVIRONMENT:
+                       makeEnvironment(text, buf, xs, runparams, par);
+                       break;
+               case LATEX_LIST_ENVIRONMENT:
+               case LATEX_ITEM_ENVIRONMENT:
+                       // Only case when makeAny() might consume more than one paragraph.
+                       return makeListEnvironment(text, buf, xs, runparams, par);
+               case LATEX_PARAGRAPH:
+                       makeParagraph(text, buf, xs, runparams, par);
+                       break;
+               case LATEX_BIB_ENVIRONMENT:
+                       makeBibliography(text, buf, xs, runparams, par);
+                       break;
+               }
        }
+
+       // For cases that are not lists, the next paragraph to handle is the next one.
        ++par;
        return par;
 }
@@ -964,6 +992,9 @@ void outputDocBookInfo(
        // This check must be performed *before* a decision on whether or not to output <info> is made.
        bool hasAbstract = !info.abstract.empty();
        docstring abstract;
+       set<const Inset *> infoInsets; // Paragraphs that should go into <info>, but are hidden in an <abstract>
+       // paragraph. (This happens for quite a few layouts, unfortunately.)
+
        if (hasAbstract) {
                // Generate the abstract XML into a string before further checks.
                // Usually, makeAny only generates one paragraph at a time. However, for the specific case of lists, it might
@@ -971,14 +1002,20 @@ void outputDocBookInfo(
                odocstringstream os2;
                XMLStream xs2(os2);
 
-               set<pit_type> doneParas;
+               auto rp = runparams;
+               rp.docbook_generate_info = false;
+
+               set<pit_type> doneParas; // Paragraphs that have already been converted (mostly to deal with lists).
                for (auto const & p : info.abstract) {
                        if (doneParas.find(p) == doneParas.end()) {
                                auto oldPar = paragraphs.iterator_at(p);
-                               auto newPar = makeAny(text, buf, xs2, runparams, oldPar);
+                               auto newPar = makeAny(text, buf, xs2, rp, oldPar);
+
+                               infoInsets.merge(gatherInfo(oldPar));
 
                                // Insert the indices of all the paragraphs that were just generated (typically, one).
                                // **Make the hypothesis that, when an abstract has a list, all its items are consecutive.**
+                               // Otherwise, makeAny and makeListEnvironment would have to be adapted too.
                                pit_type id = p;
                                while (oldPar != newPar) {
                                        doneParas.emplace(id);
@@ -1009,13 +1046,11 @@ void outputDocBookInfo(
                xs << xml::CR();
        }
 
-       // Output the elements that should go in <info>, before and after the abstract.
+       // Output the elements that should go in <info>.
+       // - First, the title.
        for (auto pit : info.shouldBeInInfo) // Typically, the title: these elements are so important and ubiquitous
                // that mandating a wrapper like <info> would repel users. Thus, generate them first.
                makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
-       for (auto pit : info.mustBeInInfo)
-               makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
-
        // If there is no title, generate one (required for the document to be valid).
        // This code is called for the main document, for table cells, etc., so be precise in this condition.
        if (text.isMainText() && info.shouldBeInInfo.empty() && !runparams.inInclude) {
@@ -1025,8 +1060,14 @@ void outputDocBookInfo(
                xs << xml::CR();
        }
 
-       // Always output the abstract as the last item of the <info>, as it requires special treatment (especially if
-       // it contains several paragraphs that are empty).
+       // - Then, other metadata.
+       for (auto pit : info.mustBeInInfo)
+               makeAny(text, buf, xs, runparams, paragraphs.iterator_at(pit));
+       for (auto const * inset : infoInsets)
+               inset->docbook(xs, runparams);
+
+       // - Finally, always output the abstract as the last item of the <info>, as it requires special treatment
+       // (especially if it contains several paragraphs that are empty).
        if (hasAbstract) {
                if (info.abstractLayout) {
                        xs << XMLStream::ESCAPE_NONE << abstract;