From: Thibaut Cuvelier Date: Mon, 7 Feb 2022 02:24:11 +0000 (+0100) Subject: DocBook: refactor ERT recognition as a map, add many coded letters. X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=f6edfaa1511cc08b6987da89ebafefeffc2acbf9;p=features.git DocBook: refactor ERT recognition as a map, add many coded letters. --- diff --git a/src/insets/InsetERT.cpp b/src/insets/InsetERT.cpp index 87abc632b8..397f1189eb 100644 --- a/src/insets/InsetERT.cpp +++ b/src/insets/InsetERT.cpp @@ -88,6 +88,311 @@ int InsetERT::plaintext(odocstringstream & os, } +static const std::map raw_latex_encoding_to_unicode_xml{ + // Punctuation. + {from_ascii("\\textquotesingle"), from_ascii("'")}, + {from_ascii("!`"), from_ascii("¡")}, // inverted exclamation mark + {from_ascii("?`"), from_ascii("¿")}, // inverted interrogation mark + + // Logos. + {from_ascii("\\LaTeX"), from_ascii("LaTeX")}, + {from_ascii("\\LaTeXe"), from_ascii("LaTeX2ε")}, // LaTeX 2 epsilon + {from_ascii("\\LyX"), from_ascii("LyX")}, + {from_ascii("\\TeX"), from_ascii("TeX")}, + + // Accentuated letters (alphabetical order of description, ligatures after accents). + // Missing letters are only indicated for vowels (only few consonants have diacritics). + // Only symbols that can be expressed as one Unicode characters are present; symbols with more than one + // diacritic are also omitted. + {from_ascii("\\`{a}"), from_ascii("á")}, // a acute + {from_ascii("\\` a"), from_ascii("á")}, + {from_ascii("\\`{A}"), from_ascii("Á")}, // A acute + {from_ascii("\\` A"), from_ascii("Á")}, + {from_ascii("\\u{a}"), from_ascii("ă")}, // a breve + {from_ascii("\\u a"), from_ascii("ă")}, + {from_ascii("\\u{A}"), from_ascii("Ă")}, // A breve + {from_ascii("\\u A"), from_ascii("Ă")}, + {from_ascii("\\v{a}"), from_ascii("ǎ")}, // a caron + {from_ascii("\\v a"), from_ascii("ǎ")}, + {from_ascii("\\v{A}"), from_ascii("Ǎ")}, // A caron + {from_ascii("\\v A"), from_ascii("Ǎ")}, + // No a cedilla. + {from_ascii("\\^{a}"), from_ascii("â")}, // a circumflex + {from_ascii("\\^ a"), from_ascii("â")}, + {from_ascii("\\^{A}"), from_ascii("Â")}, // A circumflex + {from_ascii("\\^ A"), from_ascii("Â")}, + {from_ascii("\\\"{a}"), from_ascii("ä")}, // a diaeresis + {from_ascii("\\\" a"), from_ascii("ä")}, + {from_ascii("\\\"{A}"), from_ascii("Ä")}, // A diaeresis + {from_ascii("\\\" A"), from_ascii("Ä")}, + // No a double acute. + {from_ascii("\\`{a}"), from_ascii("à")}, // a grave + {from_ascii("\\` a"), from_ascii("à")}, + {from_ascii("\\`{A}"), from_ascii("À")}, // A grave + {from_ascii("\\` A"), from_ascii("À")}, + {from_ascii("\\~{a}"), from_ascii("ã")}, // a tilde + {from_ascii("\\~ a"), from_ascii("ã")}, + {from_ascii("\\~{A}"), from_ascii("Ã")}, // A tilde + {from_ascii("\\~ A"), from_ascii("Ã")}, + {from_ascii("\\aa"), from_ascii("å")}, // a ring + {from_ascii("\\r{a}"), from_ascii("å")}, + {from_ascii("\\r a"), from_ascii("å")}, + {from_ascii("\\AA"), from_ascii("Å")}, // A ring + {from_ascii("\\r{A}"), from_ascii("Å")}, + {from_ascii("\\r A"), from_ascii("Å")}, + {from_ascii("\\ae"), from_ascii("æ")}, // ae ligature + {from_ascii("\\AE"), from_ascii("Æ")}, // AE ligature + {from_ascii("\\v{c}"), from_ascii("č")}, // c caron + {from_ascii("\\v c"), from_ascii("č")}, + {from_ascii("\\v{C}"), from_ascii("Č")}, // C caron + {from_ascii("\\v C"), from_ascii("Č")}, + {from_ascii("\\c{c}"), from_ascii("Ç")}, // c cedilla + {from_ascii("\\c c"), from_ascii("Ç")}, + {from_ascii("\\c{C}"), from_ascii("ç")}, // C cedilla + {from_ascii("\\c C"), from_ascii("ç")}, + {from_ascii("\\v{d}"), from_ascii("ď")}, // d caron + {from_ascii("\\v d"), from_ascii("ď")}, + {from_ascii("\\v{D}"), from_ascii("Ď")}, // D caron + {from_ascii("\\v D"), from_ascii("Ď")}, + {from_ascii("\\`{e}"), from_ascii("é")}, // e acute + {from_ascii("\\` e"), from_ascii("é")}, + {from_ascii("\\`{E}"), from_ascii("É")}, // E acute + {from_ascii("\\` E"), from_ascii("É")}, + {from_ascii("\\u{e}"), from_ascii("ĕ")}, // e breve + {from_ascii("\\u e"), from_ascii("ĕ")}, + {from_ascii("\\u{E}"), from_ascii("Ĕ")}, // E breve + {from_ascii("\\u E"), from_ascii("Ĕ")}, + {from_ascii("\\v{e}"), from_ascii("ě")}, // e caron + {from_ascii("\\v e"), from_ascii("ě")}, + {from_ascii("\\v{E}"), from_ascii("Ě")}, // E caron + {from_ascii("\\v E"), from_ascii("Ě")}, + {from_ascii("\\c{e}"), from_ascii("ȩ")}, // e cedilla + {from_ascii("\\c e"), from_ascii("ȩ")}, + {from_ascii("\\c{E}"), from_ascii("Ȩ")}, // E cedilla + {from_ascii("\\c E"), from_ascii("Ȩ")}, + {from_ascii("\\^{e}"), from_ascii("ê")}, // e circumflex + {from_ascii("\\^ e"), from_ascii("ê")}, + {from_ascii("\\^{E}"), from_ascii("Ê")}, // E circumflex + {from_ascii("\\^ E"), from_ascii("Ê")}, + {from_ascii("\\\"{e}"), from_ascii("ë")}, // e diaeresis + {from_ascii("\\\" e"), from_ascii("ë")}, + {from_ascii("\\\"{E}"), from_ascii("Ë")}, // E diaeresis + {from_ascii("\\\" E"), from_ascii("Ë")}, + // No e double acute. + {from_ascii("\\`{e}"), from_ascii("è")}, // e grave + {from_ascii("\\` e"), from_ascii("è")}, + {from_ascii("\\`{E}"), from_ascii("È")}, // E grave + {from_ascii("\\` E"), from_ascii("È")}, + {from_ascii("\\~{e}"), from_ascii("ẽ")}, // e tilde + {from_ascii("\\~ e"), from_ascii("ẽ")}, + {from_ascii("\\~{E}"), from_ascii("Ẽ")}, // E tilde + {from_ascii("\\~ E"), from_ascii("Ẽ")}, + // No e ring. + {from_ascii("\\u{g}"), from_ascii("ğ")}, // g breve + {from_ascii("\\u g"), from_ascii("ğ")}, + {from_ascii("\\u{G}"), from_ascii("Ğ")}, // G breve + {from_ascii("\\u G"), from_ascii("Ğ")}, + {from_ascii("\\v{g}"), from_ascii("ǧ")}, // g caron + {from_ascii("\\v g"), from_ascii("ǧ")}, + {from_ascii("\\v{G}"), from_ascii("Ǧ")}, // G caron + {from_ascii("\\v G"), from_ascii("Ǧ")}, + {from_ascii("\\c{g}"), from_ascii("ģ")}, // g cedilla + {from_ascii("\\c g"), from_ascii("ģ")}, + {from_ascii("\\c{G}"), from_ascii("Ģ")}, // G cedilla + {from_ascii("\\c G"), from_ascii("Ģ")}, + {from_ascii("\\i"), from_ascii("ı")}, // i dotless + {from_ascii("\\`{i}"), from_ascii("í")}, // i acute + {from_ascii("\\` i"), from_ascii("í")}, + {from_ascii("\\`{I}"), from_ascii("Í")}, // I acute + {from_ascii("\\` I"), from_ascii("Í")}, + {from_ascii("\\u{i}"), from_ascii("ĭ")}, // i breve + {from_ascii("\\u i"), from_ascii("ĭ")}, + {from_ascii("\\u{I}"), from_ascii("Ĭ")}, // I breve + {from_ascii("\\u I"), from_ascii("Ĭ")}, + {from_ascii("\\v{i}"), from_ascii("ǐ")}, // i caron + {from_ascii("\\v i"), from_ascii("ǐ")}, + {from_ascii("\\v{I}"), from_ascii("Ǐ")}, // I caron + {from_ascii("\\v I"), from_ascii("Ǐ")}, + // No i cedilla. + {from_ascii("\\^{i}"), from_ascii("î")}, // i circumflex + {from_ascii("\\^ i"), from_ascii("î")}, + {from_ascii("\\^{I}"), from_ascii("Î")}, // I circumflex + {from_ascii("\\^ I"), from_ascii("ï")}, + {from_ascii("\\\"{i}"), from_ascii("ï")}, // i diaeresis + {from_ascii("\\\" i"), from_ascii("ë")}, + {from_ascii("\\\"{I}"), from_ascii("ë")}, // I diaeresis + {from_ascii("\\\" I"), from_ascii("Ï")}, + // No i double acute. + {from_ascii("\\`{i}"), from_ascii("ì")}, // i grave + {from_ascii("\\` i"), from_ascii("ì")}, + {from_ascii("\\`{I}"), from_ascii("Ì")}, // I grave + {from_ascii("\\` I"), from_ascii("Ì")}, + {from_ascii("\\~{i}"), from_ascii("ĩ")}, // i tilde + {from_ascii("\\~ i"), from_ascii("ĩ")}, + {from_ascii("\\~{I}"), from_ascii("Ĩ")}, // I tilde + {from_ascii("\\~ I"), from_ascii("Ĩ")}, + // No i ring. + {from_ascii("\\j"), from_ascii("ȷ")}, // j dotless + {from_ascii("\\v{k}"), from_ascii("Ǩ")}, // k caron + {from_ascii("\\v k"), from_ascii("Ǩ")}, + {from_ascii("\\v{K}"), from_ascii("ǩ")}, // K caron + {from_ascii("\\v K"), from_ascii("ǩ")}, + {from_ascii("\\c{k}"), from_ascii("ķ")}, // k cedilla + {from_ascii("\\c k"), from_ascii("ķ")}, + {from_ascii("\\c{K}"), from_ascii("Ķ")}, // K cedilla + {from_ascii("\\c K"), from_ascii("Ķ")}, + {from_ascii("\\v{l}"), from_ascii("ľ")}, // l caron + {from_ascii("\\v l"), from_ascii("ľ")}, + {from_ascii("\\v{L}"), from_ascii("Ľ")}, // L caron + {from_ascii("\\v L"), from_ascii("Ľ")}, + {from_ascii("\\c{l}"), from_ascii("ļ")}, // l cedilla + {from_ascii("\\c l"), from_ascii("ļ")}, + {from_ascii("\\c{L}"), from_ascii("Ļ")}, // L cedilla + {from_ascii("\\c L"), from_ascii("Ļ")}, + {from_ascii("\\l"), from_ascii("ł")}, // l stroke + {from_ascii("\\L"), from_ascii("Ł")}, // L stroke + {from_ascii("\\v{n}"), from_ascii("ň")}, // n caron + {from_ascii("\\v n"), from_ascii("ň")}, + {from_ascii("\\v{N}"), from_ascii("Ň")}, // N caron + {from_ascii("\\v N"), from_ascii("Ň")}, + {from_ascii("\\c{n}"), from_ascii("ņ")}, // n cedilla + {from_ascii("\\c n"), from_ascii("ņ")}, + {from_ascii("\\c{N}"), from_ascii("Ņ")}, // N cedilla + {from_ascii("\\c N"), from_ascii("Ņ")}, + {from_ascii("\\~{n}"), from_ascii("ñ")}, // n tilde + {from_ascii("\\~ n"), from_ascii("ñ")}, + {from_ascii("\\~{N}"), from_ascii("Ñ")}, // N tilde + {from_ascii("\\~ N"), from_ascii("Ñ")}, + {from_ascii("\\`{o}"), from_ascii("ó")}, // o acute + {from_ascii("\\` o"), from_ascii("ó")}, + {from_ascii("\\`{O}"), from_ascii("Ó")}, // O acute + {from_ascii("\\` O"), from_ascii("Ó")}, + {from_ascii("\\u{o}"), from_ascii("ŏ")}, // o breve + {from_ascii("\\u o"), from_ascii("ŏ")}, + {from_ascii("\\u{O}"), from_ascii("Ŏ")}, // O breve + {from_ascii("\\u O"), from_ascii("Ŏ")}, + {from_ascii("\\v{o}"), from_ascii("ǒ")}, // o caron + {from_ascii("\\v o"), from_ascii("ǒ")}, + {from_ascii("\\v{O}"), from_ascii("Ǒ")}, // O caron + {from_ascii("\\v O"), from_ascii("Ǒ")}, + // No o cedilla. + {from_ascii("\\^{o}"), from_ascii("ô")}, // o circumflex + {from_ascii("\\^ o"), from_ascii("ô")}, + {from_ascii("\\^{O}"), from_ascii("Ô")}, // O circumflex + {from_ascii("\\^ O"), from_ascii("Ô")}, + {from_ascii("\\\"{o}"), from_ascii("ö")}, // o diaeresis + {from_ascii("\\\" o"), from_ascii("ö")}, + {from_ascii("\\\"{O}"), from_ascii("Ö")}, // O diaeresis + {from_ascii("\\\" O"), from_ascii("Ö")}, + {from_ascii("\\H{o}"), from_ascii("ő")}, // o double acute + {from_ascii("\\H o"), from_ascii("ő")}, + {from_ascii("\\H{O}"), from_ascii("Ő")}, // O double acute + {from_ascii("\\H O"), from_ascii("Ő")}, + {from_ascii("\\`{o}"), from_ascii("ò")}, // o grave + {from_ascii("\\` o"), from_ascii("ò")}, + {from_ascii("\\`{O}"), from_ascii("Ò")}, // O grave + {from_ascii("\\` O"), from_ascii("Ò")}, + {from_ascii("\\o"), from_ascii("ø")}, // o stroke + {from_ascii("\\O"), from_ascii("Ø")}, // O stroke + {from_ascii("\\~{o}"), from_ascii("õ")}, // o tilde + {from_ascii("\\~ o"), from_ascii("õ")}, + {from_ascii("\\~{O}"), from_ascii("Õ")}, // O tilde + {from_ascii("\\~ O"), from_ascii("Õ")}, + // No o ring. + {from_ascii("\\oe"), from_ascii("œ")}, // oe ligature + {from_ascii("\\OE"), from_ascii("Œ")}, // OE ligature + {from_ascii("\\v{r}"), from_ascii("ř")}, // r caron + {from_ascii("\\v r"), from_ascii("ř")}, + {from_ascii("\\v{R}"), from_ascii("Ř")}, // R caron + {from_ascii("\\v R"), from_ascii("Ř")}, + {from_ascii("\\c{r}"), from_ascii("ŗ")}, // r cedilla + {from_ascii("\\c r"), from_ascii("ŗ")}, + {from_ascii("\\c{R}"), from_ascii("Ŗ")}, // R cedilla + {from_ascii("\\c R"), from_ascii("Ŗ")}, + {from_ascii("\\v{s}"), from_ascii("š")}, // s caron + {from_ascii("\\v s"), from_ascii("š")}, + {from_ascii("\\v{S}"), from_ascii("Š")}, // S caron + {from_ascii("\\v S"), from_ascii("Š")}, + {from_ascii("\\c{s}"), from_ascii("ş")}, // s cedilla + {from_ascii("\\c s"), from_ascii("ş")}, + {from_ascii("\\c{S}"), from_ascii("Ş")}, // S cedilla + {from_ascii("\\c S"), from_ascii("Ş")}, + {from_ascii("\\v{t}"), from_ascii("ť")}, // t caron + {from_ascii("\\v t"), from_ascii("ť")}, + {from_ascii("\\v{T}"), from_ascii("Ť")}, // T caron + {from_ascii("\\v T"), from_ascii("Ť")}, + {from_ascii("\\c{t}"), from_ascii("ţ")}, // t cedilla + {from_ascii("\\c t"), from_ascii("ţ")}, + {from_ascii("\\c{T}"), from_ascii("Ţ")}, // T cedilla + {from_ascii("\\c T"), from_ascii("Ţ")}, + {from_ascii("\\`{u}"), from_ascii("ú")}, // u acute + {from_ascii("\\` u"), from_ascii("ú")}, + {from_ascii("\\`{U}"), from_ascii("Ú")}, // U acute + {from_ascii("\\` U"), from_ascii("Ú")}, + {from_ascii("\\u{u}"), from_ascii("ŭ")}, // u breve + {from_ascii("\\u u"), from_ascii("ŭ")}, + {from_ascii("\\u{U}"), from_ascii("Ŭ")}, // U breve + {from_ascii("\\u U"), from_ascii("Ŭ")}, + {from_ascii("\\v{u}"), from_ascii("ǔ")}, // u caron + {from_ascii("\\v u"), from_ascii("ǔ")}, + {from_ascii("\\v{U}"), from_ascii("Ǔ")}, // U caron + {from_ascii("\\v U"), from_ascii("Ǔ")}, + // No u cedilla. + {from_ascii("\\^{u}"), from_ascii("û")}, // u circumflex + {from_ascii("\\^ u"), from_ascii("û")}, + {from_ascii("\\^{U}"), from_ascii("Û")}, // U circumflex + {from_ascii("\\^ U"), from_ascii("Û")}, + {from_ascii("\\\"{u}"), from_ascii("ü")}, // u diaeresis + {from_ascii("\\\" u"), from_ascii("ü")}, + {from_ascii("\\\"{U}"), from_ascii("Ü")}, // U diaeresis + {from_ascii("\\\" U"), from_ascii("Ü")}, + {from_ascii("\\H{u}"), from_ascii("ű")}, // u double acute + {from_ascii("\\H u"), from_ascii("ű")}, + {from_ascii("\\H{U}"), from_ascii("Ű")}, // U double acute + {from_ascii("\\H U"), from_ascii("Ű")}, + {from_ascii("\\`{u}"), from_ascii("ù")}, // u grave + {from_ascii("\\` u"), from_ascii("ù")}, + {from_ascii("\\`{U}"), from_ascii("Ù")}, // U grave + {from_ascii("\\` U"), from_ascii("Ù")}, + {from_ascii("\\~{u}"), from_ascii("ũ")}, // u tilde + {from_ascii("\\~ u"), from_ascii("ũ")}, + {from_ascii("\\~{U}"), from_ascii("Ũ")}, // U tilde + {from_ascii("\\~ U"), from_ascii("Ũ")}, + {from_ascii("\\r{u}"), from_ascii("å")}, // u ring + {from_ascii("\\r u"), from_ascii("ů")}, + {from_ascii("\\r{U}"), from_ascii("ů")}, // U ring + {from_ascii("\\r U"), from_ascii("Ů")}, + {from_ascii("\\`{y}"), from_ascii("ý")}, // y acute + {from_ascii("\\` y"), from_ascii("ý")}, + {from_ascii("\\`{Y}"), from_ascii("Ý")}, // Y acute + {from_ascii("\\` Y"), from_ascii("Ý")}, + {from_ascii("\\v{z}"), from_ascii("ž")}, // z caron + {from_ascii("\\v z"), from_ascii("ž")}, + {from_ascii("\\v{Z}"), from_ascii("Ž")}, // Z caron + {from_ascii("\\v Z"), from_ascii("Ž")}, + // No y breve. + // No y cedilla. + {from_ascii("\\^{y}"), from_ascii("ŷ")}, // y circumflex + {from_ascii("\\^ y"), from_ascii("ŷ")}, + {from_ascii("\\^{Y}"), from_ascii("Ŷ")}, // Y circumflex + {from_ascii("\\^ Y"), from_ascii("Ŷ")}, + {from_ascii("\\\"{y}"), from_ascii("ÿ")}, // y diaeresis + {from_ascii("\\\" y"), from_ascii("ÿ")}, + {from_ascii("\\\"{Y}"), from_ascii("Ÿ")}, // Y diaeresis + {from_ascii("\\\" Y"), from_ascii("Ÿ")}, + // No y double acute. + {from_ascii("\\`{y}"), from_ascii("ỳ")}, // y grave + {from_ascii("\\` y"), from_ascii("ỳ")}, + {from_ascii("\\`{Y}"), from_ascii("Ỳ")}, // Y grave + {from_ascii("\\` Y"), from_ascii("Ỳ")}, + {from_ascii("\\~{y}"), from_ascii("ỹ")}, // y tilde + {from_ascii("\\~ y"), from_ascii("ỹ")}, + {from_ascii("\\~{Y}"), from_ascii("Ỹ")}, // Y tilde + {from_ascii("\\~ Y"), from_ascii("Ỹ")}, + // No y ring. +}; + + void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const { auto const begin = paragraphs().begin(); @@ -131,26 +436,36 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const // auto lay = getLayout(); // } - // Output the ERT as a comment with the appropriate escaping if the command is not recognised. - if (trim(os.str()) == from_ascii("\\textquotesingle")) { - xs << "'"; - } else if (trim(os.str()) == from_ascii("\\TeX") || trim(os.str()) == from_ascii("\\TeX{}")) { - xs << "TeX"; - } else if (trim(os.str()) == from_ascii("\\LaTeX") || trim(os.str()) == from_ascii("\\LaTeX{}")) { - xs << "LaTeX"; - } else if (trim(os.str()) == from_ascii("\\LaTeXe") || trim(os.str()) == from_ascii("\\LaTeXe{}")) { - xs << "LaTeX2ε"; - } else if (trim(os.str()) == from_ascii("\\LyX") || trim(os.str()) == from_ascii("\\LyX{}")) { - xs << "LyX"; - } else if (trim(os.str()) == from_ascii("\\oe") || trim(os.str()) == from_ascii("\\oe{}")) { - xs << "œ"; - } else if (trim(os.str()) == from_ascii("\\OE") || trim(os.str()) == from_ascii("\\OE{}")) { - xs << "Œ"; + // Try to recognise some commands to have a nicer DocBook output. First step: some commands have a direct mapping + // to DocBook, mostly because the mapping is simply text or an XML entity. + docstring os_trimmed = trim(os.str()); + bool output_as_comment = true; + + auto command_raw_translation = raw_latex_encoding_to_unicode_xml.find(os_trimmed); + if (command_raw_translation != raw_latex_encoding_to_unicode_xml.end()) { + xs << command_raw_translation->second; + output_as_comment = false; } else { - xs << XMLStream::ESCAPE_NONE << ""; - } + // If the trimmed ERT ends with {}, try a mapping without it. + auto os_braces = os_trimmed.find(from_ascii("{}")); + + if (os_braces != lyx::docstring::npos) { + auto key = os_trimmed.substr(0, os_braces); + auto command_braces_translation = raw_latex_encoding_to_unicode_xml.find(key); + + if (command_braces_translation != raw_latex_encoding_to_unicode_xml.end()) { + xs << command_braces_translation->second; + output_as_comment = false; + } + } + } + + // Otherwise, output the ERT as a comment with the appropriate escaping if the command is not recognised. + if (output_as_comment) { + xs << XMLStream::ESCAPE_NONE << ""; + } }