From: Juergen Spitzmueller Date: Sun, 11 Mar 2018 17:04:23 +0000 (+0100) Subject: tex2lyx: update quote handling X-Git-Tag: lyx-2.4.0dev-acb2ca7b~3710 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=8184f08f4af6efea6d1499e3f8c8d3c20ebb1b97;p=features.git tex2lyx: update quote handling * Consider new quote styles * Consider changed quote styles * Try to be a bit smarter with ambiguous quotation marks --- diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 01010140d9..50a56e82ea 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -88,35 +88,51 @@ const char * const known_coded_languages[] = {"french", "afrikaans", "albanian", "vietnamese", "welsh", 0}; +/// languages with british quotes (.lyx names) +const char * const known_british_quotes_languages[] = {"british", "welsh", 0}; + +/// languages with cjk quotes (.lyx names) +const char * const known_cjk_quotes_languages[] = {"chinese-traditional", +"japanese", "japanese-cjk", 0}; + +/// languages with cjk-angle quotes (.lyx names) +const char * const known_cjkangle_quotes_languages[] = {"korean", 0}; + /// languages with danish quotes (.lyx names) const char * const known_danish_quotes_languages[] = {"danish", 0}; /// languages with english quotes (.lyx names) const char * const known_english_quotes_languages[] = {"american", "australian", "bahasa", "bahasam", "brazilian", "canadian", "chinese-simplified", "english", -"esperanto", "hebrew", "irish", "korean", "newzealand", "portuguese", "scottish", -"thai", 0}; +"esperanto", "farsi", "interlingua", "irish", "newzealand", "scottish", +"thai", "turkish", "vietnamese", 0}; /// languages with french quotes (.lyx names) -const char * const known_french_quotes_languages[] = {"albanian", -"arabic_arabi", "arabic_arabtex", "asturian", "basque", "canadien", "catalan", -"french", "friulan", "galician", "greek", "italian", "norsk", "nynorsk", -"piedmontese", "polutonikogreek", "russian", "spanish", "spanish-mexico", -"turkish", "turkmen", "ukrainian", "vietnamese", 0}; +const char * const known_french_quotes_languages[] = {"ancientgreek", +"arabic_arabi", "arabic_arabtex", "asturian", "belarusian", "breton", +"canadien", "catalan", "french", "friulan", "galician", "italian", "occitan", +"piedmontese", "portuguese", "spanish", "spanish-mexico", 0}; /// languages with german quotes (.lyx names) const char * const known_german_quotes_languages[] = {"austrian", "bulgarian", -"czech", "german", "georgian", "icelandic", "lithuanian", "lowersorbian", "macedonian", -"naustrian", "ngerman", "romansh", "serbian", "serbian-latin", "slovak", "slovene", +"czech", "estonian", "georgian", "german", "icelandic", "latvian", "lithuanian", +"lowersorbian", "macedonian", "naustrian", "ngerman", "romansh", "slovak", "slovene", "uppersorbian", 0}; /// languages with polish quotes (.lyx names) const char * const known_polish_quotes_languages[] = {"afrikaans", "bosnian", "croatian", -"dutch", "estonian", "magyar", "polish", "romanian", 0}; +"dutch", "magyar", "polish", "romanian", "serbian", "serbian-latin", 0}; + +/// languages with russian quotes (.lyx names) +const char * const known_russian_quotes_languages[] = {"russian", "ukrainian", 0}; /// languages with swedish quotes (.lyx names) -const char * const known_swedish_quotes_languages[] = {"finnish", -"swedish", 0}; +const char * const known_swedish_quotes_languages[] = {"finnish", "swedish", 0}; + +/// languages with swiss quotes (.lyx names) +const char * const known_swiss_quotes_languages[] = {"albanian", +"armenian", "basque", "german-ch", "german-ch-old", +"norsk", "nynorsk", "turkmen", "ukrainian", "vietnamese", 0}; /// known language packages from the times before babel const char * const known_old_language_packages[] = {"french", "frenchle", @@ -1214,33 +1230,6 @@ void Preamble::handle_if(Parser & p, bool in_lyx_preamble) bool Preamble::writeLyXHeader(ostream & os, bool subdoc, string const & outfiledir) { - // set the quote language - // LyX only knows the following quotes languages: - // english, swedish, german, polish, french and danish - // (quotes for "japanese" and "chinese-traditional" are missing because - // they wouldn't be useful: https://www.lyx.org/trac/ticket/6383) - // conversion list taken from - // https://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage - // (quotes for kazakh and interlingua are unknown) - // danish - if (is_known(h_language, known_danish_quotes_languages)) - h_quotes_style = "danish"; - // french - else if (is_known(h_language, known_french_quotes_languages)) - h_quotes_style = "french"; - // german - else if (is_known(h_language, known_german_quotes_languages)) - h_quotes_style = "german"; - // polish - else if (is_known(h_language, known_polish_quotes_languages)) - h_quotes_style = "polish"; - // swedish - else if (is_known(h_language, known_swedish_quotes_languages)) - h_quotes_style = "swedish"; - //english - else if (is_known(h_language, known_english_quotes_languages)) - h_quotes_style = "english"; - if (contains(h_float_placement, "H")) registerAutomaticallyLoadedPackage("float"); if (h_spacing != "single" && h_spacing != "default") @@ -2264,6 +2253,47 @@ void Preamble::parse(Parser & p, string const & forceclass, h_options += ',' + lyx2babel(default_language); } } + + // Finally, set the quote style. + // LyX knows the following quotes styles: + // british, cjk, cjkangle, danish, english, french, german, + // polish, russian, swedish and swiss + // conversion list taken from + // https://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage + // (quotes for kazakh are unknown) + // british + if (is_known(h_language, known_british_quotes_languages)) + h_quotes_style = "british"; + // cjk + else if (is_known(h_language, known_cjk_quotes_languages)) + h_quotes_style = "cjk"; + // cjkangle + else if (is_known(h_language, known_cjkangle_quotes_languages)) + h_quotes_style = "cjkangle"; + // danish + else if (is_known(h_language, known_danish_quotes_languages)) + h_quotes_style = "danish"; + // french + else if (is_known(h_language, known_french_quotes_languages)) + h_quotes_style = "french"; + // german + else if (is_known(h_language, known_german_quotes_languages)) + h_quotes_style = "german"; + // polish + else if (is_known(h_language, known_polish_quotes_languages)) + h_quotes_style = "polish"; + // russian + else if (is_known(h_language, known_russian_quotes_languages)) + h_quotes_style = "russian"; + // swedish + else if (is_known(h_language, known_swedish_quotes_languages)) + h_quotes_style = "swedish"; + // swiss + else if (is_known(h_language, known_swiss_quotes_languages)) + h_quotes_style = "swiss"; + // english + else if (is_known(h_language, known_english_quotes_languages)) + h_quotes_style = "english"; } diff --git a/src/tex2lyx/Preamble.h b/src/tex2lyx/Preamble.h index 9c70dca62a..0d3ff0113f 100644 --- a/src/tex2lyx/Preamble.h +++ b/src/tex2lyx/Preamble.h @@ -54,6 +54,8 @@ public: std::string docLanguage() const { return h_language; } /// The language of text which is not explicitly marked std::string defaultLanguage() const { return default_language; } + /// The quotation marks style + std::string quotesStyle() const { return h_quotes_style; } /// bool usePolyglossia() const; /// diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index 0ec2ca633a..5b654fbfb3 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -75,23 +75,6 @@ Format LaTeX feature LyX feature \twocolumn[]{}{} Layout Twocolumn, InsetArgument \item[]<> InsetArgument \begin{enumerate|itemize|...}[] InsetArgument -520 Plain InsetQuote Style: - \textquotesingle \begin_inset Quotes qls, \begin_inset Quotes qrs - \textquotedbl \begin_inset Quotes qld, \begin_inset Quotes qrd -521 New Quote Styles InsetQuote - - british \begin_inset Quotes b.. - - swiss \begin_inset Quotes c.. - - swedishg \begin_inset Quotes w.. - - frenchin \begin_inset Quotes i.. - - russian \begin_inset Quotes r.. - Change default behavior \begin_inset Quotes f.. - of French quote style: - - Inner quotes are now ``...''. - - Former french style is now - called "swiss" -523 CJK Quote Styles InsetQuote - - cjk (corner brackets) \begin_inset Quotes j.. - - cjkangle (angle brackets) \begin_inset Quotes k.. 526 Plural and capitalized refstyles InsetRef 533 Multibib support \begin{btUnit}...\end{btUnit} \multibib {none|part|chapter|section|subsection} diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx index 051c337f34..087301b4b3 100644 --- a/src/tex2lyx/test/CJK.lyx.lyx +++ b/src/tex2lyx/test/CJK.lyx.lyx @@ -74,7 +74,7 @@ \paragraph_indentation default \is_math_indent 0 \math_numbering_side default -\quotes_style english +\quotes_style cjk \dynamic_quotes 0 \papercolumns 1 \papersides 1 diff --git a/src/tex2lyx/test/CJKutf8.lyx.lyx b/src/tex2lyx/test/CJKutf8.lyx.lyx index 3f36955076..27b56ad53e 100644 --- a/src/tex2lyx/test/CJKutf8.lyx.lyx +++ b/src/tex2lyx/test/CJKutf8.lyx.lyx @@ -74,7 +74,7 @@ \paragraph_indentation default \is_math_indent 0 \math_numbering_side default -\quotes_style english +\quotes_style cjk \dynamic_quotes 0 \papercolumns 1 \papersides 1 diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index c90d85f9b1..c62361b221 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -73,7 +73,7 @@ \paragraph_indentation default \is_math_indent 0 \math_numbering_side default -\quotes_style english +\quotes_style british \dynamic_quotes 0 \papercolumns 1 \papersides 1 diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 9ef22fb03f..bdf90926e8 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -201,13 +201,14 @@ bool need_commentbib = false; char const * const known_quotes[] = { "dq", "guillemotleft", "flqq", "og", "guillemotright", "frqq", "fg", "glq", "glqq", "textquoteleft", "grq", "grqq", "quotedblbase", "textquotedblleft", "quotesinglbase", "textquoteright", "flq", -"guilsinglleft", "frq", "guilsinglright", 0}; +"guilsinglleft", "frq", "guilsinglright", "textquotedblright", "textquotesingle", +"textquotedbl", 0}; /// the same as known_quotes with .lyx names -char const * const known_coded_quotes[] = { "prd", "ard", "ard", "ard", -"ald", "ald", "ald", "gls", "gld", "els", "els", "grd", -"gld", "grd", "gls", "ers", "fls", -"fls", "frs", "frs", 0}; +char const * const known_coded_quotes[] = { "qrd", "ard", "ard", "ard", +"ald", "ald", "ald", "gls", "gld", "els", "els", "eld", +"gld", "eld", "gls", "ers", "ars", +"ars", "als", "als", "erd", "qrs", "qrd", 0}; /// LaTeX names for font sizes char const * const known_sizes[] = { "tiny", "scriptsize", "footnotesize", @@ -446,6 +447,78 @@ bool translate_len(string const & length, string & valstring, string & unit) return true; } + +/// If we have ambiguous quotation marks, make a smart guess +/// based on main quote style +string guessQuoteStyle(string in, bool const opening) +{ + string res = in; + if (prefixIs(in, "qr")) {// straight quote + if (!opening) + res = subst(res, "r", "l"); + } else if (in == "eld") {// `` + if (preamble.quotesStyle() == "german") + res = "grd"; + else if (preamble.quotesStyle() == "british") + res = "bls"; + else if (preamble.quotesStyle() == "french") + res = "fls"; + else if (preamble.quotesStyle() == "russian") + res = "rrs"; + } else if (in == "erd") {// '' + if (preamble.quotesStyle() == "polish") + res = "prd"; + else if (preamble.quotesStyle() == "british") + res = "brs"; + else if (preamble.quotesStyle() == "french") + res = "frs"; + else if (preamble.quotesStyle() == "swedish") + res = opening ? "sld" : "srd"; + } else if (in == "els") {// ` + if (preamble.quotesStyle() == "german") + res = "grs"; + else if (preamble.quotesStyle() == "british") + res = "bld"; + } else if (in == "ers") {// ' + if (preamble.quotesStyle() == "polish") + res = "prs"; + else if (preamble.quotesStyle() == "british") + res = "brd"; + else if (preamble.quotesStyle() == "swedish") + res = opening ? "sls" : "srs"; + } else if (in == "ard") {// >> + if (preamble.quotesStyle() == "swiss") + res = "cld"; + else if (preamble.quotesStyle() == "french") + res = "fld"; + else if (preamble.quotesStyle() == "russian") + res = "rld"; + } else if (in == "ald") {// << + if (preamble.quotesStyle() == "swiss") + res = "crd"; + else if (preamble.quotesStyle() == "french") + res = "frd"; + else if (preamble.quotesStyle() == "russian") + res = "rrd"; + } else if (in == "ars") {// > + if (preamble.quotesStyle() == "swiss") + res = "cls"; + } else if (in == "als") {// < + if (preamble.quotesStyle() == "swiss") + res = "crs"; + } else if (in == "gld") {// ,, + if (preamble.quotesStyle() == "polish") + res = "pld"; + else if (preamble.quotesStyle() == "russian") + res = "rls"; + } else if (in == "gls") {// , + if (preamble.quotesStyle() == "polish") + res = "pls"; + } + return res; +} + + } // namespace @@ -2634,14 +2707,17 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, continue; } - // Basic support for english quotes. This should be - // extended to other quotes, but is not so easy (a - // left english quote is the same as a right german - // quote...) + // Basic support for quotes. We try to disambiguate + // quotes from the context (e.g., a left english quote is + // the same as a right german quote...). + // Try to make a smart guess about the side + Token const prev = p.prev_token(); + bool const opening = (prev.cat() != catSpace && prev.character() != 0 + && prev.character() != '\n' && prev.character() != '~'); if (t.asInput() == "`" && p.next_token().asInput() == "`") { context.check_layout(os); begin_inset(os, "Quotes "); - os << "eld"; + os << guessQuoteStyle("eld", opening); end_inset(os); p.get_token(); skip_braces(p); @@ -2650,7 +2726,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, if (t.asInput() == "'" && p.next_token().asInput() == "'") { context.check_layout(os); begin_inset(os, "Quotes "); - os << "erd"; + os << guessQuoteStyle("erd", opening); end_inset(os); p.get_token(); skip_braces(p); @@ -2660,7 +2736,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, if (t.asInput() == ">" && p.next_token().asInput() == ">") { context.check_layout(os); begin_inset(os, "Quotes "); - os << "ald"; + os << guessQuoteStyle("ald", opening); end_inset(os); p.get_token(); skip_braces(p); @@ -2681,9 +2757,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, if (!has_chunk) { context.check_layout(os); begin_inset(os, "Quotes "); - //FIXME: this is a right danish quote; - // why not a left french quote? - os << "ard"; + os << guessQuoteStyle("ard", opening); end_inset(os); p.get_token(); skip_braces(p); @@ -2809,8 +2883,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, is_known(next.cs(), known_quotes) && end.cat() == catEnd) { // Something like {\textquoteright} (e.g. - // from writer2latex). LyX writes - // \textquoteright{}, so we may skip the + // from writer2latex). We may skip the // braces here for better readability. parse_text_snippet(p, os, FLAG_BRACE_LAST, outer, context); @@ -4375,7 +4448,13 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, if ((where = is_known(t.cs(), known_quotes))) { context.check_layout(os); begin_inset(os, "Quotes "); - os << known_coded_quotes[where - known_quotes]; + string quotetype = known_coded_quotes[where - known_quotes]; + // try to make a smart guess about the side + Token const prev = p.prev_token(); + bool const opening = (prev.cat() != catSpace && prev.character() != 0 + && prev.character() != '\n' && prev.character() != '~'); + quotetype = guessQuoteStyle(quotetype, opening); + os << quotetype; end_inset(os); // LyX adds {} after the quote, so we have to eat // spaces here if there are any before a possible @@ -4386,7 +4465,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } if ((where = is_known(t.cs(), known_sizes)) && - context.new_layout_allowed) { + context.new_layout_allowed) { context.check_layout(os); TeXFont const oldFont = context.font; context.font.size = known_coded_sizes[where - known_sizes]; @@ -4551,13 +4630,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, continue; } - if (t.cs() == "textquotedbl") { - context.check_layout(os); - os << "\""; - skip_braces(p); - continue; - } - if (t.cs() == "_" || t.cs() == "&" || t.cs() == "#" || t.cs() == "$" || t.cs() == "{" || t.cs() == "}" || t.cs() == "%" || t.cs() == "-") {