From: Günter Milde Date: Mon, 30 Dec 2019 16:48:20 +0000 (+0100) Subject: ctests: supported-languages Fix inputenc Error: Invalid UTF-8 byte "A0". X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=bd4c66b18b3f00b9b34cad32d94e250811a7c64a;p=features.git ctests: supported-languages Fix inputenc Error: Invalid UTF-8 byte "A0". This happens with "inputenc: auto-legacy" if a language with default encoding "utf8" (e.g. Turkmen or Mongolian) is used in a Quote (or another environment). --- diff --git a/autotests/export/latex/languages/supported-languages.lyx b/autotests/export/latex/languages/supported-languages.lyx index 7813d13c2b..6a7832078f 100644 --- a/autotests/export/latex/languages/supported-languages.lyx +++ b/autotests/export/latex/languages/supported-languages.lyx @@ -1825,6 +1825,21 @@ Dummy paragraph to set language inside quote \end_inset +\end_layout + +\begin_layout Standard + +\lang czech +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Dummy paragraph to reset input encoding +\end_layout + +\end_inset + + \end_layout \begin_layout Standard @@ -1960,29 +1975,6 @@ F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ \end_inset -\end_layout - -\begin_layout Standard - -\lang czech -\begin_inset Note Note -status open - -\begin_layout Plain Layout -Czech dummy paragraph to reset input-encoding with -\begin_inset Quotes eld -\end_inset - -auto-legacy -\begin_inset Quotes erd -\end_inset - -. -\end_layout - -\end_inset - - \end_layout \begin_layout Labeling @@ -3082,29 +3074,6 @@ magyar nyelv a komi, a mari és a mordvin nyelvek. \end_layout -\begin_layout Standard - -\lang czech -\begin_inset Note Note -status open - -\begin_layout Plain Layout -Czech dummy paragraph to reset input-encoding with -\begin_inset Quotes eld -\end_inset - -auto-legacy -\begin_inset Quotes erd -\end_inset - -. -\end_layout - -\end_inset - - -\end_layout - \begin_layout Labeling \labelwidthstring 00.00.0000 Babel magyar @@ -3852,6 +3821,20 @@ Kurmancî an kurmanciya jorîn yek ji zaravayên zimanê kurdî ye. \end_layout \begin_layout Standard + +\lang czech +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Dummy paragraph to reset input encoding with inputenc=auto-legacy +\end_layout + +\end_inset + + +\lang english + \begin_inset Branch debug inverted 0 status collapsed @@ -5085,6 +5068,155 @@ status open \end_inset +\end_layout + +\begin_layout Standard + +\lang czech +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Dummy paragraph to reset input encoding with inputenc=auto-legacy +\end_layout + +\end_inset + + +\lang english + +\begin_inset Branch debug +inverted 0 +status collapsed + +\begin_layout Standard +The language is +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +languagename +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +, the input encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +inputencodingname +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + + and the font encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +makeatletter +\backslash +f@encoding +\backslash +makeatother +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +A0   ¡ ¢ £ ¤ Â¥ ¦ § +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¨ +\end_layout + +\end_inset + + © ª « ¬ ® +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¯ +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +B0 ° ± ² ³ +\begin_inset Note Note +status open + +\begin_layout Plain Layout +´ +\end_layout + +\end_inset + + µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ +\end_layout + +\begin_layout Standard +C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï +\end_layout + +\begin_layout Standard +D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß +\end_layout + +\begin_layout Standard +E0 à á â ã ä Ã¥ æ ç è é ê ë ì í î ï +\end_layout + +\begin_layout Standard +F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ +\end_layout + +\end_inset + + \end_layout \begin_layout Labeling @@ -5901,47 +6033,196 @@ Türkmen dili Türkmen dili - türki dilleriň oguz dilleri toparyna degişlidir. \end_layout -\begin_layout Labeling -\labelwidthstring 00.00.0000 -Babel turkmen -\end_layout - -\begin_layout Labeling -\labelwidthstring 00.00.0000 -Polyglossia turkmen -\end_layout - -\begin_layout Labeling -\labelwidthstring 00.00.0000 -LangCode tk_TM -\end_layout - -\begin_layout Subsection* -Ukrainian -\end_layout - \begin_layout Standard -\begin_inset Branch conflict-mk -inverted 1 -status open -\begin_layout Quote +\lang czech +\begin_inset Note Note +status open -\emph on -\lang ukrainian -Украї́нська мо́ва -\emph default - (історичні назви — ру́ська, руси́нська) — національна мова українців. - Належить до слов'янської групи індоєвропейської мовної сім'ї. - Число мовців — близько 45 млн, більшість яких живе в Україні. +\begin_layout Plain Layout +Dummy paragraph to reset input encoding with inputenc=auto-legacy \end_layout \end_inset -\end_layout +\lang english -\begin_layout Labeling +\begin_inset Branch debug +inverted 0 +status collapsed + +\begin_layout Standard +The language is +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +languagename +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +, the input encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +inputencodingname +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + + and the font encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +makeatletter +\backslash +f@encoding +\backslash +makeatother +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +A0   ¡ ¢ £ ¤ Â¥ ¦ § +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¨ +\end_layout + +\end_inset + + © ª « ¬ ® +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¯ +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +B0 ° ± ² ³ +\begin_inset Note Note +status open + +\begin_layout Plain Layout +´ +\end_layout + +\end_inset + + µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ +\end_layout + +\begin_layout Standard +C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï +\end_layout + +\begin_layout Standard +D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß +\end_layout + +\begin_layout Standard +E0 à á â ã ä Ã¥ æ ç è é ê ë ì í î ï +\end_layout + +\begin_layout Standard +F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +Babel turkmen +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +Polyglossia turkmen +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 +LangCode tk_TM +\end_layout + +\begin_layout Subsection* +Ukrainian +\end_layout + +\begin_layout Standard +\begin_inset Branch conflict-mk +inverted 1 +status open + +\begin_layout Quote + +\emph on +\lang ukrainian +Украї́нська мо́ва +\emph default + (історичні назви — ру́ська, руси́нська) — національна мова українців. + Належить до слов'янської групи індоєвропейської мовної сім'ї. + Число мовців — близько 45 млн, більшість яких живе в Україні. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Labeling \labelwidthstring 00.00.0000 Babel ukrainian \end_layout @@ -6043,6 +6324,155 @@ Việt ngữ \end_inset +\end_layout + +\begin_layout Standard + +\lang czech +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Dummy paragraph to reset input encoding with inputenc=auto-legacy +\end_layout + +\end_inset + + +\lang english + +\begin_inset Branch debug +inverted 0 +status collapsed + +\begin_layout Standard +The language is +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +languagename +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +, the input encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +inputencodingname +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + + and the font encoding +\begin_inset Quotes eld +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +makeatletter +\backslash +f@encoding +\backslash +makeatother +\end_layout + +\end_inset + + +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +A0   ¡ ¢ £ ¤ Â¥ ¦ § +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¨ +\end_layout + +\end_inset + + © ª « ¬ ® +\begin_inset Note Note +status open + +\begin_layout Plain Layout +¯ +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +B0 ° ± ² ³ +\begin_inset Note Note +status open + +\begin_layout Plain Layout +´ +\end_layout + +\end_inset + + µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ +\end_layout + +\begin_layout Standard +C0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï +\end_layout + +\begin_layout Standard +D0 Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß +\end_layout + +\begin_layout Standard +E0 à á â ã ä Ã¥ æ ç è é ê ë ì í î ï +\end_layout + +\begin_layout Standard +F0 ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ +\end_layout + +\end_inset + + \end_layout \begin_layout Labeling diff --git a/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx b/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx index fdda28ae69..107fb04e90 100644 --- a/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx +++ b/autotests/export/latex/languages/supported-languages_babel_auto-legacy.lyx @@ -169,9 +169,42 @@ auto-legacy , i.e. each language uses its own default legacy input encoding. +\end_layout -\emph on - +\begin_layout Quote + +\series bold +Bug +\series default + (in LyX or LaTeX?): If a language change happens in an environment (e.g. + a Quote), the input encoding +\begin_inset Quotes eld +\end_inset + +utf8 +\begin_inset Quotes erd +\end_inset + + is not properly switched back after leaving the environment: the +\backslash +inputencodingname is back to the encoding used before, but charcters in + the +\begin_inset Quotes eld +\end_inset + +high-bit +\begin_inset Quotes erd +\end_inset + + range produce utf8-errors (see +\begin_inset Quotes eld +\end_inset + +nested-inputenc_auto-legacy.lyx +\begin_inset Quotes erd +\end_inset + +). \end_layout \begin_layout Standard diff --git a/development/autotests/ignoredTests b/development/autotests/ignoredTests index 3061a1d675..7f8b8448e5 100644 --- a/development/autotests/ignoredTests +++ b/development/autotests/ignoredTests @@ -179,6 +179,7 @@ export/export/latex/inputenc-.*_systemF # 11522 still open but already tested export/export/latex/languages/supported-languages_babel_(pdf|dvi)._systemF export/export/latex/languages/supported-languages_babel_auto-legacy_.*_systemF export/export/latex/lyxbugs/6197-polish-amssymb_pdf5_systemF +export/export/latex/languages/nested-inputenc_auto-legacy_.*_systemF # wrong output but does not fail: export/export/latex/lyxbugs/3059-language-in-tables_.*_systemF diff --git a/development/autotests/invertedTests b/development/autotests/invertedTests index d94486f0ec..5c54fdbffa 100644 --- a/development/autotests/invertedTests +++ b/development/autotests/invertedTests @@ -84,6 +84,14 @@ export/examples/ja/Graphics_and_Insets/XY-Pic.*_systemF export/doc/he/.*pdf5_systemF export/.*/fa/Welcome_(dvi3|pdf5)_systemF +Bug (in LyX or LaTeX?): +If a language change happens in an environment, LyX does not write an +explicit \inputencoding{} when switching back to the "outer" +language. However, without explicit \inputencoding, the input encoding +“utf8” is not properly switched back and high-bit characters like "ä" produce +an "inputenc Error: Invalid UTF-8 byte "A0"". +export/export/latex/languages/nested-inputenc_auto-legacy_pdf2 + # ================================================ Sublabel: lyxbugs