From 5ec18f7bc0de1f3abd3cd4a57e57ff3606a82495 Mon Sep 17 00:00:00 2001 From: =?utf8?q?G=C3=BCnter=20Milde?= Date: Thu, 24 Jan 2019 00:36:33 +0100 Subject: [PATCH] Support for documents in other scripts with parts in CJK scripts. Fix CJK support for documents using input-encoding "utf8-cjk" when the main language is not Chinese, Korean, or Japanese. --- .../latex/CJK/en-de-el-ru-ja_utf8-cjk.lyx | 68 ++++-- .../latex/CJK/en-de-el-ru-ko_utf8-cjk.lyx | 43 +++- .../latex/CJK/en-de-el-ru-zh_CN_utf8-cjk.lyx | 107 +++++---- .../latex/CJK/en-de-el-ru-zh_TW_utf8-cjk.lyx | 196 +++++++++++++++++ .../latex/CJK/ja-en-de-el-ru_utf8-cjk.lyx | 4 +- .../export/latex/CJK/ja-en-de-el-ru_utf8.lyx | 4 +- .../latex/CJK/ko-en-de-el-ru_utf8-cjk.lyx | 119 ++++++++-- .../CJK/micro-sign_utf8-cjk_libertine.lyx | 207 ++++++++++++++++++ development/autotests/invertedTests | 12 +- development/autotests/unreliableTests | 4 + lib/doc/zh_CN/Intro.lyx | 18 +- lib/doc/zh_CN/Tutorial.lyx | 13 +- lib/encodings | 4 +- lib/examples/ja/multilingual.lyx | 37 +++- src/output_latex.cpp | 32 +-- 15 files changed, 748 insertions(+), 120 deletions(-) create mode 100644 autotests/export/latex/CJK/en-de-el-ru-zh_TW_utf8-cjk.lyx create mode 100644 autotests/export/latex/CJK/micro-sign_utf8-cjk_libertine.lyx diff --git a/autotests/export/latex/CJK/en-de-el-ru-ja_utf8-cjk.lyx b/autotests/export/latex/CJK/en-de-el-ru-ja_utf8-cjk.lyx index 085fae6664..4982e7b9a9 100644 --- a/autotests/export/latex/CJK/en-de-el-ru-ja_utf8-cjk.lyx +++ b/autotests/export/latex/CJK/en-de-el-ru-ja_utf8-cjk.lyx @@ -81,31 +81,39 @@ \begin_body \begin_layout Standard -For multi-lingual document, with parts in European languages, you can use - the CJK LaTeX bundle. +For multi-lingual document, with parts in European languages and parts in + Chinese, Korean, or Japanese, you can use the CJK LaTeX bundle. \end_layout \begin_layout Enumerate +The main language of this document is English. +\end_layout -\lang japanese-cjk -\SpecialChar LyX -は優秀な取扱説明書を同梱していますので、これをお使いください!まずは -\family sans -ヘルプ\SpecialChar menuseparator -はじめの一歩 -\family default -から始めてください。これは、各取扱説明書を簡潔に紹介しています。つぎに -\family sans -ヘルプ\SpecialChar menuseparator -入門篇 -\family default -をお読みになれば、\SpecialChar LyX -の使い方を学ぶことができます。 +\begin_layout Enumerate +Set Language>Encoding to +\begin_inset Quotes eld +\end_inset + +utf8-cjk +\begin_inset Quotes erd +\end_inset + + and Fonts>CJK to, e.g. + +\begin_inset Quotes eld +\end_inset + +min +\begin_inset Quotes erd +\end_inset + +. \end_layout \begin_layout Enumerate Short texts may be written without setting the language (hyphenation will - be missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός +, 挨拶 (あいさつ). \end_layout \begin_layout Enumerate @@ -160,6 +168,32 @@ Russisch: µm Снег. \end_layout +\begin_layout Description + +\lang japanese-cjk +Japanese: ( +\begin_inset Quotes eld +\end_inset + +Japanese CJK +\begin_inset Quotes erd +\end_inset + +) \SpecialChar LyX +は優秀な取扱説明書を同梱していますので、これをお使いください!まずは +\family sans +ヘルプ\SpecialChar menuseparator +はじめの一歩 +\family default +から始めてください。これは、各取扱説明書を簡潔に紹介しています。つぎに +\family sans +ヘルプ\SpecialChar menuseparator +入門篇 +\family default +をお読みになれば、\SpecialChar LyX +の使い方を学ぶことができます。 +\end_layout + \end_deeper \end_body \end_document diff --git a/autotests/export/latex/CJK/en-de-el-ru-ko_utf8-cjk.lyx b/autotests/export/latex/CJK/en-de-el-ru-ko_utf8-cjk.lyx index 3af0908c10..40263082c4 100644 --- a/autotests/export/latex/CJK/en-de-el-ru-ko_utf8-cjk.lyx +++ b/autotests/export/latex/CJK/en-de-el-ru-ko_utf8-cjk.lyx @@ -11,7 +11,7 @@ \language_package default \inputencoding utf8-cjk \fontencoding default -\font_roman "cmr" "NanumMyeongjo" +\font_roman "libertine" "NanumMyeongjo" \font_sans "default" "NanumGothic" \font_typewriter "default" "NanumGothicCoding" \font_math "auto" "auto" @@ -81,24 +81,45 @@ \begin_body \begin_layout Standard -For multi-lingual document, with parts in European languages, you can use - the CJK LaTeX bundle. +For multi-lingual documents with parts in European languages and parts in + Chinese, Korean, or Japanese, you can use the CJK LaTeX bundle. \end_layout \begin_layout Enumerate +The main language of this document is English. +\end_layout -\lang korean -LyX 는 훌륭한 문서를 동반합니다 - 사용하세요! +\begin_layout Enumerate +Set Language>Encoding to +\begin_inset Quotes eld +\end_inset + +utf8-cjk +\begin_inset Quotes erd +\end_inset + + and Fonts>CJK to, e. +\begin_inset space ~ +\end_inset + +g., +\begin_inset Quotes eld +\end_inset + +mj +\begin_inset Quotes erd +\end_inset + +. \end_layout \begin_layout Enumerate Short texts may be written without setting the language (hyphenation will - be missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός +, 인사. \end_layout \begin_layout Enumerate - -\lang japanese-cjk For longer text parts, it is recommended to set the correct language: \end_layout @@ -150,6 +171,12 @@ Russisch: µm Снег. \end_layout +\begin_layout Description + +\lang korean +Korean: LyX 는 훌륭한 문서를 동반합니다 - 사용하세요! +\end_layout + \end_deeper \end_body \end_document diff --git a/autotests/export/latex/CJK/en-de-el-ru-zh_CN_utf8-cjk.lyx b/autotests/export/latex/CJK/en-de-el-ru-zh_CN_utf8-cjk.lyx index 9f832bf239..ed02936f9f 100644 --- a/autotests/export/latex/CJK/en-de-el-ru-zh_CN_utf8-cjk.lyx +++ b/autotests/export/latex/CJK/en-de-el-ru-zh_CN_utf8-cjk.lyx @@ -5,13 +5,19 @@ \save_transient_properties true \origin unavailable \textclass article +\begin_preamble +% Fix the MIKRO SIGN symbol: +% CJKutf8 overwrites \textmu with $\mu$, +% use the symbol from "textcomp" or the Greek fonts instead +\DeclareTextSymbolDefault{\textmu}{TS1} +\end_preamble \use_default_options true \maintain_unincluded_children false \language english \language_package default \inputencoding utf8-cjk \fontencoding auto -\font_roman "default" "default" +\font_roman "libertine" "default" \font_sans "default" "default" \font_typewriter "default" "default" \font_math "auto" "auto" @@ -21,7 +27,7 @@ \font_osf false \font_sf_scale 100 100 \font_tt_scale 100 100 -\font_cjk bsmi +\font_cjk gbsn \use_microtype false \use_dash_ligatures true \graphics default @@ -81,7 +87,16 @@ \begin_body \begin_layout Standard -Test the +For multi-lingual document, with parts in European languages and parts in + Chinese, Korean, or Japanese, you can use the CJK LaTeX bundle. +\end_layout + +\begin_layout Enumerate +The main language of this document is English. +\end_layout + +\begin_layout Enumerate +Set Language>Encoding to \begin_inset Quotes eld \end_inset @@ -89,72 +104,90 @@ utf8-cjk \begin_inset Quotes erd \end_inset - input encoding. -\end_layout - -\begin_layout Standard -English: Grüße aus Österreich 5 -\begin_inset space ~ + and Fonts>CJK to +\begin_inset Quotes eld \end_inset -µm Schnee. -\end_layout +gbsn +\begin_inset Quotes erd +\end_inset -\begin_layout Standard + or +\begin_inset Quotes eld +\end_inset -\lang ngerman -Deutsch: Grüße aus Österreich 5 -\begin_inset space ~ +gkai +\begin_inset Quotes erd \end_inset -µm Schnee. -\begin_inset Foot -status open +. +\end_layout -\begin_layout Plain Layout +\begin_layout Enumerate +Short texts may be written without setting the language (hyphenation will + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός +, 迎接. +\end_layout -\lang ngerman -Gibts Brötchen? +\begin_layout Enumerate +For longer text parts, it is recommended to set the correct language: \end_layout -\end_inset +\begin_deeper +\begin_layout Description +\lang chinese-simplified +Chinesisch: 就是说我们都已经习惯于关心字符排版的细枝末节,几乎所有的字处理软件也都是这种理念。人们还在使用 Tab 键创建空白;你还需要关心什么东西 +应该出现在页面上的什么位置;强调某处文本就是改变一下字体 +\end_layout +\begin_layout Description +English: Greetings from Österreich 5 +\begin_inset space ~ +\end_inset + +µm snow. \end_layout -\begin_layout Standard +\begin_layout Description \lang ngerman Griechisch: \lang greek - Γρυσσε αυσ Οεστερρειχ 5 -\lang english - -\begin_inset space ~ -\end_inset - -μm -\lang greek - Σνη. + Η +\series bold +Ελλάδα +\series default + (στην καθαρεύουσα Ελλάς), συνταγματικό όνομα Ελληνική Δημοκρατία, είναι + χώρα της νοτιοανατολικής Ευρώπης στο νοτιότερο άκρο της Βαλκανικής χερσονήσου. \end_layout -\begin_layout Standard +\begin_layout Description \lang ngerman Russisch: \lang russian - Привет с новом годом! У нас эсть 5 + Привет с +\series bold +новым +\series default + годом! У нас эсть 5 \begin_inset space ~ \end_inset µm Снег. \end_layout -\begin_layout Standard +\begin_layout Description -\lang chinese-simplified -Chinesisch: 是一文作 +\lang ngerman +Deutsch: Grüße aus Österreich 5 +\begin_inset space ~ +\end_inset + +µm Schnee. \end_layout +\end_deeper \end_body \end_document diff --git a/autotests/export/latex/CJK/en-de-el-ru-zh_TW_utf8-cjk.lyx b/autotests/export/latex/CJK/en-de-el-ru-zh_TW_utf8-cjk.lyx new file mode 100644 index 0000000000..5d8c721a1a --- /dev/null +++ b/autotests/export/latex/CJK/en-de-el-ru-zh_TW_utf8-cjk.lyx @@ -0,0 +1,196 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 566 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\begin_preamble +% Fix the MIKRO SIGN symbol: +% CJKutf8 overwrites \textmu with $\mu$, +% use the symbol from "textcomp" or the Greek fonts instead +\DeclareTextSymbolDefault{\textmu}{TS1} +\end_preamble +\use_default_options true +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding utf8-cjk +\fontencoding auto +\font_roman "libertine" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\font_cjk bsmi +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format pdf2 +\output_sync 0 +\bibtex_command default +\index_command default +\float_placement class +\float_alignment class +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_minted 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Standard +For multi-lingual document, with parts in European languages and parts in + Chinese, Korean, or Japanese, you can use the CJK LaTeX bundle. +\end_layout + +\begin_layout Enumerate +The main language of this document is English. +\end_layout + +\begin_layout Enumerate +Set Language>Encoding to +\begin_inset Quotes eld +\end_inset + +utf8-cjk +\begin_inset Quotes erd +\end_inset + + and Fonts>CJK to +\begin_inset Quotes eld +\end_inset + +bsmi +\begin_inset Quotes erd +\end_inset + + or +\begin_inset Quotes eld +\end_inset + +bkai +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Enumerate +Short texts may be written without setting the language (hyphenation will + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός +, 迎接. +\end_layout + +\begin_layout Enumerate +For longer text parts, it is recommended to set the correct language: +\end_layout + +\begin_deeper +\begin_layout Description + +\lang chinese-traditional +(traditional) +\begin_inset space ~ +\end_inset + +Chinese: 香港《蘋果日報》在2月14日引述中國導演馮小剛的說法,稱許多簡體中文是「不合理的簡化」,而負責香港教育的官員則是在「對抗這股浪潮」。 +\end_layout + +\begin_layout Description +English: Greetings from Österreich 5 +\begin_inset space ~ +\end_inset + +µm snow. +\end_layout + +\begin_layout Description + +\lang ngerman +Griechisch: +\lang greek + Η +\series bold +Ελλάδα +\series default + (στην καθαρεύουσα Ελλάς), συνταγματικό όνομα Ελληνική Δημοκρατία, είναι + χώρα της νοτιοανατολικής Ευρώπης στο νοτιότερο άκρο της Βαλκανικής χερσονήσου. +\end_layout + +\begin_layout Description + +\lang ngerman +Russisch: +\lang russian + Привет с +\series bold +новым +\series default + годом! У нас эсть 5 +\begin_inset space ~ +\end_inset + +µm Снег. +\end_layout + +\begin_layout Description + +\lang ngerman +Deutsch: Grüße aus Österreich 5 +\begin_inset space ~ +\end_inset + +µm Schnee. +\end_layout + +\end_deeper +\end_body +\end_document diff --git a/autotests/export/latex/CJK/ja-en-de-el-ru_utf8-cjk.lyx b/autotests/export/latex/CJK/ja-en-de-el-ru_utf8-cjk.lyx index e32349b6e3..b22bc5a75f 100644 --- a/autotests/export/latex/CJK/ja-en-de-el-ru_utf8-cjk.lyx +++ b/autotests/export/latex/CJK/ja-en-de-el-ru_utf8-cjk.lyx @@ -83,7 +83,7 @@ \begin_layout Standard \lang english -For multi-lingual document, with parts in European languages, you can use +For multi-lingual documents with parts in European languages, you can use the CJK LaTeX bundle. \end_layout @@ -105,7 +105,7 @@ For multi-lingual document, with parts in European languages, you can use \begin_layout Enumerate Short texts may be written without setting the language (hyphenation will - be missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός \end_layout \begin_layout Enumerate diff --git a/autotests/export/latex/CJK/ja-en-de-el-ru_utf8.lyx b/autotests/export/latex/CJK/ja-en-de-el-ru_utf8.lyx index a61648820e..84555d72bc 100644 --- a/autotests/export/latex/CJK/ja-en-de-el-ru_utf8.lyx +++ b/autotests/export/latex/CJK/ja-en-de-el-ru_utf8.lyx @@ -83,7 +83,7 @@ \begin_layout Standard \lang english -For multi-lingual document, with parts in European languages, you can use +For multi-lingual documents with parts in European languages, you can use the CJK LaTeX bundle. \end_layout @@ -105,7 +105,7 @@ For multi-lingual document, with parts in European languages, you can use \begin_layout Enumerate Short texts may be written without setting the language (hyphenation will - be missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος + be missing and spell-checking complain): Greetings; Grüße; Приветы; χαιρετισμός \end_layout \begin_layout Enumerate diff --git a/autotests/export/latex/CJK/ko-en-de-el-ru_utf8-cjk.lyx b/autotests/export/latex/CJK/ko-en-de-el-ru_utf8-cjk.lyx index d4ed2d2899..3c54f159f6 100644 --- a/autotests/export/latex/CJK/ko-en-de-el-ru_utf8-cjk.lyx +++ b/autotests/export/latex/CJK/ko-en-de-el-ru_utf8-cjk.lyx @@ -11,7 +11,7 @@ \language_package default \inputencoding utf8-cjk \fontencoding default -\font_roman "cmr" "NanumMyeongjo" +\font_roman "lmodern" "NanumMyeongjo" \font_sans "default" "NanumGothic" \font_typewriter "default" "NanumGothicCoding" \font_math "auto" "auto" @@ -92,14 +92,22 @@ LyX 는 훌륭한 문서를 동반합니다 - 사용하세요! \end_layout \begin_layout Enumerate -Short texts may be written without setting the language (hyphenation will - be missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος + +\series bold +Short +\series default + texts may be written without setting the language (hyphenation will be + missing and spell-checking complain): Greetings; Grüße; Приветы; Λογος \end_layout \begin_layout Enumerate \lang japanese-cjk -For longer text parts, it is recommended to set the correct language: +For +\series bold +longer +\series default + text parts, it is recommended to set the correct language: \end_layout \begin_deeper @@ -115,16 +123,6 @@ English: Greetings from Österreich 5 \begin_layout Description -\lang ngerman -Deutsch: Grüße aus Österreich 5 -\begin_inset space ~ -\end_inset - -µm Schnee. -\end_layout - -\begin_layout Description - \lang ngerman Griechisch: \lang greek @@ -152,6 +150,99 @@ Russisch: µm Снег. \end_layout +\begin_layout Description + +\lang ngerman +Deutsch: Grüße aus Österreich 5 +\begin_inset space ~ +\end_inset + +µm Schnee. +\end_layout + +\begin_layout Description +Language +\begin_inset space ~ +\end_inset + +order +\begin_inset space ~ +\end_inset + +bug: +\end_layout + +\begin_deeper +\begin_layout Itemize +The last language argument becomes the main document language. +\end_layout + +\begin_layout Itemize +CJK-languages don't add to the language documents, because they have no + Babel-name. +\end_layout + +\begin_layout Itemize +LyX sorts languages alphabetically +\begin_inset Foot +status open + +\begin_layout Plain Layout +or by order in +\begin_inset Quotes eld +\end_inset + +lib/languages +\begin_inset Quotes erd +\end_inset + +? +\end_layout + +\end_inset + + and moves the document language to the end of the +\begin_inset Quotes eld +\end_inset + +languages list +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Itemize +If the main language is empty, Babel wrongly assumes the last in list to + be the document language (here +\begin_inset Quotes eld +\end_inset + +russian +\begin_inset Quotes erd +\end_inset + +). +\end_layout + +\begin_layout Itemize +As +\begin_inset Quotes eld +\end_inset + +russian +\begin_inset Quotes erd +\end_inset + + requires font encoding T2A, babel sets it as +\emph on +document font encoding, +\emph default +leading to suboptimal replacement fonts beeing used (bitmap instead of LatinMode +rn) for Latin words unless a specific language is set. +\end_layout + +\end_deeper \end_deeper \end_body \end_document diff --git a/autotests/export/latex/CJK/micro-sign_utf8-cjk_libertine.lyx b/autotests/export/latex/CJK/micro-sign_utf8-cjk_libertine.lyx new file mode 100644 index 0000000000..e724d62272 --- /dev/null +++ b/autotests/export/latex/CJK/micro-sign_utf8-cjk_libertine.lyx @@ -0,0 +1,207 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 566 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\begin_preamble +% Fix the MIKRO SIGN symbol: +% CJKutf8 overwrites \textmu with $\mu$, +% use the symbol from "textcomp" or the Greek fonts instead +%\DeclareTextSymbolDefault{\textmu}{TS1} +\end_preamble +\use_default_options true +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding utf8-cjk +\fontencoding auto +\font_roman "libertine" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\font_cjk gkai +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format pdf2 +\output_sync 0 +\bibtex_command default +\index_command default +\float_placement class +\float_alignment class +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_minted 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Standard +For multi-lingual documents with parts in European languages, you can use + the CJK LaTeX bundle. + However, +\end_layout + +\begin_layout Itemize +CJKutf8 defines the MICRO SIGN +\begin_inset Quotes eld +\end_inset + +µ +\begin_inset Quotes erd +\end_inset + + as mathematical symbol (italic) +\begin_inset Formula $\mu$ +\end_inset + +. +\end_layout + +\begin_layout Itemize +The package +\begin_inset Quotes eld +\end_inset + +textcomp +\begin_inset Quotes erd +\end_inset + + overwrites this definition with a correct looking symbol, but only if loaded + +\series bold +after +\series default +CJKutf8. +\end_layout + +\begin_layout Itemize +LyX gets this right for +\begin_inset Quotes eld +\end_inset + +textcomp +\begin_inset Quotes erd +\end_inset + + but not for +\begin_inset Quotes eld +\end_inset + +libertine +\begin_inset Quotes erd +\end_inset + + that internally loads textcomp and is called before CJKutf8 by LyX. +\end_layout + +\begin_layout Itemize +A workaround is adding +\end_layout + +\begin_deeper +\begin_layout LyX-Code + +\backslash +DeclareTextSymbolDefault{ +\backslash +textmu}{TS1} +\end_layout + +\begin_layout Standard +in the LaTeX preamble. +\end_layout + +\end_deeper +\begin_layout Standard +Example: +\end_layout + +\begin_layout Enumerate + +\lang chinese-simplified +In order to include Chinese text (强调某处文本就是改变一下字体), we use CJKutf8. +\end_layout + +\begin_layout Enumerate +Short texts in Latin, Greek, and Cyrillic may be written without setting + the language (hyphenation will be missing and spell-checking complain): + Greetings; Grüße; Приветы; χαιρετισμός. +\end_layout + +\begin_layout Enumerate +Setting the correct language does not help: +\end_layout + +\begin_deeper +\begin_layout Description + +\lang ngerman +Deutsch: Grüße aus 5 +\begin_inset space ~ +\end_inset + +µm Schnee. +\end_layout + +\begin_layout Description + +\lang ngerman +Griechisch: +\lang greek +χαιρετισμός +\end_layout + +\end_deeper +\end_body +\end_document diff --git a/development/autotests/invertedTests b/development/autotests/invertedTests index 7f2ce1d2a3..0238dab8e0 100644 --- a/development/autotests/invertedTests +++ b/development/autotests/invertedTests @@ -78,17 +78,19 @@ export/examples/(|es/)modernCV_lyx(16|20) # (similar to the Greek font encoding LGR). export/export/latex/fa-OT1_pdf2 -# No "system font" (fontspec) setup for multiple scripts (Japanese+Latin+Greek) +# No "system font" (fontspec) setup for multiple scripts (CJK+Latin+Greek) # -> Missing character (Greek letter omikron with tonos), # suboptimal character spacing in non-Japanese text parts. export/examples/ja/multilingual_.*_systemF +# +# Noto Fonts not found with LuaTeX. +# "Hand-compiling" with XeTeX works but the test still fails: +export/doc/zh_CN/Intro_.*_systemF +export/doc/zh_CN/Tutorial_.*_systemF # CJK for multilingual documents: -# input-encoding "utf8-cjk" is an easy fix -export/export/latex/CJK/en-de-el-ru-.*_utf8-cjk_pdf2 -export/examples/ja/multilingual_(dvi|pdf[23]?) # -# input-encoding "utf8" should work, too: +# input-encoding "utf8" should work for documents using a CJK language: export/export/latex/CJK/.*-en-de-el-ru_utf8_pdf2 # # but not so easy if the main language does not require CJK: diff --git a/development/autotests/unreliableTests b/development/autotests/unreliableTests index f7a8fc5e7e..40b1878b9d 100644 --- a/development/autotests/unreliableTests +++ b/development/autotests/unreliableTests @@ -120,3 +120,7 @@ export/templates/acmart_pdf # lyx2lyx back-conversion of "Date" info-inset writes # the name of the day in English instead of Japanese. export/examples/ja/multilingual_lyx.* + +# CJKutf8 uses $\mu$ for \textmu unless overwritten by textcomp. +# libertine loads textcomp and is loaded before CJKutf8 +export/export/latex/CJK/micro-sign_utf8-cjk_libertine.* diff --git a/lib/doc/zh_CN/Intro.lyx b/lib/doc/zh_CN/Intro.lyx index ea480096cb..4a0ed510b6 100644 --- a/lib/doc/zh_CN/Intro.lyx +++ b/lib/doc/zh_CN/Intro.lyx @@ -1,5 +1,5 @@ -#LyX 2.3 created this file. For more info see http://www.lyx.org/ -\lyxformat 544 +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 566 \begin_document \begin_header \save_transient_properties true @@ -16,10 +16,10 @@ \language chinese-simplified \language_package default \inputencoding utf8-cjk -\fontencoding global -\font_roman "default" "default" -\font_sans "default" "default" -\font_typewriter "default" "default" +\fontencoding auto +\font_roman "default" "Noto Serif" +\font_sans "default" "Noto Sans CJK SC" +\font_typewriter "default" "Noto Sans Mono CJK SC" \font_math "auto" "auto" \font_default_family default \use_non_tex_fonts false @@ -31,10 +31,12 @@ \use_microtype false \use_dash_ligatures false \graphics default -\default_output_format pdf2 +\default_output_format default \output_sync 0 \bibtex_command default \index_command default +\float_placement class +\float_alignment class \paperfontsize 12 \spacing onehalf \use_hyperref true @@ -695,6 +697,8 @@ reference "tab:单位" \begin_layout Standard \begin_inset Float table +placement document +alignment document wide false sideways false status open diff --git a/lib/doc/zh_CN/Tutorial.lyx b/lib/doc/zh_CN/Tutorial.lyx index 4414e72fd8..c900232e68 100644 --- a/lib/doc/zh_CN/Tutorial.lyx +++ b/lib/doc/zh_CN/Tutorial.lyx @@ -1,5 +1,5 @@ -#LyX 2.3 created this file. For more info see http://www.lyx.org/ -\lyxformat 544 +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 566 \begin_document \begin_header \save_transient_properties true @@ -22,9 +22,9 @@ \language chinese-simplified \language_package default \inputencoding utf8-cjk -\fontencoding global -\font_roman "default" "default" -\font_sans "default" "default" +\fontencoding auto +\font_roman "default" "Noto Serif" +\font_sans "default" "Noto Sans CJK SC" \font_typewriter "default" "default" \font_math "auto" "auto" \font_default_family default @@ -37,11 +37,12 @@ \use_microtype false \use_dash_ligatures false \graphics default -\default_output_format pdf2 +\default_output_format default \output_sync 0 \bibtex_command default \index_command default \float_placement th +\float_alignment class \paperfontsize 12 \spacing onehalf \use_hyperref true diff --git a/lib/encodings b/lib/encodings index 8ab1cce270..612be5f813 100644 --- a/lib/encodings +++ b/lib/encodings @@ -50,8 +50,8 @@ Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc End -# This one is used by many CJK packages. utf8 is supposed to be the successor, -# but does not have all features of utf8x yet. +# extended utf8 support from the "ucs" package: +# Larger set of supported characters but conflicts with some packages. Encoding utf8x utf8x "Unicode (ucs-extended) (utf8x)" UTF-8 variable inputenc End diff --git a/lib/examples/ja/multilingual.lyx b/lib/examples/ja/multilingual.lyx index ecafcbdf97..4c83cacbe8 100644 --- a/lib/examples/ja/multilingual.lyx +++ b/lib/examples/ja/multilingual.lyx @@ -11,6 +11,9 @@ \usepackage{textcomp} \end_preamble \use_default_options false +\begin_modules +logicalmkup +\end_modules \maintain_unincluded_children false \language japanese-cjk \language_package auto @@ -103,14 +106,14 @@ arg "long" \begin_layout Standard \lang english -For multi-lingual documents, with parts in European languages, you can use +For multi-lingual documents with parts in European languages, you can use the CJK LaTeX bundle: \end_layout \begin_layout Itemize \lang english -Set a standard document classe (the document classes starting with +Set a standard document class (document classes starting with \begin_inset Quotes bld \end_inset @@ -124,7 +127,7 @@ Japanese \begin_layout Itemize \lang english -Set the language to +Set the document language to \begin_inset Quotes bld \end_inset @@ -145,6 +148,32 @@ Unicode (CJK) (utf8) \begin_layout Itemize +\lang english +Select a Japanese CJK-font, e.g. + +\begin_inset Flex Code +status collapsed + +\begin_layout Plain Layout + +\lang english +\begin_inset Quotes sld +\end_inset + +min +\begin_inset Quotes srd +\end_inset + + +\end_layout + +\end_inset + +. +\end_layout + +\begin_layout Itemize + \lang english Some fonts are substituted with fallback \begin_inset Quotes bld @@ -241,7 +270,7 @@ Deutsch: Grüße aus Österreich! \lang ngerman Griechisch: \lang greek - χαιρετισμός + χαιρετισμός! \end_layout \begin_layout Description diff --git a/src/output_latex.cpp b/src/output_latex.cpp index 21e4d2fa78..d2fba2f063 100644 --- a/src/output_latex.cpp +++ b/src/output_latex.cpp @@ -1262,7 +1262,7 @@ void TeXOnePar(Buffer const & buf, // if this is a CJK-paragraph and the next isn't, close CJK // also if the next paragraph is a multilingual environment (because of nesting) if (nextpar - && state->open_encoding_ == CJK + && (state->open_encoding_ == CJK && bparams.encoding().iconvName() != "UTF-8") && (nextpar_language->encoding()->package() != Encoding::CJK || (nextpar->layout().isEnvironment() && nextpar->isMultiLingual(bparams))) // inbetween environments, CJK has to be closed later (nesting!) @@ -1402,17 +1402,19 @@ void latexParagraphs(Buffer const & buf, } // Open a CJK environment at the beginning of the main buffer - // if the document's language is a CJK language - // (but not in child documents) + // (but not in child documents or documents using system fonts) + // if the document's language is a CJK language (with some exceptions) + // or the document encoding is utf8-cjk: OutputState * state = getOutputState(); if (maintext && !is_child && !bparams.useNonTeXFonts - && bparams.language->encoding()->package() == Encoding::CJK - && (bparams.encoding().name() == "utf8-cjk" - || bparams.encoding().iconvName() != "UTF-8")) { - docstring const cjkenc = (bparams.encoding().name() == "utf8-cjk" - && LaTeXFeatures::isAvailable("CJKutf8")) ? - from_ascii("UTF8") - : from_ascii(bparams.encoding().latexName()); + && ((bparams.language->encoding()->package() == Encoding::CJK + && (bparams.encoding().iconvName() != "UTF-8" + || bparams.encoding().name() == "utf8-cjk" + || bparams.encoding().name() == "utf8" )) + || (bparams.encoding().name() == "utf8-cjk" + && LaTeXFeatures::isAvailable("CJKutf8")))) { + docstring const cjkenc = bparams.encoding().iconvName() == "UTF-8" + ? from_ascii("UTF8") : from_ascii(bparams.encoding().latexName()); os << "\\begin{CJK}{" << cjkenc << "}{" << from_ascii(bparams.fonts_cjk) << "}%\n"; state->open_encoding_ = CJK; @@ -1624,8 +1626,7 @@ pair switchEncoding(odocstream & os, BufferParams const & bparams, // // 2019-01-08 Possibly no longer required since tis620-0 is supported // by inputenc (but check special encodings "utf8-plain" and "default"). - if (oldEnc.package() == Encoding::none - || newEnc.package() == Encoding::none) + if (oldEnc.package() == Encoding::none || newEnc.package() == Encoding::none) return make_pair(false, 0); LYXERR(Debug::LATEX, "Changing LaTeX encoding from " @@ -1682,10 +1683,9 @@ pair switchEncoding(odocstream & os, BufferParams const & bparams, os << "\\egroup"; count += 7; } - docstring const cjkenc = (bparams.encoding().name() == "utf8-cjk" - && LaTeXFeatures::isAvailable("CJKutf8")) ? - from_ascii("UTF8") - : from_ascii(bparams.encoding().latexName()); + docstring const cjkenc = (bparams.encoding().iconvName() == "UTF-8" + && LaTeXFeatures::isAvailable("CJKutf8")) + ? from_ascii("UTF8") : from_ascii(newEnc.latexName()); os << "\\begin{CJK}{" << cjkenc << "}{" << from_ascii(bparams.fonts_cjk) << "}"; state->open_encoding_ = CJK; -- 2.39.5