X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Fencodings;h=c79db405becdc2e3034bab1fd3531f5425424c37;hb=a88e6e1ea5861f8275adbe4c962c2629fb482613;hp=b67b330999bed7adf680c181c17dc544ca5ecb45;hpb=ced73c33e662ef04dfc627b8000547d376d8d4b6;p=lyx.git diff --git a/lib/encodings b/lib/encodings index b67b330999..c79db405be 100644 --- a/lib/encodings +++ b/lib/encodings @@ -1,134 +1,223 @@ # FIXME: Have a look at the encodings known by the inputenc package and add # missing ones. Caution: File format change! -# Note that you can only add single byte encodings to this file without -# changing some C++ code. -# The only multibyte encoding that is currently supported is utf8, and this -# support is hardcoded to the iconv name "UTF-8". +# Note that you can only add singlebyte encodings to this file. +# LyX does not support the output of multibyte encodings (e.g. utf16). +# It does support singlebyte encodings with variable width (e.g. utf8). +# These are marked with the "variable" keyword. +# Fixed width encodings are marked with the "fixed" keyword. +# The code points of TeX control characters like {, } and \ can occur in the +# second byte of some variable width encodings. These encodings must not be +# set as document encodings and are marked with the "variableunsafe" keyword. +# They are only needed for proper tex2lyx import. -# Order of names: LyX name LaTeX name iconv name +# Most encodings require loading a latex package such as "inputenc" or "CJK". +# There is no "japanese" latex package, rather this keyword indicates to LyX +# to switch the buffer format and use platex instead of standard (pdf)latex. +# In this case, TeX control characters in high bytes is not a problem. +# The invocation is platex -kanji= -Encoding utf8 utf8 UTF-8 +# Syntax: Encoding End + +# LyX name: Name used by the file format and in lib/languages. Must be unique! +# LaTeX name: Used in the latex export or passed to platex as command-line switch. +# GUI name: Displayed in document settings. +# iconv name: Used by iconv. +# width: One of fixed, variable, or variableunsafe (see above). +# package: One of none, inputenc, CJK, or japanese (see above). + +# encodings used by inputenc.sty + +Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc End # This one is used by many CJK packages. utf8 is supposed to be the successor, # but does not have all features of utf8x yet. -Encoding utf8x utf8x UTF-8 +Encoding utf8x utf8x "Unicode (ucs-extended) (utf8x)" UTF-8 variable inputenc End -# A plain utf8 encoding that does not use the inputenc package. -# Such an encoding is required for XeTeX. -Encoding utf8-plain utf8-plain UTF-8 +# This encoding is used to typeset Armenian using the armTeX package +Encoding armscii8 armscii8 "Armenian (ArmSCII8)" ARMSCII-8 fixed inputenc End -# This encoding is used to typeset Armenian using the armtex package -Encoding armscii8 armscii8 ARMSCII-8 +Encoding iso8859-1 latin1 "Western European (ISO 8859-1)" ISO-8859-1 fixed inputenc End -Encoding iso8859-1 latin1 ISO-8859-1 +Encoding iso8859-2 latin2 "Central European (ISO 8859-2)" ISO-8859-2 fixed inputenc End -Encoding iso8859-2 latin2 ISO-8859-2 +Encoding iso8859-3 latin3 "South European (ISO 8859-3)" ISO-8859-3 fixed inputenc End -Encoding iso8859-3 latin3 ISO-8859-3 +Encoding iso8859-4 latin4 "Baltic (ISO 8859-4)" ISO-8859-4 fixed inputenc End -Encoding iso8859-4 latin4 ISO-8859-4 -End - -Encoding iso8859-5 iso88595 ISO-8859-5 +Encoding iso8859-5 iso88595 "Cyrillic (ISO 8859-5)" ISO-8859-5 fixed inputenc End # Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/ -Encoding iso8859-6 8859-6 ISO-8859-6 +Encoding iso8859-6 8859-6 "Arabic (ISO 8859-6)" ISO-8859-6 fixed inputenc End -Encoding iso8859-7 iso-8859-7 ISO-8859-7 +Encoding iso8859-7 iso-8859-7 "Greek (ISO 8859-7)" ISO-8859-7 fixed inputenc End -Encoding iso8859-8 8859-8 ISO-8859-8 +Encoding iso8859-8 8859-8 "Hebrew (ISO 8859-8)" ISO-8859-8 fixed inputenc End -Encoding iso8859-9 latin5 ISO-8859-9 +Encoding iso8859-9 latin5 "Turkish (ISO 8859-9)" ISO-8859-9 fixed inputenc End # Not standard, see http://www.vtex.lt/tex/littex/index.html -Encoding iso8859-13 l7xenc ISO-8859-13 +Encoding iso8859-13 l7xenc "Baltic (ISO 8859-13)" ISO-8859-13 fixed inputenc +End + +Encoding iso8859-15 latin9 "Western European (ISO 8859-15)" ISO-8859-15 fixed inputenc End -Encoding iso8859-15 latin9 ISO-8859-15 +Encoding iso8859-16 latin10 "South-Eastern European (ISO 8859-16)" ISO-8859-16 fixed inputenc End -Encoding iso8859-16 latin10 ISO-8859-16 +Encoding applemac applemac "Western European (Macintosh Roman)" Macintosh fixed inputenc End -Encoding cp437 cp437 CP437 +Encoding cp437 cp437 "DOS (CP 437)" CP437 fixed inputenc End # cp437, but on position 225 is sz instead of beta -Encoding cp437de cp437de CP437 +Encoding cp437de cp437de "DOS-de (CP 437-de)" CP437 fixed inputenc End -Encoding cp850 cp850 CP850 +Encoding cp850 cp850 "Western European (CP 850)" CP850 fixed inputenc End -Encoding cp852 cp852 CP852 +Encoding cp852 cp852 "Central European (CP 852)" CP852 fixed inputenc End -Encoding cp855 cp855 CP855 +Encoding cp855 cp855 "Cyrillic (CP 855)" CP855 fixed inputenc End -Encoding cp858 cp858 CP858 +Encoding cp858 cp858 "Western European (CP 858)" CP858 fixed inputenc End -Encoding cp862 cp862 CP862 +Encoding cp862 cp862 "Hebrew (CP 862)" CP862 fixed inputenc End -Encoding cp865 cp865 CP865 +Encoding cp865 cp865 "Nordic languages (CP 865)" CP865 fixed inputenc End -Encoding cp866 cp866 CP866 +Encoding cp866 cp866 "Cyrillic (CP 866)" CP866 fixed inputenc End -Encoding cp1250 cp1250 CP1250 +Encoding cp1250 cp1250 "Central European (CP 1250)" CP1250 fixed inputenc End -Encoding cp1251 cp1251 CP1251 +Encoding cp1251 cp1251 "Cyrillic (CP 1251)" CP1251 fixed inputenc End -Encoding cp1252 cp1252 CP1252 +# "ansinew" is harcoded as a synonym of this (see Encodings::fromLaTeXName) +Encoding cp1252 cp1252 "Western European (CP 1252)" CP1252 fixed inputenc End -Encoding cp1255 cp1255 CP1255 +Encoding cp1255 cp1255 "Hebrew (CP 1255)" CP1255 fixed inputenc End # Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/ -Encoding cp1256 cp1256 CP1256 +Encoding cp1256 cp1256 "Arabic (CP 1256)" CP1256 fixed inputenc End -Encoding cp1257 cp1257 CP1257 +Encoding cp1257 cp1257 "Baltic (CP 1257)" CP1257 fixed inputenc End -Encoding koi8 koi8-r KOI8-R +Encoding koi8-r koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc End -Encoding koi8-u koi8-u KOI8-U +Encoding koi8-u koi8-u "Cyrillic (KOI8-U)" KOI8-U fixed inputenc End -# This one needs hardcoded support, since the inputenc package does not know -# tis620-0, and thailatex sets up babel directly to use tis620-0, so the value -# for inputenc is never output to .tex files (but needed for the hardcoded -# tis620-0 support). -Encoding tis620-0 tis620-0 TIS620-0 +Encoding pt154 pt154 "Cyrillic (pt 154)" PT154 fixed inputenc +End + +Encoding pt254 pt254 "Cyrillic (pt 254)" PT254 fixed inputenc +End + +# encodings used by CJK.sty + +# The following encodings that are supported by the CJK package are not +# included here, because they are not widely used and lack proper iconv support: +# Bg5+, GBt +# See the NOTES file of libiconv for details. + +# For traditional chinese +Encoding big5 Bg5 "Chinese (traditional) (Big5)" BIG5 variableunsafe CJK +End + +# For japanese +# We use CP932 and not SJIS, since it is a super set of SJIS except that it +# translates SJIS 0x5c to U+005C (REVERSE SOLIDUS aka backslash) instead of +# U+00A5 (YEN SIGN). Strictly speaking this is wrong, but there is a long +# standing confusion which translation is correct, and using CP932 is also +# consistent with the SJIS example file /doc/latex/cjk/examples/SJIS.tex +# of the CJK package. The CP932 extensions over SJIS do not matter here. +Encoding shift-jis SJIS "Japanese (CJK) (SJIS)" CP932 variableunsafe CJK +End + +# The following encodings need hardcoded support of the encodable unicode +# range, but are known by iconv: + +# For simplified chinese +Encoding euc-cn GB "Chinese (simplified) (EUC-CN)" EUC-CN variable CJK End -Encoding pt154 pt154 PT154 +# For simplified chinese +Encoding gbk GBK "Chinese (simplified) (GBK)" GBK variable CJK End -Encoding pt254 pt254 PT254 +# For japanese +Encoding jis JIS "Japanese (CJK) (JIS)" ISO-2022-JP variable CJK +End + +# For korean +Encoding euc-kr KS "Korean (EUC-KR)" EUC-KR variable CJK +End + +# The CJK package has yet another name for utf8... +Encoding utf8-cjk UTF8 "Unicode (CJK) (utf8)" UTF-8 variable CJK +End + +# For traditional chinese +Encoding euc-tw EUC-TW "Chinese (traditional) (EUC-TW)" EUC-TW variable CJK +End + +# For japanese +Encoding euc-jp EUC-JP "Japanese (CJK) (EUC-JP)" EUC-JP variable CJK +End + +# encodings that do not use a package + +# Traditional Japanese TeX programs require the japanese package. +# that is incompatible with CJK and inputenc. +Encoding euc-jp-platex euc "Japanese (pLaTeX) (EUC-JP)" EUC-JP variable japanese +End +Encoding jis-platex jis "Japanese (pLaTeX) (JIS)" ISO-2022-JP variable japanese +End +Encoding shift-jis-platex sjis "Japanese (pLaTeX) (SJIS)" CP932 variable japanese +End +Encoding utf8-platex utf8 "Japanese (pLaTeX) (UTF8)" UTF-8 variable japanese +End + +# This one needs hardcoded support, since the inputenc package does not know +# tis620-0, and thailatex sets up babel directly to use tis620-0, so the +# LaTeX name is never output to .tex files (but needed for the hardcoded +# tis620-0 support). +Encoding tis620-0 tis620-0 "Thai (TIS 620-0)" TIS620-0 fixed none +End + +# A plain utf8 encoding that does not use the inputenc package. +# Such an encoding is required for XeTeX. +Encoding utf8-plain utf8-plain "Unicode (XeTeX) (utf8)" UTF-8 variable none End # Pure 7bit ASCII encoding (partially hardcoded in LyX) -Encoding ascii ascii ascii +Encoding ascii ascii "ASCII" ascii fixed none End