X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Fencodings;h=924c996b967117cbaa2979c142237888fc3a1770;hb=bc6214b63bf9c1b4680c8b4ed9a08c3328878940;hp=d275d746b01f09271a706ef8f2ef13c4d79dad56;hpb=db200daf6f74b6746ad09c634fee55179e74f69c;p=lyx.git diff --git a/lib/encodings b/lib/encodings index d275d746b0..924c996b96 100644 --- a/lib/encodings +++ b/lib/encodings @@ -3,14 +3,50 @@ # Note that you can only add singlebyte encodings to this file. # LyX does not support the output of multibyte encodings (e.g. utf16). -# It does support singlebyte encodings with variable with (e.g. utf8). +# It does support singlebyte encodings with variable width (e.g. utf8). # These are marked with the "variable" keyword. # Fixed width encodings are marked with the "fixed" keyword. - -# Syntax: Encoding fixed|variable End - -# encodings used by inputenc.sty - +# The code points of TeX control characters like {, } and \ can occur in the +# second byte of some variable width encodings. These encodings must not be +# set as document encodings and are marked with the "variableunsafe" keyword. +# They are only needed for proper tex2lyx import. + +# Most encodings require loading a latex package such as "inputenc" or "CJK". +# There is no "japanese" latex package, rather this keyword indicates to LyX +# to switch the buffer format and use platex instead of standard (pdf)latex. +# In this case, TeX control characters in high bytes is not a problem. +# The invocation is platex -kanji= + +# The set of "iconv" supported encodings is system dependent. + +# For Gnu libiconv, supported encodings are listed at +# https://www.gnu.org/software/libiconv/ +# and available via the `iconv --list` command. + +# Syntax: Encoding End + +# LyX name: Name used by the file format and in lib/languages. Must be unique! +# LaTeX name: Used in the latex export or passed to platex as command-line switch. +# GUI name: Displayed in document settings. +# iconv name: Used by iconv. +# width: One of fixed, variable, or variableunsafe (see above). +# package: One of none, inputenc, CJK, or japanese (see above). + +# Encodings used with inputenc.sty +# ================================ + +# "inputenc" is a base LaTeX package that provides an extensible framework +# for conversion of a document encoding into an "LaTeX Internal Character +# Representation" (LICR) and a set of encoding definitions +# (.def files). Additional encoding definitions are provided by +# several language support packages. +# +# The following encodings from http://www.ctan.org/pkg/latex-cyrillic are +# not included, because they are not widely used and lack iconv support: +# ctt, dbk, isoir111, koi8-ru, lcyenc, maccyr, macukr, mik, mls, mnk, mos, +# and pt254. + +# inputenc's standard utf8 support: Encoding utf8 utf8 "Unicode (utf8)" UTF-8 variable inputenc End @@ -19,7 +55,7 @@ End Encoding utf8x utf8x "Unicode (ucs-extended) (utf8x)" UTF-8 variable inputenc End -# This encoding is used to typeset Armenian using the armTeX package +# from http://www.ctan.org/pkg/armtex Encoding armscii8 armscii8 "Armenian (ArmSCII8)" ARMSCII-8 fixed inputenc End @@ -35,13 +71,15 @@ End Encoding iso8859-4 latin4 "Baltic (ISO 8859-4)" ISO-8859-4 fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding iso8859-5 iso88595 "Cyrillic (ISO 8859-5)" ISO-8859-5 fixed inputenc End -# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/ +# from http://www.ctan.org/pkg/arabi Encoding iso8859-6 8859-6 "Arabic (ISO 8859-6)" ISO-8859-6 fixed inputenc End +# from http://www.ctan.org/pkg/greek-inputenc Encoding iso8859-7 iso-8859-7 "Greek (ISO 8859-7)" ISO-8859-7 fixed inputenc End @@ -51,8 +89,7 @@ End Encoding iso8859-9 latin5 "Turkish (ISO 8859-9)" ISO-8859-9 fixed inputenc End -# Not standard, see http://www.vtex.lt/tex/littex/index.html -Encoding iso8859-13 l7xenc "Baltic (ISO 8859-13)" ISO-8859-13 fixed inputenc +Encoding iso8859-13 latin7 "Baltic (ISO 8859-13)" ISO-8859-13 fixed inputenc End Encoding iso8859-15 latin9 "Western European (ISO 8859-15)" ISO-8859-15 fixed inputenc @@ -61,10 +98,13 @@ End Encoding iso8859-16 latin10 "South-Eastern European (ISO 8859-16)" ISO-8859-16 fixed inputenc End +Encoding applemac applemac "Western European (Macintosh Roman)" Macintosh fixed inputenc +End + Encoding cp437 cp437 "DOS (CP 437)" CP437 fixed inputenc End -# cp437, but on position 225 is sz instead of beta +# like cp437, but on position 225 is sz instead of beta Encoding cp437de cp437de "DOS-de (CP 437-de)" CP437 fixed inputenc End @@ -74,9 +114,12 @@ End Encoding cp852 cp852 "Central European (CP 852)" CP852 fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding cp855 cp855 "Cyrillic (CP 855)" CP855 fixed inputenc End +# Gnu iconv only supports cp858, if configured with "--enable-extra-encodings" +# (see https://www.gnu.org/software/libiconv/) Encoding cp858 cp858 "Western European (CP 858)" CP858 fixed inputenc End @@ -86,40 +129,43 @@ End Encoding cp865 cp865 "Nordic languages (CP 865)" CP865 fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding cp866 cp866 "Cyrillic (CP 866)" CP866 fixed inputenc End Encoding cp1250 cp1250 "Central European (CP 1250)" CP1250 fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding cp1251 cp1251 "Cyrillic (CP 1251)" CP1251 fixed inputenc End +# "ansinew" is harcoded as a synonym of this (see Encodings::fromLaTeXName) Encoding cp1252 cp1252 "Western European (CP 1252)" CP1252 fixed inputenc End Encoding cp1255 cp1255 "Hebrew (CP 1255)" CP1255 fixed inputenc End -# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/ +# from http://www.ctan.org/pkg/arabi Encoding cp1256 cp1256 "Arabic (CP 1256)" CP1256 fixed inputenc End Encoding cp1257 cp1257 "Baltic (CP 1257)" CP1257 fixed inputenc End -Encoding koi8 koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc +# from http://www.ctan.org/pkg/latex-cyrillic +Encoding koi8-r koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding koi8-u koi8-u "Cyrillic (KOI8-U)" KOI8-U fixed inputenc End +# from http://www.ctan.org/pkg/latex-cyrillic Encoding pt154 pt154 "Cyrillic (pt 154)" PT154 fixed inputenc End -Encoding pt254 pt254 "Cyrillic (pt 254)" PT254 fixed inputenc -End - # encodings used by CJK.sty # The following encodings that are supported by the CJK package are not @@ -127,16 +173,19 @@ End # Bg5+, GBt # See the NOTES file of libiconv for details. -# The following encodings can't be handled directly, because the code points -# of TeX control characters like {, } and \ can occur in the second byte: - # For traditional chinese -#Encoding big5 Bg5 "Chinese (traditional) (Big5)" BIG5 variable CJK -#End +Encoding big5 Bg5 "Chinese (traditional) (Big5)" BIG5 variableunsafe CJK +End # For japanese -#Encoding shift-jis SJIS "Japanese (CJK) (SJIS)" SJIS variable CJK -#End +# We use CP932 and not SJIS, since it is a super set of SJIS except that it +# translates SJIS 0x5c to U+005C (REVERSE SOLIDUS aka backslash) instead of +# U+00A5 (YEN SIGN). Strictly speaking this is wrong, but there is a long +# standing confusion which translation is correct, and using CP932 is also +# consistent with the SJIS example file /doc/latex/cjk/examples/SJIS.tex +# of the CJK package. The CP932 extensions over SJIS do not matter here. +Encoding shift-jis SJIS "Japanese (CJK) (SJIS)" CP932 variableunsafe CJK +End # The following encodings need hardcoded support of the encodable unicode # range, but are known by iconv: @@ -171,13 +220,15 @@ End # encodings that do not use a package -# Traditional Japanese TeX programs require neither CJK nor inputenc -# package. -Encoding euc-jp-plain EUC-JP-pLaTeX "Japanese (non-CJK) (EUC-JP)" EUC-JP variable none +# Traditional Japanese TeX programs require the japanese package. +# that is incompatible with CJK and inputenc. +Encoding euc-jp-platex euc "Japanese (pLaTeX) (EUC-JP)" EUC-JP variable japanese +End +Encoding jis-platex jis "Japanese (pLaTeX) (JIS)" ISO-2022-JP variable japanese End -Encoding jis-plain JIS-pLaTeX "Japanese (non-CJK) (JIS)" ISO-2022-JP variable none +Encoding shift-jis-platex sjis "Japanese (pLaTeX) (SJIS)" CP932 variable japanese End -Encoding shift-jis-plain SJIS-pLaTeX "Japanese (non-CJK) (SJIS)" CP932 variable none +Encoding utf8-platex utf8 "Japanese (pLaTeX) (UTF8)" UTF-8 variable japanese End # This one needs hardcoded support, since the inputenc package does not know @@ -195,4 +246,3 @@ End # Pure 7bit ASCII encoding (partially hardcoded in LyX) Encoding ascii ascii "ASCII" ascii fixed none End -