3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
7 * \author Jean-Marc Lasgouttes
10 * Full author contact details are available in file CREDITS.
24 #ifndef CXX_GLOBAL_CSTD
36 char_type tab_iso8859_1[256] = {
37 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
38 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
39 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
40 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
41 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
42 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
43 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
44 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
45 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
46 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
47 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
48 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
49 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
50 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
51 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
52 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
55 #ifdef USE_UNICODE_FOR_SYMBOLS
56 char_type tab_symbol[256] = {
57 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
58 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
60 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220b,
61 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
63 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
65 // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
66 // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
68 // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
69 // 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
70 0x2245, 0x0041, 0x0042, 0x0058, 0x0394, 0x0045, 0x03c2, 0x03a9,
71 0x0048, 0x0049, 0x03d1, 0x004b, 0x039b, 0x004d, 0x004e, 0x004f,
73 0x03a0, 0x0398, 0x0050, 0x03a3, 0x0054, 0x0059, 0x03c2, 0x03a9,
74 0x039e, 0x03a8, 0x005a, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
76 0xffff, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03d5, 0x03b3,
77 0x03b7, 0x03b9, 0x03c6, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
79 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
80 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x007e, 0xffff,
82 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
83 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
85 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
86 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
88 0xffff, 0x03d2, 0x2032, 0x2264, 0x2215, 0x221e, 0xffff, 0x2663,
89 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
91 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022,
92 0x00f7, 0x2260, 0x2261, 0x2248, 0x22ef, 0xffff, 0xffff, 0x21b5,
94 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
95 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
97 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5,
98 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3,
100 0x2662, 0x2329, 0x00ae, 0x00a9, 0x2122, 0x2211, 0xffff, 0xffff,
101 0xffff, 0x2308, 0xffff, 0x230a, 0xffff, 0xffff, 0xffff, 0xffff,
103 0xffff, 0x232a, 0x222b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
104 0xffff, 0x2309, 0xffff, 0x230b, 0xffff, 0xffff, 0xffff, 0xffff
109 char_type arabic_table2[63][4] = {
110 {0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza
111 {0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef
112 {0x43, 0xa2, 0x43, 0xa2}, // 0xc3 = ligature hamza on alef
113 {0x44, 0xa3, 0x44, 0xa3}, // 0xc4 = ligature hamza on waw
114 {0x45, 0xa4, 0x45, 0xa4}, // 0xc5 = ligature hamza under alef
115 {0x46, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya
116 {0x47, 0xa5, 0x47, 0xa5}, // 0xc7 = alef
117 {0x48, 0xae, 0xac, 0xad}, // 0xc8 = baa
118 {0x49, 0xb1, 0x49, 0xb1}, // 0xc9 = taa marbuta
119 {0x4a, 0xb4, 0xb2, 0xb3}, // 0xca = taa
120 {0x4b, 0xb7, 0xb5, 0xb6}, // 0xcb = thaa
121 {0x4c, 0xba, 0xb8, 0xb9}, // 0xcc = jeem
122 {0x4d, 0xbd, 0xbb, 0xbc}, // 0xcd = haa
123 {0x4e, 0xc0, 0xbe, 0xbf}, // 0xce = khaa
124 {0x4f, 0xa6, 0x4f, 0xa6}, // 0xcf = dal
126 {0x50, 0xa7, 0x50, 0xa7}, // 0xd0 = thal
127 {0x51, 0xa8, 0x51, 0xa8}, // 0xd1 = ra
128 {0x52, 0xa9, 0x52, 0xa9}, // 0xd2 = zain
129 {0x53, 0xc3, 0xc1, 0xc2}, // 0xd3 = seen
130 {0x54, 0xc6, 0xc4, 0xc5}, // 0xd4 = sheen
131 {0x55, 0xc9, 0xc7, 0xc8}, // 0xd5 = sad
132 {0x56, 0xcc, 0xca, 0xcb}, // 0xd6 = dad
133 {0x57, 0xcf, 0xcd, 0xce}, // 0xd7 = tah
134 {0x58, 0xd2, 0xd0, 0xd1}, // 0xd8 = zah
135 {0x59, 0xd5, 0xd3, 0xd4}, // 0xd9 = ain
136 {0x5a, 0xd8, 0xd6, 0xd7}, // 0xda = ghain
144 {0x61, 0xdb, 0xd9, 0xda}, // 0xe1 = fa
145 {0x62, 0xde, 0xdc, 0xdd}, // 0xe2 = qaf
146 {0x63, 0xe1, 0xdf, 0xe0}, // 0xe3 = kaf
147 {0x64, 0xe4, 0xe2, 0xe3}, // 0xe4 = lam
148 {0x65, 0xe7, 0xe5, 0xe6}, // 0xe5 = meem
149 {0x66, 0xea, 0xe8, 0xe9}, // 0xe6 = noon
150 {0x67, 0xed, 0xeb, 0xec}, // 0xe7 = ha
151 {0x68, 0xaa, 0x68, 0xaa}, // 0xe8 = waw
152 {0x69, 0xab, 0x69, 0xab}, // 0xe9 = alef maksura
153 {0x6a, 0xf0, 0xee, 0xef}, // 0xea = ya
154 {0x6b, 0x6b, 0x6b, 0x6b}, // 0xeb = fathatan
155 {0x6c, 0x6c, 0x6c, 0x6c}, // 0xec = dammatan
156 {0x6d, 0x6d, 0x6d, 0x6d}, // 0xed = kasratan
157 {0x6e, 0x6e, 0x6e, 0x6e}, // 0xee = fatha
158 {0x6f, 0x6f, 0x6f, 0x6f}, // 0xef = damma
160 {0x70, 0x70, 0x70, 0x70}, // 0xf0 = kasra
161 {0x71, 0x71, 0x71, 0x71}, // 0xf1 = shadda
162 {0x72, 0x72, 0x72, 0x72}, // 0xf2 = sukun
179 char_type arabic_table[63][2] = {
180 {0xc1, 0xc1}, // 0xc1 = hamza
181 {0xc2, 0xc2}, // 0xc2 = ligature madda on alef
182 {0xc3, 0xc3}, // 0xc3 = ligature hamza on alef
183 {0xc4, 0xc4}, // 0xc4 = ligature hamza on waw
184 {0xc5, 0xc5}, // 0xc5 = ligature hamza under alef
185 {0xc6, 0xc0}, // 0xc6 = ligature hamza on ya
186 {0xc7, 0xc7}, // 0xc7 = alef
187 {0xc8, 0xeb}, // 0xc8 = baa
188 {0xc9, 0xc9}, // 0xc9 = taa marbuta
189 {0xca, 0xec}, // 0xca = taa
190 {0xcb, 0xed}, // 0xcb = thaa
191 {0xcc, 0xee}, // 0xcc = jeem
192 {0xcd, 0xef}, // 0xcd = haa
193 {0xce, 0xf0}, // 0xce = khaa
194 {0xcf, 0xcf}, // 0xcf = dal
196 {0xd0, 0xd0}, // 0xd0 = thal
197 {0xd1, 0xd1}, // 0xd1 = ra
198 {0xd2, 0xd2}, // 0xd2 = zain
199 {0xd3, 0xf1}, // 0xd3 = seen
200 {0xd4, 0xf2}, // 0xd4 = sheen
201 {0xd5, 0xf3}, // 0xd5 = sad
202 {0xd6, 0xf4}, // 0xd6 = dad
203 {0xd7, 0xd7}, // 0xd7 = tah
204 {0xd8, 0xd8}, // 0xd8 = zah
205 {0xd9, 0xf5}, // 0xd9 = ain
206 {0xda, 0xf6}, // 0xda = ghain
214 {0xe1, 0xf7}, // 0xe1 = fa
215 {0xe2, 0xf8}, // 0xe2 = qaf
216 {0xe3, 0xf9}, // 0xe3 = kaf
217 {0xe4, 0xfa}, // 0xe4 = lam
218 {0xe5, 0xfb}, // 0xe5 = meem
219 {0xe6, 0xfc}, // 0xe6 = noon
220 {0xe7, 0xfd}, // 0xe7 = ha
221 {0xe8, 0xe8}, // 0xe8 = waw
222 {0xe9, 0xe9}, // 0xe9 = alef maksura
223 {0xea, 0xfe}, // 0xea = ya
224 {0xa8, 0xa8}, // 0xeb = fathatan
225 {0xa9, 0xa9}, // 0xec = dammatan
226 {0xaa, 0xaa}, // 0xed = kasratan
227 {0xab, 0xab}, // 0xee = fatha
228 {0xac, 0xac}, // 0xef = damma
230 {0xad, 0xad}, // 0xf0 = kasra
231 {0xae, 0xae}, // 0xf1 = shadda
232 {0xaf, 0xaf}, // 0xf2 = sukun
249 char_type const arabic_start = 0xc1;
255 char_type Encoding::ucs(char_type c) const
257 BOOST_ASSERT(c < 256);
258 return encoding_table[c];
262 bool Encodings::isComposeChar_hebrew(char_type c)
264 return c <= 0xd2 && c >= 0xc0 &&
265 c != 0xce && c != 0xd0;
269 // Special Arabic letters are ones that do not get connected from left
270 // they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under,
271 // alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura
273 bool Encodings::is_arabic_special(char_type c)
275 return (c >= 0xc1 && c <= 0xc5) ||
276 c == 0xc7 || c == 0xc9 ||
277 c == 0xcf || c == 0xe8 ||
278 (c >= 0xd0 && c <= 0xd2) ||
282 bool Encodings::isComposeChar_arabic(char_type c)
284 return c >= 0xeb && c <= 0xf2;
288 bool Encodings::is_arabic(char_type c)
290 return c >= arabic_start && arabic_table[c-arabic_start][0];
294 char_type Encodings::transformChar(char_type c,
295 Encodings::Letter_Form form)
300 if (lyxrc.font_norm_type == LyXRC::ISO_10646_1)
301 return arabic_table2[c-arabic_start][form];
303 return arabic_table[c-arabic_start][form >> 1];
307 Encoding const * Encodings::getEncoding(string const & encoding) const
309 EncodingList::const_iterator it = encodinglist.find(encoding);
310 if (it != encodinglist.end())
316 Encodings::Encodings()
318 encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1);
320 #ifdef USE_UNICODE_FOR_SYMBOLS
321 Encoding("symbol", "", tab_symbol);
323 Encoding("symbol", "", tab_iso8859_1);
327 void Encodings::read(string const & filename)
335 struct keyword_item encodingtags[et_last - 1] = {
336 { "encoding", et_encoding },
340 LyXLex lex(encodingtags, et_last - 1);
341 lex.setFile(filename);
347 string const name = lex.getString();
349 string const latexname = lex.getString();
350 lyxerr[Debug::INFO] << "Reading encoding " << name << endl;
351 char_type table[256];
352 for (unsigned int i = 0; i < 256; ++i) {
354 string const tmp = lex.getString();
355 table[i] = ::strtol(tmp.c_str(), 0 , 16);
357 encodinglist[name] = Encoding(name, latexname, table);
358 if (lex.lex() != et_end)
359 lex.printError("Encodings::read: "
364 lex.printError("Encodings::read: Misplaced end");
366 case LyXLex::LEX_FEOF:
369 lex.printError("Encodings::read: "
370 "Unknown tag: `$$Token'");