3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
7 * \author Jean-Marc Lasgouttes
10 * Full author contact details are available in file CREDITS.
24 #ifndef CXX_GLOBAL_CSTD
36 Uchar tab_iso8859_1[256] = {
37 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
38 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
39 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
40 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
41 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
42 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
43 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
44 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
45 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
46 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
47 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
48 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
49 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
50 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
51 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
52 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
55 #ifdef USE_UNICODE_FOR_SYMBOLS
56 Uchar tab_symbol[256] = {
57 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
58 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
60 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220b,
61 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
63 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
65 // 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
66 // 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
68 // 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
69 // 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
70 0x2245, 0x0041, 0x0042, 0x0058, 0x0394, 0x0045, 0x03c2, 0x03a9,
71 0x0048, 0x0049, 0x03d1, 0x004b, 0x039b, 0x004d, 0x004e, 0x004f,
73 0x03a0, 0x0398, 0x0050, 0x03a3, 0x0054, 0x0059, 0x03c2, 0x03a9,
74 0x039e, 0x03a8, 0x005a, 0x005b, 0x2234, 0x005d, 0x22a5, 0xffff,
76 0xffff, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03d5, 0x03b3,
77 0x03b7, 0x03b9, 0x03c6, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
79 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
80 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x007e, 0xffff,
82 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
83 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
85 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
86 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
88 0xffff, 0x03d2, 0x2032, 0x2264, 0x2215, 0x221e, 0xffff, 0x2663,
89 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
91 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022,
92 0x00f7, 0x2260, 0x2261, 0x2248, 0x22ef, 0xffff, 0xffff, 0x21b5,
94 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
95 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
97 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5,
98 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3,
100 0x2662, 0x2329, 0x00ae, 0x00a9, 0x2122, 0x2211, 0xffff, 0xffff,
101 0xffff, 0x2308, 0xffff, 0x230a, 0xffff, 0xffff, 0xffff, 0xffff,
103 0xffff, 0x232a, 0x222b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
104 0xffff, 0x2309, 0xffff, 0x230b, 0xffff, 0xffff, 0xffff, 0xffff
109 unsigned char arabic_table2[63][4] = {
110 {0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza
111 {0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef
112 {0x43, 0xa2, 0x43, 0xa2}, // 0xc3 = ligature hamza on alef
113 {0x44, 0xa3, 0x44, 0xa3}, // 0xc4 = ligature hamza on waw
114 {0x45, 0xa4, 0x45, 0xa4}, // 0xc5 = ligature hamza under alef
115 {0x46, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya
116 {0x47, 0xa5, 0x47, 0xa5}, // 0xc7 = alef
117 {0x48, 0xae, 0xac, 0xad}, // 0xc8 = baa
118 {0x49, 0xb1, 0x49, 0xb1}, // 0xc9 = taa marbuta
119 {0x4a, 0xb4, 0xb2, 0xb3}, // 0xca = taa
120 {0x4b, 0xb7, 0xb5, 0xb6}, // 0xcb = thaa
121 {0x4c, 0xba, 0xb8, 0xb9}, // 0xcc = jeem
122 {0x4d, 0xbd, 0xbb, 0xbc}, // 0xcd = haa
123 {0x4e, 0xc0, 0xbe, 0xbf}, // 0xce = khaa
124 {0x4f, 0xa6, 0x4f, 0xa6}, // 0xcf = dal
126 {0x50, 0xa7, 0x50, 0xa7}, // 0xd0 = thal
127 {0x51, 0xa8, 0x51, 0xa8}, // 0xd1 = ra
128 {0x52, 0xa9, 0x52, 0xa9}, // 0xd2 = zain
129 {0x53, 0xc3, 0xc1, 0xc2}, // 0xd3 = seen
130 {0x54, 0xc6, 0xc4, 0xc5}, // 0xd4 = sheen
131 {0x55, 0xc9, 0xc7, 0xc8}, // 0xd5 = sad
132 {0x56, 0xcc, 0xca, 0xcb}, // 0xd6 = dad
133 {0x57, 0xcf, 0xcd, 0xce}, // 0xd7 = tah
134 {0x58, 0xd2, 0xd0, 0xd1}, // 0xd8 = zah
135 {0x59, 0xd5, 0xd3, 0xd4}, // 0xd9 = ain
136 {0x5a, 0xd8, 0xd6, 0xd7}, // 0xda = ghain
144 {0x61, 0xdb, 0xd9, 0xda}, // 0xe1 = fa
145 {0x62, 0xde, 0xdc, 0xdd}, // 0xe2 = qaf
146 {0x63, 0xe1, 0xdf, 0xe0}, // 0xe3 = kaf
147 {0x64, 0xe4, 0xe2, 0xe3}, // 0xe4 = lam
148 {0x65, 0xe7, 0xe5, 0xe6}, // 0xe5 = meem
149 {0x66, 0xea, 0xe8, 0xe9}, // 0xe6 = noon
150 {0x67, 0xed, 0xeb, 0xec}, // 0xe7 = ha
151 {0x68, 0xaa, 0x68, 0xaa}, // 0xe8 = waw
152 {0x69, 0xab, 0x69, 0xab}, // 0xe9 = alef maksura
153 {0x6a, 0xf0, 0xee, 0xef}, // 0xea = ya
154 {0x6b, 0x6b, 0x6b, 0x6b}, // 0xeb = fathatan
155 {0x6c, 0x6c, 0x6c, 0x6c}, // 0xec = dammatan
156 {0x6d, 0x6d, 0x6d, 0x6d}, // 0xed = kasratan
157 {0x6e, 0x6e, 0x6e, 0x6e}, // 0xee = fatha
158 {0x6f, 0x6f, 0x6f, 0x6f}, // 0xef = damma
160 {0x70, 0x70, 0x70, 0x70}, // 0xf0 = kasra
161 {0x71, 0x71, 0x71, 0x71}, // 0xf1 = shadda
162 {0x72, 0x72, 0x72, 0x72}, // 0xf2 = sukun
179 unsigned char arabic_table[63][2] = {
180 {0xc1, 0xc1}, // 0xc1 = hamza
181 {0xc2, 0xc2}, // 0xc2 = ligature madda on alef
182 {0xc3, 0xc3}, // 0xc3 = ligature hamza on alef
183 {0xc4, 0xc4}, // 0xc4 = ligature hamza on waw
184 {0xc5, 0xc5}, // 0xc5 = ligature hamza under alef
185 {0xc6, 0xc0}, // 0xc6 = ligature hamza on ya
186 {0xc7, 0xc7}, // 0xc7 = alef
187 {0xc8, 0xeb}, // 0xc8 = baa
188 {0xc9, 0xc9}, // 0xc9 = taa marbuta
189 {0xca, 0xec}, // 0xca = taa
190 {0xcb, 0xed}, // 0xcb = thaa
191 {0xcc, 0xee}, // 0xcc = jeem
192 {0xcd, 0xef}, // 0xcd = haa
193 {0xce, 0xf0}, // 0xce = khaa
194 {0xcf, 0xcf}, // 0xcf = dal
196 {0xd0, 0xd0}, // 0xd0 = thal
197 {0xd1, 0xd1}, // 0xd1 = ra
198 {0xd2, 0xd2}, // 0xd2 = zain
199 {0xd3, 0xf1}, // 0xd3 = seen
200 {0xd4, 0xf2}, // 0xd4 = sheen
201 {0xd5, 0xf3}, // 0xd5 = sad
202 {0xd6, 0xf4}, // 0xd6 = dad
203 {0xd7, 0xd7}, // 0xd7 = tah
204 {0xd8, 0xd8}, // 0xd8 = zah
205 {0xd9, 0xf5}, // 0xd9 = ain
206 {0xda, 0xf6}, // 0xda = ghain
214 {0xe1, 0xf7}, // 0xe1 = fa
215 {0xe2, 0xf8}, // 0xe2 = qaf
216 {0xe3, 0xf9}, // 0xe3 = kaf
217 {0xe4, 0xfa}, // 0xe4 = lam
218 {0xe5, 0xfb}, // 0xe5 = meem
219 {0xe6, 0xfc}, // 0xe6 = noon
220 {0xe7, 0xfd}, // 0xe7 = ha
221 {0xe8, 0xe8}, // 0xe8 = waw
222 {0xe9, 0xe9}, // 0xe9 = alef maksura
223 {0xea, 0xfe}, // 0xea = ya
224 {0xa8, 0xa8}, // 0xeb = fathatan
225 {0xa9, 0xa9}, // 0xec = dammatan
226 {0xaa, 0xaa}, // 0xed = kasratan
227 {0xab, 0xab}, // 0xee = fatha
228 {0xac, 0xac}, // 0xef = damma
230 {0xad, 0xad}, // 0xf0 = kasra
231 {0xae, 0xae}, // 0xf1 = shadda
232 {0xaf, 0xaf}, // 0xf2 = sukun
249 unsigned char const arabic_start = 0xc1;
254 bool Encodings::isComposeChar_hebrew(unsigned char c)
256 return c <= 0xd2 && c >= 0xc0 &&
257 c != 0xce && c != 0xd0;
261 // Special Arabic letters are ones that do not get connected from left
262 // they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under,
263 // alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura
265 bool Encodings::is_arabic_special(unsigned char c)
267 return (c >= 0xc1 && c <= 0xc5) ||
268 c == 0xc7 || c == 0xc9 ||
269 c == 0xcf || c == 0xe8 ||
270 (c >= 0xd0 && c <= 0xd2) ||
274 bool Encodings::isComposeChar_arabic(unsigned char c)
276 return c >= 0xeb && c <= 0xf2;
280 bool Encodings::is_arabic(unsigned char c)
282 return c >= arabic_start && arabic_table[c-arabic_start][0];
286 unsigned char Encodings::transformChar(unsigned char c,
287 Encodings::Letter_Form form)
292 if (lyxrc.font_norm_type == LyXRC::ISO_10646_1)
293 return arabic_table2[c-arabic_start][form];
295 return arabic_table[c-arabic_start][form >> 1];
299 Encoding const * Encodings::getEncoding(string const & encoding) const
301 EncodingList::const_iterator it = encodinglist.find(encoding);
302 if (it != encodinglist.end())
308 Encodings::Encodings()
310 encodinglist["iso8859-1"] = Encoding("iso8859-1", "latin1", tab_iso8859_1);
312 #ifdef USE_UNICODE_FOR_SYMBOLS
313 Encoding("symbol", "", tab_symbol);
315 Encoding("symbol", "", tab_iso8859_1);
319 void Encodings::read(string const & filename)
327 struct keyword_item encodingtags[et_last - 1] = {
328 { "encoding", et_encoding },
332 LyXLex lex(encodingtags, et_last - 1);
333 lex.setFile(filename);
339 string const name = lex.getString();
341 string const latexname = lex.getString();
342 lyxerr[Debug::INFO] << "Reading encoding " << name << endl;
344 for (unsigned int i = 0; i < 256; ++i) {
346 string const tmp = lex.getString();
347 table[i] = ::strtol(tmp.c_str(), 0 , 16);
349 encodinglist[name] = Encoding(name, latexname, table);
350 if (lex.lex() != et_end)
351 lex.printError("Encodings::read: "
356 lex.printError("Encodings::read: Misplaced end");
358 case LyXLex::LEX_FEOF:
361 lex.printError("Encodings::read: "
362 "Unknown tag: `$$Token'");