2 # -*- coding: utf-8 -*-
4 # file unciodesymbols.py
5 # This file is part of LyX, the document processor.
6 # Licence details can be found in the file COPYING.
10 # Full author contact details are available in file CREDITS
12 # This script reads a unicode symbol file and completes it in the given range
15 import os, re, string, sys, unicodedata
18 return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
19 "or %s start stop <inputfile >outputfile" % prog_name)
23 sys.stderr.write(message + '\n')
28 " Remove end of line char(s)."
29 if line[-2:-1] == '\r':
31 elif line[-1:] == '\r' or line[-1:] == '\n':
34 # file with no EOL in last line
39 " Read input file and strip lineendings."
42 line = input.readline()
49 if tokens[0][0:2] == "0x":
50 char = int(tokens[0][2:], 16)
51 elif tokens[0][0:3] == "#0x":
52 char = int(tokens[0][3:], 16)
53 lines.append([char, line])
57 def write(output, lines):
58 " Write output file with native lineendings."
60 output.write(line[1] + os.linesep)
63 def complete(lines, start, stop):
65 for i in range(start, stop):
66 # This catches both comments (lines[l][0] == -1) and code points less than i
67 while l < len(lines) and lines[l][0] < i:
71 if l >= len(lines) or lines[l][0] != i:
73 name = unicodedata.name(c, "")
75 if unicodedata.combining(c):
76 combining = "combining"
79 line = [i, '#0x%04x "" "" "%s" "" "" # %s' % (i, combining, name)]
92 input = open(argv[3], 'rb')
93 output = open(argv[4], 'wb')
96 if argv[1][:2] == "0x":
97 start = int(argv[1][2:], 16)
100 if argv[2][:2] == "0x":
101 stop = int(argv[2][2:], 16)
107 complete(lines, start, stop)
118 if __name__ == "__main__":