development/tools/unicodesymbols.py

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # file unciodesymbols.py
   5 # This file is part of LyX, the document processor.
   6 # Licence details can be found in the file COPYING.
   7
   8 # author Georg Baum
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script reads a unicode symbol file and completes it in the given range
  13
  14
  15 import os, re, string, sys, unicodedata
  16
  17 def usage(prog_name):
  18     return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
  19             "or     %s start stop <inputfile >outputfile" % prog_name)
  20
  21
  22 def error(message):
  23     sys.stderr.write(message + '\n')
  24     sys.exit(1)
  25
  26
  27 def trim_eol(line):
  28     " Remove end of line char(s)."
  29     if line[-2:-1] == '\r':
  30         return line[:-2]
  31     elif line[-1:] == '\r' or line[-1:] == '\n':
  32         return line[:-1]
  33     else:
  34         # file with no EOL in last line
  35         return line
  36
  37
  38 def read(input):
  39     " Read input file and strip lineendings."
  40     lines = list()
  41     while 1:
  42         line = input.readline()
  43         if not line:
  44             break
  45         line = trim_eol(line)
  46         tokens = line.split()
  47         char = -1
  48         if len(tokens) > 0:
  49             if tokens[0][0:2] == "0x":
  50                 char = int(tokens[0][2:], 16)
  51             elif tokens[0][0:3] == "#0x":
  52                 char = int(tokens[0][3:], 16)
  53         lines.append([char, line])
  54     return lines
  55
  56
  57 def write(output, lines):
  58     " Write output file with native lineendings."
  59     for line in lines:
  60         output.write(line[1] + os.linesep)
  61
  62
  63 def complete(lines, start, stop):
  64     l = 0
  65     for i in range(start, stop):
  66         # This catches both comments (lines[l][0] == -1) and code points less than i
  67         while l < len(lines) and lines[l][0] < i:
  68 #            print lines[l]
  69             l = l + 1
  70             continue
  71         if l >= len(lines) or lines[l][0] != i:
  72             c = unichr(i)
  73             name = unicodedata.name(c, "")
  74             if name != "":
  75                 if unicodedata.combining(c):
  76                     combining = "combining"
  77                 else:
  78                     combining = ""
  79                 line = [i, '#0x%04x ""                         "" "%s" "" "" # %s' % (i, combining, name)]
  80                 lines.insert(l, line)
  81 #                print lines[l]
  82                 l = l + 1
  83
  84
  85 def main(argv):
  86
  87     # Open files
  88     if len(argv) == 3:
  89         input = sys.stdin
  90         output = sys.stdout
  91     elif len(argv) == 5:
  92         input = open(argv[3], 'rb')
  93         output = open(argv[4], 'wb')
  94     else:
  95         error(usage(argv[0]))
  96     if argv[1][:2] == "0x":
  97         start = int(argv[1][2:], 16)
  98     else:
  99         start = int(argv[1])
 100     if argv[2][:2] == "0x":
 101         stop = int(argv[2][2:], 16)
 102     else:
 103         stop = int(argv[2])
 104
 105     # Do the real work
 106     lines = read(input)
 107     complete(lines, start, stop)
 108     write(output, lines)
 109
 110     # Close files
 111     if len(argv) == 3:
 112         input.close()
 113         output.close()
 114
 115     return 0
 116
 117
 118 if __name__ == "__main__":
 119     main(sys.argv)