development/tools/unicodesymbols.py

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 # file unciodesymbols.py
   5 # This file is part of LyX, the document processor.
   6 # Licence details can be found in the file COPYING.
   7
   8 # author Georg Baum
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script reads a unicode symbol file and completes it in the given range
  13
  14
  15 import os, re, string, sys, unicodedata
  16 import io
  17
  18 def usage(prog_name):
  19     return ("Usage: %s start stop inputfile outputfile\n" % prog_name +
  20             "or     %s start stop <inputfile >outputfile" % prog_name)
  21
  22
  23 def error(message):
  24     sys.stderr.write(message + '\n')
  25     sys.exit(1)
  26
  27
  28 def trim_eol(line):
  29     " Remove end of line char(s)."
  30     if line[-1:] == '\n':
  31         return line[:-1]
  32     else:
  33         # file with no EOL in last line
  34         return line
  35
  36
  37 def read(input):
  38     " Read input file and strip lineendings."
  39     lines = list()
  40     while 1:
  41         line = input.readline()
  42         if not line:
  43             break
  44         line = trim_eol(line)
  45         tokens = line.split()
  46         char = -1
  47         if len(tokens) > 0:
  48             if tokens[0][0:2] == "0x":
  49                 char = int(tokens[0][2:], 16)
  50             elif tokens[0][0:3] == "#0x":
  51                 char = int(tokens[0][3:], 16)
  52         lines.append([char, line])
  53     return lines
  54
  55
  56 def write(output, lines):
  57     " Write output file."
  58     for line in lines:
  59         output.write(line[1] + '\n')
  60
  61
  62 def complete(lines, start, stop):
  63     l = 0
  64     for i in range(start, stop):
  65         # This catches both comments (lines[l][0] == -1) and code points less than i
  66         while l < len(lines) and lines[l][0] < i:
  67 #            print lines[l]
  68             l = l + 1
  69             continue
  70         if l >= len(lines) or lines[l][0] != i:
  71             if sys.version_info[0] < 3:
  72                 c = unichr(i)
  73             else:
  74                 c = chr(i)
  75             name = unicodedata.name(c, "")
  76             if name != "":
  77                 if unicodedata.combining(c):
  78                     combining = "combining"
  79                 else:
  80                     combining = ""
  81                 line = [i, '#0x%04x ""                         "" "%s" "" "" # %s' % (i, combining, name)]
  82                 lines.insert(l, line)
  83 #                print lines[l]
  84                 l = l + 1
  85
  86
  87 def main(argv):
  88
  89     # Open files
  90     if len(argv) == 3:
  91         input = sys.stdin
  92         output = sys.stdout
  93     elif len(argv) == 5:
  94         input = io.open(argv[3], 'r', encoding='utf_8')
  95         output = io.open(argv[4], 'w', encoding='utf_8')
  96     else:
  97         error(usage(argv[0]))
  98     if argv[1][:2] == "0x":
  99         start = int(argv[1][2:], 16)
 100     else:
 101         start = int(argv[1])
 102     if argv[2][:2] == "0x":
 103         stop = int(argv[2][2:], 16)
 104     else:
 105         stop = int(argv[2])
 106
 107     # Do the real work
 108     lines = read(input)
 109     complete(lines, start, stop)
 110     write(output, lines)
 111
 112     # Close files
 113     if len(argv) == 3:
 114         input.close()
 115         output.close()
 116
 117     return 0
 118
 119
 120 if __name__ == "__main__":
 121     main(sys.argv)