]> git.lyx.org Git - features.git/blob - po/lyx_pot.py
* lyx_pot.py
[features.git] / po / lyx_pot.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # file lyx_pot.py
5 # This file is part of LyX, the document processor.
6 # Licence details can be found in the file COPYING.
7 #
8 # \author Bo Peng
9 #
10 # Full author contact details are available in file CREDITS
11
12 # Usage: use
13 #     lyx_pot.py -h
14 # to get usage message
15
16 # This script will extract translatable strings from input files and write
17 # to output in gettext .pot format.
18 #
19 import sys, os, re, getopt
20 if sys.version_info < (2, 4, 0):
21     from sets import Set as set
22
23 def relativePath(path, base):
24     '''return relative path from top source dir'''
25     # full pathname of path
26     path1 = os.path.normpath(os.path.realpath(path)).split(os.sep)
27     path2 = os.path.normpath(os.path.realpath(base)).split(os.sep)
28     if path1[:len(path2)] != path2:
29         print "Path %s is not under top source directory" % path
30     path3 = os.path.join(*path1[len(path2):]);
31     # replace all \ by / such that we get the same comments on Windows and *nix
32     path3 = path3.replace('\\', '/')
33     return path3
34
35
36 def writeString(outfile, infile, basefile, lineno, string):
37     string = string.replace('\\', '\\\\').replace('"', '')
38     if string == "":
39         return
40     print >> outfile, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
41         (relativePath(infile, basefile), lineno, string)
42
43
44 def ui_l10n(input_files, output, base):
45     '''Generate pot file from lib/ui/*'''
46     output = open(output, 'w')
47     Submenu = re.compile(r'^[^#]*Submenu\s+"([^"]*)"')
48     Popupmenu = re.compile(r'^[^#]*PopupMenu\s+"[^"]+"\s+"([^"]*)"')
49     IconPalette = re.compile(r'^[^#]*IconPalette\s+"[^"]+"\s+"([^"]*)"')
50     Toolbar = re.compile(r'^[^#]*Toolbar\s+"[^"]+"\s+"([^"]*)"')
51     Item = re.compile(r'[^#]*Item\s+"([^"]*)"')
52     TableInsert = re.compile(r'[^#]*TableInsert\s+"([^"]*)"')
53     for src in input_files:
54         input = open(src)
55         for lineno, line in enumerate(input.readlines()):
56             if Submenu.match(line):
57                 (string,) = Submenu.match(line).groups()
58                 string = string.replace('_', ' ')
59             elif Popupmenu.match(line):
60                 (string,) = Popupmenu.match(line).groups()
61             elif IconPalette.match(line):
62                 (string,) = IconPalette.match(line).groups()
63             elif Toolbar.match(line):
64                 (string,) = Toolbar.match(line).groups()
65             elif Item.match(line):
66                 (string,) = Item.match(line).groups()
67             elif TableInsert.match(line):
68                 (string,) = TableInsert.match(line).groups()
69             else:
70                 continue
71             string = string.replace('"', '')
72             if string != "":
73                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
74                     (relativePath(src, base), lineno+1, string)
75         input.close()
76     output.close()
77
78
79 def layouts_l10n(input_files, output, base, layouttranslations):
80     '''Generate pot file from lib/layouts/*.{layout,inc,module}'''
81     Style = re.compile(r'^Style\s+(.*)', re.IGNORECASE)
82     # include ???LabelString???, but exclude comment lines
83     LabelString = re.compile(r'^[^#]*LabelString\S*\s+(.*)')
84     GuiName = re.compile(r'\s*GuiName\s+(.*)')
85     ListName = re.compile(r'\s*ListName\s+(.*)')
86     CategoryName = re.compile(r'\s*Category\s+(.*)')
87     NameRE = re.compile(r'DeclareLyXModule.*{(.*)}')
88     InsetLayout = re.compile(r'^InsetLayout\s+\"?(.*)\"?')
89     FlexCheck = re.compile(r'^Flex:(.*)')
90     DescBegin = re.compile(r'#+\s*DescriptionBegin\s*$')
91     DescEnd = re.compile(r'#+\s*DescriptionEnd\s*$')
92     Category = re.compile(r'#Category: (.*)$')
93     I18nPreamble = re.compile(r'\s*(Lang)|(Babel)Preamble\s*$')
94     EndI18nPreamble = re.compile(r'\s*End(Lang)|(Babel)Preamble\s*$')
95     I18nString = re.compile(r'_\(([^\)]+)\)')
96     CounterFormat = re.compile(r'\s*PrettyFormat\s+"?(.*)"?')
97     CiteFormat = re.compile(r'\s*CiteFormat')
98     KeyVal = re.compile(r'^\s*_\w+\s+(.*)$')
99     Float = re.compile(r'\s*Float')
100     End = re.compile(r'\s*End')
101     Comment = re.compile(r'\s*#')
102     Translation = re.compile(r'\s*Translation\s+(.*)\s*$')
103     KeyValPair = re.compile(r'\s*"(.*)"\s+"(.*)"')
104
105     oldlanguages = []
106     languages = []
107     keyset = set()
108     oldtrans = dict()
109     if layouttranslations:
110         linguas_file = os.path.join(base, 'po/LINGUAS')
111         for line in open(linguas_file).readlines():
112             if Comment.search(line) == None:
113                 languages.extend(line.split())
114
115         # read old translations if available
116         try:
117             input = open(output)
118             lang = ''
119             for line in input.readlines():
120                 res = Comment.search(line)
121                 if res:
122                     continue
123                 if line.strip() == '':
124                     continue
125                 res = Translation.search(line)
126                 if res:
127                     lang = res.group(1)
128                     if lang not in languages:
129                         oldlanguages.append(lang)
130                         languages.append(lang)
131                     oldtrans[lang] = dict()
132                     continue
133                 res = End.search(line)
134                 if res:
135                     lang = ''
136                     continue
137                 res = KeyValPair.search(line)
138                 if res and lang != '':
139                     key = res.group(1).decode('utf-8')
140                     val = res.group(2).decode('utf-8')
141                     key = key.replace('\\"', '"').replace('\\\\', '\\')
142                     val = val.replace('\\"', '"').replace('\\\\', '\\')
143                     oldtrans[lang][key] = val
144                     keyset.add(key)
145                     continue
146                 print "Error: Unable to handle line:"
147                 print line
148         except IOError:
149             print "Warning: Unable to open %s for reading." % output
150             print "         Old translations will be lost."
151
152         # walon is not a known document language
153         # FIXME: Do not hardcode, read from lib/languages!
154         if 'wa' in languages:
155             languages.remove('wa')
156
157     out = open(output, 'w')
158     for src in input_files:
159         readingDescription = False
160         readingI18nPreamble = False
161         readingFloat = False
162         readingCiteFormats = False
163         descStartLine = -1
164         descLines = []
165         lineno = 0
166         for line in open(src).readlines():
167             lineno += 1
168             if readingDescription:
169                 res = DescEnd.search(line)
170                 if res != None:
171                     readingDescription = False
172                     desc = " ".join(descLines)
173                     if not layouttranslations:
174                         writeString(out, src, base, lineno + 1, desc)
175                     continue
176                 descLines.append(line[1:].strip())
177                 continue
178             res = DescBegin.search(line)
179             if res != None:
180                 readingDescription = True
181                 descStartLine = lineno
182                 continue
183             if readingI18nPreamble:
184                 res = EndI18nPreamble.search(line)
185                 if res != None:
186                     readingI18nPreamble = False
187                     continue
188                 res = I18nString.search(line)
189                 if res != None:
190                     string = res.group(1)
191                     if layouttranslations:
192                         keyset.add(string)
193                     else:
194                         writeString(out, src, base, lineno, string)
195                 continue
196             res = I18nPreamble.search(line)
197             if res != None:
198                 readingI18nPreamble = True
199                 continue
200             res = NameRE.search(line)
201             if res != None:
202                 string = res.group(1)
203                 if not layouttranslations:
204                     writeString(out, src, base, lineno + 1, string)
205                 continue
206             res = Style.search(line)
207             if res != None:
208                 string = res.group(1)
209                 string = string.replace('_', ' ')
210                 if not layouttranslations:
211                     writeString(out, src, base, lineno, string)
212                 continue
213             res = LabelString.search(line)
214             if res != None:
215                 string = res.group(1)
216                 if not layouttranslations:
217                     writeString(out, src, base, lineno, string)
218                 continue
219             res = GuiName.search(line)
220             if res != None:
221                 string = res.group(1)
222                 if layouttranslations:
223                     # gui name must only be added for floats
224                     if readingFloat:
225                         keyset.add(string)
226                 else:
227                     writeString(out, src, base, lineno, string)
228                 continue
229             res = CategoryName.search(line)
230             if res != None:
231                 string = res.group(1)
232                 if not layouttranslations:
233                     writeString(out, src, base, lineno, string)
234                 continue
235             res = ListName.search(line)
236             if res != None:
237                 string = res.group(1)
238                 if layouttranslations:
239                     keyset.add(string.strip('"'))
240                 else:
241                     writeString(out, src, base, lineno, string)
242                 continue
243             res = InsetLayout.search(line)
244             if res != None:
245                 string = res.group(1)
246                 string = string.replace('_', ' ')
247                 #Flex:xxx is not used in translation
248                 #if not layouttranslations:
249                 #    writeString(out, src, base, lineno, string)
250                 m = FlexCheck.search(string)
251                 if m:
252                     if not layouttranslations:
253                         writeString(out, src, base, lineno, m.group(1))
254                 continue
255             res = Category.search(line)
256             if res != None:
257                 string = res.group(1)
258                 if not layouttranslations:
259                     writeString(out, src, base, lineno, string)
260                 continue
261             res = CounterFormat.search(line)
262             if res != None:
263                 string = res.group(1)
264                 if not layouttranslations:
265                     writeString(out, src, base, lineno, string)
266                 continue
267             res = Float.search(line)
268             if res != None:
269                 readingFloat = True
270                 continue
271             res = CiteFormat.search(line)
272             if res != None:
273                 readingCiteFormats = True
274             res = End.search(line)
275             if res != None:
276                 readingCiteFormats = False
277                 readingFloat = False
278             if readingCiteFormats:
279                 res = KeyVal.search(line)
280                 if res != None:
281                     val = res.group(1)
282                     if not layouttranslations:
283                         writeString(out, src, base, lineno, val)
284
285     if layouttranslations:
286         # Extract translations of layout files
287         import polib
288
289         # Sort languages and key to minimize the diff between different runs
290         # with changed translations
291         languages.sort()
292         keys = []
293         for key in keyset:
294             keys.append(key)
295         keys.sort()
296
297         print >> out, '''# This file has been automatically generated by po/lyx_pot.py.
298 # PLEASE MODIFY ONLY THE LAGUAGES HAVING NO .po FILE! If you want to regenerate
299 # this file from the translations, run `make ../lib/layouttranslations' in po.
300 # Python polib library is needed for building the output file.
301 #
302 # This file should remain fixed during minor LyX releases.
303 # For more comments see README.localization file.'''
304         for lang in languages:
305             print >> out, '\nTranslation %s' % lang
306             if lang in oldtrans.keys():
307                 trans = oldtrans[lang]
308             else:
309                 trans = dict()
310             if not lang in oldlanguages:
311                 poname = os.path.join(base, 'po/' + lang + '.po')
312                 po = polib.pofile(poname)
313                 # Iterate through po entries and not keys for speed reasons.
314                 # FIXME: The code is still too slow
315                 for entry in po:
316                     if not entry.translated():
317                         continue
318                     if entry.msgid in keys:
319                         key = entry.msgid
320                         val = entry.msgstr
321                         # some translators keep untranslated entries
322                         if val != key:
323                             trans[key] = val
324             for key in keys:
325                 if key in trans.keys():
326                     val = trans[key].replace('\\', '\\\\').replace('"', '\\"')
327                     key = key.replace('\\', '\\\\').replace('"', '\\"')
328                     print >> out, '\t"%s" "%s"' % \
329                              (key.encode('utf-8'), val.encode('utf-8'))
330                 # also print untranslated entries to help translators
331                 elif not lang in oldlanguages:
332                     key = key.replace('\\', '\\\\').replace('"', '\\"')
333                     print >> out, '\t"%s" "%s"' % \
334                              (key.encode('utf-8'), key.encode('utf-8'))
335             print >> out, 'End'
336
337     out.close()
338
339
340 def qt4_l10n(input_files, output, base):
341     '''Generate pot file from src/frontends/qt4/ui/*.ui'''
342     output = open(output, 'w')
343     pat = re.compile(r'\s*<string>(.*)</string>')
344     prop = re.compile(r'\s*<property.*name.*=.*shortcut')
345     for src in input_files:
346         input = open(src)
347         skipNextLine = False
348         for lineno, line in enumerate(input.readlines()):
349             # skip the line after <property name=shortcut>
350             if skipNextLine:
351                 skipNextLine = False
352                 continue
353             if prop.match(line):
354                 skipNextLine = True
355                 continue
356             # get lines that match <string>...</string>
357             if pat.match(line):
358                 (string,) = pat.match(line).groups()
359                 string = string.replace('&amp;', '&').replace('&quot;', '"')
360                 string = string.replace('&lt;', '<').replace('&gt;', '>')
361                 string = string.replace('\\', '\\\\').replace('"', r'\"')
362                 string = string.replace('&#x0a;', r'\n')
363                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
364                     (relativePath(src, base), lineno+1, string)
365         input.close()
366     output.close()
367
368
369 def languages_l10n(input_files, output, base):
370     '''Generate pot file from lib/languages'''
371     out = open(output, 'w')
372     GuiName = re.compile(r'^[^#]*GuiName\s+(.*)')
373     
374     for src in input_files:
375         descStartLine = -1
376         descLines = []
377         lineno = 0
378         for line in open(src).readlines():
379             lineno += 1
380             res = GuiName.search(line)
381             if res != None:
382                 string = res.group(1)
383                 writeString(out, src, base, lineno, string)
384                 continue
385                
386     out.close()
387
388
389 def external_l10n(input_files, output, base):
390     '''Generate pot file from lib/external_templates'''
391     output = open(output, 'w')
392     Template = re.compile(r'^Template\s+(.*)')
393     GuiName = re.compile(r'\s*GuiName\s+(.*)')
394     HelpTextStart = re.compile(r'\s*HelpText\s')
395     HelpTextSection = re.compile(r'\s*(\S.*)\s*$')
396     HelpTextEnd = re.compile(r'\s*HelpTextEnd\s')
397     i = -1
398     for src in input_files:
399         input = open(src)
400         inHelp = False
401         hadHelp = False
402         prev_help_string = ''
403         for lineno, line in enumerate(input.readlines()):
404             if Template.match(line):
405                 (string,) = Template.match(line).groups()
406             elif GuiName.match(line):
407                 (string,) = GuiName.match(line).groups()
408             elif inHelp:
409                 if HelpTextEnd.match(line):
410                     if hadHelp:
411                         print >> output, '\nmsgstr ""\n'
412                     inHelp = False
413                     hadHelp = False
414                     prev_help_string = ''
415                 elif HelpTextSection.match(line):
416                     (help_string,) = HelpTextSection.match(line).groups()
417                     help_string = help_string.replace('"', '')
418                     if help_string != "" and prev_help_string == '':
419                         print >> output, '#: %s:%d\nmsgid ""\n"%s\\n"' % \
420                             (relativePath(src, base), lineno+1, help_string)
421                         hadHelp = True
422                     elif help_string != "":
423                         print >> output, '"%s\\n"' % help_string
424                     prev_help_string = help_string
425             elif HelpTextStart.match(line):
426                 inHelp = True
427                 prev_help_string = ''
428             else:
429                 continue
430             string = string.replace('"', '')
431             if string != "" and not inHelp:
432                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
433                     (relativePath(src, base), lineno+1, string)
434         input.close()
435     output.close()
436
437
438 def formats_l10n(input_files, output, base):
439     '''Generate pot file from configure.py'''
440     output = open(output, 'w')
441     GuiName = re.compile(r'.*\Format\s+\S+\s+\S+\s+"([^"]*)"\s+(\S*)\s+.*')
442     GuiName2 = re.compile(r'.*\Format\s+\S+\s+\S+\s+([^"]\S+)\s+(\S*)\s+.*')
443     input = open(input_files[0])
444     for lineno, line in enumerate(input.readlines()):
445         label = ""
446         labelsc = ""
447         if GuiName.match(line):
448             label = GuiName.match(line).group(1)
449             shortcut = GuiName.match(line).group(2).replace('"', '')
450         elif GuiName2.match(line):
451             label = GuiName2.match(line).group(1)
452             shortcut = GuiName2.match(line).group(2).replace('"', '')
453         else:
454             continue
455         label = label.replace('\\', '\\\\').replace('"', '')
456         if shortcut != "":
457             labelsc = label + "|" + shortcut
458         if label != "":
459             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
460                 (relativePath(input_files[0], base), lineno+1, label)
461         if labelsc != "":
462             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
463                 (relativePath(input_files[0], base), lineno+1, labelsc)
464     input.close()
465     output.close()
466
467
468 def encodings_l10n(input_files, output, base):
469     '''Generate pot file from lib/encodings'''
470     output = open(output, 'w')
471     # assuming only one encodings file
472     #                 Encoding utf8      utf8    "Unicode (utf8)" UTF-8    variable inputenc
473     reg = re.compile('Encoding [\w-]+\s+[\w-]+\s+"([\w \-\(\)]+)"\s+[\w-]+\s+(fixed|variable)\s+\w+.*')
474     input = open(input_files[0])
475     for lineno, line in enumerate(input.readlines()):
476         if not line.startswith('Encoding'):
477             continue
478         if reg.match(line):
479             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
480                 (relativePath(input_files[0], base), lineno+1, reg.match(line).groups()[0])
481         else:
482             print "Error: Unable to handle line:"
483             print line
484             # No need to abort if the parsing fails
485             # sys.exit(1)
486     input.close()
487     output.close()
488
489
490
491 Usage = '''
492 lyx_pot.py [-b|--base top_src_dir] [-o|--output output_file] [-h|--help] [-s|src_file filename] -t|--type input_type input_files
493
494 where
495     --base:
496         path to the top source directory. default to '.'
497     --output:
498         output pot file, default to './lyx.pot'
499     --src_file
500         filename that contains a list of input files in each line
501     --input_type can be
502         ui: lib/ui/*
503         layouts: lib/layouts/*
504         layouttranslations: create lib/layouttranslations from po/*.po and lib/layouts/*
505         qt4: qt4 ui files
506         languages: file lib/languages
507         encodings: file lib/encodings
508         external: external templates file
509         formats: formats predefined in lib/configure.py
510 '''
511
512 if __name__ == '__main__':
513     input_type = None
514     output = 'lyx.pot'
515     base = '.'
516     input_files = []
517     #
518     optlist, args = getopt.getopt(sys.argv[1:], 'ht:o:b:s:',
519         ['help', 'type=', 'output=', 'base=', 'src_file='])
520     for (opt, value) in optlist:
521         if opt in ['-h', '--help']:
522             print Usage
523             sys.exit(0)
524         elif opt in ['-o', '--output']:
525             output = value
526         elif opt in ['-b', '--base']:
527             base = value
528         elif opt in ['-t', '--type']:
529             input_type = value
530         elif opt in ['-s', '--src_file']:
531             input_files = [f.strip() for f in open(value)]
532
533     if input_type not in ['ui', 'layouts', 'layouttranslations', 'qt4', 'languages', 'encodings', 'external', 'formats'] or output is None:
534         print 'Wrong input type or output filename.'
535         sys.exit(1)
536
537     input_files += args
538
539     if input_type == 'ui':
540         ui_l10n(input_files, output, base)
541     elif input_type == 'layouts':
542         layouts_l10n(input_files, output, base, False)
543     elif input_type == 'layouttranslations':
544         layouts_l10n(input_files, output, base, True)
545     elif input_type == 'qt4':
546         qt4_l10n(input_files, output, base)
547     elif input_type == 'external':
548         external_l10n(input_files, output, base)
549     elif input_type == 'formats':
550         formats_l10n(input_files, output, base)
551     elif input_type == 'encodings':
552         encodings_l10n(input_files, output, base)
553     else:
554         languages_l10n(input_files, output, base)
555
556