]> git.lyx.org Git - lyx.git/blob - po/lyx_pot.py
Add comment
[lyx.git] / po / lyx_pot.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # file lyx_pot.py
5 # This file is part of LyX, the document processor.
6 # Licence details can be found in the file COPYING.
7 #
8 # \author Bo Peng
9 #
10 # Full author contact details are available in file CREDITS
11
12 # Usage: use
13 #     lyx_pot.py -h
14 # to get usage message
15
16 # This script will extract translatable strings from input files and write
17 # to output in gettext .pot format.
18 #
19 import sys, os, re, getopt
20 if sys.version_info < (2, 4, 0):
21     from sets import Set as set
22
23 def relativePath(path, base):
24     '''return relative path from top source dir'''
25     # full pathname of path
26     path1 = os.path.normpath(os.path.realpath(path)).split(os.sep)
27     path2 = os.path.normpath(os.path.realpath(base)).split(os.sep)
28     if path1[:len(path2)] != path2:
29         print "Path %s is not under top source directory" % path
30     path3 = os.path.join(*path1[len(path2):]);
31     # replace all \ by / such that we get the same comments on Windows and *nix
32     path3 = path3.replace('\\', '/')
33     return path3
34
35
36 def writeString(outfile, infile, basefile, lineno, string):
37     string = string.replace('\\', '\\\\').replace('"', '')
38     if string == "":
39         return
40     print >> outfile, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
41         (relativePath(infile, basefile), lineno, string)
42
43
44 def ui_l10n(input_files, output, base):
45     '''Generate pot file from lib/ui/*'''
46     output = open(output, 'w')
47     Submenu = re.compile(r'^[^#]*Submenu\s+"([^"]*)"')
48     Popupmenu = re.compile(r'^[^#]*PopupMenu\s+"[^"]+"\s+"([^"]*)"')
49     IconPalette = re.compile(r'^[^#]*IconPalette\s+"[^"]+"\s+"([^"]*)"')
50     Toolbar = re.compile(r'^[^#]*Toolbar\s+"[^"]+"\s+"([^"]*)"')
51     Item = re.compile(r'[^#]*Item\s+"([^"]*)"')
52     TableInsert = re.compile(r'[^#]*TableInsert\s+"([^"]*)"')
53     for src in input_files:
54         input = open(src)
55         for lineno, line in enumerate(input.readlines()):
56             if Submenu.match(line):
57                 (string,) = Submenu.match(line).groups()
58                 string = string.replace('_', ' ')
59             elif Popupmenu.match(line):
60                 (string,) = Popupmenu.match(line).groups()
61             elif IconPalette.match(line):
62                 (string,) = IconPalette.match(line).groups()
63             elif Toolbar.match(line):
64                 (string,) = Toolbar.match(line).groups()
65             elif Item.match(line):
66                 (string,) = Item.match(line).groups()
67             elif TableInsert.match(line):
68                 (string,) = TableInsert.match(line).groups()
69             else:
70                 continue
71             string = string.replace('"', '')
72             if string != "":
73                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
74                     (relativePath(src, base), lineno+1, string)
75         input.close()
76     output.close()
77
78
79 def layouts_l10n(input_files, output, base, layouttranslations):
80     '''Generate pot file from lib/layouts/*.{layout,inc,module}'''
81     Style = re.compile(r'^Style\s+(.*)', re.IGNORECASE)
82     # include ???LabelString???, but exclude comment lines
83     LabelString = re.compile(r'^[^#]*LabelString\S*\s+(.*)')
84     GuiName = re.compile(r'\s*GuiName\s+(.*)')
85     ListName = re.compile(r'\s*ListName\s+(.*)')
86     CategoryName = re.compile(r'\s*Category\s+(.*)')
87     NameRE = re.compile(r'DeclareLyXModule.*{(.*)}')
88     InsetLayout = re.compile(r'^InsetLayout\s+\"?(.*)\"?')
89     FlexCheck = re.compile(r'^Flex:(.*)')
90     DescBegin = re.compile(r'#+\s*DescriptionBegin\s*$')
91     DescEnd = re.compile(r'#+\s*DescriptionEnd\s*$')
92     Category = re.compile(r'#Category: (.*)$')
93     I18nPreamble = re.compile(r'\s*(Lang)|(Babel)Preamble\s*$')
94     EndI18nPreamble = re.compile(r'\s*End(Lang)|(Babel)Preamble\s*$')
95     I18nString = re.compile(r'_\(([^\)]+)\)')
96     CounterFormat = re.compile(r'\s*PrettyFormat\s+"?(.*)"?')
97     CiteFormat = re.compile(r'\s*CiteFormat')
98     KeyVal = re.compile(r'^\s*_\w+\s+(.*)$')
99     Float = re.compile(r'\s*Float')
100     End = re.compile(r'\s*End')
101     Comment = re.compile(r'\s*#')
102     Translation = re.compile(r'\s*Translation\s+(.*)\s*$')
103     KeyValPair = re.compile(r'\s*"(.*)"\s+"(.*)"')
104
105     oldlanguages = []
106     languages = []
107     keyset = set()
108     oldtrans = dict()
109     if layouttranslations:
110         linguas_file = os.path.join(base, 'po/LINGUAS')
111         for line in open(linguas_file).readlines():
112             if Comment.search(line) == None:
113                 languages.extend(line.split())
114
115         # read old translations if available
116         try:
117             input = open(output)
118             lang = ''
119             for line in input.readlines():
120                 res = Comment.search(line)
121                 if res:
122                     continue
123                 if line.strip() == '':
124                     continue
125                 res = Translation.search(line)
126                 if res:
127                     lang = res.group(1)
128                     if lang not in languages:
129                         oldlanguages.append(lang)
130                         languages.append(lang)
131                     oldtrans[lang] = dict()
132                     continue
133                 res = End.search(line)
134                 if res:
135                     lang = ''
136                     continue
137                 res = KeyValPair.search(line)
138                 if res and lang != '':
139                     key = res.group(1).decode('utf-8')
140                     val = res.group(2).decode('utf-8')
141                     key = key.replace('\\"', '"').replace('\\\\', '\\')
142                     val = val.replace('\\"', '"').replace('\\\\', '\\')
143                     oldtrans[lang][key] = val
144                     keyset.add(key)
145                     continue
146                 print "Error: Unable to handle line:"
147                 print line
148         except IOError:
149             pass
150
151         # walon is not a known document language
152         # FIXME: Do not hardcode, read from lib/languages!
153         if 'wa' in languages:
154             languages.remove('wa')
155
156     out = open(output, 'w')
157     for src in input_files:
158         readingDescription = False
159         readingI18nPreamble = False
160         readingFloat = False
161         readingCiteFormats = False
162         descStartLine = -1
163         descLines = []
164         lineno = 0
165         for line in open(src).readlines():
166             lineno += 1
167             if readingDescription:
168                 res = DescEnd.search(line)
169                 if res != None:
170                     readingDescription = False
171                     desc = " ".join(descLines)
172                     if not layouttranslations:
173                         writeString(out, src, base, lineno + 1, desc)
174                     continue
175                 descLines.append(line[1:].strip())
176                 continue
177             res = DescBegin.search(line)
178             if res != None:
179                 readingDescription = True
180                 descStartLine = lineno
181                 continue
182             if readingI18nPreamble:
183                 res = EndI18nPreamble.search(line)
184                 if res != None:
185                     readingI18nPreamble = False
186                     continue
187                 res = I18nString.search(line)
188                 if res != None:
189                     string = res.group(1)
190                     if layouttranslations:
191                         keyset.add(string)
192                     else:
193                         writeString(out, src, base, lineno, string)
194                 continue
195             res = I18nPreamble.search(line)
196             if res != None:
197                 readingI18nPreamble = True
198                 continue
199             res = NameRE.search(line)
200             if res != None:
201                 string = res.group(1)
202                 if not layouttranslations:
203                     writeString(out, src, base, lineno + 1, string)
204                 continue
205             res = Style.search(line)
206             if res != None:
207                 string = res.group(1)
208                 string = string.replace('_', ' ')
209                 if not layouttranslations:
210                     writeString(out, src, base, lineno, string)
211                 continue
212             res = LabelString.search(line)
213             if res != None:
214                 string = res.group(1)
215                 if not layouttranslations:
216                     writeString(out, src, base, lineno, string)
217                 continue
218             res = GuiName.search(line)
219             if res != None:
220                 string = res.group(1)
221                 if layouttranslations:
222                     # gui name must only be added for floats
223                     if readingFloat:
224                         keyset.add(string)
225                 else:
226                     writeString(out, src, base, lineno, string)
227                 continue
228             res = CategoryName.search(line)
229             if res != None:
230                 string = res.group(1)
231                 if not layouttranslations:
232                     writeString(out, src, base, lineno, string)
233                 continue
234             res = ListName.search(line)
235             if res != None:
236                 string = res.group(1)
237                 if layouttranslations:
238                     keyset.add(string.strip('"'))
239                 else:
240                     writeString(out, src, base, lineno, string)
241                 continue
242             res = InsetLayout.search(line)
243             if res != None:
244                 string = res.group(1)
245                 string = string.replace('_', ' ')
246                 #Flex:xxx is not used in translation
247                 #if not layouttranslations:
248                 #    writeString(out, src, base, lineno, string)
249                 m = FlexCheck.search(string)
250                 if m:
251                     if not layouttranslations:
252                         writeString(out, src, base, lineno, m.group(1))
253                 continue
254             res = Category.search(line)
255             if res != None:
256                 string = res.group(1)
257                 if not layouttranslations:
258                     writeString(out, src, base, lineno, string)
259                 continue
260             res = CounterFormat.search(line)
261             if res != None:
262                 string = res.group(1)
263                 if not layouttranslations:
264                     writeString(out, src, base, lineno, string)
265                 continue
266             res = Float.search(line)
267             if res != None:
268                 readingFloat = True
269                 continue
270             res = CiteFormat.search(line)
271             if res != None:
272                 readingCiteFormats = True
273             res = End.search(line)
274             if res != None:
275                 readingCiteFormats = False
276                 readingFloat = False
277             if readingCiteFormats:
278                 res = KeyVal.search(line)
279                 if res != None:
280                     val = res.group(1)
281                     if not layouttranslations:
282                         writeString(out, src, base, lineno, val)
283
284     if layouttranslations:
285         # Extract translations of layout files
286         import polib
287
288         # Sort languages and key to minimize the diff between different runs
289         # with changed translations
290         languages.sort()
291         keys = []
292         for key in keyset:
293             keys.append(key)
294         keys.sort()
295
296         print >> out, '''# This file has been automatically generated by po/lyx_pot.py.
297 # PLEASE MODIFY ONLY THE LAGUAGES HAVING NO .po FILE! If you want to regenerate
298 # this file from the translations, run `make ../lib/layouttranslations' in po.
299 # Python polib library is needed for building the output file.
300 #
301 # This file should remain fixed during minor LyX releases.'''
302         for lang in languages:
303             print >> out, '\nTranslation %s' % lang
304             if lang in oldtrans.keys():
305                 trans = oldtrans[lang]
306             else:
307                 trans = dict()
308             if not lang in oldlanguages:
309                 poname = os.path.join(base, 'po/' + lang + '.po')
310                 po = polib.pofile(poname)
311                 # Iterate through po entries and not keys for speed reasons.
312                 # FIXME: The code is still too slow
313                 for entry in po:
314                     if not entry.translated():
315                         continue
316                     if entry.msgid in keys:
317                         key = entry.msgid
318                         val = entry.msgstr
319                         # some translators keep untranslated entries
320                         if val != key:
321                             trans[key] = val
322             for key in keys:
323                 if key in trans.keys():
324                     val = trans[key].replace('\\', '\\\\').replace('"', '\\"')
325                     key = key.replace('\\', '\\\\').replace('"', '\\"')
326                     print >> out, '\t"%s" "%s"' % \
327                              (key.encode('utf-8'), val.encode('utf-8'))
328                 # to print untranslated entries, uncomment the following lines
329                 #else:
330                 #    key = key.replace('\\', '\\\\').replace('"', '\\"')
331                 #    print >> out, '\t"%s" "%s"' % \
332                 #             (key.encode('utf-8'), key.encode('utf-8'))
333             print >> out, 'End'
334
335     out.close()
336
337
338 def qt4_l10n(input_files, output, base):
339     '''Generate pot file from src/frontends/qt4/ui/*.ui'''
340     output = open(output, 'w')
341     pat = re.compile(r'\s*<string>(.*)</string>')
342     prop = re.compile(r'\s*<property.*name.*=.*shortcut')
343     for src in input_files:
344         input = open(src)
345         skipNextLine = False
346         for lineno, line in enumerate(input.readlines()):
347             # skip the line after <property name=shortcut>
348             if skipNextLine:
349                 skipNextLine = False
350                 continue
351             if prop.match(line):
352                 skipNextLine = True
353                 continue
354             # get lines that match <string>...</string>
355             if pat.match(line):
356                 (string,) = pat.match(line).groups()
357                 string = string.replace('&amp;', '&').replace('&quot;', '"')
358                 string = string.replace('&lt;', '<').replace('&gt;', '>')
359                 string = string.replace('\\', '\\\\').replace('"', r'\"')
360                 string = string.replace('&#x0a;', r'\n')
361                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
362                     (relativePath(src, base), lineno+1, string)
363         input.close()
364     output.close()
365
366
367 def languages_l10n(input_files, output, base):
368     '''Generate pot file from lib/languages'''
369     out = open(output, 'w')
370     GuiName = re.compile(r'^[^#]*GuiName\s+(.*)')
371     
372     for src in input_files:
373         descStartLine = -1
374         descLines = []
375         lineno = 0
376         for line in open(src).readlines():
377             lineno += 1
378             res = GuiName.search(line)
379             if res != None:
380                 string = res.group(1)
381                 writeString(out, src, base, lineno, string)
382                 continue
383                
384     out.close()
385
386
387 def external_l10n(input_files, output, base):
388     '''Generate pot file from lib/external_templates'''
389     output = open(output, 'w')
390     Template = re.compile(r'^Template\s+(.*)')
391     GuiName = re.compile(r'\s*GuiName\s+(.*)')
392     HelpTextStart = re.compile(r'\s*HelpText\s')
393     HelpTextSection = re.compile(r'\s*(\S.*)\s*$')
394     HelpTextEnd = re.compile(r'\s*HelpTextEnd\s')
395     i = -1
396     for src in input_files:
397         input = open(src)
398         inHelp = False
399         hadHelp = False
400         prev_help_string = ''
401         for lineno, line in enumerate(input.readlines()):
402             if Template.match(line):
403                 (string,) = Template.match(line).groups()
404             elif GuiName.match(line):
405                 (string,) = GuiName.match(line).groups()
406             elif inHelp:
407                 if HelpTextEnd.match(line):
408                     if hadHelp:
409                         print >> output, '\nmsgstr ""\n'
410                     inHelp = False
411                     hadHelp = False
412                     prev_help_string = ''
413                 elif HelpTextSection.match(line):
414                     (help_string,) = HelpTextSection.match(line).groups()
415                     help_string = help_string.replace('"', '')
416                     if help_string != "" and prev_help_string == '':
417                         print >> output, '#: %s:%d\nmsgid ""\n"%s\\n"' % \
418                             (relativePath(src, base), lineno+1, help_string)
419                         hadHelp = True
420                     elif help_string != "":
421                         print >> output, '"%s\\n"' % help_string
422                     prev_help_string = help_string
423             elif HelpTextStart.match(line):
424                 inHelp = True
425                 prev_help_string = ''
426             else:
427                 continue
428             string = string.replace('"', '')
429             if string != "" and not inHelp:
430                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
431                     (relativePath(src, base), lineno+1, string)
432         input.close()
433     output.close()
434
435
436 def formats_l10n(input_files, output, base):
437     '''Generate pot file from configure.py'''
438     output = open(output, 'w')
439     GuiName = re.compile(r'.*\Format\s+\S+\s+\S+\s+"([^"]*)"\s+(\S*)\s+.*')
440     GuiName2 = re.compile(r'.*\Format\s+\S+\s+\S+\s+([^"]\S+)\s+(\S*)\s+.*')
441     input = open(input_files[0])
442     for lineno, line in enumerate(input.readlines()):
443         label = ""
444         labelsc = ""
445         if GuiName.match(line):
446             label = GuiName.match(line).group(1)
447             shortcut = GuiName.match(line).group(2).replace('"', '')
448         elif GuiName2.match(line):
449             label = GuiName2.match(line).group(1)
450             shortcut = GuiName2.match(line).group(2).replace('"', '')
451         else:
452             continue
453         label = label.replace('\\', '\\\\').replace('"', '')
454         if shortcut != "":
455             labelsc = label + "|" + shortcut
456         if label != "":
457             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
458                 (relativePath(input_files[0], base), lineno+1, label)
459         if labelsc != "":
460             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
461                 (relativePath(input_files[0], base), lineno+1, labelsc)
462     input.close()
463     output.close()
464
465
466 def encodings_l10n(input_files, output, base):
467     '''Generate pot file from lib/encodings'''
468     output = open(output, 'w')
469     # assuming only one encodings file
470     #                 Encoding utf8      utf8    "Unicode (utf8)" UTF-8    variable inputenc
471     reg = re.compile('Encoding [\w-]+\s+[\w-]+\s+"([\w \-\(\)]+)"\s+[\w-]+\s+(fixed|variable)\s+\w+.*')
472     input = open(input_files[0])
473     for lineno, line in enumerate(input.readlines()):
474         if not line.startswith('Encoding'):
475             continue
476         if reg.match(line):
477             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
478                 (relativePath(input_files[0], base), lineno+1, reg.match(line).groups()[0])
479         else:
480             print "Error: Unable to handle line:"
481             print line
482             # No need to abort if the parsing fails
483             # sys.exit(1)
484     input.close()
485     output.close()
486
487
488
489 Usage = '''
490 lyx_pot.py [-b|--base top_src_dir] [-o|--output output_file] [-h|--help] [-s|src_file filename] -t|--type input_type input_files
491
492 where
493     --base:
494         path to the top source directory. default to '.'
495     --output:
496         output pot file, default to './lyx.pot'
497     --src_file
498         filename that contains a list of input files in each line
499     --input_type can be
500         ui: lib/ui/*
501         layouts: lib/layouts/*
502         layouttranslations: create lib/layouttranslations from po/*.po and lib/layouts/*
503         qt4: qt4 ui files
504         languages: file lib/languages
505         encodings: file lib/encodings
506         external: external templates file
507         formats: formats predefined in lib/configure.py
508 '''
509
510 if __name__ == '__main__':
511     input_type = None
512     output = 'lyx.pot'
513     base = '.'
514     input_files = []
515     #
516     optlist, args = getopt.getopt(sys.argv[1:], 'ht:o:b:s:',
517         ['help', 'type=', 'output=', 'base=', 'src_file='])
518     for (opt, value) in optlist:
519         if opt in ['-h', '--help']:
520             print Usage
521             sys.exit(0)
522         elif opt in ['-o', '--output']:
523             output = value
524         elif opt in ['-b', '--base']:
525             base = value
526         elif opt in ['-t', '--type']:
527             input_type = value
528         elif opt in ['-s', '--src_file']:
529             input_files = [f.strip() for f in open(value)]
530
531     if input_type not in ['ui', 'layouts', 'layouttranslations', 'qt4', 'languages', 'encodings', 'external', 'formats'] or output is None:
532         print 'Wrong input type or output filename.'
533         sys.exit(1)
534
535     input_files += args
536
537     if input_type == 'ui':
538         ui_l10n(input_files, output, base)
539     elif input_type == 'layouts':
540         layouts_l10n(input_files, output, base, False)
541     elif input_type == 'layouttranslations':
542         layouts_l10n(input_files, output, base, True)
543     elif input_type == 'qt4':
544         qt4_l10n(input_files, output, base)
545     elif input_type == 'external':
546         external_l10n(input_files, output, base)
547     elif input_type == 'formats':
548         formats_l10n(input_files, output, base)
549     elif input_type == 'encodings':
550         encodings_l10n(input_files, output, base)
551     else:
552         languages_l10n(input_files, output, base)
553
554