]> git.lyx.org Git - features.git/blob - po/lyx_pot.py
Remove unneeded layout translations as hinted on the users list.
[features.git] / po / lyx_pot.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 # file lyx_pot.py
5 # This file is part of LyX, the document processor.
6 # Licence details can be found in the file COPYING.
7 #
8 # \author Bo Peng
9 #
10 # Full author contact details are available in file CREDITS
11
12 # Usage: use
13 #     lyx_pot.py -h
14 # to get usage message
15
16 # This script will extract translatable strings from input files and write
17 # to output in gettext .pot format.
18 #
19 import sys, os, re, getopt
20 if sys.version_info < (2, 4, 0):
21     from sets import Set as set
22
23 def relativePath(path, base):
24     '''return relative path from top source dir'''
25     # full pathname of path
26     path1 = os.path.normpath(os.path.realpath(path)).split(os.sep)
27     path2 = os.path.normpath(os.path.realpath(base)).split(os.sep)
28     if path1[:len(path2)] != path2:
29         print "Path %s is not under top source directory" % path
30     path3 = os.path.join(*path1[len(path2):]);
31     # replace all \ by / such that we get the same comments on Windows and *nix
32     path3 = path3.replace('\\', '/')
33     return path3
34
35
36 def writeString(outfile, infile, basefile, lineno, string):
37     string = string.replace('\\', '\\\\').replace('"', '')
38     if string == "":
39         return
40     print >> outfile, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
41         (relativePath(infile, basefile), lineno, string)
42
43
44 def ui_l10n(input_files, output, base):
45     '''Generate pot file from lib/ui/*'''
46     output = open(output, 'w')
47     Submenu = re.compile(r'^[^#]*Submenu\s+"([^"]*)"')
48     Popupmenu = re.compile(r'^[^#]*PopupMenu\s+"[^"]+"\s+"([^"]*)"')
49     IconPalette = re.compile(r'^[^#]*IconPalette\s+"[^"]+"\s+"([^"]*)"')
50     Toolbar = re.compile(r'^[^#]*Toolbar\s+"[^"]+"\s+"([^"]*)"')
51     Item = re.compile(r'[^#]*Item\s+"([^"]*)"')
52     TableInsert = re.compile(r'[^#]*TableInsert\s+"([^"]*)"')
53     for src in input_files:
54         input = open(src)
55         for lineno, line in enumerate(input.readlines()):
56             if Submenu.match(line):
57                 (string,) = Submenu.match(line).groups()
58                 string = string.replace('_', ' ')
59             elif Popupmenu.match(line):
60                 (string,) = Popupmenu.match(line).groups()
61             elif IconPalette.match(line):
62                 (string,) = IconPalette.match(line).groups()
63             elif Toolbar.match(line):
64                 (string,) = Toolbar.match(line).groups()
65             elif Item.match(line):
66                 (string,) = Item.match(line).groups()
67             elif TableInsert.match(line):
68                 (string,) = TableInsert.match(line).groups()
69             else:
70                 continue
71             string = string.replace('"', '')
72             if string != "":
73                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
74                     (relativePath(src, base), lineno+1, string)
75         input.close()
76     output.close()
77
78
79 def layouts_l10n(input_files, output, base, layouttranslations):
80     '''Generate pot file from lib/layouts/*.{layout,inc,module}'''
81     Style = re.compile(r'^Style\s+(.*)', re.IGNORECASE)
82     # include ???LabelString???, but exclude comment lines
83     LabelString = re.compile(r'^[^#]*LabelString\S*\s+(.*)')
84     GuiName = re.compile(r'\s*GuiName\s+(.*)')
85     ListName = re.compile(r'\s*ListName\s+(.*)')
86     CategoryName = re.compile(r'\s*Category\s+(.*)')
87     NameRE = re.compile(r'DeclareLyXModule.*{(.*)}')
88     InsetLayout = re.compile(r'^InsetLayout\s+\"?(.*)\"?')
89     FlexCheck = re.compile(r'^Flex:(.*)')
90     DescBegin = re.compile(r'#+\s*DescriptionBegin\s*$')
91     DescEnd = re.compile(r'#+\s*DescriptionEnd\s*$')
92     Category = re.compile(r'#Category: (.*)$')
93     I18nPreamble = re.compile(r'\s*(Lang)|(Babel)Preamble\s*$')
94     EndI18nPreamble = re.compile(r'\s*End(Lang)|(Babel)Preamble\s*$')
95     I18nString = re.compile(r'_\(([^\)]+)\)')
96     CounterFormat = re.compile(r'\s*PrettyFormat\s+"?(.*)"?')
97     CiteFormat = re.compile(r'\s*CiteFormat')
98     KeyVal = re.compile(r'^\s*_\w+\s+(.*)$')
99     Float = re.compile(r'\s*Float\s*$')
100     UsesFloatPkg = re.compile(r'\s*UsesFloatPkg\s+(.*)')
101     IsPredefined = re.compile(r'\s*IsPredefined\s+(.*)')
102     End = re.compile(r'\s*End')
103     Comment = re.compile(r'\s*#')
104     Translation = re.compile(r'\s*Translation\s+(.*)\s*$')
105     KeyValPair = re.compile(r'\s*"(.*)"\s+"(.*)"')
106
107     oldlanguages = []
108     languages = []
109     keyset = set()
110     oldtrans = dict()
111     if layouttranslations:
112         linguas_file = os.path.join(base, 'po/LINGUAS')
113         for line in open(linguas_file).readlines():
114             if Comment.search(line) == None:
115                 languages.extend(line.split())
116
117         # read old translations if available
118         try:
119             input = open(output)
120             lang = ''
121             for line in input.readlines():
122                 res = Comment.search(line)
123                 if res:
124                     continue
125                 if line.strip() == '':
126                     continue
127                 res = Translation.search(line)
128                 if res:
129                     lang = res.group(1)
130                     if lang not in languages:
131                         oldlanguages.append(lang)
132                         languages.append(lang)
133                     oldtrans[lang] = dict()
134                     continue
135                 res = End.search(line)
136                 if res:
137                     lang = ''
138                     continue
139                 res = KeyValPair.search(line)
140                 if res and lang != '':
141                     key = res.group(1).decode('utf-8')
142                     val = res.group(2).decode('utf-8')
143                     key = key.replace('\\"', '"').replace('\\\\', '\\')
144                     val = val.replace('\\"', '"').replace('\\\\', '\\')
145                     oldtrans[lang][key] = val
146                     keyset.add(key)
147                     continue
148                 print "Error: Unable to handle line:"
149                 print line
150         except IOError:
151             print "Warning: Unable to open %s for reading." % output
152             print "         Old translations will be lost."
153
154         # walon is not a known document language
155         # FIXME: Do not hardcode, read from lib/languages!
156         if 'wa' in languages:
157             languages.remove('wa')
158
159     out = open(output, 'w')
160     for src in input_files:
161         readingDescription = False
162         readingI18nPreamble = False
163         readingFloat = False
164         readingCiteFormats = False
165         isPredefined = False
166         usesFloatPkg = True
167         listname = ''
168         floatname = ''
169         descStartLine = -1
170         descLines = []
171         lineno = 0
172         for line in open(src).readlines():
173             lineno += 1
174             if readingDescription:
175                 res = DescEnd.search(line)
176                 if res != None:
177                     readingDescription = False
178                     desc = " ".join(descLines)
179                     if not layouttranslations:
180                         writeString(out, src, base, lineno + 1, desc)
181                     continue
182                 descLines.append(line[1:].strip())
183                 continue
184             res = DescBegin.search(line)
185             if res != None:
186                 readingDescription = True
187                 descStartLine = lineno
188                 continue
189             if readingI18nPreamble:
190                 res = EndI18nPreamble.search(line)
191                 if res != None:
192                     readingI18nPreamble = False
193                     continue
194                 res = I18nString.search(line)
195                 if res != None:
196                     string = res.group(1)
197                     if layouttranslations:
198                         keyset.add(string)
199                     else:
200                         writeString(out, src, base, lineno, string)
201                 continue
202             res = I18nPreamble.search(line)
203             if res != None:
204                 readingI18nPreamble = True
205                 continue
206             res = NameRE.search(line)
207             if res != None:
208                 string = res.group(1)
209                 if not layouttranslations:
210                     writeString(out, src, base, lineno + 1, string)
211                 continue
212             res = Style.search(line)
213             if res != None:
214                 string = res.group(1)
215                 string = string.replace('_', ' ')
216                 if not layouttranslations:
217                     writeString(out, src, base, lineno, string)
218                 continue
219             res = LabelString.search(line)
220             if res != None:
221                 string = res.group(1)
222                 if not layouttranslations:
223                     writeString(out, src, base, lineno, string)
224                 continue
225             res = GuiName.search(line)
226             if res != None:
227                 string = res.group(1)
228                 if layouttranslations:
229                     # gui name must only be added for floats
230                     if readingFloat:
231                         floatname = string
232                 else:
233                     writeString(out, src, base, lineno, string)
234                 continue
235             res = CategoryName.search(line)
236             if res != None:
237                 string = res.group(1)
238                 if not layouttranslations:
239                     writeString(out, src, base, lineno, string)
240                 continue
241             res = ListName.search(line)
242             if res != None:
243                 string = res.group(1)
244                 if layouttranslations:
245                     listname = string.strip('"')
246                 else:
247                     writeString(out, src, base, lineno, string)
248                 continue
249             res = InsetLayout.search(line)
250             if res != None:
251                 string = res.group(1)
252                 string = string.replace('_', ' ')
253                 #Flex:xxx is not used in translation
254                 #if not layouttranslations:
255                 #    writeString(out, src, base, lineno, string)
256                 m = FlexCheck.search(string)
257                 if m:
258                     if not layouttranslations:
259                         writeString(out, src, base, lineno, m.group(1))
260                 continue
261             res = Category.search(line)
262             if res != None:
263                 string = res.group(1)
264                 if not layouttranslations:
265                     writeString(out, src, base, lineno, string)
266                 continue
267             res = CounterFormat.search(line)
268             if res != None:
269                 string = res.group(1)
270                 if not layouttranslations:
271                     writeString(out, src, base, lineno, string)
272                 continue
273             res = Float.search(line)
274             if res != None:
275                 readingFloat = True
276                 continue
277             res = IsPredefined.search(line)
278             if res != None:
279                 string = res.group(1).lower()
280                 if string == 'true':
281                     isPredefined = True
282                 else:
283                     isPredefined = False
284                 continue
285             res = UsesFloatPkg.search(line)
286             if res != None:
287                 string = res.group(1).lower()
288                 if string == 'true':
289                     usesFloatPkg = True
290                 else:
291                     usesFloatPkg = False
292                 continue
293             res = CiteFormat.search(line)
294             if res != None:
295                 readingCiteFormats = True
296                 continue
297             res = End.search(line)
298             if res != None:
299                 # If a float is predefined by the package and it does not need
300                 # the float package then it uses the standard babel translations.
301                 # This is even true for MarginFigure, MarginTable (both from
302                 # tufte-book.layout) and Planotable, Plate (both from aguplus.inc).
303                 if layouttranslations and readingFloat and usesFloatPkg and not isPredefined:
304                     if floatname != '':
305                         keyset.add(floatname)
306                     if listname != '':
307                         keyset.add(listname)
308                 isPredefined = False
309                 usesFloatPkg = True
310                 listname = ''
311                 floatname = ''
312                 readingCiteFormats = False
313                 readingFloat = False
314                 continue
315             if readingCiteFormats:
316                 res = KeyVal.search(line)
317                 if res != None:
318                     val = res.group(1)
319                     if not layouttranslations:
320                         writeString(out, src, base, lineno, val)
321
322     if layouttranslations:
323         # Extract translations of layout files
324         import polib
325
326         # Sort languages and key to minimize the diff between different runs
327         # with changed translations
328         languages.sort()
329         keys = []
330         for key in keyset:
331             keys.append(key)
332         keys.sort()
333
334         print >> out, '''# This file has been automatically generated by po/lyx_pot.py.
335 # PLEASE MODIFY ONLY THE LAGUAGES HAVING NO .po FILE! If you want to regenerate
336 # this file from the translations, run `make ../lib/layouttranslations' in po.
337 # Python polib library is needed for building the output file.
338 #
339 # This file should remain fixed during minor LyX releases.
340 # For more comments see README.localization file.'''
341         for lang in languages:
342             print >> out, '\nTranslation %s' % lang
343             if lang in oldtrans.keys():
344                 trans = oldtrans[lang]
345             else:
346                 trans = dict()
347             if not lang in oldlanguages:
348                 poname = os.path.join(base, 'po/' + lang + '.po')
349                 po = polib.pofile(poname)
350                 # Iterate through po entries and not keys for speed reasons.
351                 # FIXME: The code is still too slow
352                 for entry in po:
353                     if not entry.translated():
354                         continue
355                     if entry.msgid in keys:
356                         key = entry.msgid
357                         val = entry.msgstr
358                         # some translators keep untranslated entries
359                         if val != key:
360                             trans[key] = val
361             for key in keys:
362                 if key in trans.keys():
363                     val = trans[key].replace('\\', '\\\\').replace('"', '\\"')
364                     key = key.replace('\\', '\\\\').replace('"', '\\"')
365                     print >> out, '\t"%s" "%s"' % \
366                              (key.encode('utf-8'), val.encode('utf-8'))
367                 # also print untranslated entries to help translators
368                 elif not lang in oldlanguages:
369                     key = key.replace('\\', '\\\\').replace('"', '\\"')
370                     print >> out, '\t"%s" "%s"' % \
371                              (key.encode('utf-8'), key.encode('utf-8'))
372             print >> out, 'End'
373
374     out.close()
375
376
377 def qt4_l10n(input_files, output, base):
378     '''Generate pot file from src/frontends/qt4/ui/*.ui'''
379     output = open(output, 'w')
380     pat = re.compile(r'\s*<string>(.*)</string>')
381     prop = re.compile(r'\s*<property.*name.*=.*shortcut')
382     for src in input_files:
383         input = open(src)
384         skipNextLine = False
385         for lineno, line in enumerate(input.readlines()):
386             # skip the line after <property name=shortcut>
387             if skipNextLine:
388                 skipNextLine = False
389                 continue
390             if prop.match(line):
391                 skipNextLine = True
392                 continue
393             # get lines that match <string>...</string>
394             if pat.match(line):
395                 (string,) = pat.match(line).groups()
396                 string = string.replace('&amp;', '&').replace('&quot;', '"')
397                 string = string.replace('&lt;', '<').replace('&gt;', '>')
398                 string = string.replace('\\', '\\\\').replace('"', r'\"')
399                 string = string.replace('&#x0a;', r'\n')
400                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
401                     (relativePath(src, base), lineno+1, string)
402         input.close()
403     output.close()
404
405
406 def languages_l10n(input_files, output, base):
407     '''Generate pot file from lib/languages'''
408     out = open(output, 'w')
409     GuiName = re.compile(r'^[^#]*GuiName\s+(.*)')
410     
411     for src in input_files:
412         descStartLine = -1
413         descLines = []
414         lineno = 0
415         for line in open(src).readlines():
416             lineno += 1
417             res = GuiName.search(line)
418             if res != None:
419                 string = res.group(1)
420                 writeString(out, src, base, lineno, string)
421                 continue
422                
423     out.close()
424
425
426 def external_l10n(input_files, output, base):
427     '''Generate pot file from lib/external_templates'''
428     output = open(output, 'w')
429     Template = re.compile(r'^Template\s+(.*)')
430     GuiName = re.compile(r'\s*GuiName\s+(.*)')
431     HelpTextStart = re.compile(r'\s*HelpText\s')
432     HelpTextSection = re.compile(r'\s*(\S.*)\s*$')
433     HelpTextEnd = re.compile(r'\s*HelpTextEnd\s')
434     i = -1
435     for src in input_files:
436         input = open(src)
437         inHelp = False
438         hadHelp = False
439         prev_help_string = ''
440         for lineno, line in enumerate(input.readlines()):
441             if Template.match(line):
442                 (string,) = Template.match(line).groups()
443             elif GuiName.match(line):
444                 (string,) = GuiName.match(line).groups()
445             elif inHelp:
446                 if HelpTextEnd.match(line):
447                     if hadHelp:
448                         print >> output, '\nmsgstr ""\n'
449                     inHelp = False
450                     hadHelp = False
451                     prev_help_string = ''
452                 elif HelpTextSection.match(line):
453                     (help_string,) = HelpTextSection.match(line).groups()
454                     help_string = help_string.replace('"', '')
455                     if help_string != "" and prev_help_string == '':
456                         print >> output, '#: %s:%d\nmsgid ""\n"%s\\n"' % \
457                             (relativePath(src, base), lineno+1, help_string)
458                         hadHelp = True
459                     elif help_string != "":
460                         print >> output, '"%s\\n"' % help_string
461                     prev_help_string = help_string
462             elif HelpTextStart.match(line):
463                 inHelp = True
464                 prev_help_string = ''
465             else:
466                 continue
467             string = string.replace('"', '')
468             if string != "" and not inHelp:
469                 print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
470                     (relativePath(src, base), lineno+1, string)
471         input.close()
472     output.close()
473
474
475 def formats_l10n(input_files, output, base):
476     '''Generate pot file from configure.py'''
477     output = open(output, 'w')
478     GuiName = re.compile(r'.*\Format\s+\S+\s+\S+\s+"([^"]*)"\s+(\S*)\s+.*')
479     GuiName2 = re.compile(r'.*\Format\s+\S+\s+\S+\s+([^"]\S+)\s+(\S*)\s+.*')
480     input = open(input_files[0])
481     for lineno, line in enumerate(input.readlines()):
482         label = ""
483         labelsc = ""
484         if GuiName.match(line):
485             label = GuiName.match(line).group(1)
486             shortcut = GuiName.match(line).group(2).replace('"', '')
487         elif GuiName2.match(line):
488             label = GuiName2.match(line).group(1)
489             shortcut = GuiName2.match(line).group(2).replace('"', '')
490         else:
491             continue
492         label = label.replace('\\', '\\\\').replace('"', '')
493         if shortcut != "":
494             labelsc = label + "|" + shortcut
495         if label != "":
496             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
497                 (relativePath(input_files[0], base), lineno+1, label)
498         if labelsc != "":
499             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
500                 (relativePath(input_files[0], base), lineno+1, labelsc)
501     input.close()
502     output.close()
503
504
505 def encodings_l10n(input_files, output, base):
506     '''Generate pot file from lib/encodings'''
507     output = open(output, 'w')
508     # assuming only one encodings file
509     #                 Encoding utf8      utf8    "Unicode (utf8)" UTF-8    variable inputenc
510     reg = re.compile('Encoding [\w-]+\s+[\w-]+\s+"([\w \-\(\)]+)"\s+[\w-]+\s+(fixed|variable)\s+\w+.*')
511     input = open(input_files[0])
512     for lineno, line in enumerate(input.readlines()):
513         if not line.startswith('Encoding'):
514             continue
515         if reg.match(line):
516             print >> output, '#: %s:%d\nmsgid "%s"\nmsgstr ""\n' % \
517                 (relativePath(input_files[0], base), lineno+1, reg.match(line).groups()[0])
518         else:
519             print "Error: Unable to handle line:"
520             print line
521             # No need to abort if the parsing fails
522             # sys.exit(1)
523     input.close()
524     output.close()
525
526
527
528 Usage = '''
529 lyx_pot.py [-b|--base top_src_dir] [-o|--output output_file] [-h|--help] [-s|src_file filename] -t|--type input_type input_files
530
531 where
532     --base:
533         path to the top source directory. default to '.'
534     --output:
535         output pot file, default to './lyx.pot'
536     --src_file
537         filename that contains a list of input files in each line
538     --input_type can be
539         ui: lib/ui/*
540         layouts: lib/layouts/*
541         layouttranslations: create lib/layouttranslations from po/*.po and lib/layouts/*
542         qt4: qt4 ui files
543         languages: file lib/languages
544         encodings: file lib/encodings
545         external: external templates file
546         formats: formats predefined in lib/configure.py
547 '''
548
549 if __name__ == '__main__':
550     input_type = None
551     output = 'lyx.pot'
552     base = '.'
553     input_files = []
554     #
555     optlist, args = getopt.getopt(sys.argv[1:], 'ht:o:b:s:',
556         ['help', 'type=', 'output=', 'base=', 'src_file='])
557     for (opt, value) in optlist:
558         if opt in ['-h', '--help']:
559             print Usage
560             sys.exit(0)
561         elif opt in ['-o', '--output']:
562             output = value
563         elif opt in ['-b', '--base']:
564             base = value
565         elif opt in ['-t', '--type']:
566             input_type = value
567         elif opt in ['-s', '--src_file']:
568             input_files = [f.strip() for f in open(value)]
569
570     if input_type not in ['ui', 'layouts', 'layouttranslations', 'qt4', 'languages', 'encodings', 'external', 'formats'] or output is None:
571         print 'Wrong input type or output filename.'
572         sys.exit(1)
573
574     input_files += args
575
576     if input_type == 'ui':
577         ui_l10n(input_files, output, base)
578     elif input_type == 'layouts':
579         layouts_l10n(input_files, output, base, False)
580     elif input_type == 'layouttranslations':
581         layouts_l10n(input_files, output, base, True)
582     elif input_type == 'qt4':
583         qt4_l10n(input_files, output, base)
584     elif input_type == 'external':
585         external_l10n(input_files, output, base)
586     elif input_type == 'formats':
587         formats_l10n(input_files, output, base)
588     elif input_type == 'encodings':
589         encodings_l10n(input_files, output, base)
590     else:
591         languages_l10n(input_files, output, base)
592
593