Dash Core  0.12.2.1
P2P Digital Currency
update-translations.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Copyright (c) 2014 Wladimir J. van der Laan
3 # Distributed under the MIT software license, see the accompanying
4 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 '''
6 Run this script from the root of the repository to update all translations from
7 transifex.
8 It will do the following automatically:
9 
10 - fetch all translations using the tx tool
11 - post-process them into valid and committable format
12  - remove invalid control characters
13  - remove location tags (makes diffs less noisy)
14 
15 TODO:
16 - auto-add new translations to the build system according to the translation process
17 '''
18 from __future__ import division, print_function
19 import subprocess
20 import re
21 import sys
22 import os
23 import io
24 import xml.etree.ElementTree as ET
25 
26 # Name of transifex tool
27 TX = 'tx'
28 # Name of source language file
29 SOURCE_LANG = 'dash_en.ts'
30 # Directory with locale files
31 LOCALE_DIR = 'src/qt/locale'
32 # Minimum number of messages for translation to be considered at all
33 MIN_NUM_MESSAGES = 10
34 
36  if not os.path.exists('.git'):
37  print('No .git directory found')
38  print('Execute this script at the root of the repository', file=sys.stderr)
39  exit(1)
40 
42  if subprocess.call([TX, 'pull', '-f', '-a']):
43  print('Error while fetching translations', file=sys.stderr)
44  exit(1)
45 
47  '''Find all format specifiers in a string.'''
48  pos = 0
49  specifiers = []
50  while True:
51  percent = s.find('%', pos)
52  if percent < 0:
53  break
54  try:
55  specifiers.append(s[percent+1])
56  except:
57  print('Failed to get specifier')
58  pos = percent+2
59  return specifiers
60 
61 def split_format_specifiers(specifiers):
62  '''Split format specifiers between numeric (Qt) and others (strprintf)'''
63  numeric = []
64  other = []
65  for s in specifiers:
66  if s in {'1','2','3','4','5','6','7','8','9'}:
67  numeric.append(s)
68  else:
69  other.append(s)
70 
71  # numeric (Qt) can be present in any order, others (strprintf) must be in specified order
72  return set(numeric),other
73 
75  '''Sanitize string for printing'''
76  return s.replace('\n',' ')
77 
78 def check_format_specifiers(source, translation, errors, numerus):
80  # assert that no source messages contain both Qt and strprintf format specifiers
81  # if this fails, go change the source as this is hacky and confusing!
82  #assert(not(source_f[0] and source_f[1]))
83  try:
84  translation_f = split_format_specifiers(find_format_specifiers(translation))
85  except IndexError:
86  errors.append("Parse error in translation for '%s': '%s'" % (sanitize_string(source), sanitize_string(translation)))
87  return False
88  else:
89  if source_f != translation_f:
90  if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1:
91  # Allow numerus translations to omit %n specifier (usually when it only has one possible value)
92  return True
93  errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
94  return False
95  return True
96 
97 def all_ts_files(suffix=''):
98  for filename in os.listdir(LOCALE_DIR):
99  # process only language files, and do not process source language
100  if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
101  continue
102  if suffix: # remove provided suffix
103  filename = filename[0:-len(suffix)]
104  filepath = os.path.join(LOCALE_DIR, filename)
105  yield(filename, filepath)
106 
107 FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
109  '''Remove invalid characters from translation string'''
110  return FIX_RE.sub(b'', s)
111 
112 # Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
113 # comparison, disable by default)
114 _orig_escape_cdata = None
115 def escape_cdata(text):
116  text = _orig_escape_cdata(text)
117  text = text.replace("'", '&apos;')
118  text = text.replace('"', '&quot;')
119  return text
120 
121 def postprocess_translations(reduce_diff_hacks=False):
122  print('Checking and postprocessing...')
123 
124  if reduce_diff_hacks:
125  global _orig_escape_cdata
126  _orig_escape_cdata = ET._escape_cdata
127  ET._escape_cdata = escape_cdata
128 
129  for (filename,filepath) in all_ts_files():
130  os.rename(filepath, filepath+'.orig')
131 
132  have_errors = False
133  for (filename,filepath) in all_ts_files('.orig'):
134  # pre-fixups to cope with transifex output
135  parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
136  with open(filepath + '.orig', 'rb') as f:
137  data = f.read()
138  # remove control characters; this must be done over the entire file otherwise the XML parser will fail
139  data = remove_invalid_characters(data)
140  tree = ET.parse(io.BytesIO(data), parser=parser)
141 
142  # iterate over all messages in file
143  root = tree.getroot()
144  for context in root.findall('context'):
145  for message in context.findall('message'):
146  numerus = message.get('numerus') == 'yes'
147  source = message.find('source').text
148  translation_node = message.find('translation')
149  # pick all numerusforms
150  if numerus:
151  translations = [i.text for i in translation_node.findall('numerusform')]
152  else:
153  translations = [translation_node.text]
154 
155  for translation in translations:
156  if translation is None:
157  continue
158  errors = []
159  valid = check_format_specifiers(source, translation, errors, numerus)
160 
161  for error in errors:
162  print('%s: %s' % (filename, error))
163 
164  if not valid: # set type to unfinished and clear string if invalid
165  translation_node.clear()
166  translation_node.set('type', 'unfinished')
167  have_errors = True
168 
169  # Remove location tags
170  for location in message.findall('location'):
171  message.remove(location)
172 
173  # Remove entire message if it is an unfinished translation
174  if translation_node.get('type') == 'unfinished':
175  context.remove(message)
176 
177  # check if document is (virtually) empty, and remove it if so
178  num_messages = 0
179  for context in root.findall('context'):
180  for message in context.findall('message'):
181  num_messages += 1
182  if num_messages < MIN_NUM_MESSAGES:
183  print('Removing %s, as it contains only %i messages' % (filepath, num_messages))
184  continue
185 
186  # write fixed-up tree
187  # if diff reduction requested, replace some XML to 'sanitize' to qt formatting
188  if reduce_diff_hacks:
189  out = io.BytesIO()
190  tree.write(out, encoding='utf-8')
191  out = out.getvalue()
192  out = out.replace(b' />', b'/>')
193  with open(filepath, 'wb') as f:
194  f.write(out)
195  else:
196  tree.write(filepath, encoding='utf-8')
197  return have_errors
198 
199 if __name__ == '__main__':
201  # fetch_all_translations()
203 
def split_format_specifiers(specifiers)
def postprocess_translations(reduce_diff_hacks=False)
def all_ts_files(suffix='')
def check_format_specifiers(source, translation, errors, numerus)