1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.misc import optrecurse
33 from translate.misc.multistring import multistring
34 from translate.lang import data
35 import re
36 import locale
37
38
40 """Just a small data structure that represents a search match."""
41
42
43 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
44 self.unit = unit
45 self.part = part
46 self.part_n = part_n
47 self.start = start
48 self.end = end
49
50
52 if self.part == 'target':
53 if self.unit.hasplural():
54 getter = lambda: self.unit.target.strings[self.part_n]
55 else:
56 getter = lambda: self.unit.target
57 return getter
58 elif self.part == 'source':
59 if self.unit.hasplural():
60 getter = lambda: self.unit.source.strings[self.part_n]
61 else:
62 getter = lambda: self.unit.source
63 return getter
64 elif self.part == 'notes':
65 def getter():
66 return self.unit.getnotes()[self.part_n]
67 return getter
68 elif self.part == 'locations':
69 def getter():
70 return self.unit.getlocations()[self.part_n]
71 return getter
72
74 if self.part == 'target':
75 if self.unit.hasplural():
76 def setter(value):
77 strings = self.unit.target.strings
78 strings[self.part_n] = value
79 self.unit.target = strings
80 else:
81 def setter(value):
82 self.unit.target = value
83 return setter
84
85
94
97
99 """Calculate the real index in the unnormalized string that corresponds to
100 the index nfc_index in the normalized string."""
101 length = nfc_index
102 max_length = len(string)
103 while len(data.normalize(string[:length])) <= nfc_index:
104 if length == max_length:
105 return length
106 length += 1
107 return length - 1
108
109
121
123 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
124 invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False,
125 max_matches=0):
126 """builds a checkfilter using the given checker"""
127 if isinstance(searchstring, unicode):
128 self.searchstring = searchstring
129 else:
130 self.searchstring = searchstring.decode(encoding)
131 self.searchstring = data.normalize(self.searchstring)
132 if searchparts:
133
134
135 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
136 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
137 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
138 self.search_locations = 'locations' in searchparts
139 else:
140 self.search_source = True
141 self.search_target = True
142 self.search_notes = False
143 self.search_locations = False
144 self.ignorecase = ignorecase
145 if self.ignorecase:
146 self.searchstring = self.searchstring.lower()
147 self.useregexp = useregexp
148 if self.useregexp:
149 self.searchpattern = re.compile(self.searchstring)
150 self.invertmatch = invertmatch
151 self.accelchar = accelchar
152 self.includeheader = includeheader
153 self.max_matches = max_matches
154
156 if teststr is None:
157 return False
158 teststr = data.normalize(teststr)
159 if self.ignorecase:
160 teststr = teststr.lower()
161 if self.accelchar:
162 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
163 teststr = re.sub(self.accelchar, "", teststr)
164 if self.useregexp:
165 found = self.searchpattern.search(teststr)
166 else:
167 found = teststr.find(self.searchstring) != -1
168 if self.invertmatch:
169 found = not found
170 return found
171
173 """runs filters on an element"""
174 if unit.isheader(): return []
175
176 if self.search_source:
177 if isinstance(unit.source, multistring):
178 strings = unit.source.strings
179 else:
180 strings = [unit.source]
181 for string in strings:
182 if self.matches(string):
183 return True
184
185 if self.search_target:
186 if isinstance(unit.target, multistring):
187 strings = unit.target.strings
188 else:
189 strings = [unit.target]
190 for string in strings:
191 if self.matches(string):
192 return True
193
194 if self.search_notes:
195 return self.matches(unit.getnotes())
196 if self.search_locations:
197 return self.matches(u" ".join(unit.getlocations()))
198 return False
199
214
216 if not self.searchstring:
217 return [], []
218
219 searchstring = self.searchstring
220 flags = re.LOCALE | re.MULTILINE | re.UNICODE
221
222 if self.ignorecase:
223 flags |= re.IGNORECASE
224 if not self.useregexp:
225 searchstring = re.escape(searchstring)
226 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
227
228 matches = []
229 indexes = []
230
231 for index, unit in enumerate(units):
232 old_length = len(matches)
233
234 if self.search_target:
235 if unit.hasplural():
236 targets = unit.target.strings
237 else:
238 targets = [unit.target]
239 matches.extend(find_matches(unit, 'target', targets, self.re_search))
240 if self.search_source:
241 if unit.hasplural():
242 sources = unit.source.strings
243 else:
244 sources = [unit.source]
245 matches.extend(find_matches(unit, 'source', sources, self.re_search))
246 if self.search_notes:
247 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
248
249 if self.search_locations:
250 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
251
252
253
254
255 if self.max_matches and len(matches) > self.max_matches:
256 raise Exception("Too many matches found")
257
258 if len(matches) > old_length:
259 old_length = len(matches)
260 indexes.append(index)
261
262 return matches, indexes
263
265 """a specialized Option Parser for the grep tool..."""
267 """parses the command line options, handling implicit input/output args"""
268 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
269
270 if args:
271 options.searchstring = args[0]
272 args = args[1:]
273 else:
274 self.error("At least one argument must be given for the search string")
275 if args and not options.input:
276 if not options.output:
277 options.input = args[:-1]
278 args = args[-1:]
279 else:
280 options.input = args
281 args = []
282 if args and not options.output:
283 options.output = args[-1]
284 args = args[:-1]
285 if args:
286 self.error("You have used an invalid combination of --input, --output and freestanding args")
287 if isinstance(options.input, list) and len(options.input) == 1:
288 options.input = options.input[0]
289 return (options, args)
290
292 """sets the usage string - if usage not given, uses getusagestring for each option"""
293 if usage is None:
294 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
295 else:
296 super(GrepOptionParser, self).set_usage(usage)
297
306
307 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
308 """reads in inputfile, filters using checkfilter, writes to outputfile"""
309 fromfile = factory.getobject(inputfile)
310 tofile = checkfilter.filterfile(fromfile)
311 if tofile.isempty():
312 return False
313 outputfile.write(str(tofile))
314 return True
315
317 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
318 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
319 "tmx":("tmx", rungrep),
320 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
321 None:("po", rungrep)}
322 parser = GrepOptionParser(formats)
323 parser.add_option("", "--search", dest="searchparts",
324 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
325 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
326 parser.add_option("-I", "--ignore-case", dest="ignorecase",
327 action="store_true", default=False, help="ignore case distinctions")
328 parser.add_option("-e", "--regexp", dest="useregexp",
329 action="store_true", default=False, help="use regular expression matching")
330 parser.add_option("-v", "--invert-match", dest="invertmatch",
331 action="store_true", default=False, help="select non-matching lines")
332 parser.add_option("", "--accelerator", dest="accelchar",
333 action="store", type="choice", choices=["&", "_", "~"],
334 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
335 parser.add_option("", "--header", dest="includeheader",
336 action="store_true", default=False,
337 help="include a PO header in the output")
338 parser.set_usage()
339 parser.passthrough.append('checkfilter')
340 parser.description = __doc__
341 return parser
342
344 parser = cmdlineparser()
345 parser.run()
346
347 if __name__ == '__main__':
348 main()
349