1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 from UserDict import UserDict
22
23 """Module to provide a cache of statistics in a database.
24
25 @organization: Zuza Software Foundation
26 @copyright: 2007 Zuza Software Foundation
27 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
28 """
29
30 from translate import __version__ as toolkitversion
31 from translate.storage import factory
32 from translate.misc.multistring import multistring
33 from translate.lang.common import Common
34
35 try:
36 from sqlite3 import dbapi2
37 except ImportError:
38 from pysqlite2 import dbapi2
39 import os.path
40 import re
41 import sys
42 import stat
43
44 kdepluralre = re.compile("^_n: ")
45 brtagre = re.compile("<br\s*?/?>")
46 xmltagre = re.compile("<[^>]+>")
47 numberre = re.compile("\\D\\.\\D")
48
49 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
50
60
62 """Counts the words in the unit's source and target, taking plurals into
63 account. The target words are only counted if the unit is translated."""
64 (sourcewords, targetwords) = (0, 0)
65 if isinstance(unit.source, multistring):
66 sourcestrings = unit.source.strings
67 else:
68 sourcestrings = [unit.source or ""]
69 for s in sourcestrings:
70 sourcewords += wordcount(s)
71 if not unit.istranslated():
72 return sourcewords, targetwords
73 if isinstance(unit.target, multistring):
74 targetstrings = unit.target.strings
75 else:
76 targetstrings = [unit.target or ""]
77 for s in targetstrings:
78 targetwords += wordcount(s)
79 return sourcewords, targetwords
80
82 - def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
89
91 return tuple(self[key] for key in self.record_keys)
92
99
106
109
111 """Modifies f to commit database changes if it executes without exceptions.
112 Otherwise it rolls back the database.
113
114 ALL publicly accessible methods in StatsCache MUST be decorated with this
115 decorator.
116 """
117
118 def decorated_f(self, *args, **kwargs):
119 try:
120 result = f(self, *args, **kwargs)
121 self.con.commit()
122 return result
123 except:
124
125
126
127 if self.con:
128 self.con.rollback()
129 raise
130 return decorated_f
131
132 UNTRANSLATED, TRANSLATED, FUZZY = 0, 1, 2
134 """Returns the numeric database state for the unit."""
135 if unit.istranslated():
136 return TRANSLATED
137 if unit.isfuzzy() and unit.target:
138 return FUZZY
139 return UNTRANSLATED
140
142 keys = ['translatedsourcewords',
143 'fuzzysourcewords',
144 'untranslatedsourcewords',
145 'translated',
146 'fuzzy',
147 'untranslated',
148 'translatedtargetwords']
149
152
154 self.cur = cur
155 self.cur.execute("""
156 CREATE TABLE IF NOT EXISTS filetotals(
157 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
158 translatedsourcewords INTEGER NOT NULL,
159 fuzzysourcewords INTEGER NOT NULL,
160 untranslatedsourcewords INTEGER NOT NULL,
161 translated INTEGER NOT NULL,
162 fuzzy INTEGER NOT NULL,
163 untranslated INTEGER NOT NULL,
164 translatedtargetwords INTEGER NOT NULL);""")
165
166 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
167 record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
168 if state_for_db is not None:
169 if state_for_db is UNTRANSLATED:
170 record['untranslated'] = 1
171 record['untranslatedsourcewords'] = sourcewords
172 if state_for_db is TRANSLATED:
173 record['translated'] = 1
174 record['translatedsourcewords'] = sourcewords
175 record['translatedtargetwords'] = targetwords
176 elif state_for_db is FUZZY:
177 record['fuzzy'] = 1
178 record['fuzzysourcewords'] = sourcewords
179 return record
180
181 new_record = classmethod(new_record)
182
184 record["total"] = record["untranslated"] + \
185 record["translated"] + \
186 record["fuzzy"]
187 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
188 record["translatedsourcewords"] + \
189 record["fuzzysourcewords"]
190 record["review"] = 0
191 _compute_derived_values = classmethod(_compute_derived_values)
192
199
201 self.cur.execute("""
202 INSERT OR REPLACE into filetotals
203 VALUES (%(fileid)d, %(vals)s);
204 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
205
207 self.cur.execute("""
208 DELETE FROM filetotals
209 WHERE fileid=?;
210 """, (fileid,))
211
213 """Returns a dictionary with all statistics initalised to 0."""
214 return FileTotals.new_record()
215
218
220 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
221
223 return {"sourcewordcount": [], "targetwordcount": []}
224
225
226
227
228
229
230
232 file_stat = os.stat(file_path)
233 assert not stat.S_ISDIR(file_stat.st_mode)
234 return file_stat.st_mtime, file_stat.st_size
235
237 return os.path.extsep + 'pending'
238
241
242
244 """An object instantiated as a singleton for each statsfile that provides
245 access to the database cache from a pool of StatsCache objects."""
246 _caches = {}
247 defaultfile = None
248 con = None
249 """This cache's connection"""
250 cur = None
251 """The current cursor"""
252
254 def make_database(statsfile):
255 def connect(cache):
256 cache.con = dbapi2.connect(statsfile)
257 cache.cur = cache.con.cursor()
258
259 def clear_old_data(cache):
260 try:
261 cache.cur.execute("""SELECT toolkitbuild FROM files""")
262 val = cache.cur.fetchone()
263
264
265 if val is None or val[0] < toolkitversion.build:
266 cache.con.close()
267 del cache
268 os.unlink(statsfile)
269 return True
270 return False
271 except dbapi2.OperationalError:
272 return False
273
274 cache = cls._caches[statsfile] = object.__new__(cls)
275 connect(cache)
276 if clear_old_data(cache):
277 connect(cache)
278 cache.create()
279 return cache
280
281 if not statsfile:
282 if not cls.defaultfile:
283 userdir = os.path.expanduser("~")
284 cachedir = None
285 if os.name == "nt":
286 cachedir = os.path.join(userdir, "Translate Toolkit")
287 else:
288 cachedir = os.path.join(userdir, ".translate_toolkit")
289 if not os.path.exists(cachedir):
290 os.mkdir(cachedir)
291 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
292 statsfile = cls.defaultfile
293 else:
294 statsfile = os.path.realpath(statsfile)
295
296 if statsfile in cls._caches:
297 return cls._caches[statsfile]
298
299 return make_database(statsfile)
300
302 """Create all tables and indexes."""
303 self.file_totals = FileTotals(self.cur)
304
305 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
306 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
307 path VARCHAR NOT NULL UNIQUE,
308 st_mtime INTEGER NOT NULL,
309 st_size INTEGER NOT NULL,
310 toolkitbuild INTEGER NOT NULL);""")
311
312 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
313 ON files (path);""")
314
315 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
316 id INTEGER PRIMARY KEY AUTOINCREMENT,
317 unitid VARCHAR NOT NULL,
318 fileid INTEGER NOT NULL,
319 unitindex INTEGER NOT NULL,
320 source VARCHAR NOT NULL,
321 target VARCHAR,
322 state INTEGER,
323 sourcewords INTEGER,
324 targetwords INTEGER);""")
325
326 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
327 ON units(fileid);""")
328
329 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
330 configid INTEGER PRIMARY KEY AUTOINCREMENT,
331 config VARCHAR);""")
332
333 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
334 ON checkerconfigs(config);""")
335
336 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
337 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
338 unitindex INTEGER NOT NULL,
339 fileid INTEGER NOT NULL,
340 configid INTEGER NOT NULL,
341 name VARCHAR NOT NULL,
342 message VARCHAR);""")
343
344 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
345 ON uniterrors(fileid, configid);""")
346 create = transaction(create)
347
348 - def _getfileid(self, filename, check_mod_info=True, store=None):
349 """return fileid representing the given file in the statscache.
350
351 if file not in cache or has been updated since last record
352 update, recalculate stats.
353
354 optional argument store can be used to avoid unnessecary
355 reparsing of already loaded translation files.
356
357 store can be a TranslationFile object or a callback that returns one.
358 """
359 realpath = os.path.realpath(filename)
360 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
361 WHERE path=?;""", (realpath,))
362 filerow = self.cur.fetchone()
363 mod_info = get_mod_info(realpath)
364 if filerow:
365 fileid = filerow[0]
366 if not check_mod_info:
367
368 self.cur.execute("""UPDATE files
369 SET st_mtime=?, st_size=?
370 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
371 return fileid
372 if (filerow[1], filerow[2]) == mod_info:
373 return fileid
374
375 assert check_mod_info
376 if callable(store):
377 store = store()
378 else:
379 store = store or factory.getobject(realpath)
380
381 return self._cachestore(store, realpath, mod_info)
382
384 """See if this checker configuration has been used before."""
385 config = str(checker.config.__dict__)
386 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
387 config=?;""", (config,))
388 configrow = self.cur.fetchone()
389 if not configrow or configrow[1] != config:
390 return None
391 else:
392 return configrow[0]
393
395 """Cache the statistics for the supplied unit(s)."""
396 unitvalues = []
397 for index, unit in enumerate(units):
398 if unit.istranslatable():
399 sourcewords, targetwords = wordsinunit(unit)
400 if unitindex:
401 index = unitindex
402
403 unitvalues.append((unit.getid(), fileid, index, \
404 unit.source, unit.target, \
405 sourcewords, targetwords, \
406 statefordb(unit)))
407 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
408
409 self.cur.executemany("""INSERT INTO units
410 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
411 values (?, ?, ?, ?, ?, ?, ?, ?);""",
412 unitvalues)
413 self.file_totals[fileid] = file_totals_record
414 if unitindex:
415 return state_strings[statefordb(units[0])]
416 return ""
417
419 """Calculates and caches the statistics of the given store
420 unconditionally."""
421 self.cur.execute("""DELETE FROM files WHERE
422 path=?;""", (realpath,))
423 self.cur.execute("""INSERT INTO files
424 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
425 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
426 fileid = self.cur.lastrowid
427 self.cur.execute("""DELETE FROM units WHERE
428 fileid=?""", (fileid,))
429 self._cacheunitstats(store.units, fileid)
430 return fileid
431
433 """Retrieves the statistics for the given file if possible, otherwise
434 delegates to cachestore()."""
435 return self.file_totals[self._getfileid(filename, store=store)]
436 filetotals = transaction(filetotals)
437
439 """Helper method for cachestorechecks() and recacheunit()"""
440
441
442 dummy = (-1, fileid, configid, "noerror", "")
443 unitvalues = [dummy]
444
445 errornames = []
446 for index, unit in enumerate(units):
447 if unit.istranslatable():
448
449 if unitindex:
450 index = unitindex
451 failures = checker.run_filters(unit)
452 for checkname, checkmessage in failures.iteritems():
453 unitvalues.append((index, fileid, configid, checkname, checkmessage))
454 errornames.append("check-" + checkname)
455 checker.setsuggestionstore(None)
456
457 if unitindex:
458
459
460 unitvalues.remove(dummy)
461 errornames.append("total")
462
463
464 self.cur.executemany("""INSERT INTO uniterrors
465 (unitindex, fileid, configid, name, message)
466 values (?, ?, ?, ?, ?);""",
467 unitvalues)
468 return errornames
469
471 """Calculates and caches the error statistics of the given store
472 unconditionally."""
473
474
475 self.cur.execute("""DELETE FROM uniterrors WHERE
476 fileid=?;""", (fileid,))
477 self._cacheunitschecks(store.units, fileid, configid, checker)
478 return fileid
479
481 values = self.cur.execute("""
482 SELECT state, sourcewords, targetwords
483 FROM units
484 WHERE fileid=? AND unitid=?
485 """, (fileid, unitid))
486 result = values.fetchone()
487 if result is not None:
488 return result
489 else:
490 print >> sys.stderr, """WARNING: Database in inconsistent state.
491 fileid %d and unitid %d have no entries in the table units.""" % (fileid, unitid)
492
493
494
495 return []
496
498 """Recalculate all information for a specific unit. This is necessary
499 for updating all statistics when a translation of a unit took place,
500 for example.
501
502 This method assumes that everything was up to date before (file totals,
503 checks, checker config, etc."""
504 fileid = self._getfileid(filename, check_mod_info=False)
505 configid = self._get_config_id(fileid, checker)
506 unitid = unit.getid()
507
508 totals_without_unit = self.file_totals[fileid] - \
509 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
510 self.cur.execute("""SELECT unitindex FROM units WHERE
511 fileid=? AND unitid=?;""", (fileid, unitid))
512 unitindex = self.cur.fetchone()[0]
513 self.cur.execute("""DELETE FROM units WHERE
514 fileid=? AND unitid=?;""", (fileid, unitid))
515 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
516
517 self.cur.execute("""DELETE FROM uniterrors WHERE
518 fileid=? AND unitindex=?;""", (fileid, unitindex))
519 if os.path.exists(suggestion_filename(filename)):
520 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
521 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
522 return state
523 recacheunit = transaction(recacheunit)
524
525 - def _checkerrors(self, filename, fileid, configid, checker, store):
526 def geterrors():
527 self.cur.execute("""SELECT
528 name,
529 unitindex
530 FROM uniterrors WHERE fileid=? and configid=?
531 ORDER BY unitindex;""", (fileid, configid))
532 return self.cur.fetchone(), self.cur
533
534 first, cur = geterrors()
535 if first is not None:
536 return first, cur
537
538
539
540 if callable(store):
541 store = store()
542 else:
543 store = store or factory.getobject(filename)
544
545 if os.path.exists(suggestion_filename(filename)):
546 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
547 self._cachestorechecks(fileid, store, checker, configid)
548 return geterrors()
549
550 - def _geterrors(self, filename, fileid, configid, checker, store):
551 result = []
552 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
553 result.append(first)
554 result.extend(cur.fetchall())
555 return result
556
558 configid = self._getstoredcheckerconfig(checker)
559 if configid:
560 return configid
561 self.cur.execute("""INSERT INTO checkerconfigs
562 (configid, config) values (NULL, ?);""",
563 (str(checker.config.__dict__),))
564 return self.cur.lastrowid
565
566 - def filechecks(self, filename, checker, store=None):
567 """Retrieves the error statistics for the given file if possible,
568 otherwise delegates to cachestorechecks()."""
569 fileid = self._getfileid(filename, store=store)
570 configid = self._get_config_id(fileid, checker)
571 values = self._geterrors(filename, fileid, configid, checker, store)
572
573 errors = emptyfilechecks()
574 for value in values:
575 if value[1] == -1:
576 continue
577 checkkey = 'check-' + value[0]
578 if not checkkey in errors:
579 errors[checkkey] = []
580 errors[checkkey].append(value[1])
581
582 return errors
583 filechecks = transaction(filechecks)
584
586 fileid = self._getfileid(filename)
587 configid = self._get_config_id(fileid, checker)
588 self._checkerrors(filename, fileid, configid, checker, None)
589 self.cur.execute("""SELECT
590 name,
591 unitindex
592 FROM uniterrors
593 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
594 return self.cur.fetchone() is not None
595 file_fails_test = transaction(file_fails_test)
596
598 """Return a dictionary of unit stats mapping sets of unit
599 indices with those states"""
600 stats = emptyfilestats()
601 fileid = self._getfileid(filename, store=store)
602
603 self.cur.execute("""SELECT
604 state,
605 unitindex
606 FROM units WHERE fileid=?
607 ORDER BY unitindex;""", (fileid,))
608 values = self.cur.fetchall()
609
610 for value in values:
611 stats[state_strings[value[0]]].append(value[1])
612 stats["total"].append(value[1])
613
614 return stats
615
616 - def filestats(self, filename, checker, store=None):
623 filestats = transaction(filestats)
624
625 - def unitstats(self, filename, _lang=None, store=None):
626
627
628
629 """Return a dictionary of property names mapping to arrays which
630 map unit indices to property values.
631
632 Please note that this is different from filestats, since filestats
633 supplies sets of unit indices with a given property, whereas this
634 method supplies arrays which map unit indices to given values."""
635 stats = emptyunitstats()
636
637
638 fileid = self._getfileid(filename, store=store)
639
640 self.cur.execute("""SELECT
641 sourcewords, targetwords
642 FROM units WHERE fileid=?
643 ORDER BY unitindex;""", (fileid,))
644
645 for sourcecount, targetcount in self.cur.fetchall():
646 stats["sourcewordcount"].append(sourcecount)
647 stats["targetwordcount"].append(targetcount)
648
649 return stats
650 unitstats = transaction(unitstats)
651