Package translate :: Package storage :: Module xliff
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xliff

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2005-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Module for handling XLIFF files for translation. 
 22   
 23  The official recommendation is to use the extention .xlf for XLIFF files. 
 24  """ 
 25   
 26  from lxml import etree 
 27   
 28  from translate.misc.multistring import multistring 
 29  from translate.misc.xml_helpers import * 
 30  from translate.storage import base, lisa 
 31  from translate.storage.lisa import getXMLspace 
 32  from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml 
 33   
 34  # TODO: handle translation types 
 35   
36 -class xliffunit(lisa.LISAunit):
37 """A single term in the xliff file.""" 38 39 rootNode = "trans-unit" 40 languageNode = "source" 41 textNode = "" 42 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 43 44 _default_xml_space = "default" 45 46 #TODO: id and all the trans-unit level stuff 47
48 - def __init__(self, source, empty=False, **kwargs):
49 """Override the constructor to set xml:space="preserve".""" 50 if empty: 51 return 52 super(xliffunit, self).__init__(source, empty, **kwargs) 53 lisa.setXMLspace(self.xmlelement, "preserve")
54
55 - def createlanguageNode(self, lang, text, purpose):
56 """Returns an xml Element setup with given parameters.""" 57 58 #TODO: for now we do source, but we have to test if it is target, perhaps 59 # with parameter. Alternatively, we can use lang, if supplied, since an xliff 60 #file has to conform to the bilingual nature promised by the header. 61 assert purpose 62 langset = etree.Element(self.namespaced(purpose)) 63 #TODO: check language 64 # lisa.setXMLlang(langset, lang) 65 66 # self.createPHnodes(langset, text) 67 langset.text = text 68 return langset
69
70 - def getlanguageNodes(self):
71 """We override this to get source and target nodes.""" 72 source = None 73 target = None 74 nodes = [] 75 try: 76 source = self.xmlelement.iterchildren(self.namespaced(self.languageNode)).next() 77 target = self.xmlelement.iterchildren(self.namespaced('target')).next() 78 nodes = [source, target] 79 except StopIteration: 80 if source is not None: 81 nodes.append(source) 82 if not target is None: 83 nodes.append(target) 84 return nodes
85
86 - def set_rich_source(self, value, sourcelang='en'):
87 sourcelanguageNode = self.get_source_dom() 88 if sourcelanguageNode is None: 89 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source") 90 self.set_source_dom(sourcelanguageNode) 91 92 # Clear sourcelanguageNode first 93 for i in range(len(sourcelanguageNode)): 94 del sourcelanguageNode[0] 95 sourcelanguageNode.text = None 96 97 strelem_to_xml(sourcelanguageNode, value[0])
98
99 - def get_rich_source(self):
100 #rsrc = xml_to_strelem(self.source_dom) 101 #logging.debug('rich source: %s' % (repr(rsrc))) 102 #from dubulib.debug.misc import print_stack_funcs 103 #print_stack_funcs() 104 return [xml_to_strelem(self.source_dom, getXMLspace(self.xmlelement, self._default_xml_space))]
105 rich_source = property(get_rich_source, set_rich_source) 106
107 - def set_rich_target(self, value, lang='xx', append=False):
108 if value is None: 109 self.set_target_dom(self.createlanguageNode(lang, u'', "target")) 110 return 111 112 languageNode = self.get_target_dom() 113 if languageNode is None: 114 languageNode = self.createlanguageNode(lang, u'', "target") 115 self.set_target_dom(languageNode, append) 116 117 # Clear languageNode first 118 for i in range(len(languageNode)): 119 del languageNode[0] 120 languageNode.text = None 121 122 strelem_to_xml(languageNode, value[0])
123
124 - def get_rich_target(self, lang=None):
125 """retrieves the "target" text (second entry), or the entry in the 126 specified language, if it exists""" 127 return [xml_to_strelem(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))]
128 rich_target = property(get_rich_target, set_rich_target) 129
130 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
131 """Adds an alt-trans tag and alt-trans components to the unit. 132 133 @type txt: String 134 @param txt: Alternative translation of the source text. 135 """ 136 137 #TODO: support adding a source tag ad match quality attribute. At 138 # the source tag is needed to inject fuzzy matches from a TM. 139 if isinstance(txt, str): 140 txt = txt.decode("utf-8") 141 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans")) 142 lisa.setXMLspace(alttrans, "preserve") 143 if sourcetxt: 144 if isinstance(sourcetxt, str): 145 sourcetxt = sourcetxt.decode("utf-8") 146 altsource = etree.SubElement(alttrans, self.namespaced("source")) 147 altsource.text = sourcetxt 148 alttarget = etree.SubElement(alttrans, self.namespaced("target")) 149 alttarget.text = txt 150 if matchquality: 151 alttrans.set("match-quality", matchquality) 152 if origin: 153 alttrans.set("origin", origin) 154 if lang: 155 lisa.setXMLlang(alttrans, lang)
156
157 - def getalttrans(self, origin=None):
158 """Returns <alt-trans> for the given origin as a list of units. No 159 origin means all alternatives.""" 160 translist = [] 161 for node in self.xmlelement.iterdescendants(self.namespaced("alt-trans")): 162 if self.correctorigin(node, origin): 163 # We build some mini units that keep the xmlelement. This 164 # makes it easier to delete it if it is passed back to us. 165 newunit = base.TranslationUnit(self.source) 166 167 # the source tag is optional 168 sourcenode = node.iterdescendants(self.namespaced("source")) 169 try: 170 newunit.source = lisa.getText(sourcenode.next(), getXMLspace(node, self._default_xml_space)) 171 except StopIteration: 172 pass 173 174 # must have one or more targets 175 targetnode = node.iterdescendants(self.namespaced("target")) 176 newunit.target = lisa.getText(targetnode.next(), getXMLspace(node, self._default_xml_space)) 177 #TODO: support multiple targets better 178 #TODO: support notes in alt-trans 179 newunit.xmlelement = node 180 181 translist.append(newunit) 182 return translist
183
184 - def delalttrans(self, alternative):
185 """Removes the supplied alternative from the list of alt-trans tags""" 186 self.xmlelement.remove(alternative.xmlelement)
187
188 - def addnote(self, text, origin=None):
189 """Add a note specifically in a "note" tag""" 190 if isinstance(text, str): 191 text = text.decode("utf-8") 192 note = etree.SubElement(self.xmlelement, self.namespaced("note")) 193 note.text = text.strip() 194 if origin: 195 note.set("from", origin)
196
197 - def getnotelist(self, origin=None):
198 """Private method that returns the text from notes matching 'origin' or all notes.""" 199 notenodes = self.xmlelement.iterdescendants(self.namespaced("note")) 200 # TODO: consider using xpath to construct initial_list directly 201 # or to simply get the correct text from the outset (just remember to 202 # check for duplication. 203 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)] 204 205 # Remove duplicate entries from list: 206 dictset = {} 207 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset] 208 209 return notelist
210
211 - def getnotes(self, origin=None):
212 return '\n'.join(self.getnotelist(origin=origin))
213
214 - def removenotes(self, origin="translator"):
215 """Remove all the translator notes.""" 216 notes = self.xmlelement.iterdescendants(self.namespaced("note")) 217 for note in notes: 218 if self.correctorigin(note, origin=origin): 219 self.xmlelement.remove(note)
220
221 - def adderror(self, errorname, errortext):
222 """Adds an error message to this unit.""" 223 #TODO: consider factoring out: some duplication between XLIFF and TMX 224 text = errorname + ': ' + errortext 225 self.addnote(text, origin="pofilter")
226
227 - def geterrors(self):
228 """Get all error messages.""" 229 #TODO: consider factoring out: some duplication between XLIFF and TMX 230 notelist = self.getnotelist(origin="pofilter") 231 errordict = {} 232 for note in notelist: 233 errorname, errortext = note.split(': ') 234 errordict[errorname] = errortext 235 return errordict
236
237 - def isapproved(self):
238 """States whether this unit is approved.""" 239 return self.xmlelement.get("approved") == "yes"
240
241 - def markapproved(self, value=True):
242 """Mark this unit as approved.""" 243 if value: 244 self.xmlelement.set("approved", "yes") 245 elif self.isapproved(): 246 self.xmlelement.set("approved", "no")
247
248 - def isreview(self):
249 """States whether this unit needs to be reviewed""" 250 targetnode = self.getlanguageNode(lang=None, index=1) 251 return not targetnode is None and \ 252 "needs-review" in targetnode.get("state", "")
253
254 - def markreviewneeded(self, needsreview=True, explanation=None):
255 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note.""" 256 targetnode = self.getlanguageNode(lang=None, index=1) 257 if not targetnode is None: 258 if needsreview: 259 targetnode.set("state", "needs-review-translation") 260 if explanation: 261 self.addnote(explanation, origin="translator") 262 else: 263 del targetnode.attrib["state"]
264
265 - def isfuzzy(self):
266 # targetnode = self.getlanguageNode(lang=None, index=1) 267 # return not targetnode is None and \ 268 # (targetnode.get("state-qualifier") == "fuzzy-match" or \ 269 # targetnode.get("state") == "needs-review-translation") 270 return not self.isapproved()
271
272 - def markfuzzy(self, value=True):
273 if value: 274 self.markapproved(False) 275 else: 276 self.markapproved(True) 277 targetnode = self.getlanguageNode(lang=None, index=1) 278 if not targetnode is None: 279 if value: 280 targetnode.set("state", "needs-review-translation") 281 else: 282 for attribute in ["state", "state-qualifier"]: 283 if attribute in targetnode.attrib: 284 del targetnode.attrib[attribute]
285
286 - def settarget(self, text, lang='xx', append=False):
287 """Sets the target string to the given value.""" 288 super(xliffunit, self).settarget(text, lang, append) 289 if text: 290 self.marktranslated()
291 292 # This code is commented while this will almost always return false. 293 # This way pocount, etc. works well. 294 # def istranslated(self): 295 # targetnode = self.getlanguageNode(lang=None, index=1) 296 # return not targetnode is None and \ 297 # (targetnode.get("state") == "translated") 298
299 - def istranslatable(self):
300 value = self.xmlelement.get("translate") 301 if value and value.lower() == 'no': 302 return False 303 return True
304
305 - def marktranslated(self):
306 targetnode = self.getlanguageNode(lang=None, index=1) 307 if targetnode is None: 308 return 309 if self.isfuzzy() and "state-qualifier" in targetnode.attrib: 310 #TODO: consider 311 del targetnode.attrib["state-qualifier"] 312 targetnode.set("state", "translated")
313
314 - def setid(self, id):
315 self.xmlelement.set("id", id)
316
317 - def getid(self):
318 return self.xmlelement.get("id") or ""
319
320 - def addlocation(self, location):
321 self.setid(location)
322
323 - def getlocations(self):
324 return [self.getid()]
325
326 - def createcontextgroup(self, name, contexts=None, purpose=None):
327 """Add the context group to the trans-unit with contexts a list with 328 (type, text) tuples describing each context.""" 329 assert contexts 330 group = etree.Element(self.namespaced("context-group")) 331 # context-group tags must appear at the start within <group> 332 # tags. Otherwise it must be appended to the end of a group 333 # of tags. 334 if self.xmlelement.tag == self.namespaced("group"): 335 self.xmlelement.insert(0, group) 336 else: 337 self.xmlelement.append(group) 338 group.set("name", name) 339 if purpose: 340 group.set("purpose", purpose) 341 for type, text in contexts: 342 if isinstance(text, str): 343 text = text.decode("utf-8") 344 context = etree.SubElement(group, self.namespaced("context")) 345 context.text = text 346 context.set("context-type", type)
347
348 - def getcontextgroups(self, name):
349 """Returns the contexts in the context groups with the specified name""" 350 groups = [] 351 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group")) 352 #TODO: conbine name in query 353 for group in grouptags: 354 if group.get("name") == name: 355 contexts = group.iterdescendants(self.namespaced("context")) 356 pairs = [] 357 for context in contexts: 358 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space)))) 359 groups.append(pairs) #not extend 360 return groups
361
362 - def getrestype(self):
363 """returns the restype attribute in the trans-unit tag""" 364 return self.xmlelement.get("restype")
365
366 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
367 #TODO: consider other attributes like "approved" 368 super(xliffunit, self).merge(otherunit, overwrite, comments) 369 if self.target: 370 self.marktranslated() 371 if otherunit.isfuzzy(): 372 self.markfuzzy() 373 elif otherunit.source == self.source: 374 self.markfuzzy(False) 375 if comments: 376 self.addnote(otherunit.getnotes())
377
378 - def correctorigin(self, node, origin):
379 """Check against node tag's origin (e.g note or alt-trans)""" 380 if origin == None: 381 return True 382 elif origin in node.get("from", ""): 383 return True 384 elif origin in node.get("origin", ""): 385 return True 386 else: 387 return False
388
389 - def multistring_to_rich(self, mstr):
390 """Override L{TranslationUnit.multistring_to_rich} which is used by the 391 C{rich_source} and C{rich_target} properties.""" 392 strings = mstr 393 if isinstance(mstr, multistring): 394 strings = mstr.strings 395 elif isinstance(mstr, basestring): 396 strings = [mstr] 397 398 return [xml_to_strelem(s) for s in strings]
399 multistring_to_rich = classmethod(multistring_to_rich) 400
401 - def rich_to_multistring(self, elem_list):
402 """Override L{TranslationUnit.rich_to_multistring} which is used by the 403 C{rich_source} and C{rich_target} properties.""" 404 return multistring([unicode(elem) for elem in elem_list])
405 rich_to_multistring = classmethod(rich_to_multistring)
406 407
408 -class xlifffile(lisa.LISAfile):
409 """Class representing a XLIFF file store.""" 410 UnitClass = xliffunit 411 Name = _("XLIFF Translation File") 412 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"] 413 Extensions = ["xlf", "xliff"] 414 rootNode = "xliff" 415 bodyNode = "body" 416 XMLskeleton = '''<?xml version="1.0" ?> 417 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'> 418 <file original='NoName' source-language='en' datatype='plaintext'> 419 <body> 420 </body> 421 </file> 422 </xliff>''' 423 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 424 suggestions_in_format = True 425 """xliff units have alttrans tags which can be used to store suggestions""" 426
427 - def __init__(self, *args, **kwargs):
428 lisa.LISAfile.__init__(self, *args, **kwargs) 429 self._filename = "NoName" 430 self._messagenum = 0
431 432
433 - def initbody(self):
434 super(xlifffile, self).initbody() 435 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 436 sourcelanguage = filenode.get('source-language') 437 if sourcelanguage: 438 self.setsourcelanguage(sourcelanguage) 439 targetlanguage = filenode.get('target-language') 440 if targetlanguage: 441 self.settargetlanguage(targetlanguage)
442
443 - def addheader(self):
444 """Initialise the file header.""" 445 filenode = self.document.getroot().iterchildren(self.namespaced("file")).next() 446 filenode.set("source-language", self.sourcelanguage) 447 if self.targetlanguage: 448 filenode.set("target-language", self.targetlanguage)
449
450 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
451 """creates a filenode with the given filename. All parameters are needed 452 for XLIFF compliance.""" 453 self.removedefaultfile() 454 if sourcelanguage is None: 455 sourcelanguage = self.sourcelanguage 456 if targetlanguage is None: 457 targetlanguage = self.targetlanguage 458 filenode = etree.Element(self.namespaced("file")) 459 filenode.set("original", filename) 460 filenode.set("source-language", sourcelanguage) 461 if targetlanguage: 462 filenode.set("target-language", targetlanguage) 463 filenode.set("datatype", datatype) 464 bodyNode = etree.SubElement(filenode, self.namespaced(self.bodyNode)) 465 return filenode
466
467 - def getfilename(self, filenode):
468 """returns the name of the given file""" 469 return filenode.get("original")
470
471 - def setfilename(self, filenode, filename):
472 """set the name of the given file""" 473 return filenode.set("original", filename)
474
475 - def getfilenames(self):
476 """returns all filenames in this XLIFF file""" 477 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 478 filenames = [self.getfilename(filenode) for filenode in filenodes] 479 filenames = filter(None, filenames) 480 if len(filenames) == 1 and filenames[0] == '': 481 filenames = [] 482 return filenames
483
484 - def getfilenode(self, filename):
485 """finds the filenode with the given name""" 486 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 487 for filenode in filenodes: 488 if self.getfilename(filenode) == filename: 489 return filenode 490 return None
491
492 - def getdatatype(self, filename=None):
493 """Returns the datatype of the stored file. If no filename is given, 494 the datatype of the first file is given.""" 495 if filename: 496 node = self.getfilenode(filename) 497 if not node is None: 498 return node.get("datatype") 499 else: 500 filenames = self.getfilenames() 501 if len(filenames) > 0 and filenames[0] != "NoName": 502 return self.getdatatype(filenames[0]) 503 return ""
504
505 - def getdate(self, filename=None):
506 """Returns the date attribute for the file. If no filename is given, 507 the date of the first file is given. If the date attribute is not 508 specified, None is returned.""" 509 if filename: 510 node = self.getfilenode(filename) 511 if not node is None: 512 return node.get("date") 513 else: 514 filenames = self.getfilenames() 515 if len(filenames) > 0 and filenames[0] != "NoName": 516 return self.getdate(filenames[0]) 517 return None
518
519 - def removedefaultfile(self):
520 """We want to remove the default file-tag as soon as possible if we 521 know if still present and empty.""" 522 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file"))) 523 if len(filenodes) > 1: 524 for filenode in filenodes: 525 if filenode.get("original") == "NoName" and \ 526 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))): 527 self.document.getroot().remove(filenode) 528 break
529
530 - def getheadernode(self, filenode, createifmissing=False):
531 """finds the header node for the given filenode""" 532 # TODO: Deprecated? 533 headernode = filenode.iterchildren(self.namespaced("header")) 534 try: 535 return headernode.next() 536 except StopIteration: 537 pass 538 if not createifmissing: 539 return None 540 headernode = etree.SubElement(filenode, self.namespaced("header")) 541 return headernode
542
543 - def getbodynode(self, filenode, createifmissing=False):
544 """finds the body node for the given filenode""" 545 bodynode = filenode.iterchildren(self.namespaced("body")) 546 try: 547 return bodynode.next() 548 except StopIteration: 549 pass 550 if not createifmissing: 551 return None 552 bodynode = etree.SubElement(filenode, self.namespaced("body")) 553 return bodynode
554
555 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
556 """adds the given trans-unit to the last used body node if the filename has changed it uses the slow method instead (will create the nodes required if asked). Returns success""" 557 if self._filename != filename: 558 if not self.switchfile(filename, createifmissing): 559 return None 560 unit = super(xlifffile, self).addsourceunit(source) 561 self._messagenum += 1 562 unit.setid("%d" % self._messagenum) 563 return unit
564
565 - def switchfile(self, filename, createifmissing=False):
566 """adds the given trans-unit (will create the nodes required if asked). Returns success""" 567 self._filename = filename 568 filenode = self.getfilenode(filename) 569 if filenode is None: 570 if not createifmissing: 571 return False 572 filenode = self.createfilenode(filename) 573 self.document.getroot().append(filenode) 574 575 self.body = self.getbodynode(filenode, createifmissing=createifmissing) 576 if self.body is None: 577 return False 578 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit")))) 579 #TODO: was 0 based before - consider 580 # messagenum = len(self.units) 581 #TODO: we want to number them consecutively inside a body/file tag 582 #instead of globally in the whole XLIFF file, but using len(self.units) 583 #will be much faster 584 return True
585
586 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
587 """adds a group tag into the specified file""" 588 if self._filename != filename: 589 if not self.switchfile(filename, createifmissing): 590 return None 591 group = etree.SubElement(self.body, self.namespaced("group")) 592 if restype: 593 group.set("restype", restype) 594 return group
595
596 - def __str__(self):
597 self.removedefaultfile() 598 return super(xlifffile, self).__str__()
599
600 - def parsestring(cls, storestring):
601 """Parses the string to return the correct file object""" 602 xliff = super(xlifffile, cls).parsestring(storestring) 603 if xliff.units: 604 header = xliff.units[0] 605 if ("gettext-domain-header" in (header.getrestype() or "") \ 606 or xliff.getdatatype() == "po") \ 607 and cls.__name__.lower() != "poxlifffile": 608 import poxliff 609 xliff = poxliff.PoXliffFile.parsestring(storestring) 610 return xliff
611 parsestring = classmethod(parsestring)
612