Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  import copy 
 26  import cStringIO 
 27  import re 
 28   
 29  from translate.lang import data 
 30  from translate.misc.multistring import multistring 
 31  from translate.misc import quote 
 32  from translate.misc import textwrap 
 33  from translate.storage import pocommon, base, poparser 
 34  from translate.storage.pocommon import encodingToUse 
 35   
 36  lsep = "\n#: " 
 37  """Seperator for #: entries""" 
 38   
 39  # general functions for quoting / unquoting po strings 
 40   
 41  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 42  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 43   
 44   
45 -def escapeforpo(line):
46 """Escapes a line for po format. assumes no \n occurs in the line. 47 48 @param line: unescaped text 49 """ 50 special_locations = [] 51 for special_key in po_escape_map: 52 special_locations.extend(quote.find_all(line, special_key)) 53 special_locations = dict.fromkeys(special_locations).keys() 54 special_locations.sort() 55 escaped_line = "" 56 last_location = 0 57 for location in special_locations: 58 escaped_line += line[last_location:location] 59 escaped_line += po_escape_map[line[location:location+1]] 60 last_location = location + 1 61 escaped_line += line[last_location:] 62 return escaped_line
63 64
65 -def unescapehandler(escape):
66 return po_unescape_map.get(escape, escape)
67 68
69 -def wrapline(line):
70 """Wrap text for po files.""" 71 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 72 73 # Lines should not start with a space... 74 if len(wrappedlines) > 1: 75 for index, line in enumerate(wrappedlines[1:]): 76 if line.startswith(' '): 77 # Remove the space at the beginning of the line: 78 wrappedlines[index+1] = line[1:] 79 80 # Append a space to the previous line: 81 wrappedlines[index] += ' ' 82 return wrappedlines
83 84
85 -def quoteforpo(text):
86 """quotes the given text for a PO file, returning quoted and escaped lines""" 87 polines = [] 88 if text is None: 89 return polines 90 lines = text.split("\n") 91 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 92 if len(lines) != 2 or lines[1]: 93 polines.extend(['""']) 94 for line in lines[:-1]: 95 #TODO: We should only wrap after escaping 96 lns = wrapline(line) 97 if len(lns) > 0: 98 for ln in lns[:-1]: 99 polines.extend(['"' + escapeforpo(ln) + '"']) 100 if lns[-1]: 101 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 102 else: 103 polines.extend(['"\\n"']) 104 if lines[-1]: 105 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 106 return polines
107 108
109 -def extractpoline(line):
110 """Remove quote and unescape line from po file. 111 112 @param line: a quoted line from a po file (msgid or msgstr) 113 """ 114 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 115 return extracted
116 117
118 -def unquotefrompo(postr):
119 return "".join([extractpoline(line) for line in postr])
120 121
122 -def is_null(lst):
123 return lst == [] or len(lst) == 1 and lst[0] == '""'
124 125
126 -def extractstr(string):
127 left = string.find('"') 128 right = string.rfind('"') 129 if right > -1: 130 return string[left:right+1] 131 else: 132 return string[left:] + '"'
133 134
135 -class pounit(pocommon.pounit):
136 # othercomments = [] # # this is another comment 137 # automaticcomments = [] # #. comment extracted from the source code 138 # sourcecomments = [] # #: sourcefile.xxx:35 139 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 140 # prev_msgid = [] # 141 # prev_msgid_plural = [] # 142 # typecomments = [] # #, fuzzy 143 # msgidcomments = [] # _: within msgid 144 # msgctxt 145 # msgid = [] 146 # msgstr = [] 147 148 # Our homegrown way to indicate what must be copied in a shallow 149 # fashion 150 __shallow__ = ['_store'] 151
152 - def __init__(self, source=None, encoding="UTF-8"):
153 self._encoding = encodingToUse(encoding) 154 self.obsolete = False 155 self._initallcomments(blankall=True) 156 self.prev_msgctxt = [] 157 self.prev_msgid = [] 158 self.prev_msgid_plural = [] 159 self.msgctxt = [] 160 self.msgid = [] 161 self.msgid_pluralcomments = [] 162 self.msgid_plural = [] 163 self.msgstr = [] 164 pocommon.pounit.__init__(self, source)
165
166 - def _initallcomments(self, blankall=False):
167 """Initialises allcomments""" 168 if blankall: 169 self.othercomments = [] 170 self.automaticcomments = [] 171 self.sourcecomments = [] 172 self.typecomments = [] 173 self.msgidcomments = []
174
175 - def _get_all_comments(self):
176 return [self.othercomments, 177 self.automaticcomments, 178 self.sourcecomments, 179 self.typecomments, 180 self.msgidcomments, 181 ]
182 183 allcomments = property(_get_all_comments) 184
185 - def _get_source_vars(self, msgid, msgid_plural):
186 multi = multistring(unquotefrompo(msgid), self._encoding) 187 if self.hasplural(): 188 pluralform = unquotefrompo(msgid_plural) 189 if isinstance(pluralform, str): 190 pluralform = pluralform.decode(self._encoding) 191 multi.strings.append(pluralform) 192 return multi
193
194 - def _set_source_vars(self, source):
195 msgid = None 196 msgid_plural = None 197 if isinstance(source, str): 198 source = source.decode(self._encoding) 199 if isinstance(source, multistring): 200 source = source.strings 201 if isinstance(source, list): 202 msgid = quoteforpo(source[0]) 203 if len(source) > 1: 204 msgid_plural = quoteforpo(source[1]) 205 else: 206 msgid_plural = [] 207 else: 208 msgid = quoteforpo(source) 209 msgid_plural = [] 210 return msgid, msgid_plural
211
212 - def getsource(self):
213 """Returns the unescaped msgid""" 214 return self._get_source_vars(self.msgid, self.msgid_plural)
215
216 - def setsource(self, source):
217 """Sets the msgid to the given (unescaped) value. 218 219 @param source: an unescaped source string. 220 """ 221 self._rich_source = None 222 self.msgid, self.msgid_plural = self._set_source_vars(source)
223 source = property(getsource, setsource) 224
225 - def _get_prev_source(self):
226 """Returns the unescaped msgid""" 227 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
228
229 - def _set_prev_source(self, source):
230 """Sets the msgid to the given (unescaped) value. 231 232 @param source: an unescaped source string. 233 """ 234 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
235 prev_source = property(_get_prev_source, _set_prev_source) 236
237 - def gettarget(self):
238 """Returns the unescaped msgstr""" 239 if isinstance(self.msgstr, dict): 240 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 241 else: 242 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 243 return multi
244
245 - def settarget(self, target):
246 """Sets the msgstr to the given (unescaped) value""" 247 self._rich_target = None 248 if isinstance(target, str): 249 target = target.decode(self._encoding) 250 if self.hasplural(): 251 if isinstance(target, multistring): 252 target = target.strings 253 elif isinstance(target, basestring): 254 target = [target] 255 elif isinstance(target, (dict, list)): 256 if len(target) == 1: 257 target = target[0] 258 else: 259 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 260 templates = self.msgstr 261 if isinstance(templates, list): 262 templates = {0: templates} 263 if isinstance(target, list): 264 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 265 elif isinstance(target, dict): 266 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 267 else: 268 self.msgstr = quoteforpo(target)
269 target = property(gettarget, settarget) 270
271 - def getalttrans(self):
272 """Return a list of alternate units. 273 274 Previous msgid and current msgstr is combined to form a single 275 alternative unit.""" 276 prev_source = self.prev_source 277 if prev_source and self.isfuzzy(): 278 unit = type(self)(prev_source) 279 unit.target = self.target 280 # Already released versions of Virtaal (0.6.x) only supported XLIFF 281 # alternatives, and expect .xmlelement.get(). 282 # This can be removed soon: 283 unit.xmlelement = dict() 284 return [unit] 285 return []
286
287 - def getnotes(self, origin=None):
288 """Return comments based on origin value (programmer, developer, source code and translator)""" 289 if origin == None: 290 comments = u"".join([comment[2:] for comment in self.othercomments]) 291 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 292 elif origin == "translator": 293 comments = u"".join([comment[2:] for comment in self.othercomments]) 294 elif origin in ["programmer", "developer", "source code"]: 295 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 296 else: 297 raise ValueError("Comment type not valid") 298 # Let's drop the last newline 299 return comments[:-1]
300
301 - def addnote(self, text, origin=None, position="append"):
302 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 303 # ignore empty strings and strings without non-space characters 304 if not (text and text.strip()): 305 return 306 text = data.forceunicode(text) 307 commentlist = self.othercomments 308 linestart = "# " 309 autocomments = False 310 if origin in ["programmer", "developer", "source code"]: 311 autocomments = True 312 commentlist = self.automaticcomments 313 linestart = "#. " 314 text = text.split("\n") 315 newcomments = [linestart + line + "\n" for line in text] 316 if position == "append": 317 newcomments = commentlist + newcomments 318 elif position == "prepend": 319 newcomments = newcomments + commentlist 320 321 if autocomments: 322 self.automaticcomments = newcomments 323 else: 324 self.othercomments = newcomments
325
326 - def removenotes(self):
327 """Remove all the translator's notes (other comments)""" 328 self.othercomments = []
329
330 - def __deepcopy__(self, memo={}):
331 # Make an instance to serve as the copy 332 new_unit = self.__class__() 333 # We'll be testing membership frequently, so make a set from 334 # self.__shallow__ 335 shallow = set(self.__shallow__) 336 # Make deep copies of all members which are not in shallow 337 for key, value in self.__dict__.iteritems(): 338 if key not in shallow: 339 setattr(new_unit, key, copy.deepcopy(value)) 340 # Make shallow copies of all members which are in shallow 341 for key in set(shallow): 342 setattr(new_unit, key, getattr(self, key)) 343 # Mark memo with ourself, so that we won't get deep copied 344 # again 345 memo[id(self)] = self 346 # Return our copied unit 347 return new_unit
348
349 - def copy(self):
350 return copy.deepcopy(self)
351
352 - def _msgidlen(self):
353 if self.hasplural(): 354 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 355 else: 356 return len(unquotefrompo(self.msgid))
357
358 - def _msgstrlen(self):
359 if isinstance(self.msgstr, dict): 360 combinedstr = "\n".join(filter(None, [unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()])) 361 return len(combinedstr) 362 else: 363 return len(unquotefrompo(self.msgstr))
364
365 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
366 """Merges the otherpo (with the same msgid) into this one. 367 368 Overwrite non-blank self.msgstr only if overwrite is True 369 merge comments only if comments is True 370 """ 371 372 def mergelists(list1, list2, split=False): 373 #decode where necessary 374 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 375 for position, item in enumerate(list1): 376 if isinstance(item, str): 377 list1[position] = item.decode("utf-8") 378 for position, item in enumerate(list2): 379 if isinstance(item, str): 380 list2[position] = item.decode("utf-8") 381 382 #Determine the newline style of list1 383 lineend = "" 384 if list1 and list1[0]: 385 for candidate in ["\n", "\r", "\n\r"]: 386 if list1[0].endswith(candidate): 387 lineend = candidate 388 if not lineend: 389 lineend = "" 390 else: 391 lineend = "\n" 392 393 #Split if directed to do so: 394 if split: 395 splitlist1 = [] 396 splitlist2 = [] 397 prefix = "#" 398 for item in list1: 399 splitlist1.extend(item.split()[1:]) 400 prefix = item.split()[0] 401 for item in list2: 402 splitlist2.extend(item.split()[1:]) 403 prefix = item.split()[0] 404 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 405 else: 406 #Normal merge, but conform to list1 newline style 407 if list1 != list2: 408 for item in list2: 409 if lineend: 410 item = item.rstrip() + lineend 411 # avoid duplicate comment lines (this might cause some problems) 412 if item not in list1 or len(item) < 5: 413 list1.append(item)
414 if not isinstance(otherpo, pounit): 415 super(pounit, self).merge(otherpo, overwrite, comments) 416 return 417 if comments: 418 mergelists(self.othercomments, otherpo.othercomments) 419 mergelists(self.typecomments, otherpo.typecomments) 420 if not authoritative: 421 # We don't bring across otherpo.automaticcomments as we consider ourself 422 # to be the the authority. Same applies to otherpo.msgidcomments 423 mergelists(self.automaticcomments, otherpo.automaticcomments) 424 mergelists(self.msgidcomments, otherpo.msgidcomments) 425 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 426 if not self.istranslated() or overwrite: 427 # Remove kde-style comments from the translation (if any). 428 if self._extract_msgidcomments(otherpo.target): 429 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '') 430 self.target = otherpo.target 431 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 432 self.markfuzzy() 433 else: 434 self.markfuzzy(otherpo.isfuzzy()) 435 elif not otherpo.istranslated(): 436 if self.source != otherpo.source: 437 self.markfuzzy() 438 else: 439 if self.target != otherpo.target: 440 self.markfuzzy()
441
442 - def isheader(self):
443 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 444 #rewritten here for performance: 445 return (is_null(self.msgid) 446 and not is_null(self.msgstr) 447 and self.msgidcomments == [] 448 and is_null(self.msgctxt))
449
450 - def isblank(self):
451 if self.isheader() or len(self.msgidcomments): 452 return False 453 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 454 return True 455 return False
456 # TODO: remove: 457 # Before, the equivalent of the following was the final return statement: 458 # return len(self.source.strip()) == 0 459
460 - def hastypecomment(self, typecomment):
461 """Check whether the given type comment is present""" 462 # check for word boundaries properly by using a regular expression... 463 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
464
465 - def hasmarkedcomment(self, commentmarker):
466 """Check whether the given comment marker is present as # (commentmarker) ...""" 467 commentmarker = "(%s)" % commentmarker 468 for comment in self.othercomments: 469 if comment.replace("#", "", 1).strip().startswith(commentmarker): 470 return True 471 return False
472
473 - def settypecomment(self, typecomment, present=True):
474 """Alters whether a given typecomment is present""" 475 if self.hastypecomment(typecomment) != present: 476 typecomments = re.findall(r"\b[-\w]+\b", "\n".join(self.typecomments)) 477 if present: 478 typecomments.append(typecomment) 479 else: 480 typecomments.remove(typecomment) 481 if typecomments: 482 typecomments.sort() 483 self.typecomments = ["#, %s\n" % ", ".join(typecomments)] 484 else: 485 self.typecomments = []
486
487 - def isfuzzy(self):
488 return self.hastypecomment('fuzzy')
489
490 - def markfuzzy(self, present=True):
491 if present: 492 self.set_state_n(self.STATE[self.S_FUZZY][0]) 493 elif self.hasplural() and not self._msgstrlen() or is_null(self.msgstr): 494 self.set_state_n(self.STATE[self.S_UNTRANSLATED][0]) 495 else: 496 self.set_state_n(self.STATE[self.S_TRANSLATED][0])
497
498 - def _domarkfuzzy(self, present=True):
499 self.settypecomment("fuzzy", present)
500
501 - def infer_state(self):
502 if self.obsolete: 503 self.makeobsolete() 504 else: 505 self.markfuzzy(self.hastypecomment('fuzzy'))
506
507 - def isobsolete(self):
508 return self.obsolete
509
510 - def makeobsolete(self):
511 """Makes this unit obsolete""" 512 super(pounit, self).makeobsolete() 513 self.obsolete = True 514 self.sourcecomments = [] 515 self.automaticcomments = []
516
517 - def resurrect(self):
518 """Makes an obsolete unit normal""" 519 super(pounit, self).resurrect() 520 self.obsolete = False
521
522 - def hasplural(self):
523 """returns whether this pounit contains plural strings...""" 524 return len(self.msgid_plural) > 0
525
526 - def parse(self, src):
527 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
528
529 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
530 if isinstance(partlines, dict): 531 partkeys = partlines.keys() 532 partkeys.sort() 533 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 534 partstr = partname + " " 535 partstartline = 0 536 if len(partlines) > 0 and len(partcomments) == 0: 537 partstr += partlines[0] 538 partstartline = 1 539 elif len(partcomments) > 0: 540 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 541 # if there is a blank leader line, it must come before the comment 542 partstr += partlines[0] + '\n' 543 # but if the whole string is blank, leave it in 544 if len(partlines) > 1: 545 partstartline += 1 546 else: 547 # All partcomments should start on a newline 548 partstr += '""\n' 549 # combine comments into one if more than one 550 if len(partcomments) > 1: 551 combinedcomment = [] 552 for comment in partcomments: 553 comment = unquotefrompo([comment]) 554 if comment.startswith("_:"): 555 comment = comment[len("_:"):] 556 if comment.endswith("\\n"): 557 comment = comment[:-len("\\n")] 558 #Before we used to strip. Necessary in some cases? 559 combinedcomment.append(comment) 560 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 561 # comments first, no blank leader line needed 562 partstr += "\n".join(partcomments) 563 partstr = quote.rstripeol(partstr) 564 else: 565 partstr += '""' 566 partstr += '\n' 567 # add the rest 568 for partline in partlines[partstartline:]: 569 partstr += partline + '\n' 570 return partstr
571
572 - def _encodeifneccessary(self, output):
573 """encodes unicode strings and returns other strings unchanged""" 574 if isinstance(output, unicode): 575 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8")) 576 return output.encode(encoding) 577 return output
578
579 - def __str__(self):
580 """convert to a string. double check that unicode is handled somehow here""" 581 output = self._getoutput() 582 return self._encodeifneccessary(output)
583
584 - def _getoutput(self):
585 """return this po element as a string""" 586 587 def add_prev_msgid_lines(lines, prefix, header, var): 588 if len(var) > 0: 589 lines.append("%s %s %s\n" % (prefix, header, var[0])) 590 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
591 592 def add_prev_msgid_info(lines, prefix): 593 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt) 594 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid) 595 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural) 596 597 lines = [] 598 lines.extend(self.othercomments) 599 if self.isobsolete(): 600 lines.extend(self.typecomments) 601 obsoletelines = [] 602 add_prev_msgid_info(obsoletelines, prefix="#~|") 603 if self.msgctxt: 604 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.msgctxt)) 605 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.msgid, self.msgidcomments)) 606 if self.msgid_plural or self.msgid_pluralcomments: 607 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 608 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.msgstr)) 609 for index, obsoleteline in enumerate(obsoletelines): 610 # We need to account for a multiline msgid or msgstr here 611 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 612 lines.extend(obsoletelines) 613 return u"".join(lines) 614 # if there's no msgid don't do msgid and string, unless we're the header 615 # this will also discard any comments other than plain othercomments... 616 if is_null(self.msgid): 617 if not (self.isheader() or self.getcontext() or self.sourcecomments): 618 return u"".join(lines) 619 lines.extend(self.automaticcomments) 620 lines.extend(self.sourcecomments) 621 lines.extend(self.typecomments) 622 add_prev_msgid_info(lines, prefix="#|") 623 if self.msgctxt: 624 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt)) 625 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments)) 626 if self.msgid_plural or self.msgid_pluralcomments: 627 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 628 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr)) 629 postr = u"".join(lines) 630 return postr 631
632 - def getlocations(self):
633 """Get a list of locations from sourcecomments in the PO unit 634 635 rtype: List 636 return: A list of the locations with '#: ' stripped 637 638 """ 639 locations = [] 640 for sourcecomment in self.sourcecomments: 641 locations += quote.rstripeol(sourcecomment)[3:].split() 642 for i, loc in enumerate(locations): 643 locations[i] = pocommon.unquote_plus(loc) 644 return locations
645
646 - def addlocation(self, location):
647 """Add a location to sourcecomments in the PO unit 648 649 @param location: Text location e.g. 'file.c:23' does not include #: 650 @type location: String 651 652 """ 653 if location.find(" ") != -1: 654 location = pocommon.quote_plus(location) 655 self.sourcecomments.append("#: %s\n" % location)
656
657 - def _extract_msgidcomments(self, text=None):
658 """Extract KDE style msgid comments from the unit. 659 660 @rtype: String 661 @return: Returns the extracted msgidcomments found in this unit's msgid. 662 """ 663 664 if not text: 665 text = unquotefrompo(self.msgidcomments) 666 return text.split('\n')[0].replace('_: ', '', 1)
667
668 - def setmsgidcomment(self, msgidcomment):
669 if msgidcomment: 670 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 671 else: 672 self.msgidcomments = []
673 674 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 675
676 - def getcontext(self):
677 """Get the message context.""" 678 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
679
680 - def setcontext(self, context):
681 context = data.forceunicode(context) 682 self.msgctxt = quoteforpo(context)
683
684 - def getid(self):
685 """Returns a unique identifier for this unit.""" 686 context = self.getcontext() 687 # Gettext does not consider the plural to determine duplicates, only 688 # the msgid. For generation of .mo files, we might want to use this 689 # code to generate the entry for the hash table, but for now, it is 690 # commented out for conformance to gettext. 691 # id = '\0'.join(self.source.strings) 692 id = self.source 693 if self.msgidcomments: 694 id = u"_: %s\n%s" % (context, id) 695 elif context: 696 id = u"%s\04%s" % (context, id) 697 return id
698 699
700 -class pofile(pocommon.pofile):
701 """A .po file containing various units""" 702 UnitClass = pounit 703
704 - def parse(self, input):
705 """Parses the given file or file source string.""" 706 if True: 707 # try: 708 if hasattr(input, 'name'): 709 self.filename = input.name 710 elif not getattr(self, 'filename', ''): 711 self.filename = '' 712 if isinstance(input, str): 713 input = cStringIO.StringIO(input) 714 # clear units to get rid of automatically generated headers before parsing 715 self.units = [] 716 poparser.parse_units(poparser.ParseState(input, pounit), self)
717 # except Exception, e: 718 # raise base.ParseError(e) 719
720 - def removeduplicates(self, duplicatestyle="merge"):
721 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 722 # TODO: can we handle consecutive calls to removeduplicates()? What 723 # about files already containing msgctxt? - test 724 id_dict = {} 725 uniqueunits = [] 726 # TODO: this is using a list as the pos aren't hashable, but this is slow. 727 # probably not used frequently enough to worry about it, though. 728 markedpos = [] 729 730 def addcomment(thepo): 731 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 732 markedpos.append(thepo)
733 for thepo in self.units: 734 id = thepo.getid() 735 if thepo.isheader() and not thepo.getlocations(): 736 # header msgids shouldn't be merged... 737 uniqueunits.append(thepo) 738 elif id in id_dict: 739 if duplicatestyle == "merge": 740 if id: 741 id_dict[id].merge(thepo) 742 else: 743 addcomment(thepo) 744 uniqueunits.append(thepo) 745 elif duplicatestyle == "msgctxt": 746 origpo = id_dict[id] 747 if origpo not in markedpos: 748 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 749 markedpos.append(thepo) 750 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 751 uniqueunits.append(thepo) 752 else: 753 if not id: 754 if duplicatestyle == "merge": 755 addcomment(thepo) 756 else: 757 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 758 id_dict[id] = thepo 759 uniqueunits.append(thepo) 760 self.units = uniqueunits
761
762 - def __str__(self):
763 """Convert to a string. double check that unicode is handled somehow here""" 764 output = self._getoutput() 765 if isinstance(output, unicode): 766 try: 767 return output.encode(getattr(self, "_encoding", "UTF-8")) 768 except UnicodeEncodeError, e: 769 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8") 770 self._encoding = "UTF-8" 771 for unit in self.units: 772 unit._encoding = "UTF-8" 773 return self._getoutput().encode("UTF-8") 774 775 return output
776
777 - def _getoutput(self):
778 """convert the units back to lines""" 779 lines = [] 780 for unit in self.units: 781 unitsrc = unit._getoutput() + u"\n" 782 lines.append(unitsrc) 783 lines = u"".join(lines).rstrip() 784 #After the last pounit we will have \n\n and we only want to end in \n: 785 if lines: 786 lines += u"\n" 787 return lines
788
789 - def encode(self, lines):
790 """encode any unicode strings in lines in self._encoding""" 791 newlines = [] 792 encoding = self._encoding 793 if encoding is None or encoding.lower() == "charset": 794 encoding = 'UTF-8' 795 for line in lines: 796 if isinstance(line, unicode): 797 line = line.encode(encoding) 798 newlines.append(line) 799 return newlines
800
801 - def decode(self, lines):
802 """decode any non-unicode strings in lines with self._encoding""" 803 newlines = [] 804 for line in lines: 805 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 806 try: 807 line = line.decode(self._encoding) 808 except UnicodeError, e: 809 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 810 newlines.append(line) 811 return newlines
812
813 - def unit_iter(self):
814 for unit in self.units: 815 if not (unit.isheader() or unit.isobsolete()): 816 yield unit
817