1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .po files (pounit) or entire files (pofile).
23
24 Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
25 many other projects.
26
27 This uses libgettextpo from the gettext package. Any version before 0.17 will
28 at least cause some subtle bugs or may not work at all. Developers might want
29 to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
30 package for the public API of the library.
31 """
32
33 from ctypes import c_size_t, c_int, c_uint, c_char_p, c_long, CFUNCTYPE, POINTER
34 from ctypes import Structure, cdll
35 import ctypes.util
36 import os
37 import re
38 import sys
39 import tempfile
40
41 from translate.lang import data
42 from translate.misc.multistring import multistring
43 from translate.storage import base, pocommon
44 from translate.storage import pypo
45 from translate.storage.pocommon import encodingToUse
46
47 lsep = " "
48 """Seperator for #: entries"""
49
50 STRING = c_char_p
51
52
53
56
57
58 xerror_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
59 xerror2_prototype = CFUNCTYPE(None, c_int, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING, POINTER(po_message), STRING, c_uint, c_uint, c_int, STRING)
60
61
62
66
67
69 _fields_ = [
70 ('error', CFUNCTYPE(None, c_int, c_int, STRING)),
71 ('error_at_line', CFUNCTYPE(None, c_int, c_int, STRING, c_uint, STRING)),
72 ('multiline_warning', CFUNCTYPE(None, STRING, STRING)),
73 ('multiline_error', CFUNCTYPE(None, STRING, STRING)),
74 ]
75
76
77
78 -def xerror_cb(severity, message, filename, lineno, column, multilint_p, message_text):
79 print >> sys.stderr, "xerror_cb", severity, message, filename, lineno, column, multilint_p, message_text
80 if severity >= 1:
81 raise ValueError(message_text)
82
83
84 -def xerror2_cb(severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2):
85 print >> sys.stderr, "xerror2_cb", severity, message1, filename1, lineno1, column1, multiline_p1, message_text1, message2, filename2, lineno2, column2, multiline_p2, message_text2
86 if severity >= 1:
87 raise ValueError(message_text1)
88
89
90
91 gpo = None
92
93
94 names = ['gettextpo', 'libgettextpo']
95 for name in names:
96 lib_location = ctypes.util.find_library(name)
97 if lib_location:
98 gpo = cdll.LoadLibrary(lib_location)
99 if gpo:
100 break
101 else:
102
103
104 try:
105 gpo = cdll.LoadLibrary('libgettextpo.so')
106 except OSError, e:
107 raise ImportError("gettext PO library not found")
108
109
110
111 gpo.po_file_read_v3.argtypes = [STRING, POINTER(po_xerror_handler)]
112 gpo.po_file_write_v2.argtypes = [c_int, STRING, POINTER(po_xerror_handler)]
113 gpo.po_file_write_v2.retype = c_int
114
115
116 gpo.po_file_domain_header.restype = STRING
117 gpo.po_header_field.restype = STRING
118 gpo.po_header_field.argtypes = [STRING, STRING]
119
120
121 gpo.po_filepos_file.restype = STRING
122 gpo.po_message_filepos.restype = c_int
123 gpo.po_message_filepos.argtypes = [c_int, c_int]
124 gpo.po_message_add_filepos.argtypes = [c_int, STRING, c_size_t]
125
126
127 gpo.po_message_comments.restype = STRING
128 gpo.po_message_extracted_comments.restype = STRING
129 gpo.po_message_prev_msgctxt.restype = STRING
130 gpo.po_message_prev_msgid.restype = STRING
131 gpo.po_message_prev_msgid_plural.restype = STRING
132 gpo.po_message_is_format.restype = c_int
133 gpo.po_message_is_format.argtypes = [c_int, STRING]
134 gpo.po_message_set_format.argtypes = [c_int, STRING, c_int]
135 gpo.po_message_msgctxt.restype = STRING
136 gpo.po_message_msgid.restype = STRING
137 gpo.po_message_msgid_plural.restype = STRING
138 gpo.po_message_msgstr.restype = STRING
139 gpo.po_message_msgstr_plural.restype = STRING
140
141
142 gpo.po_message_set_comments.argtypes = [c_int, STRING]
143 gpo.po_message_set_extracted_comments.argtypes = [c_int, STRING]
144 gpo.po_message_set_fuzzy.argtypes = [c_int, c_int]
145 gpo.po_message_set_msgctxt.argtypes = [c_int, STRING]
146
147
148 xerror_handler = po_xerror_handler()
149 xerror_handler.xerror = xerror_prototype(xerror_cb)
150 xerror_handler.xerror2 = xerror2_prototype(xerror2_cb)
151
152
155
156
159
160
163
164
166 """Returns the libgettextpo version
167
168 @rtype: three-value tuple
169 @return: libgettextpo version in the following format::
170 (major version, minor version, subminor version)
171 """
172 libversion = c_long.in_dll(gpo, 'libgettextpo_version')
173 major = libversion.value >> 16
174 minor = libversion.value >> 8
175 subminor = libversion.value - (major << 16) - (minor << 8)
176 return major, minor, subminor
177
178
179 -class pounit(pocommon.pounit):
180
181 - def __init__(self, source=None, encoding='utf-8', gpo_message=None):
193
204
209 msgid_plural = property(None, setmsgid_plural)
210
212
213 def remove_msgid_comments(text):
214 if not text:
215 return text
216 if text.startswith("_:"):
217 remainder = re.search(r"_: .*\n(.*)", text)
218 if remainder:
219 return remainder.group(1)
220 else:
221 return u""
222 else:
223 return text
224 singular = remove_msgid_comments((gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding))
225 if singular:
226 if self.hasplural():
227 multi = multistring(singular, self._encoding)
228 pluralform = (gpo.po_message_msgid_plural(self._gpo_message) or "").decode(self._encoding)
229 multi.strings.append(pluralform)
230 return multi
231 else:
232 return singular
233 else:
234 return u""
235
248 source = property(getsource, setsource)
249
251 if self.hasplural():
252 plurals = []
253 nplural = 0
254 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
255 while plural:
256 plurals.append(plural.decode(self._encoding))
257 nplural += 1
258 plural = gpo.po_message_msgstr_plural(self._gpo_message, nplural)
259 if plurals:
260 multi = multistring(plurals, encoding=self._encoding)
261 else:
262 multi = multistring(u"")
263 else:
264 multi = (gpo.po_message_msgstr(self._gpo_message) or "").decode(self._encoding)
265 return multi
266
268
269 if self.hasplural():
270 if isinstance(target, multistring):
271 target = target.strings
272 elif isinstance(target, basestring):
273 target = [target]
274
275 elif isinstance(target, (dict, list)):
276 if len(target) == 1:
277 target = target[0]
278 else:
279 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
280
281
282
283
284
285 if isinstance(target, (dict, list)):
286 i = 0
287 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
288 while message is not None:
289 gpo.po_message_set_msgstr_plural(self._gpo_message, i, None)
290 i += 1
291 message = gpo.po_message_msgstr_plural(self._gpo_message, i)
292
293 if isinstance(target, list):
294 for i in range(len(target)):
295 targetstring = target[i]
296 if isinstance(targetstring, unicode):
297 targetstring = targetstring.encode(self._encoding)
298 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
299
300 elif isinstance(target, dict):
301 for i, targetstring in enumerate(target.itervalues()):
302 gpo.po_message_set_msgstr_plural(self._gpo_message, i, targetstring)
303
304 else:
305 if isinstance(target, unicode):
306 target = target.encode(self._encoding)
307 if target is None:
308 gpo.po_message_set_msgstr(self._gpo_message, "")
309 else:
310 gpo.po_message_set_msgstr(self._gpo_message, target)
311 target = property(gettarget, settarget)
312
314 """The unique identifier for this unit according to the convensions in
315 .mo files."""
316 id = (gpo.po_message_msgid(self._gpo_message) or "").decode(self._encoding)
317
318
319
320
321
322
323
324 context = gpo.po_message_msgctxt(self._gpo_message)
325 if context:
326 id = u"%s\04%s" % (context.decode(self._encoding), id)
327 return id
328
330 if origin == None:
331 comments = gpo.po_message_comments(self._gpo_message) + \
332 gpo.po_message_extracted_comments(self._gpo_message)
333 elif origin == "translator":
334 comments = gpo.po_message_comments(self._gpo_message)
335 elif origin in ["programmer", "developer", "source code"]:
336 comments = gpo.po_message_extracted_comments(self._gpo_message)
337 else:
338 raise ValueError("Comment type not valid")
339
340 if comments and get_libgettextpo_version() < (0, 17, 0):
341 comments = "\n".join([line for line in comments.split("\n")])
342
343 return comments[:-1].decode(self._encoding)
344
345 - def addnote(self, text, origin=None, position="append"):
346
347 if not (text and text.strip()):
348 return
349 text = data.forceunicode(text)
350 oldnotes = self.getnotes(origin)
351 newnotes = None
352 if oldnotes:
353 if position == "append":
354 newnotes = oldnotes + "\n" + text
355 elif position == "merge":
356 if oldnotes != text:
357 oldnoteslist = oldnotes.split("\n")
358 for newline in text.split("\n"):
359 newline = newline.rstrip("\r")
360
361 if newline not in oldnotes or len(newline) < 5:
362 oldnoteslist.append(newline)
363 newnotes = "\n".join(oldnoteslist)
364 else:
365 newnotes = text + '\n' + oldnotes
366 else:
367 newnotes = "\n".join([line.rstrip("\r") for line in text.split("\n")])
368
369 if newnotes:
370 newlines = []
371 needs_space = get_libgettextpo_version() < (0, 17, 0)
372 for line in newnotes.split("\n"):
373 if line and needs_space:
374 newlines.append(" " + line)
375 else:
376 newlines.append(line)
377 newnotes = "\n".join(newlines).encode(self._encoding)
378 if origin in ["programmer", "developer", "source code"]:
379 gpo.po_message_set_extracted_comments(self._gpo_message, newnotes)
380 else:
381 gpo.po_message_set_comments(self._gpo_message, newnotes)
382
384 gpo.po_message_set_comments(self._gpo_message, "")
385
387 newpo = self.__class__()
388 newpo._gpo_message = self._gpo_message
389 return newpo
390
391 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
425
427
428
429 return self.getid() == "" and len(self.target) > 0
430
433
436
439
446
448 return gpo.po_message_is_fuzzy(self._gpo_message)
449
451 gpo.po_message_set_fuzzy(self._gpo_message, present)
452
454
455
456 gpo.po_message_set_obsolete(self._gpo_message, True)
457 self.infer_state()
458
460 gpo.po_message_set_obsolete(self._gpo_message, False)
461 self.infer_state()
462
464 return gpo.po_message_msgid_plural(self._gpo_message) is not None
465
477
481 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
482
484 pf = pofile(noheader=True)
485 pf.addunit(self)
486 return str(pf)
487
489 locations = []
490 i = 0
491 location = gpo.po_message_filepos(self._gpo_message, i)
492 while location:
493 locname = gpo.po_filepos_file(location)
494 locline = gpo.po_filepos_start_line(location)
495 if locline == -1:
496 locstring = locname
497 else:
498 locstring = locname + ":" + str(locline)
499 locations.append(pocommon.unquote_plus(locstring))
500 i += 1
501 location = gpo.po_message_filepos(self._gpo_message, i)
502 return locations
503
505 if location.find(" ") != -1:
506 location = pocommon.quote_plus(location)
507 parts = location.split(":")
508 file = parts[0]
509 if len(parts) == 2:
510 line = int(parts[1] or "0")
511 else:
512 line = -1
513 gpo.po_message_add_filepos(self._gpo_message, file, line)
514
515 - def getcontext(self):
516 msgctxt = gpo.po_message_msgctxt(self._gpo_message)
517 if msgctxt:
518 return msgctxt.decode(self._encoding)
519 else:
520 msgidcomment = self._extract_msgidcomments()
521 return msgidcomment
522
523 - def setcontext(self, context):
524 context = data.forceunicode(context)
525 gpo.po_message_set_msgctxt(self._gpo_message, context)
526
561 buildfromunit = classmethod(buildfromunit)
562
563
564 -class pofile(pocommon.pofile):
565 UnitClass = pounit
566
567 - def __init__(self, inputfile=None, encoding=None, unitclass=pounit, noheader=False):
568 self._gpo_memory_file = None
569 self._gpo_message_iterator = None
570 self.units = []
571 self.sourcelanguage = None
572 self.targetlanguage = None
573 self._encoding = 'utf-8'
574 if inputfile is None:
575 self._gpo_memory_file = gpo.po_file_create()
576 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
577 if not noheader:
578 self.init_headers()
579 else:
580 super(pofile, self).__init__(inputfile=inputfile, encoding=encoding)
581
582 - def addunit(self, unit, new=True):
583 if new:
584 gpo.po_message_insert(self._gpo_message_iterator, unit._gpo_message)
585 super(pofile, self).addunit(unit)
586
588 header._store = self
589 self.units.insert(0, header)
590 gpo.po_message_iterator_free(self._gpo_message_iterator)
591 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
592 gpo.po_message_insert(self._gpo_message_iterator, header._gpo_message)
593 while gpo.po_next_message(self._gpo_message_iterator):
594 pass
595
597 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
598
599
600 id_dict = {}
601 uniqueunits = []
602
603
604 markedpos = []
605
606 def addcomment(thepo):
607 thepo.msgidcomment = " ".join(thepo.getlocations())
608 markedpos.append(thepo)
609 for thepo in self.units:
610 id = thepo.getid()
611 if thepo.isheader() and not thepo.getlocations():
612
613 uniqueunits.append(thepo)
614 elif id in id_dict:
615 if duplicatestyle == "merge":
616 if id:
617 id_dict[id].merge(thepo)
618 else:
619 addcomment(thepo)
620 uniqueunits.append(thepo)
621 elif duplicatestyle == "msgctxt":
622 origpo = id_dict[id]
623 if origpo not in markedpos:
624 gpo.po_message_set_msgctxt(origpo._gpo_message, " ".join(origpo.getlocations()))
625 markedpos.append(thepo)
626 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
627 uniqueunits.append(thepo)
628 else:
629 if not id:
630 if duplicatestyle == "merge":
631 addcomment(thepo)
632 else:
633 gpo.po_message_set_msgctxt(thepo._gpo_message, " ".join(thepo.getlocations()))
634 id_dict[id] = thepo
635 uniqueunits.append(thepo)
636 new_gpo_memory_file = gpo.po_file_create()
637 new_gpo_message_iterator = gpo.po_message_iterator(new_gpo_memory_file, None)
638 for unit in uniqueunits:
639 gpo.po_message_insert(new_gpo_message_iterator, unit._gpo_message)
640 gpo.po_message_iterator_free(self._gpo_message_iterator)
641 self._gpo_message_iterator = new_gpo_message_iterator
642 self._gpo_memory_file = new_gpo_memory_file
643 self.units = uniqueunits
644
646
647 def obsolete_workaround():
648
649
650
651 for unit in self.units:
652 if unit.isobsolete():
653 gpo.po_message_set_extracted_comments(unit._gpo_message, "")
654 location = gpo.po_message_filepos(unit._gpo_message, 0)
655 while location:
656 gpo.po_message_remove_filepos(unit._gpo_message, 0)
657 location = gpo.po_message_filepos(unit._gpo_message, 0)
658 outputstring = ""
659 if self._gpo_memory_file:
660 obsolete_workaround()
661 f, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
662 os.close(f)
663 self._gpo_memory_file = gpo.po_file_write_v2(self._gpo_memory_file, fname, xerror_handler)
664 f = open(fname)
665 outputstring = f.read()
666 f.close()
667 os.remove(fname)
668 return outputstring
669
671 """Returns True if the object doesn't contain any translation units."""
672 if len(self.units) == 0:
673 return True
674
675 if self.units[0].isheader():
676 units = self.units[1:]
677 else:
678 units = self.units
679
680 for unit in units:
681 if not unit.isblank() and not unit.isobsolete():
682 return False
683 return True
684
686 if hasattr(input, 'name'):
687 self.filename = input.name
688 elif not getattr(self, 'filename', ''):
689 self.filename = ''
690
691 if hasattr(input, "read"):
692 posrc = input.read()
693 input.close()
694 input = posrc
695
696 needtmpfile = not os.path.isfile(input)
697 if needtmpfile:
698
699 fd, fname = tempfile.mkstemp(prefix='translate', suffix='.po')
700 os.write(fd, input)
701 input = fname
702 os.close(fd)
703
704 self._gpo_memory_file = gpo.po_file_read_v3(input, xerror_handler)
705 if self._gpo_memory_file is None:
706 print >> sys.stderr, "Error:"
707
708 if needtmpfile:
709 os.remove(input)
710
711 self.units = []
712
713 self._header = gpo.po_file_domain_header(self._gpo_memory_file, None)
714 if self._header:
715 charset = gpo.po_header_field(self._header, "Content-Type")
716 if charset:
717 charset = re.search("charset=([^\\s]+)", charset).group(1)
718 self._encoding = encodingToUse(charset)
719 self._gpo_message_iterator = gpo.po_message_iterator(self._gpo_memory_file, None)
720 newmessage = gpo.po_next_message(self._gpo_message_iterator)
721 while newmessage:
722 newunit = pounit(gpo_message=newmessage, encoding=self._encoding)
723 self.addunit(newunit, new=False)
724 newmessage = gpo.po_next_message(self._gpo_message_iterator)
725 self._free_iterator()
726
728
729
730 return
731 self._free_iterator()
732 if self._gpo_memory_file is not None:
733 gpo.po_file_free(self._gpo_memory_file)
734 self._gpo_memory_file = None
735
737
738
739 return
740 if self._gpo_message_iterator is not None:
741 gpo.po_message_iterator_free(self._gpo_message_iterator)
742 self._gpo_message_iterator = None
743