1
2
3
4
5
6
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po
9 and mo files). You can load existing files, iterate through it's entries,
10 add, modify entries, comments or metadata, etc... or create new po files
11 from scratch.
12
13 **polib** provides a simple and pythonic API, exporting only three
14 convenience functions (*pofile*, *mofile* and *detect_encoding*), and the
15 four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating
16 new files/entries.
17
18 **Basic example**:
19
20 >>> import polib
21 >>> # load an existing po file
22 >>> po = polib.pofile('tests/test_utf8.po')
23 >>> for entry in po:
24 ... # do something with entry...
25 ... pass
26 >>> # add an entry
27 >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue')
28 >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')]
29 >>> po.append(entry)
30 >>> # to save our modified po file:
31 >>> # po.save()
32 >>> # or you may want to compile the po file
33 >>> # po.save_as_mofile('tests/test_utf8.mo')
34 """
35
36 __author__ = 'David JEAN LOUIS <izimobil@gmail.com>'
37 __version__ = '0.4.2'
38 __all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
39 'detect_encoding', 'escape', 'unescape']
40
41 import codecs
42 import struct
43 import textwrap
44
45 default_encoding = 'utf-8'
46
47
48
50 """
51 Convenience function that parse the po/pot file *fpath* and return
52 a POFile instance.
53
54 **Keyword arguments**:
55 - *fpath*: string, full or relative path to the po/pot file to parse
56 - *wrapwidth*: integer, the wrap width, only useful when -w option was
57 passed to xgettext (optional, default to 78)
58 - *autodetect_encoding*: boolean, if set to False the function will
59 not try to detect the po file encoding (optional, default to True)
60 - *encoding*: string, an encoding, only relevant if autodetect_encoding
61 is set to False
62
63 **Example**:
64
65 >>> import polib
66 >>> po = polib.pofile('tests/test_weird_occurrences.po')
67 >>> po #doctest: +ELLIPSIS
68 <POFile instance at ...>
69 >>> import os, tempfile
70 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']:
71 ... orig_po = polib.pofile('tests/'+fname)
72 ... tmpf = tempfile.NamedTemporaryFile().name
73 ... orig_po.save(tmpf)
74 ... try:
75 ... new_po = polib.pofile(tmpf)
76 ... for old, new in zip(orig_po, new_po):
77 ... if old.msgid != new.msgid:
78 ... old.msgid
79 ... new.msgid
80 ... if old.msgstr != new.msgstr:
81 ... old.msgid
82 ... new.msgid
83 ... finally:
84 ... os.unlink(tmpf)
85 """
86 if kwargs.get('autodetect_encoding', True) == True:
87 enc = detect_encoding(fpath)
88 else:
89 enc = kwargs.get('encoding', default_encoding)
90 parser = _POFileParser(fpath, enc)
91 instance = parser.parse()
92 instance.wrapwidth = kwargs.get('wrapwidth', 78)
93 return instance
94
95
96
97
99 """
100 Convenience function that parse the mo file *fpath* and return
101 a MOFile instance.
102
103 **Keyword arguments**:
104 - *fpath*: string, full or relative path to the mo file to parse
105 - *wrapwidth*: integer, the wrap width, only useful when -w option was
106 passed to xgettext to generate the po file that was used to format
107 the mo file (optional, default to 78)
108 - *autodetect_encoding*: boolean, if set to False the function will
109 not try to detect the po file encoding (optional, default to True)
110 - *encoding*: string, an encoding, only relevant if autodetect_encoding
111 is set to False
112
113 **Example**:
114
115 >>> import polib
116 >>> mo = polib.mofile('tests/test_utf8.mo')
117 >>> mo #doctest: +ELLIPSIS
118 <MOFile instance at ...>
119 >>> import os, tempfile
120 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']:
121 ... orig_mo = polib.mofile('tests/'+fname)
122 ... tmpf = tempfile.NamedTemporaryFile().name
123 ... orig_mo.save(tmpf)
124 ... try:
125 ... new_mo = polib.mofile(tmpf)
126 ... for old, new in zip(orig_mo, new_mo):
127 ... if old.msgid != new.msgid:
128 ... old.msgstr
129 ... new.msgstr
130 ... finally:
131 ... os.unlink(tmpf)
132 """
133 if kwargs.get('autodetect_encoding', True) == True:
134 enc = detect_encoding(fpath, True)
135 else:
136 enc = kwargs.get('encoding', default_encoding)
137 parser = _MOFileParser(fpath, enc)
138 instance = parser.parse()
139 instance.wrapwidth = kwargs.get('wrapwidth', 78)
140 return instance
141
142
143
144
146 """
147 Try to detect the encoding used by the file *fpath*. The function will
148 return polib default *encoding* if it's unable to detect it.
149
150 **Keyword argument**:
151 - *fpath*: string, full or relative path to the mo file to parse.
152
153 **Examples**:
154
155 >>> print(detect_encoding('tests/test_noencoding.po'))
156 utf-8
157 >>> print(detect_encoding('tests/test_utf8.po'))
158 UTF-8
159 >>> print(detect_encoding('tests/test_utf8.mo', True))
160 UTF-8
161 >>> print(detect_encoding('tests/test_iso-8859-15.po'))
162 ISO_8859-15
163 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True))
164 ISO_8859-15
165 """
166 import re
167 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)')
168 if binary_mode:
169 mode = 'rb'
170 else:
171 mode = 'r'
172 f = open(fpath, mode)
173 for l in f.readlines():
174 match = rx.search(l)
175 if match:
176 f.close()
177 return match.group(1).strip()
178 f.close()
179 return default_encoding
180
181
182
183
185 """
186 Escape special chars and return the given string *st*.
187
188 **Examples**:
189
190 >>> escape('\\t and \\n and \\r and " and \\\\')
191 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\'
192 """
193 st = st.replace('\\', r'\\')
194 st = st.replace('\t', r'\t')
195 st = st.replace('\r', r'\r')
196 st = st.replace('\n', r'\n')
197 st = st.replace('\"', r'\"')
198 return st
199
200
201
202
204 """
205 Unescape special chars and return the given string *st*.
206
207 **Examples**:
208
209 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\')
210 '\\t and \\n and \\r and " and \\\\'
211 """
212 st = st.replace(r'\"', '"')
213 st = st.replace(r'\n', '\n')
214 st = st.replace(r'\r', '\r')
215 st = st.replace(r'\t', '\t')
216 st = st.replace(r'\\', '\\')
217 return st
218
219
220
221
223 """
224 Common parent class for POFile and MOFile classes.
225 This class must **not** be instanciated directly.
226 """
227
229 """
230 Constructor.
231
232 **Keyword arguments**:
233 - *fpath*: string, path to po or mo file
234 - *wrapwidth*: integer, the wrap width, only useful when -w option
235 was passed to xgettext to generate the po file that was used to
236 format the mo file, default to 78 (optional).
237 """
238 list.__init__(self)
239
240 self.fpath = fpath
241
242 self.wrapwidth = wrapwidth
243
244 self.encoding = encoding
245
246 self.header = ''
247
248 self.metadata = {}
249 self.metadata_is_fuzzy = 0
250
252 """String representation of the file."""
253 ret = []
254 entries = [self.metadata_as_entry()] + \
255 [e for e in self if not e.obsolete]
256 for entry in entries:
257 ret.append(entry.__str__(self.wrapwidth))
258 for entry in self.obsolete_entries():
259 ret.append(entry.__str__(self.wrapwidth))
260 return '\n'.join(ret)
261
263 """Return the official string representation of the object."""
264 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
265
267 """Return the metadata as an entry"""
268 e = POEntry(msgid='')
269 mdata = self.ordered_metadata()
270 if mdata:
271 strs = []
272 for name, value in mdata:
273
274 value = '\n'.join([v.strip() for v in value.split('\n')])
275 strs.append('%s: %s' % (name, value))
276 e.msgstr = '\n'.join(strs) + '\n'
277 return e
278
279 - def save(self, fpath=None, repr_method='__str__'):
280 """
281 Save the po file to file *fpath* if no file handle exists for
282 the object. If there's already an open file and no fpath is
283 provided, then the existing file is rewritten with the modified
284 data.
285
286 **Keyword arguments**:
287 - *fpath*: string, full or relative path to the file.
288 - *repr_method*: string, the method to use for output.
289 """
290 if self.fpath is None and fpath is None:
291 raise IOError('You must provide a file path to save() method')
292 contents = getattr(self, repr_method)()
293 if fpath is None:
294 fpath = self.fpath
295 if repr_method == 'to_binary':
296 fhandle = open(fpath, 'wb')
297 else:
298 fhandle = codecs.open(fpath, 'w', self.encoding)
299 fhandle.write(contents)
300 fhandle.close()
301
302 - def find(self, st, by='msgid'):
303 """
304 Find entry which msgid (or property identified by the *by*
305 attribute) matches the string *st*.
306
307 **Keyword arguments**:
308 - *st*: string, the string to search for
309 - *by*: string, the comparison attribute
310
311 **Examples**:
312
313 >>> po = pofile('tests/test_utf8.po')
314 >>> entry = po.find('Thursday')
315 >>> entry.msgstr
316 u'Jueves'
317 >>> entry = po.find('Some unexistant msgid')
318 >>> entry is None
319 True
320 >>> entry = po.find('Jueves', 'msgstr')
321 >>> entry.msgid
322 u'Thursday'
323 """
324 for e in self:
325 if getattr(e, by) == st:
326 return e
327 return None
328
361
363 """Return the mofile binary representation."""
364 import struct
365 import array
366 output = ''
367 offsets = []
368 ids = strs = ''
369 entries = self.translated_entries()
370
371 def cmp(_self, other):
372 if _self.msgid > other.msgid:
373 return 1
374 elif _self.msgid < other.msgid:
375 return -1
376 else:
377 return 0
378 entries.sort(cmp)
379
380 mentry = self.metadata_as_entry()
381 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n'
382 entries = [mentry] + entries
383 entries_len = len(entries)
384 for e in entries:
385
386
387 msgid = e.msgid
388 if e.msgid_plural:
389 msgid = msgid + '\0' + e.msgid_plural
390 indexes = e.msgstr_plural.keys()
391 indexes.sort()
392 msgstr = []
393 for index in indexes:
394 msgstr.append(e.msgstr_plural[index])
395 msgstr = '\0'.join(msgstr)
396 else:
397 msgstr = e.msgstr
398
399 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
400 ids += e.msgid + '\0'
401 strs += e.msgstr + '\0'
402
403 keystart = 7*4+16*entries_len
404
405 valuestart = keystart + len(ids)
406 koffsets = []
407 voffsets = []
408
409
410 for o1, l1, o2, l2 in offsets:
411 koffsets += [l1, o1+keystart]
412 voffsets += [l2, o2+valuestart]
413 offsets = koffsets + voffsets
414 output = struct.pack("IIIIIII",
415 0x950412de,
416 0,
417 entries_len,
418 7*4,
419 7*4+entries_len*8,
420 0, 0)
421 output += array.array("I", offsets).tostring()
422 output += ids
423 output += strs
424 return output
425
426
427
428
430 '''
431 Po (or Pot) file reader/writer.
432 POFile objects inherit the list objects methods.
433
434 **Example**:
435
436 >>> po = POFile()
437 >>> entry1 = POEntry(
438 ... msgid="Some english text",
439 ... msgstr="Un texte en anglais"
440 ... )
441 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)]
442 >>> entry1.comment = "Some useful comment"
443 >>> entry2 = POEntry(
444 ... msgid="Peace in some languages",
445 ... msgstr="Pace سلام שלום Hasîtî 和平"
446 ... )
447 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)]
448 >>> entry2.comment = "Another useful comment"
449 >>> entry3 = POEntry(
450 ... msgid='Some entry with quotes " \\"',
451 ... msgstr='Un message unicode avec des quotes " \\"'
452 ... )
453 >>> entry3.comment = "Test string quoting"
454 >>> po.append(entry1)
455 >>> po.append(entry2)
456 >>> po.append(entry3)
457 >>> po.header = "Some Header"
458 >>> print(po)
459 # Some Header
460 msgid ""
461 msgstr ""
462 <BLANKLINE>
463 #. Some useful comment
464 #: testfile:12 another_file:1
465 msgid "Some english text"
466 msgstr "Un texte en anglais"
467 <BLANKLINE>
468 #. Another useful comment
469 #: testfile:15 another_file:5
470 msgid "Peace in some languages"
471 msgstr "Pace سلام שלום Hasîtî 和平"
472 <BLANKLINE>
473 #. Test string quoting
474 msgid "Some entry with quotes \\" \\""
475 msgstr "Un message unicode avec des quotes \\" \\""
476 <BLANKLINE>
477 '''
478
480 """Return the string representation of the po file"""
481 ret, headers = '', self.header.split('\n')
482 for header in headers:
483 if header[:1] in [',', ':']:
484 ret += '#%s\n' % header
485 else:
486 ret += '# %s\n' % header
487 return ret + _BaseFile.__str__(self)
488
490 """
491 Save the binary representation of the file to *fpath*.
492
493 **Keyword arguments**:
494 - *fpath*: string, full or relative path to the file.
495 """
496 _BaseFile.save(self, fpath, 'to_binary')
497
499 """
500 Convenience method that return the percentage of translated
501 messages.
502
503 **Example**:
504
505 >>> import polib
506 >>> po = polib.pofile('tests/test_pofile_helpers.po')
507 >>> po.percent_translated()
508 50
509 >>> po = POFile()
510 >>> po.percent_translated()
511 100
512 """
513 total = len([e for e in self if not e.obsolete])
514 if total == 0:
515 return 100
516 translated = len(self.translated_entries())
517 return int((100.00 / float(total)) * translated)
518
520 """
521 Convenience method that return a list of translated entries.
522
523 **Example**:
524
525 >>> import polib
526 >>> po = polib.pofile('tests/test_pofile_helpers.po')
527 >>> len(po.translated_entries())
528 6
529 """
530 return [e for e in self if e.translated() and not e.obsolete]
531
533 """
534 Convenience method that return a list of untranslated entries.
535
536 **Example**:
537
538 >>> import polib
539 >>> po = polib.pofile('tests/test_pofile_helpers.po')
540 >>> len(po.untranslated_entries())
541 6
542 """
543 return [e for e in self if not e.translated() and not e.obsolete]
544
546 """
547 Convenience method that return the list of 'fuzzy' entries.
548
549 **Example**:
550
551 >>> import polib
552 >>> po = polib.pofile('tests/test_pofile_helpers.po')
553 >>> len(po.fuzzy_entries())
554 2
555 """
556 return [e for e in self if 'fuzzy' in e.flags]
557
559 """
560 Convenience method that return the list of obsolete entries.
561
562 **Example**:
563
564 >>> import polib
565 >>> po = polib.pofile('tests/test_pofile_helpers.po')
566 >>> len(po.obsolete_entries())
567 4
568 """
569 return [e for e in self if e.obsolete]
570
571 - def merge(self, refpot):
572 """
573 XXX this could not work if encodings are different, needs thinking
574 and general refactoring of how polib handles encoding...
575
576 Convenience method that merge the current pofile with the pot file
577 provided. It behaves exactly as the gettext msgmerge utility:
578
579 - comments of this file will be preserved, but extracted comments
580 and occurrences will be discarded
581 - any translations or comments in the file will be discarded,
582 however dot comments and file positions will be preserved
583
584 **Keyword argument**:
585 - *refpot*: object POFile, the reference catalog.
586
587 **Example**:
588
589 >>> import polib
590 >>> refpot = polib.pofile('tests/test_merge.pot')
591 >>> po = polib.pofile('tests/test_merge_before.po')
592 >>> po.merge(refpot)
593 >>> expected_po = polib.pofile('tests/test_merge_after.po')
594 >>> unicode(po) == unicode(expected_po)
595 True
596 """
597 for entry in refpot:
598 e = self.find(entry.msgid)
599 if e is None:
600 e = POEntry()
601 self.append(e)
602 e.merge(entry)
603
604
605 for entry in self:
606 if refpot.find(entry.msgid) is None:
607 entry.obsolete = True
608
609
610
611
613 '''
614 Mo file reader/writer.
615 MOFile objects inherit the list objects methods.
616
617 **Example**:
618
619 >>> mo = MOFile()
620 >>> entry1 = POEntry(
621 ... msgid="Some english text",
622 ... msgstr="Un texte en anglais"
623 ... )
624 >>> entry2 = POEntry(
625 ... msgid="I need my dirty cheese",
626 ... msgstr="Je veux mon sale fromage"
627 ... )
628 >>> entry3 = MOEntry(
629 ... msgid='Some entry with quotes " \\"',
630 ... msgstr='Un message unicode avec des quotes " \\"'
631 ... )
632 >>> mo.append(entry1)
633 >>> mo.append(entry2)
634 >>> mo.append(entry3)
635 >>> print(mo)
636 msgid ""
637 msgstr ""
638 <BLANKLINE>
639 msgid "Some english text"
640 msgstr "Un texte en anglais"
641 <BLANKLINE>
642 msgid "I need my dirty cheese"
643 msgstr "Je veux mon sale fromage"
644 <BLANKLINE>
645 msgid "Some entry with quotes \\" \\""
646 msgstr "Un message unicode avec des quotes \\" \\""
647 <BLANKLINE>
648 '''
649
651 """
652 MOFile constructor. Mo files have two other properties:
653 - magic_number: the magic_number of the binary file,
654 - version: the version of the mo spec.
655 """
656 _BaseFile.__init__(self, *args, **kwargs)
657 self.magic_number = None
658 self.version = 0
659
661 """
662 Save the string representation of the file to *fpath*.
663
664 **Keyword argument**:
665 - *fpath*: string, full or relative path to the file.
666 """
667 _BaseFile.save(self, fpath)
668
669 - def save(self, fpath):
670 """
671 Save the binary representation of the file to *fpath*.
672
673 **Keyword argument**:
674 - *fpath*: string, full or relative path to the file.
675 """
676 _BaseFile.save(self, fpath, 'to_binary')
677
679 """
680 Convenience method to keep the same interface with POFile instances.
681 """
682 return 100
683
685 """
686 Convenience method to keep the same interface with POFile instances.
687 """
688 return self
689
691 """
692 Convenience method to keep the same interface with POFile instances.
693 """
694 return []
695
697 """
698 Convenience method to keep the same interface with POFile instances.
699 """
700 return []
701
703 """
704 Convenience method to keep the same interface with POFile instances.
705 """
706 return []
707
708
709
710
711 -class _BaseEntry(object):
712 """
713 Base class for POEntry or MOEntry objects.
714 This class must *not* be instanciated directly.
715 """
716
717 - def __init__(self, *args, **kwargs):
718 """Base Entry constructor."""
719 self.msgid = kwargs.get('msgid', '')
720 self.msgstr = kwargs.get('msgstr', '')
721 self.msgid_plural = kwargs.get('msgid_plural', '')
722 self.msgstr_plural = kwargs.get('msgstr_plural', {})
723 self.obsolete = kwargs.get('obsolete', False)
724 self.encoding = kwargs.get('encoding', default_encoding)
725
726 - def __repr__(self):
727 """Return the official string representation of the object."""
728 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
729
730 - def __str__(self, wrapwidth=78):
731 """
732 Common string representation of the POEntry and MOEntry
733 objects.
734 """
735 if self.obsolete:
736 delflag = '#~ '
737 else:
738 delflag = ''
739
740 ret = []
741 ret += self._str_field("msgid", delflag, "", self.msgid)
742
743 if self.msgid_plural:
744 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural)
745 if self.msgstr_plural:
746
747 msgstrs = self.msgstr_plural
748 keys = list(msgstrs)
749 keys.sort()
750 for index in keys:
751 msgstr = msgstrs[index]
752 plural_index = '[%s]' % index
753 ret += self._str_field("msgstr", delflag, plural_index, msgstr)
754 else:
755
756 ret += self._str_field("msgstr", delflag, "", self.msgstr)
757 ret.append('')
758 return '\n'.join(ret)
759
760 - def _str_field(self, fieldname, delflag, plural_index, field):
761 lines = field.splitlines(True)
762
763
764 if len(lines) > 1:
765 lines = ['']+lines
766 else:
767 lines = [field]
768 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index,
769 escape(lines.pop(0)))]
770 for mstr in lines:
771 ret.append('%s"%s"' % (delflag, escape(mstr)))
772 return ret
773
774
775
776
777 -class POEntry(_BaseEntry):
778 """
779 Represents a po file entry.
780
781 **Examples**:
782
783 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue')
784 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)]
785 >>> print(entry)
786 #: welcome.py:12 anotherfile.py:34
787 msgid "Welcome"
788 msgstr "Bienvenue"
789 <BLANKLINE>
790 >>> entry = POEntry()
791 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)]
792 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
793 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...'
794 >>> entry.flags.append('c-format')
795 >>> entry.msgid = 'I have spam but no egg !'
796 >>> entry.msgid_plural = 'I have spam and %d eggs !'
797 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !"
798 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !"
799 >>> print(entry)
800 #. A plural translation. This is a very very very long line please do not
801 #. wrap, this is just for testing comment wrapping...
802 # A plural translation. This is a very very very long line please do not wrap,
803 # this is just for testing comment wrapping...
804 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32
805 #: src/eggs.c:45
806 #, c-format
807 msgid "I have spam but no egg !"
808 msgid_plural "I have spam and %d eggs !"
809 msgstr[0] "J'ai du jambon mais aucun oeuf !"
810 msgstr[1] "J'ai du jambon et %d oeufs !"
811 <BLANKLINE>
812 """
813
814 - def __init__(self, *args, **kwargs):
815 """POEntry constructor."""
816 _BaseEntry.__init__(self, *args, **kwargs)
817 self.comment = kwargs.get('comment', '')
818 self.tcomment = kwargs.get('tcomment', '')
819 self.occurrences = kwargs.get('occurrences', [])
820 self.flags = kwargs.get('flags', [])
821
822 - def __str__(self, wrapwidth=78):
823 """
824 Return the string representation of the entry.
825 """
826 if self.obsolete:
827 return _BaseEntry.__str__(self)
828 ret = []
829
830 if self.comment != '':
831 for comment in self.comment.split('\n'):
832 if wrapwidth > 0 and len(comment) > wrapwidth-3:
833 ret += textwrap.wrap(comment, wrapwidth,
834 initial_indent='#. ',
835 subsequent_indent='#. ',
836 break_long_words=False)
837 else:
838 ret.append('#. %s' % comment)
839
840 if self.tcomment != '':
841 for tcomment in self.tcomment.split('\n'):
842 if wrapwidth > 0 and len(tcomment) > wrapwidth-2:
843 ret += textwrap.wrap(tcomment, wrapwidth,
844 initial_indent='# ',
845 subsequent_indent='# ',
846 break_long_words=False)
847 else:
848 ret.append('# %s' % tcomment)
849
850 if self.occurrences:
851 filelist = []
852 for fpath, lineno in self.occurrences:
853 if lineno:
854 filelist.append('%s:%s' % (fpath, lineno))
855 else:
856 filelist.append(fpath)
857 filestr = ' '.join(filelist)
858 if wrapwidth > 0 and len(filestr)+3 > wrapwidth:
859
860
861
862
863 lines = textwrap.wrap(filestr.replace('-', '*'),
864 wrapwidth,
865 initial_indent='#: ',
866 subsequent_indent='#: ',
867 break_long_words=False)
868
869 for line in lines:
870 ret.append(line.replace('*', '-'))
871 else:
872 ret.append('#: '+filestr)
873
874 if self.flags:
875 flags = []
876 for flag in self.flags:
877 flags.append(flag)
878 ret.append('#, %s' % ', '.join(flags))
879 ret.append(_BaseEntry.__str__(self))
880 return '\n'.join(ret)
881
882 - def __cmp__(self, other):
883 '''
884 Called by comparison operations if rich comparison is not defined.
885
886 **Tests**:
887 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)])
888 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)])
889 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)])
890 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)])
891 >>> po = POFile()
892 >>> po.append(a)
893 >>> po.append(b)
894 >>> po.append(c1)
895 >>> po.append(c2)
896 >>> po.sort()
897 >>> print(po)
898 #
899 msgid ""
900 msgstr ""
901 <BLANKLINE>
902 #: a.py:1 a.py:3
903 msgid "c2"
904 msgstr ""
905 <BLANKLINE>
906 #: a.py:1 b.py:1
907 msgid "c1"
908 msgstr ""
909 <BLANKLINE>
910 #: b.py:1 b.py:3
911 msgid "a"
912 msgstr ""
913 <BLANKLINE>
914 #: b.py:1 b.py:3
915 msgid "b"
916 msgstr ""
917 <BLANKLINE>
918 '''
919 def compare_occurrences(a, b):
920 """
921 Compare an entry occurrence with another one.
922 """
923 if a[0] != b[0]:
924 return a[0] < b[0]
925 if a[1] != b[1]:
926 return a[1] < b[1]
927 return 0
928
929
930 if self.obsolete != other.obsolete:
931 if self.obsolete:
932 return -1
933 else:
934 return 1
935
936 occ1 = self.occurrences[:]
937 occ2 = other.occurrences[:]
938
939 occ1.sort(compare_occurrences)
940 occ2.sort(compare_occurrences)
941
942 pos = 0
943 for entry1 in occ1:
944 try:
945 entry2 = occ2[pos]
946 except IndexError:
947 return 1
948 pos = pos + 1
949 if entry1[0] != entry2[0]:
950 if entry1[0] > entry2[0]:
951 return 1
952 else:
953 return -1
954 if entry1[1] != entry2[1]:
955 if entry1[1] > entry2[1]:
956 return 1
957 else:
958 return -1
959
960 if self.msgid > other.msgid: return 1
961 else: return -1
962
963 - def translated(self):
964 """
965 Return True if the entry has been translated or False.
966 """
967 if self.obsolete or 'fuzzy' in self.flags:
968 return False
969 if self.msgstr != '':
970 return True
971 if self.msgstr_plural:
972 for pos in self.msgstr_plural:
973 if self.msgstr_plural[pos] == '':
974 return False
975 return True
976 return False
977
978 - def merge(self, other):
979 """
980 Merge the current entry with the given pot entry.
981 """
982 self.msgid = other.msgid
983 self.occurrences = other.occurrences
984 self.comment = other.comment
985 self.flags = other.flags
986 self.msgid_plural = other.msgid_plural
987 if other.msgstr_plural:
988 for pos in other.msgstr_plural:
989 try:
990
991 self.msgstr_plural[pos]
992 except KeyError:
993 self.msgstr_plural[pos] = ''
994
995
996
997
998 -class MOEntry(_BaseEntry):
999 """
1000 Represents a mo file entry.
1001
1002 **Examples**:
1003
1004 >>> entry = MOEntry()
1005 >>> entry.msgid = 'translate me !'
1006 >>> entry.msgstr = 'traduisez moi !'
1007 >>> print(entry)
1008 msgid "translate me !"
1009 msgstr "traduisez moi !"
1010 <BLANKLINE>
1011 """
1012
1013 - def __str__(self, wrapwidth=78):
1014 """
1015 Return the string representation of the entry.
1016 """
1017 return _BaseEntry.__str__(self, wrapwidth)
1018
1019
1020
1021
1023 """
1024 A finite state machine to parse efficiently and correctly po
1025 file format.
1026 """
1027
1029 """
1030 Constructor.
1031
1032 **Keyword argument**:
1033 - *fpath*: string, path to the po file
1034 """
1035 try:
1036 self.fhandle = codecs.open(fpath, 'rU', enc)
1037 except LookupError:
1038 enc = default_encoding
1039 self.fhandle = codecs.open(fpath, 'rU', enc)
1040 self.instance = POFile(fpath=fpath, encoding=enc)
1041 self.transitions = {}
1042 self.current_entry = POEntry()
1043 self.current_state = 'ST'
1044 self.current_token = None
1045
1046 self.msgstr_index = 0
1047 self.entry_obsolete = 0
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI']
1062
1063 self.add('TC', ['ST', 'HE'], 'HE')
1064 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC')
1065 self.add('GC', all_, 'GC')
1066 self.add('OC', all_, 'OC')
1067 self.add('FL', all_, 'FL')
1068 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI')
1069 self.add('MP', ['TC', 'GC', 'MI'], 'MP')
1070 self.add('MS', ['MI', 'MP', 'TC'], 'MS')
1071 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX')
1072 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1073
1075 """
1076 Run the state machine, parse the file line by line and call process()
1077 with the current matched symbol.
1078 """
1079 i, lastlen = 1, 0
1080 for line in self.fhandle:
1081 line = line.strip()
1082 if line == '':
1083 i = i+1
1084 continue
1085 if line[:3] == '#~ ':
1086 line = line[3:]
1087 self.entry_obsolete = 1
1088 else:
1089 self.entry_obsolete = 0
1090 self.current_token = line
1091 if line[:2] == '#:':
1092
1093 self.process('OC', i)
1094 elif line[:7] == 'msgid "':
1095
1096 self.process('MI', i)
1097 elif line[:8] == 'msgstr "':
1098
1099 self.process('MS', i)
1100 elif line[:1] == '"':
1101
1102 self.process('MC', i)
1103 elif line[:14] == 'msgid_plural "':
1104
1105 self.process('MP', i)
1106 elif line[:7] == 'msgstr[':
1107
1108 self.process('MX', i)
1109 elif line[:3] == '#, ':
1110
1111 self.process('FL', i)
1112 elif line[:2] == '# ' or line == '#':
1113 if line == '#': line = line + ' '
1114
1115 self.process('TC', i)
1116 elif line[:2] == '#.':
1117
1118 self.process('GC', i)
1119 i = i+1
1120
1121 if self.current_entry:
1122
1123
1124 self.instance.append(self.current_entry)
1125
1126
1127 firstentry = self.instance[0]
1128 if firstentry.msgid == '':
1129
1130 firstentry = self.instance.pop(0)
1131 self.instance.metadata_is_fuzzy = firstentry.flags
1132 key = None
1133 for msg in firstentry.msgstr.splitlines():
1134 try:
1135 key, val = msg.split(':', 1)
1136 self.instance.metadata[key] = val.strip()
1137 except:
1138 if key is not None:
1139 self.instance.metadata[key] += '\n'+ msg.strip()
1140
1141 self.fhandle.close()
1142 return self.instance
1143
1144 - def add(self, symbol, states, next_state):
1145 """
1146 Add a transition to the state machine.
1147 Keywords arguments:
1148
1149 symbol -- string, the matched token (two chars symbol)
1150 states -- list, a list of states (two chars symbols)
1151 next_state -- the next state the fsm will have after the action
1152 """
1153 for state in states:
1154 action = getattr(self, 'handle_%s' % next_state.lower())
1155 self.transitions[(symbol, state)] = (action, next_state)
1156
1157 - def process(self, symbol, linenum):
1158 """
1159 Process the transition corresponding to the current state and the
1160 symbol provided.
1161
1162 Keywords arguments:
1163 symbol -- string, the matched token (two chars symbol)
1164 linenum -- integer, the current line number of the parsed file
1165 """
1166 try:
1167 (action, state) = self.transitions[(symbol, self.current_state)]
1168 if action():
1169 self.current_state = state
1170 except Exception, exc:
1171 raise IOError('Syntax error in po file (line %s)' % linenum)
1172
1173
1174
1176 """Handle a header comment."""
1177 if self.instance.header != '':
1178 self.instance.header += '\n'
1179 self.instance.header += self.current_token[2:]
1180 return 1
1181
1183 """Handle a translator comment."""
1184 if self.current_state in ['MC', 'MS', 'MX']:
1185 self.instance.append(self.current_entry)
1186 self.current_entry = POEntry()
1187 if self.current_entry.tcomment != '':
1188 self.current_entry.tcomment += '\n'
1189 self.current_entry.tcomment += self.current_token[2:]
1190 return True
1191
1193 """Handle a generated comment."""
1194 if self.current_state in ['MC', 'MS', 'MX']:
1195 self.instance.append(self.current_entry)
1196 self.current_entry = POEntry()
1197 if self.current_entry.comment != '':
1198 self.current_entry.comment += '\n'
1199 self.current_entry.comment += self.current_token[3:]
1200 return True
1201
1203 """Handle a file:num occurence."""
1204 if self.current_state in ['MC', 'MS', 'MX']:
1205 self.instance.append(self.current_entry)
1206 self.current_entry = POEntry()
1207 occurrences = self.current_token[3:].split()
1208 for occurrence in occurrences:
1209 if occurrence != '':
1210 try:
1211 fil, line = occurrence.split(':')
1212 if not line.isdigit():
1213 fil = fil + line
1214 line = ''
1215 self.current_entry.occurrences.append((fil, line))
1216 except:
1217 self.current_entry.occurrences.append((occurrence, ''))
1218 return True
1219
1221 """Handle a flags line."""
1222 if self.current_state in ['MC', 'MS', 'MX']:
1223 self.instance.append(self.current_entry)
1224 self.current_entry = POEntry()
1225 self.current_entry.flags += self.current_token[3:].split(', ')
1226 return True
1227
1229 """Handle a msgid."""
1230 if self.current_state in ['MC', 'MS', 'MX']:
1231 self.instance.append(self.current_entry)
1232 self.current_entry = POEntry()
1233 self.current_entry.obsolete = self.entry_obsolete
1234 self.current_entry.msgid = unescape(self.current_token[7:-1])
1235 return True
1236
1238 """Handle a msgid plural."""
1239 self.current_entry.msgid_plural = unescape(self.current_token[14:-1])
1240 return True
1241
1243 """Handle a msgstr."""
1244 self.current_entry.msgstr = unescape(self.current_token[8:-1])
1245 return True
1246
1248 """Handle a msgstr plural."""
1249 index, value = self.current_token[7], self.current_token[11:-1]
1250 self.current_entry.msgstr_plural[index] = unescape(value)
1251 self.msgstr_index = index
1252 return True
1253
1255 """Handle a msgid or msgstr continuation line."""
1256 if self.current_state == 'MI':
1257 self.current_entry.msgid += unescape(self.current_token[1:-1])
1258 elif self.current_state == 'MP':
1259 self.current_entry.msgid_plural += \
1260 unescape(self.current_token[1:-1])
1261 elif self.current_state == 'MS':
1262 self.current_entry.msgstr += unescape(self.current_token[1:-1])
1263 elif self.current_state == 'MX':
1264 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\
1265 unescape(self.current_token[1:-1])
1266 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr
1267
1268 return False
1269
1270
1271
1272
1274 """
1275 A class to parse binary mo files.
1276 """
1277 BIG_ENDIAN = 0xde120495
1278 LITTLE_ENDIAN = 0x950412de
1279
1281 """_MOFileParser constructor."""
1282 self.fhandle = open(fpath, 'rb')
1283 self.instance = MOFile(fpath=fpath, encoding=enc)
1284
1286 """
1287 Parse the magic number and raise an exception if not valid.
1288 """
1289
1291 """
1292 Build the instance with the file handle provided in the
1293 constructor.
1294 """
1295 magic_number = self._readbinary('<I', 4)
1296 if magic_number == self.LITTLE_ENDIAN:
1297 ii = '<II'
1298 elif magic_number == self.BIG_ENDIAN:
1299 ii = '>II'
1300 else:
1301 raise IOError('Invalid mo file, magic number is incorrect !')
1302 self.instance.magic_number = magic_number
1303
1304 self.instance.version, numofstrings = self._readbinary(ii, 8)
1305
1306 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1307
1308 self.fhandle.seek(msgids_hash_offset)
1309 msgids_index = []
1310 for i in range(numofstrings):
1311 msgids_index.append(self._readbinary(ii, 8))
1312
1313 self.fhandle.seek(msgstrs_hash_offset)
1314 msgstrs_index = []
1315 for i in range(numofstrings):
1316 msgstrs_index.append(self._readbinary(ii, 8))
1317
1318 for i in range(numofstrings):
1319 self.fhandle.seek(msgids_index[i][1])
1320 msgid = self.fhandle.read(msgids_index[i][0])
1321 self.fhandle.seek(msgstrs_index[i][1])
1322 msgstr = self.fhandle.read(msgstrs_index[i][0])
1323 if i == 0:
1324 raw_metadata, metadata = msgstr.split('\n'), {}
1325 for line in raw_metadata:
1326 tokens = line.split(':', 1)
1327 if tokens[0] != '':
1328 try:
1329 metadata[tokens[0]] = tokens[1].strip()
1330 except IndexError:
1331 metadata[tokens[0]] = ''
1332 self.instance.metadata = metadata
1333 continue
1334 entry = MOEntry(msgid=msgid, msgstr=msgstr)
1335 self.instance.append(entry)
1336
1337 self.fhandle.close()
1338 return self.instance
1339
1341 """
1342 Private method that unpack n bytes of data using format <fmt>.
1343 It returns a tuple or a mixed value if the tuple length is 1.
1344 """
1345 bytes = self.fhandle.read(numbytes)
1346 tup = struct.unpack(fmt, bytes)
1347 if len(tup) == 1:
1348 return tup[0]
1349 return tup
1350
1351
1352
1353
1354 if __name__ == '__main__':
1355 """
1356 **Main function**::
1357 - to **test** the module just run: *python polib.py [-v]*
1358 - to **profile** the module: *python polib.py -p <some_pofile.po>*
1359 """
1360 import sys
1361 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1363 if f.endswith('po'):
1364 p = pofile(f)
1365 else:
1366 p = mofile(f)
1367 s = str(p)
1368 import profile
1369 profile.run('test("'+sys.argv[2]+'")')
1370 else:
1371 import doctest
1372 doctest.testmod()
1373
1374
1375