1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Classes that hold units of .properties, and similar, files that are used in
23 translating Java, Mozilla, MacOS and other software.
24
25 The L{propfile} class is a monolingual class with L{propunit} providing unit
26 level access.
27
28 The .properties store has become a general key value pair class with
29 L{Dialect} providing the ability to change the behaviour of the parsing
30 and handling of the various dialects.
31
32 Currently we support::
33 * Java .properties
34 * Mozilla .properties
35 * Adobe Flex files
36 * MacOS X .strings files
37 * Skype .lang files
38
39
40 Dialects
41 ========
42 The following provides references and descriptions of the various dialects supported::
43
44 Java
45 ----
46 Java .properties are supported completely except for the ability to drop
47 pairs that are not translated.
48
49 The following U{.properties file
50 description<http://java.sun.com/j2se/1.4.2/docs/api/java/util/Properties.html#load(java.io.InputStream)>}
51 and U{example <http://www.exampledepot.com/egs/java.util/Props.html>} give
52 some good references to the .properties specification.
53
54 Properties file may also hold Java
55 U{MessageFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>}
56 messages. No special handling is provided in this storage class for
57 MessageFormat, but this may be implemented in future.
58
59 All delimiter types, comments, line continuations and spaces handling in
60 delimeters are supported.
61
62 Mozilla
63 -------
64 Mozilla files use '=' as a delimiter, are UTF-8 encoded and thus don't need \\u
65 escaping. Any \\U values will be converted to correct Unicode characters.
66 `
67 Strings
68 -------
69 Mac OS X strings files are implemented using
70 U{these<http://developer.apple.com/mac/library/documentation/MacOSX/Conceptual/BPInternational/Articles/StringsFiles.html>}
71 U{two<http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/LoadingResources/Strings/Strings.html>}
72 articles as references.
73
74 Flex
75 ----
76 Adobe Flex files seem to be normal .properties files but in UTF-8 just like
77 Mozilla files. This
78 U{page<http://livedocs.adobe.com/flex/3/html/help.html?content=l10n_3.html>}
79 provides the information used to implement the dialect.
80
81 Skype
82 -----
83 Skype .lang files seem to be UTF-16 encoded .properties files.
84
85 Implementation
86 ==============
87
88 A simple summary of what is permissible follows.
89
90 Comments supported::
91 # a comment
92 ! a comment
93 // a comment (only at the beginning of a line)
94 /* a comment (not across multiple lines) */
95
96 Name and Value pairs::
97 # Delimiters
98 key = value
99 key : value
100 key value
101
102 # Space in key and around value
103 \ key\ = \ value
104
105 # Note that the b and c are escaped for epydoc rendering
106 b = a string with escape sequences \\t \\n \\r \\\\ \\" \\' \\ (space) \u0123
107 c = a string with a continuation line \\
108 continuation line
109
110 # Special cases
111 # key with no value
112 key
113 # value no key (extractable in prop2po but not mergeable in po2prop)
114 =value
115
116 # .strings specific
117 "key" = "value";
118 '"
119 """
120
121 import re
122 import warnings
123 import logging
124
125 from translate.lang import data
126 from translate.misc import quote
127 from translate.misc.typecheck import accepts, returns, IsOneOf
128 from translate.storage import base
129
130
131
132
133 eol = "\n"
134
135
136 @accepts(unicode, [unicode])
137 @returns(IsOneOf(type(None), unicode), int)
138 -def _find_delimiter(line, delimiters):
139 """Find the type and position of the delimiter in a property line.
140
141 Property files can be delimeted by "=", ":" or whitespace (space for now).
142 We find the position of each delimiter, then find the one that appears
143 first.
144
145 @param line: A properties line
146 @type line: str
147 @param delimiters: valid delimiters
148 @type delimiters: list
149 @return: delimiter character and offset within L{line}
150 @rtype: Tuple (delimiter char, Offset Integer)
151 """
152 delimiter_dict = {}
153 for delimiter in delimiters:
154 delimiter_dict[delimiter] = -1
155 delimiters = delimiter_dict
156
157 for delimiter, pos in delimiters.iteritems():
158 prewhitespace = len(line) - len(line.lstrip())
159 pos = line.find(delimiter, prewhitespace)
160 while pos != -1:
161 if delimiters[delimiter] == -1 and line[pos-1] != u"\\":
162 delimiters[delimiter] = pos
163 break
164 pos = line.find(delimiter, pos + 1)
165
166 mindelimiter = None
167 minpos = -1
168 for delimiter, pos in delimiters.iteritems():
169 if pos == -1 or delimiter == u" ":
170 continue
171 if minpos == -1 or pos < minpos:
172 minpos = pos
173 mindelimiter = delimiter
174 if mindelimiter is None and delimiters.get(u" ", -1) != -1:
175
176 return (u" ", delimiters[" "])
177 if mindelimiter is not None and u" " in delimiters and delimiters[u" "] < delimiters[mindelimiter]:
178
179
180
181 if len(line[delimiters[u" "]:delimiters[mindelimiter]].strip()) > 0:
182 return (u" ", delimiters[u" "])
183 return (mindelimiter, minpos)
184
187 """Spelling error that is kept around for in case someone relies on it.
188
189 Deprecated."""
190 warnings.warn("deprecated use Dialect.find_delimiter instead", DeprecationWarning)
191 return _find_delimiter(line, DialectJava.delimiters)
192
197 """Determine whether L{line} has a line continuation marker.
198
199 .properties files can be terminated with a backslash (\\) indicating
200 that the 'value' continues on the next line. Continuation is only
201 valid if there are an odd number of backslashses (an even number
202 would result in a set of N/2 slashes not an escape)
203
204 @param line: A properties line
205 @type line: str
206 @return: Does L{line} end with a line continuation
207 @rtype: Boolean
208 """
209 pos = -1
210 count = 0
211 if len(line) == 0:
212 return False
213
214
215 while len(line) >= -pos and line[pos:][0] == "\\":
216 pos -= 1
217 count += 1
218 return (count % 2) == 1
219
220
221 @accepts(unicode)
222 @returns(unicode)
223 -def _key_strip(key):
224 """Cleanup whitespace found around a key
225
226 @param key: A properties key
227 @type key: str
228 @return: Key without any uneeded whitespace
229 @rtype: str
230 """
231 newkey = key.rstrip()
232
233 if newkey[-1:] == "\\":
234 newkey += key[len(newkey):len(newkey)+1]
235 return newkey.lstrip()
236
237 dialects = {}
238 default_dialect = "java"
243
247
281
287 register_dialect(DialectJava)
298 register_dialect(DialectJavaUtf8)
304 register_dialect(DialectFlex)
310 register_dialect(DialectMozilla)
321 register_dialect(DialectSkype)
325 name = "strings"
326 default_encoding = "utf-16"
327 delimiters = [u"="]
328 pair_terminator = u";"
329 key_wrap_char = u'"'
330 value_wrap_char = u'"'
331 drop_comments = ["/* No comment provided by engineer. */"]
332
334 """Strip uneeded characters from the key"""
335 newkey = key.rstrip().rstrip('"')
336
337 if newkey[-1:] == "\\":
338 newkey += key[len(newkey):len(newkey)+1]
339 return newkey.lstrip().lstrip('"')
340 key_strip = classmethod(key_strip)
341
343 """Strip uneeded characters from the value"""
344 newvalue = value.rstrip().rstrip(';').rstrip('"')
345
346 if newvalue[-1:] == "\\":
347 newvalue += value[len(newvalue):len(newvalue)+1]
348 return newvalue.lstrip().lstrip('"')
349 value_strip = classmethod(value_strip)
350
351 - def encode(cls, string, encoding=None):
353 encode = classmethod(encode)
354 register_dialect(DialectStrings)
355
356
357 -class propunit(base.TranslationUnit):
358 """an element of a properties file i.e. a name and value, and any comments
359 associated"""
360
361 - def __init__(self, source="", personality="java"):
371
376
380
381 source = property(getsource, setsource)
382
387
389 translation = quote.propertiesdecode(self.translation)
390 translation = re.sub(u"\\\\ ", u" ", translation)
391 return translation
392
393 target = property(gettarget, settarget)
394
400 encoding = property(_get_encoding)
401
408
425
428
429 - def addnote(self, text, origin=None, position="append"):
430 if origin in ['programmer', 'developer', 'source code', None]:
431 text = data.forceunicode(text)
432 self.comments.append(text)
433 else:
434 return super(propunit, self).addnote(text, origin=origin,
435 position=position)
436
438 if origin in ['programmer', 'developer', 'source code', None]:
439 return u'\n'.join(self.comments)
440 else:
441 return super(propunit, self).getnotes(origin)
442
445
447 """returns whether this is a blank element, containing only
448 comments."""
449 return not (self.name or self.value)
450
452 return bool(self.name)
453
456
459
460
461 -class propfile(base.TranslationStore):
462 """this class represents a .properties file, made up of propunits"""
463 UnitClass = propunit
464
465 - def __init__(self, inputfile=None, personality="java", encoding=None):
466 """construct a propfile, optionally reading in from inputfile"""
467 super(propfile, self).__init__(unitclass=self.UnitClass)
468 self.personality = get_dialect(personality)
469 self.encoding = encoding or self.personality.default_encoding
470 self.filename = getattr(inputfile, 'name', '')
471 if inputfile is not None:
472 propsrc = inputfile.read()
473 inputfile.close()
474 self.parse(propsrc)
475
476 - def parse(self, propsrc):
477 """read the source of a properties file in and include them as units"""
478 text, encoding = self.detect_encoding(propsrc, default_encodings=[self.personality.default_encoding, 'utf-8', 'utf-16'])
479 self.encoding = encoding
480 propsrc = text
481
482 newunit = propunit("", self.personality.name)
483 inmultilinevalue = False
484
485 for line in propsrc.split(u"\n"):
486
487 line = quote.rstripeol(line)
488 if inmultilinevalue:
489 newunit.value += line.lstrip()
490
491 inmultilinevalue = is_line_continuation(newunit.value)
492
493 if inmultilinevalue:
494
495 newunit.value = newunit.value[:-1]
496 if not inmultilinevalue:
497
498 self.addunit(newunit)
499 newunit = propunit("", self.personality.name)
500
501
502
503 elif line.strip()[:1] in (u'#', u'!') or line.strip()[:2] in (u"/*", u"//") or line.strip()[:-2] == "*/":
504
505 if line not in self.personality.drop_comments:
506 newunit.comments.append(line)
507 elif not line.strip():
508
509 if str(newunit).strip():
510 self.addunit(newunit)
511 newunit = propunit("", self.personality.name)
512 else:
513 newunit.delimiter, delimiter_pos = self.personality.find_delimiter(line)
514 if delimiter_pos == -1:
515 newunit.name = self.personality.key_strip(line)
516 newunit.value = u""
517 self.addunit(newunit)
518 newunit = propunit("", self.personality.name)
519 else:
520 newunit.name = self.personality.key_strip(line[:delimiter_pos])
521 if is_line_continuation(line[delimiter_pos+1:].lstrip()):
522 inmultilinevalue = True
523 newunit.value = line[delimiter_pos+1:].lstrip()[:-1]
524 else:
525 newunit.value = self.personality.value_strip(line[delimiter_pos+1:])
526 self.addunit(newunit)
527 newunit = propunit("", self.personality.name)
528
529 if inmultilinevalue or len(newunit.comments) > 0:
530 self.addunit(newunit)
531
533 """convert the units back to lines"""
534 lines = []
535 for unit in self.units:
536 lines.append(str(unit))
537 return "".join(lines)
538
541 Name = _("Java Properties")
542 Exensions = ['properties']
543
545 kwargs['personality'] = "java"
546 kwargs['encoding'] = "auto"
547 super(javafile, self).__init__(*args, **kwargs)
548
551 Name = _("Java Properties (UTF-8)")
552 Exensions = ['properties']
553
555 kwargs['personality'] = "java-utf8"
556 kwargs['encoding'] = "utf-8"
557 super(javautf8file, self).__init__(*args, **kwargs)
558
567