1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'an': (u'Aragonese', 2, '(n != 1)'),
34 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
35 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
36 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, '(n != 1)'),
37 'az': (u'Azerbaijani', 2, '(n != 1)'),
38 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'bg': (u'Bulgarian', 2, '(n != 1)'),
40 'bn': (u'Bengali', 2, '(n != 1)'),
41 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
42 'bo': (u'Tibetan', 1, '0'),
43 'br': (u'Breton', 2, 'n > 1'),
44 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
45 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
46 'ca@valencia': (u'Catalan; Valencian (Valencia)', 2, '(n != 1)'),
47 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
48 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
49 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
50 'da': (u'Danish', 2, '(n != 1)'),
51 'de': (u'German', 2, '(n != 1)'),
52 'dz': (u'Dzongkha', 1, '0'),
53 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
54 'en': (u'English', 2, '(n != 1)'),
55 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
56 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
57 'eo': (u'Esperanto', 2, '(n != 1)'),
58 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
59 'et': (u'Estonian', 2, '(n != 1)'),
60 'eu': (u'Basque', 2, '(n != 1)'),
61 'fa': (u'Persian', 1, '0'),
62 'fi': (u'Finnish', 2, '(n != 1)'),
63 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
64 'fo': (u'Faroese', 2, '(n != 1)'),
65 'fr': (u'French', 2, '(n > 1)'),
66 'fur': (u'Friulian', 2, '(n != 1)'),
67 'fy': (u'Frisian', 2, '(n != 1)'),
68 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
69 'gd': (u'Gaelic; Scottish Gaelic', 2, 'nplurals=4; plural=(n==1 || n==11) ? 0 : (n==2 || n==12) ? 1 : (n > 2 && n < 20) ? 2 : 3'),
70 'gl': (u'Galician', 2, '(n != 1)'),
71 'gu': (u'Gujarati', 2, '(n != 1)'),
72 'gun': (u'Gun', 2, '(n > 1)'),
73 'ha': (u'Hausa', 2, '(n != 1)'),
74 'he': (u'Hebrew', 2, '(n != 1)'),
75 'hi': (u'Hindi', 2, '(n != 1)'),
76 'hy': (u'Armenian', 1, '0'),
77 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
78 'hu': (u'Hungarian', 2, '(n != 1)'),
79 'ia': (u"Interlingua (International Auxiliary Language Association)", 2, '(n != 1)'),
80 'id': (u'Indonesian', 1, '0'),
81 'is': (u'Icelandic', 2, '(n != 1)'),
82 'it': (u'Italian', 2, '(n != 1)'),
83 'ja': (u'Japanese', 1, '0'),
84 'jv': (u'Javanese', 2, '(n != 1)'),
85 'ka': (u'Georgian', 1, '0'),
86 'kk': (u'Kazakh', 1, '0'),
87 'km': (u'Central Khmer', 1, '0'),
88 'kn': (u'Kannada', 2, '(n != 1)'),
89 'ko': (u'Korean', 1, '0'),
90 'ku': (u'Kurdish', 2, '(n != 1)'),
91 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
92 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
93 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
94 'ln': (u'Lingala', 2, '(n > 1)'),
95 'lo': (u'Lao', 1, '0'),
96 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
97 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
98 'mai': (u'Maithili', 2, '(n != 1)'),
99 'mfe': (u'Morisyen', 2, '(n > 1)'),
100 'mg': (u'Malagasy', 2, '(n > 1)'),
101 'mi': (u'Maori', 2, '(n > 1)'),
102 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
103 'ml': (u'Malayalam', 2, '(n != 1)'),
104 'mn': (u'Mongolian', 2, '(n != 1)'),
105 'mr': (u'Marathi', 2, '(n != 1)'),
106 'ms': (u'Malay', 1, '0'),
107 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
108 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
109 'nap': (u'Neapolitan', 2, '(n != 1)'),
110 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
111 'ne': (u'Nepali', 2, '(n != 1)'),
112 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
113 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
114 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n != 1)'),
115 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
116 'or': (u'Oriya', 2, '(n != 1)'),
117 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
118 'pap': (u'Papiamento', 2, '(n != 1)'),
119 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
120 'pms': (u'Piemontese', 2, '(n != 1)'),
121 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
122 'pt': (u'Portuguese', 2, '(n != 1)'),
123 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
124 'rm': (u'Romansh', 2, '(n != 1)'),
125 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
126 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
127 'sco': (u'Scots', 2, '(n != 1)'),
128 'si': (u'Sinhala; Sinhalese', 2, '(n != 1)'),
129 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
130 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
131 'so': (u'Somali', 2, '(n != 1)'),
132 'son': (u'Songhai languages', 2, '(n != 1)'),
133 'sq': (u'Albanian', 2, '(n != 1)'),
134 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
135 'st': (u'Sotho, Southern', 2, '(n != 1)'),
136 'su': (u'Sundanese', 1, '0'),
137 'sv': (u'Swedish', 2, '(n != 1)'),
138 'sw': (u'Swahili', 2, '(n != 1)'),
139 'ta': (u'Tamil', 2, '(n != 1)'),
140 'te': (u'Telugu', 2, '(n != 1)'),
141 'tg': (u'Tajik', 2, '(n != 1)'),
142 'ti': (u'Tigrinya', 2, '(n > 1)'),
143 'th': (u'Thai', 1, '0'),
144 'tk': (u'Turkmen', 2, '(n != 1)'),
145 'tr': (u'Turkish', 1, '0'),
146 'tt': (u'Tatar', 1, '0'),
147 'ug': (u'Uighur; Uyghur', 1, '0'),
148 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
149 'vi': (u'Vietnamese', 1, '0'),
150 'wa': (u'Walloon', 2, '(n > 1)'),
151 'yo': (u'Yoruba', 2, '(n != 1)'),
152
153
154
155 'zh_CN': (u'Chinese (China)', 1, '0'),
156 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
157 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
158 'zu': (u'Zulu', 2, '(n != 1)'),
159 }
160 """Dictionary of language data.
161 The language code is the dictionary key (which may contain country codes and modifiers).
162 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
163
164 Note that the English names should not be used in user facing places - it
165 should always be passed through the function returned from tr_lang(), or at
166 least passed through _fix_language_name()."""
167
168 _fixed_names = {
169 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
170 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
171 u"Catalan; Valencian": u"Catalan",
172 u"Central Khmer": u"Khmer",
173 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
174 u"Divehi; Dhivehi; Maldivian": u"Divehi",
175 u"Dutch; Flemish": u"Dutch",
176 u"Filipino; Pilipino": u"Filipino",
177 u"Gaelic; Scottish Gaelic": u"Scottish Gaelic",
178 u"Greek, Modern (1453-)": u"Greek",
179 u"Interlingua (International Auxiliary Language Association)": u"Interlingua",
180 u"Kirghiz; Kyrgyz": u"Kirghiz",
181 u"Klingon; tlhIngan-Hol": u"Klingon",
182 u"Limburgan; Limburger; Limburgish": u"Limburgish",
183 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
184 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
185 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
186 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
187 u"Occitan (post 1500)": u"Occitan",
188 u"Panjabi; Punjabi": u"Punjabi",
189 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
190 u"Pushto; Pashto": u"Pashto",
191 u"Sinhala; Sinhalese": u"Sinhala",
192 u"Sotho, Southern": u"Sotho",
193 u"Spanish; Castilian": u"Spanish",
194 u"Uighur; Uyghur": u"Uighur",
195 }
196
197
199 """This attempts to simplify the given language code by ignoring country
200 codes, for example.
201
202 @see:
203 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
204 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
205 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
206 - U{http://www.w3.org/International/articles/language-tags/}
207 """
208 if not code:
209 return code
210
211 normalized = normalize_code(code)
212 separator = normalized.rfind('-')
213 if separator >= 0:
214 return code[:separator]
215 else:
216 return ""
217
218
219 expansion_factors = {
220 'af': 0.1,
221 'ar': -0.09,
222 'es': 0.21,
223 'fr': 0.28,
224 'it': 0.2,
225 }
226 """Source to target string length expansion factors."""
227
228 import gettext
229 import locale
230 import re
231 import os
232
233 iso639 = {}
234 """ISO 639 language codes"""
235 iso3166 = {}
236 """ISO 3166 country codes"""
237
238 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
239 langcode_ire = re.compile("^[a-z]{2,3}([_-][a-z]{2,3})?(@[a-z0-9]+)?$", re.IGNORECASE)
240 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
241
242
244 """matches a languagecode to another, ignoring regions in the second"""
245 if languagecode is None:
246 return langcode_re.match(otherlanguagecode)
247 return languagecode == otherlanguagecode or \
248 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
249
250 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]{,25})\)$")
251
252
253
254
256 """Gives a function that can translate a language name, even in the form C{"language (country)"},
257 into the language with iso code langcode, or the system language if no language is specified."""
258 langfunc = gettext_lang(langcode)
259 countryfunc = gettext_country(langcode)
260
261 def handlelanguage(name):
262 match = dialect_name_re.match(name)
263 if match:
264 language, country = match.groups()
265 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
266 else:
267 return _fix_language_name(langfunc(name))
268
269 return handlelanguage
270
271
273 """Identify and replace some unsightly names present in iso-codes.
274
275 If the name is present in _fixed_names we assume it is untranslated and
276 we replace it with a more usable rendering. If the remaining part is long
277 and includes a semi-colon, we only take the text up to the semi-colon to
278 keep things neat."""
279 if name in _fixed_names:
280 return _fixed_names[name]
281 elif len(name) > 11:
282
283
284 split_point = name[5:].find(u';')
285 if split_point >= 0:
286 return name[:5+split_point]
287 return name
288
289
290 -def gettext_lang(langcode=None):
291 """Returns a gettext function to translate language names into the given
292 language, or the system language if no language is specified."""
293 if not langcode in iso639:
294 if not langcode:
295 langcode = ""
296 if os.name == "nt":
297
298 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
299 else:
300 t = gettext.translation('iso_639', fallback=True)
301 else:
302 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
303 iso639[langcode] = t.ugettext
304 return iso639[langcode]
305
306
307 -def gettext_country(langcode=None):
308 """Returns a gettext function to translate country names into the given
309 language, or the system language if no language is specified."""
310 if not langcode in iso3166:
311 if not langcode:
312 langcode = ""
313 if os.name == "nt":
314
315 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
316 else:
317 t = gettext.translation('iso_3166', fallback=True)
318 else:
319 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
320 iso3166[langcode] = t.ugettext
321 return iso3166[langcode]
322
323
325 """Return a unicode string in its normalized form
326
327 @param string: The string to be normalized
328 @param normal_form: NFC (default), NFD, NFKC, NFKD
329 @return: Normalized string
330 """
331 if string is None:
332 return None
333 else:
334 return unicodedata.normalize(normal_form, string)
335
336
338 """Ensures that the string is in unicode.
339
340 @param string: A text string
341 @type string: Unicode, String
342 @return: String converted to Unicode and normalized as needed.
343 @rtype: Unicode
344 """
345 if string is None:
346 return None
347 if isinstance(string, str):
348 encoding = getattr(string, "encoding", "utf-8")
349 string = string.decode(encoding)
350 elif isinstance(string, StringElem):
351 string = unicode(string)
352 return string
353
354
356 """Forces the string to unicode and does normalization."""
357 return normalize(forceunicode(string))
358
359
364
365
367 """Simplify language code to the most commonly used form for the
368 language, stripping country information for languages that tend
369 not to be localized differently for different countries"""
370 simpler = simplercode(language_code)
371 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
372 return language_code
373 else:
374 return simplify_to_common(simpler)
375