1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Manage the OmegaT glossary format
22
23 OmegaT glossary format is used by the
24 U{OmegaT<http://www.omegat.org/en/omegat.html>} computer aided
25 translation tool.
26
27 It is a bilingual base class derived format with L{OmegaTFile}
28 and L{OmegaTUnit} providing file and unit level access.
29
30 Format Implementation
31 =====================
32 The OmegaT glossary format is a simple Tab Separated Value (TSV) file
33 with the columns: source, target, comment.
34
35 The dialect of the TSV files is specified by L{OmegaTDialect}.
36
37 Encoding
38 --------
39 The files are either UTF-8 or encoded using the system default. UTF-8
40 encoded files use the .utf8 extension while system encoded files use
41 the .tab extension.
42 """
43
44 import csv
45 import locale
46 import sys
47
48 from translate.storage import base
49
50 OMEGAT_FIELDNAMES = ["source", "target", "comment"]
51 """Field names for an OmegaT glossary unit"""
52
53
68 csv.register_dialect("omegat", OmegaTDialect)
69
70
72 """An OmegaT glossary unit"""
73
79
81 """Get the dictionary of values for a OmegaT line"""
82 return self._dict
83
85 """Set the dictionary of values for a OmegaT line
86
87 @param newdict: a new dictionary with OmegaT line elements
88 @type newdict: Dict
89 """
90
91 self._dict = newdict
92 dict = property(getdict, setdict)
93
95 if key not in self._dict:
96 return None
97 elif self._dict[key]:
98 return self._dict[key].decode('utf-8')
99 else:
100 return ""
101
103 if newvalue is None:
104 self._dict[key] = None
105 if isinstance(newvalue, unicode):
106 newvalue = newvalue.encode('utf-8')
107 if not key in self._dict or newvalue != self._dict[key]:
108 self._dict[key] = newvalue
109
112
113 - def addnote(self, text, origin=None, position="append"):
114 currentnote = self._get_field('comment')
115 if position == "append" and currentnote is not None and currentnote != u'':
116 self._set_field('comment', currentnote + '\n' + text)
117 else:
118 self._set_field('comment', text)
119
122
125
129 source = property(getsource, setsource)
130
133
137 target = property(gettarget, settarget)
138
140 self._dict['target-lang'] = newlang
141 targetlang = property(None, settargetlang)
142
144 return str(self._dict)
145
147 return bool(self._dict.get('target', None))
148
149
151 """An OmegaT glossary file"""
152 Name = _("OmegaT Glossary")
153 Mimetypes = ["application/x-omegat-glossary"]
154 Extensions = ["utf8"]
155
157 """Construct an OmegaT glossary, optionally reading in from
158 inputfile."""
159 self.UnitClass = unitclass
160 base.TranslationStore.__init__(self, unitclass=unitclass)
161 self.filename = ''
162 self.extension = ''
163 self._encoding = self._get_encoding()
164 if inputfile is not None:
165 self.parse(inputfile)
166
169
171 """parsese the given file or file source string"""
172 if hasattr(input, 'name'):
173 self.filename = input.name
174 elif not getattr(self, 'filename', ''):
175 self.filename = ''
176 if hasattr(input, "read"):
177 tmsrc = input.read()
178 input.close()
179 input = tmsrc
180 try:
181 input = input.decode(self._encoding).encode('utf-8')
182 except:
183 raise ValueError("OmegaT files are either UTF-8 encoded or use the default system encoding")
184 lines = csv.DictReader(input.split("\n"), fieldnames=OMEGAT_FIELDNAMES,
185 dialect="omegat")
186 for line in lines:
187 newunit = OmegaTUnit()
188 newunit.dict = line
189 self.addunit(newunit)
190
192 output = csv.StringIO()
193 writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES,
194 dialect="omegat")
195 unit_count = 0
196 for unit in self.units:
197 if unit.istranslated():
198 unit_count += 1
199 writer.writerow(unit.dict)
200 if unit_count == 0:
201 return ""
202 output.reset()
203 decoded = "".join(output.readlines()).decode('utf-8')
204 try:
205 return decoded.encode(self._encoding)
206 except UnicodeEncodeError:
207 return decoded.encode('utf-8')
208
209
211 """An OmegaT glossary file in the default system encoding"""
212 Name = _("OmegaT Glossary")
213 Mimetypes = ["application/x-omegat-glossary"]
214 Extensions = ["tab"]
215
217 return locale.getdefaultlocale()[1]
218