1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of .dtd files (dtdunit) or entire files (dtdfile)
23 these are specific .dtd files for localisation used by mozilla"""
24
25 from translate.storage import base
26 from translate.misc import quote
27
28 import re
29 import warnings
30 try:
31 from lxml import etree
32 import StringIO
33 except ImportError:
34 etree = None
35
36 labelsuffixes = (".label", ".title")
37 """Label suffixes: entries with this suffix are able to be comibed with accesskeys
38 found in in entries ending with L{accesskeysuffixes}"""
39 accesskeysuffixes = (".accesskey", ".accessKey", ".akey")
40 """Accesskey Suffixes: entries with this suffix may be combined with labels
41 ending in L{labelsuffixes} into accelerator notation"""
42
51
64
66 """Find and remove ampersands that are not part of an entity definition.
67
68 A stray & in a DTD file can break an applications ability to parse the file. In Mozilla
69 localisation this is very important and these can break the parsing of files used in XUL
70 and thus break interface rendering. Tracking down the problem is very difficult,
71 thus by removing potential broken & and warning the users we can ensure that the output
72 DTD will always be parsable.
73
74 @type name: String
75 @param name: Entity name
76 @type value: String
77 @param value: Entity text value
78 @rtype: String
79 @return: Entity value without bad ampersands
80 """
81 def is_valid_entity_name(name):
82 """Check that supplied L{name} is a valid entity name"""
83 if name.replace('.', '').isalnum():
84 return True
85 elif name[0] == '#' and name[1:].isalnum():
86 return True
87 return False
88
89 amppos = 0
90 invalid_amps = []
91 while amppos >= 0:
92 amppos = value.find("&", amppos)
93 if amppos != -1:
94 amppos += 1
95 semipos = value.find(";", amppos)
96 if semipos != -1:
97 if is_valid_entity_name(value[amppos:semipos]):
98 continue
99 invalid_amps.append(amppos-1)
100 if len(invalid_amps) > 0:
101 warnings.warn("invalid ampersands in dtd entity %s" % (name))
102 adjustment = 0
103 for amppos in invalid_amps:
104 value = value[:amppos-adjustment] + value[amppos-adjustment+1:]
105 adjustment += 1
106 return value
107
108 -class dtdunit(base.TranslationUnit):
109 """this class represents an entity definition from a dtd file (and possibly associated comments)"""
111 """construct the dtdunit, prepare it for parsing"""
112 super(dtdunit, self).__init__(source)
113 self.comments = []
114 self.unparsedlines = []
115 self.incomment = False
116 self.inentity = False
117 self.entity = "FakeEntityOnlyForInitialisationAndTesting"
118 self.source = source
119
120
122 """Sets the definition to the quoted value of source"""
123 self.definition = quotefordtd(source)
124
126 """gets the unquoted source string"""
127 return unquotefromdtd(self.definition)
128 source = property(getsource, setsource)
129
135
137 """gets the unquoted target string"""
138 return unquotefromdtd(self.definition)
139 target = property(gettarget, settarget)
140
142 """returns whether this dtdunit doesn't actually have an entity definition"""
143
144
145 return self.entity is None
146
147 - def parse(self, dtdsrc):
148 """read the first dtd element from the source code into this object, return linesprocessed"""
149 self.comments = []
150
151 self.locfilenotes = self.comments
152 self.locgroupstarts = self.comments
153 self.locgroupends = self.comments
154 self.locnotes = self.comments
155
156
157
158
159
160 self.entity = None
161 self.definition = ''
162 if not dtdsrc:
163 return 0
164 lines = dtdsrc.split("\n")
165 linesprocessed = 0
166 comment = ""
167 for line in lines:
168 line += "\n"
169 linesprocessed += 1
170
171 if not self.incomment:
172 if (line.find('<!--') != -1):
173 self.incomment = True
174 self.continuecomment = False
175
176 (comment, dummy) = quote.extract(line, "<!--", "-->", None, 0)
177 if comment.find('LOCALIZATION NOTE') != -1:
178 l = quote.findend(comment,'LOCALIZATION NOTE')
179 while (comment[l] == ' '):
180 l += 1
181 if comment.find('FILE', l) == l:
182 self.commenttype = "locfile"
183 elif comment.find('BEGIN', l) == l:
184 self.commenttype = "locgroupstart"
185 elif comment.find('END', l) == l:
186 self.commenttype = "locgroupend"
187 else:
188 self.commenttype = "locnote"
189 else:
190
191 self.commenttype = "comment"
192
193 elif not self.inentity and re.search("%.*;", line):
194
195 self.comments.append(("comment", line))
196 line = ""
197 continue
198
199 if self.incomment:
200
201 (comment, self.incomment) = quote.extract(line, "<!--", "-->", None, self.continuecomment)
202
203 self.continuecomment = self.incomment
204
205 line = line.replace(comment, "", 1)
206
207 if not self.incomment:
208 if line.isspace():
209 comment += line
210 line = ''
211 else:
212 comment += '\n'
213
214
215
216
217
218
219
220 commentpair = (self.commenttype, comment)
221 if self.commenttype == "locfile":
222 self.locfilenotes.append(commentpair)
223 elif self.commenttype == "locgroupstart":
224 self.locgroupstarts.append(commentpair)
225 elif self.commenttype == "locgroupend":
226 self.locgroupends.append(commentpair)
227 elif self.commenttype == "locnote":
228 self.locnotes.append(commentpair)
229 elif self.commenttype == "comment":
230 self.comments.append(commentpair)
231
232 if not self.inentity and not self.incomment:
233 entitypos = line.find('<!ENTITY')
234 if entitypos != -1:
235 self.inentity = True
236 beforeentity = line[:entitypos].strip()
237 if beforeentity.startswith("#"):
238 self.hashprefix = beforeentity
239 self.entitypart = "start"
240 else:
241 self.unparsedlines.append(line)
242
243 if self.inentity:
244 if self.entitypart == "start":
245
246 e = quote.findend(line,'<!ENTITY')
247 line = line[e:]
248 self.entitypart = "name"
249 self.entitytype = "internal"
250 if self.entitypart == "name":
251 e = 0
252 while (e < len(line) and line[e].isspace()):
253 e += 1
254 self.entity = ''
255 if (e < len(line) and line[e] == '%'):
256 self.entitytype = "external"
257 self.entityparameter = ""
258 e += 1
259 while (e < len(line) and line[e].isspace()):
260 e += 1
261 while (e < len(line) and not line[e].isspace()):
262 self.entity += line[e]
263 e += 1
264 while (e < len(line) and line[e].isspace()):
265 e += 1
266 if self.entity:
267 if self.entitytype == "external":
268 self.entitypart = "parameter"
269 else:
270 self.entitypart = "definition"
271
272 if e == len(line):
273 self.entityhelp = None
274 e = 0
275 continue
276 elif self.entitypart == "definition":
277 self.entityhelp = (e, line[e])
278 self.instring = False
279 if self.entitypart == "parameter":
280 while (e < len(line) and line[e].isspace()): e += 1
281 paramstart = e
282 while (e < len(line) and line[e].isalnum()):
283 e += 1
284 self.entityparameter += line[paramstart:e]
285 while (e < len(line) and line[e].isspace()):
286 e += 1
287 line = line[e:]
288 e = 0
289 if not line:
290 continue
291 if line[0] in ('"', "'"):
292 self.entitypart = "definition"
293 self.entityhelp = (e, line[e])
294 self.instring = False
295 if self.entitypart == "definition":
296 if self.entityhelp is None:
297 e = 0
298 while (e < len(line) and line[e].isspace()):
299 e += 1
300 if e == len(line):
301 continue
302 self.entityhelp = (e, line[e])
303 self.instring = False
304
305 e = self.entityhelp[0]
306 if (self.entityhelp[1] == "'"):
307 (defpart, self.instring) = quote.extract(line[e:], "'", "'", startinstring=self.instring, allowreentry=False)
308 elif (self.entityhelp[1] == '"'):
309 (defpart, self.instring) = quote.extract(line[e:], '"', '"', startinstring=self.instring, allowreentry=False)
310 else:
311 raise ValueError("Unexpected quote character... %r" % (self.entityhelp[1]))
312
313 self.entityhelp = (0, self.entityhelp[1])
314 self.definition += defpart
315 if not self.instring:
316 self.inentity = False
317 break
318
319
320 if 0:
321 for attr in dir(self):
322 r = repr(getattr(self, attr))
323 if len(r) > 60:
324 r = r[:57]+"..."
325 self.comments.append(("comment", "self.%s = %s" % (attr, r) ))
326 return linesprocessed
327
334
336 """convert the dtd entity back to string form"""
337 lines = []
338 lines.extend([comment for commenttype, comment in self.comments])
339 lines.extend(self.unparsedlines)
340 if self.isnull():
341 result = "".join(lines)
342 return result.rstrip() + "\n"
343
344
345
346
347 if len(self.entity) > 0:
348 if getattr(self, 'entitytype', None) == 'external':
349 entityline = '<!ENTITY % '+self.entity+' '+self.entityparameter+' '+self.definition+'>'
350 else:
351 entityline = '<!ENTITY '+self.entity+' '+self.definition+'>'
352 if getattr(self, 'hashprefix', None):
353 entityline = self.hashprefix + " " + entityline
354 if isinstance(entityline, unicode):
355 entityline = entityline.encode('UTF-8')
356 lines.append(entityline+'\n')
357 return "".join(lines)
358
359 -class dtdfile(base.TranslationStore):
360 """this class represents a .dtd file, made up of dtdunits"""
361 UnitClass = dtdunit
363 """construct a dtdfile, optionally reading in from inputfile"""
364 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
365 self.filename = getattr(inputfile, 'name', '')
366 if inputfile is not None:
367 dtdsrc = inputfile.read()
368 self.parse(dtdsrc)
369 self.makeindex()
370
371 - def parse(self, dtdsrc):
372 """read the source code of a dtd file in and include them as dtdunits in self.units"""
373 start = 0
374 end = 0
375 lines = dtdsrc.split("\n")
376 while end < len(lines):
377 if (start == end):
378 end += 1
379 foundentity = False
380 while end < len(lines):
381 if end >= len(lines):
382 break
383 if lines[end].find('<!ENTITY') > -1:
384 foundentity = True
385 if foundentity and re.match("[\"']\s*>", lines[end]):
386 end += 1
387 break
388 end += 1
389
390
391 linesprocessed = 1
392 while linesprocessed >= 1:
393 newdtd = dtdunit()
394 try:
395 linesprocessed = newdtd.parse("\n".join(lines[start:end]))
396 if linesprocessed >= 1 and (not newdtd.isnull() or newdtd.unparsedlines):
397 self.units.append(newdtd)
398 except Exception, e:
399 warnings.warn("%s\nError occured between lines %d and %d:\n%s" % (e, start+1, end, "\n".join(lines[start:end])))
400 start += linesprocessed
401
403 """convert to a string. double check that unicode is handled somehow here"""
404 source = self.getoutput()
405 if not self._valid_store():
406 warnings.warn("DTD file '%s' does not validate" % self.filename)
407 return None
408 if isinstance(source, unicode):
409 return source.encode(getattr(self, "encoding", "UTF-8"))
410 return source
411
413 """convert the units back to source"""
414 sources = [str(dtd) for dtd in self.units]
415 return "".join(sources)
416
418 """makes self.index dictionary keyed on entities"""
419 self.index = {}
420 for dtd in self.units:
421 if not dtd.isnull():
422 self.index[dtd.entity] = dtd
423
425 """Validate the store to determine if it is valid
426
427 This uses ElementTree to parse the DTD
428
429 @return: If the store passes validation
430 @rtype: Boolean
431 """
432 if etree is not None:
433 try:
434
435 dtd = etree.DTD(StringIO.StringIO(re.sub("#expand", "", self.getoutput())))
436 except etree.DTDParseError:
437 return False
438 return True
439