Package zephir :: Package monitor :: Module xmlite
[frames] | no frames]

Source Code for Module zephir.monitor.xmlite

  1  # -*- coding: UTF-8 -*- 
  2  """ Copyright (C) 2003 Peter Ohler 
  3   
  4      XMLite is free software; you can redistribute it and/or modify it under 
  5      the terms of the GNU General Public License as published by the Free 
  6      Software Foundation; either version 2, or (at your option) any later 
  7      version. 
  8   
  9      XMLite is distributed in the hope that it will be useful, 
 10      but WITHOUT ANY WARRANTY; without even the implied warranty of 
 11      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 12      GNU General Public License for more details. 
 13   
 14      You download a copy of the GNU General Public License at 
 15      http://www.gnu.org/licenses/gpl.txt or obtain a copy of the GNU General 
 16      Public License by writing to the Free Software Foundation, Inc., 59 Temple 
 17      Place - Suite 330, Boston, MA 02111-1307, USA. 
 18   
 19      XMLite - extremely light weight XML parse and printer 
 20   
 21      The xmlite module is an extremely light weight XML parser and printer. It 
 22      does not use the DOM or SAX interfaces but instead works with a simple 
 23      list or rather nested lists to represent an XML document. The parser takes 
 24      as input a string or filename and returns a list with all the elements of 
 25      the XML file. 
 26   
 27      The first item in the top level XML list is a dict object with 'version', 
 28      'encoding', and 'standalone' keys. If there are any decl tags such as 
 29      'DOCTYPE' they will be next in the list and will be tuples with the decl 
 30      tag name and the value of the tag as the second item of the tuple. 
 31   
 32      Comments are included as lists of two items. The first item is None and 
 33      the second is a string which is the comment text. 
 34   
 35      CDATA are tuples of two items. The first item is 'CDATA' and the second is 
 36      the CDATA content. 
 37   
 38      XML elements are lists. The first item in the list is the element tag or 
 39      name. The second item is a dict object with includes all the attributes of 
 40      the element. Any remainin list items are either comments, strings, CDATA, 
 41      or more elements as lists. 
 42   
 43      Author: Peter Ohler, peter@ohler.com 
 44      $Id: xmlite.py,v 1.1.1.1 2004/05/12 09:26:11 sam Exp $ 
 45  """ 
 46   
 47  import os 
 48  import sys 
 49  import string 
 50   
51 -class XmlException:
52 """ XML Exception for reporting errors in parsing of an XML file or 53 string. 54 """ 55
56 - def __init__(self, msg, s, pos):
57 """ Pass in the error message, string being parsed, and the position 58 in the string where the error was detected. 59 """ 60 self.msg = msg 61 if s == None: 62 self.line = -1 63 self.char = -1 64 else: 65 self.line = 1 + s.count(os.linesep, 0, pos) 66 if self.line > 1: 67 self.char = pos - s.rfind(os.linesep, 0, pos) 68 else: 69 self.char = pos
70 - def __str__(self):
71 if self.line < 0: 72 return self.msg 73 else: 74 return "%s at %d of line %d" % (self.msg, self.char, self.line)
75
76 -def printXml(xml, indent = 0):
77 """ Print out a list that matches the expected XML list format. Other 78 formats may not print out correctly. The output format is XML. 79 """ 80 istr = ' ' * indent 81 if isinstance(xml, str): 82 print "%s%s" % (istr, expandCodedChars(xml)) 83 elif isinstance(xml, tuple): 84 if 'CDATA' == xml[0]: 85 print "%s<![CDATA[%s]]>" % (istr, xml[1]) 86 else: 87 print "%s<!%s %s>" % (istr, xml[0], xml[1]) 88 elif isinstance(xml, list): 89 tag = xml[0] 90 if tag == None: 91 print "%s<!-- %s -->" % (istr, xml[1]) 92 return 93 elif isinstance(tag, dict): # the very top of the xml 94 print "<?xml", 95 for k in tag: 96 v = tag[k] 97 if v != None: 98 print '%s="%s"' % (k, v), 99 print "?>" 100 indent += 2 101 for e in xml[1:]: 102 printXml(e, indent) 103 return 104 n = len(xml) 105 if n == 1: 106 print "%s<%s/>" % (istr, tag) 107 elif n == 2: 108 attrs = xml[1] 109 if attrs == None: 110 print "%s<%s/>" % (istr, tag) 111 else: 112 print "%s<%s" % (istr, tag) 113 printAttrs(xml[1], indent + 3) 114 print "/>" 115 else: 116 attrs = xml[1] 117 if attrs == None: 118 print "%s<%s>" % (istr, tag) 119 else: 120 print "%s<%s" % (istr, tag) 121 printAttrs(xml[1], indent + 3) 122 print ">" 123 indent += 2 124 for e in xml[2:]: 125 printXml(e, indent) 126 print "%s</%s>" % (istr, tag) 127 else: 128 raise XmlException("Invalid format", None, 0)
129
130 -def printAttrs(attrs, indent):
131 if not isinstance(attrs, dict): 132 if attrs == None: 133 return 134 raise XmlException("Invalid format", s, i) 135 istr = ' ' * indent 136 n = len(attrs) 137 for a in attrs: 138 n -= 1 139 # Strings are expanded and special characters are replaces with 140 # character codes. 141 if 0 < n: 142 print '%s%s="%s"' % (istr, a, expandCodedChars(attrs[a])) 143 else: 144 print '%s%s="%s"' % (istr, a, expandCodedChars(attrs[a])),
145
146 -def toStr(xml, s = "", indent = 0):
147 """ Return a string that is an XML document. 148 """ 149 istr = ' ' * indent 150 if isinstance(xml, str): 151 s = s + "%s%s\n" % (istr, xml) 152 return s 153 154 if not isinstance(xml, list): 155 raise XmlException("Invalid format", s, i) 156 tag = xml[0] 157 if tag == None: 158 s = s + "%s<!-- %s -->\n" % (istr, xml[1]) 159 return s 160 n = len(xml) 161 if n == 1: 162 s = s + "%s<%s/>\n" % (istr, tag) 163 elif n == 2: 164 attrs = xml[1] 165 if attrs == None: 166 s = s + "%s<%s/>\n" % (istr, tag) 167 else: 168 s = s + "%s<%s\n" % (istr, tag) 169 s = attrsToStr(xml[1], s, indent + 3) 170 s = s + "/>\n" 171 else: 172 attrs = xml[1] 173 if attrs == None: 174 s = s + "%s<%s>\n" % (istr, tag) 175 else: 176 s = s + "%s<%s\n" % (istr, tag) 177 s = attrsToStr(xml[1], s, indent + 3) 178 s = s + ">\n" 179 indent = indent + 2 180 for e in xml[2:]: 181 s = toStr(e, s, indent) 182 s = s + "%s</%s>\n" % (istr, tag) 183 return s
184
185 -def attrsToStr(attrs, s, indent):
186 if not isinstance(attrs, dict): 187 if attrs == None: 188 return s 189 raise XmlException("Invalid format", s, i) 190 istr = ' ' * indent 191 n = len(attrs) 192 for a in attrs: 193 n -= 1 194 # Strings are expanded and special characters are replaces with 195 # character codes. 196 if 0 < n: 197 s = s + '%s%s="%s"\n' % (istr, a, expandCodedChars(attrs[a])) 198 else: 199 s = s + '%s%s="%s"' % (istr, a, expandCodedChars(attrs[a])) 200 return s
201
202 -def load(filename):
203 """ Load complete file into memory and then parse the string. 204 """ 205 f = open(filename, "r") 206 if f == None: 207 return None 208 s = f.read() 209 f.close() 210 211 return parse(s)
212
213 -def parse(s):
214 """ Make one pass and parse directly into an XML list. 215 """ 216 phase = 0 # 0 - before prolog, 1 - after prolog, 2 - after root, 3 - done 217 x = [] 218 i = 0 219 220 # let an IndexError be raised if the end of the string is reached 221 while 3 > phase: 222 try: 223 while s[i] in string.whitespace: 224 i += 1 225 except IndexError: 226 break 227 # every element at the top level starts with '<' 228 if '<' != s[i]: 229 raise XmlException("Expected a '<' character", s, i) 230 i += 1 231 c = s[i] 232 if c == '?': # prolog 233 if phase != 0: 234 raise XmlException("Prolog must be the first element", s, i) 235 i += 1 236 i = readProlog(s, i, x) 237 phase = 1 238 elif c == '!': # comment or decl 239 i += 1 240 if '--' == s[i:i + 2]: 241 i = readComment(s, i + 2, x) 242 elif phase > 1: 243 raise XmlException("DECLs must appear before other element", s, i) 244 else: 245 i = readDecl(s, i, x) 246 phase = 1 247 else: # element 248 i = readElement(s, i, x) 249 phase = 2 250 return x
251
252 -def readProlog(s, i, x):
253 version, encoding, standalone = None, None, None 254 255 if 'xml' != s[i:i + 3]: 256 raise XmlException("Expected 'xml' in prolog", s, i) 257 i += 3 258 while '?' != s[i]: 259 token, i = readNameToken(s, i) 260 261 while s[i] in string.whitespace: 262 i += 1 263 c = s[i] 264 if '=' == c: 265 i += 1 266 if token == "version": 267 version, i = readQuotedValue(s, i) 268 elif token == "encoding": 269 encoding, i = readQuotedValue(s, i) 270 elif token == "standalone": 271 standalone, i = readQuotedValue(s, i) 272 else: 273 raise XmlException("Invalid prolog attribute: '" + token + "'", s, i) 274 elif '?' == c: 275 break 276 else: 277 raise XmlException("Expected '=' or '?' in prolog", s, i) 278 279 i += 1 # past ? 280 if '>' != s[i]: 281 raise XmlException("Expected '>' after '?' in prolog", s, i) 282 i += 1 283 x.append({ 'version': version, 'encoding': encoding, 'standalone': standalone }) 284 285 return i
286 287 nonNameStr = " \t\n\r?=/><\x0b\x0c" 288
289 -def readNameToken(s, i):
290 while s[i] in string.whitespace: 291 i += 1 292 start = i 293 while not s[i] in nonNameStr: 294 i += 1 295 if start == i: 296 return None, i 297 return s[start:i], i
298
299 -def readQuotedValue(s, i):
300 while s[i] in string.whitespace: 301 i += 1 302 if '"' != s[i]: 303 raise XmlException("Expected '\"' character", s, i) 304 i += 1 305 start = i 306 while '"' != s[i]: 307 i += 1 308 if start == i: 309 return None, i + 1 310 return replaceCodedChars(s[start:i]), i + 1
311
312 -def readComment(s, i, x):
313 end = s.find('-->', i) 314 if 0 > end: 315 raise XmlException("Comment not terminated", s, i) 316 x.append([None, s[i:end].strip()]) 317 318 return end + 3
319
320 -def readDecl(s, i, x):
321 name, i = readNameToken(s, i) 322 while s[i] in string.whitespace: 323 i += 1 324 start = i 325 depth = 1 326 while 1: 327 c = s[i] 328 if '<' == c: 329 depth += 1 330 elif '>' == c: 331 depth -= 1 332 if depth == 0: # done, the end 333 break 334 i += 1 335 x.append((name, s[start:i])) 336 337 return i + 1
338
339 -def readElement(s, i, x):
340 name, i = readNameToken(s, i) 341 element = [name, None] 342 343 while s[i] in string.whitespace: 344 i += 1 345 if '/' == s[i]: 346 i += 1 347 if '>' == s[i]: # empty element, no attributes and no children 348 x.append(element) 349 return i + 1 350 raise XmlException("Expected '>' after '/'", s, i) 351 352 # read attribute names until the close (/ or >) is reached 353 dict = None 354 while 1: 355 name, i = readNameToken(s, i) 356 while s[i] in string.whitespace: 357 i += 1 358 c = s[i] 359 i += 1 360 if '=' == c: 361 while s[i] in string.whitespace: 362 i += 1 363 value, i = readQuotedValue(s, i) 364 if dict == None: 365 dict = { name : value } 366 element[1] = dict 367 else: 368 dict[name] = value 369 elif '/' == c: 370 if '>' != s[i]: 371 raise XmlException("Expected '>' after '/'", s, i) 372 # no children 373 i += 1 374 x.append(element) 375 return i 376 elif '>' == c: 377 break 378 else: 379 raise XmlException("Format error", s, i) 380 381 # read children 382 while 1: 383 while s[i] in string.whitespace: 384 i += 1 385 if '<' == s[i]: 386 i += 1 387 c = s[i] 388 if '!' == c: # better be a comment or CDATA 389 i += 1 390 if '--' == s[i:i + 2]: 391 i = readComment(s, i + 2, element) 392 elif '[CDATA[' == s[i:i + 7]: 393 i = readCData(s, i + 1, element) 394 else: 395 raise XmlException("Comment format error", s, i) 396 elif '/' == c: # end of element 397 i += 1 398 name, i = readNameToken(s, i) 399 while s[i] in string.whitespace: 400 i += 1 401 if '>' != s[i]: 402 raise XmlException("Expected '>' to close element end tag", s, i) 403 if name != element[0]: 404 raise XmlException("Element end tag name mismatch", s, i) 405 i += 1 406 break 407 else: # read sub element 408 i = readElement(s, i, element) 409 else: 410 i = readText(s, i, element) 411 412 x.append(element) 413 return i
414
415 -def readCData(s, i, x):
416 start = i 417 end = s.find(']]>', i) 418 if 0 > end: 419 raise XmlException("No CDATA closure", s, i) 420 x.append(('CDATA', s[start:end])) 421 422 return end + 3
423
424 -def readText(s, i, x):
425 start = i 426 end = s.find('<', i) 427 if 0 > end: 428 raise XmlException("No text closure", s, i) 429 x.append(replaceCodedChars(s[start:end].strip())) 430 431 return end
432
433 -def replaceCodedChars(text):
434 if '&' in text: 435 newtext = "" 436 t = 0 437 tend = len(text) 438 prev = 0 439 while 1: 440 t = text.find('&', t) 441 if 0 > t: 442 newtext += text[prev:] 443 text = newtext 444 break 445 else: 446 c, i = readCodedChar(text, t) 447 newtext += text[prev:t] + c 448 t = i 449 prev = t 450 return text
451
452 -def readCodedChar(s, i):
453 end = s.find(';', i, i + 6) 454 if 0 > end: 455 raise XmlException("Invalid coded character. Not terminated by ';'", None, -1) 456 i += 1 457 if '#' == s[i]: 458 c = chr(int(s[i + 1: end])) 459 else: 460 code = s[i:end] 461 if 'nbsp' == code: 462 c = ' ' 463 elif 'lt' == code: 464 c = '<' 465 elif 'gt' == code: 466 c = '>' 467 elif 'amp' == code: 468 c = '&' 469 elif 'quot' == code: 470 c = '"' 471 elif 'apos' == code: 472 c = "'" 473 else: 474 raise XmlException("Invalid coded character '%s'" % code, None, -1) 475 476 return c, end + 1
477
478 -def expandCodedChars(s):
479 # todo handle nbsp correctly, needs read adjustment for strip 480 if 0 < s.find(' '): 481 s = s.replace(' ', ' &nbsp;') 482 s = s.replace('&nbsp; ', '&nbsp;&nbsp;') 483 484 if '<' in s: 485 s = s.replace('<', '&lt;') 486 if '>' in s: 487 s = s.replace('>', '&gt;') 488 if '&' in s: 489 s = s.replace('>', '&amp;') 490 if '"' in s: 491 s = s.replace('>', '&quot;') 492 if "'" in s: 493 s = s.replace('>', '&apos;') 494 495 return s
496