xmlutils.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. # File: xmlutils.py
  2. # Library: DOPAL - DO Python Azureus Library
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; version 2 of the License.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details ( see the COPYING file ).
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. '''
  17. XML utility functions.
  18. '''
  19. # Given an object which has the same interface as xml.dom.Node:
  20. # a) Join all concurrent text nodes together.
  21. # b) Strip all trailing and leading whitespace from each text node.
  22. #
  23. # This function will recursively process the tree structure given in the node
  24. # object. No value will be returned by this function, instead the given object
  25. # will be modified.
  26. def normalise_xml_structure(xml_node):
  27. # Concurrent text nodes should be joined together.
  28. xml_node.normalize()
  29. # Strip all text nodes which are empty of content (whitespace is not
  30. # content).
  31. from xml.dom import Node
  32. nodes_to_delete = []
  33. for node in xml_node.childNodes:
  34. if node.nodeType == Node.TEXT_NODE:
  35. stripped_text = node.nodeValue.strip()
  36. if stripped_text:
  37. node.nodeValue = stripped_text
  38. else:
  39. nodes_to_delete.append(node)
  40. else:
  41. normalise_xml_structure(node)
  42. for node in nodes_to_delete:
  43. xml_node.removeChild(node)
  44. node.unlink()
  45. def get_text_content(node):
  46. from xml.dom import Node
  47. # Text content is stored directly in this node.
  48. if node.nodeType == Node.TEXT_NODE:
  49. return node.nodeValue
  50. # Otherwise, must be in a child node.
  51. #elif len(node.childNodes) == 1 and \
  52. # node.firstChild.nodeType == Node.TEXT_NODE:
  53. # return node.firstChild.nodeValue
  54. # Sometimes happens for attributes with no real value.
  55. elif len(node.childNodes) == 0:
  56. return ''
  57. text_node = None
  58. err_text = None
  59. for child in node.childNodes:
  60. if child.nodeType == Node.TEXT_NODE:
  61. if text_node is None:
  62. text_node = child
  63. else:
  64. err_text = "contained multiple text nodes"
  65. break
  66. else:
  67. if text_node is None:
  68. if len(node.childNodes) != 1:
  69. err_text = "contained multiple nodes, but none were text"
  70. else:
  71. err_text = "did not contain a character string as its value"
  72. else:
  73. return text_node.nodeValue
  74. raise ValueError, ("the node %s " % node.nodeName) + err_text
  75. from xml.sax.saxutils import quoteattr, escape
  76. # This base class will be removed when XMLObject is removed.
  77. class _XMLObjectBase(object):
  78. def __init__(self, tag_name):
  79. self.tag_name = tag_name
  80. self.attributes = {}
  81. self.contents = []
  82. def add_attribute(self, attribute_name, attribute_value):
  83. self.attributes[attribute_name] = attribute_value
  84. def add_content(self, content):
  85. self.contents.append(content)
  86. def to_string(self, out=None, indent=0):
  87. if out is None:
  88. # We use StringIO instead of cStringIO not to lose unicode strings.
  89. import StringIO
  90. out = StringIO.StringIO()
  91. return_as_string = True
  92. else:
  93. return_as_string = False
  94. indent_string = ' ' * indent
  95. out.write(indent_string)
  96. out.write('<')
  97. out.write(self.tag_name)
  98. for attr_name, attr_value in self.attributes.items():
  99. out.write(' ')
  100. out.write(attr_name)
  101. out.write('=')
  102. out.write(quoteattr(attr_value))
  103. # If we have no contents, we'll close the tag here.
  104. if not self.contents:
  105. out.write(' />\n')
  106. else:
  107. out.write('>')
  108. # If we have one piece of content, which is just a string, then
  109. # we'll put it on the same line as the opening tag is on.
  110. if len(self.contents) == 1 and not hasattr(self.contents[0], 'to_string'):
  111. out.write(escape(self.contents[0]))
  112. # Otherwise, we assume we have some more XML blocks to write out,
  113. # so we'll indent them and put them on newlines.
  114. elif self.contents:
  115. out.write('\n')
  116. for content in self.contents:
  117. content.to_string(out, indent+2)
  118. out.write(indent_string)
  119. # Write out the closing tag (if we haven't written it already).
  120. if self.contents:
  121. out.write('</')
  122. out.write(self.tag_name)
  123. out.write('>\n')
  124. # If the invocation of this method was not passed a buffer to write
  125. # into, then we return the string representation.
  126. if return_as_string:
  127. return out.getvalue()
  128. return None
  129. class XMLObject(_XMLObjectBase):
  130. '''
  131. B{Deprecated:} An object representing a block of XML.
  132. @attention: B{Deprecated:} This class does not provide any guarantees in
  133. the way that byte strings are handled. Use L{UXMLObject} instead.
  134. '''
  135. def __init__(self, tag_name):
  136. from dopal.errors import DopalPendingDeprecationWarning
  137. import warnings
  138. warnings.warn("XMLObject is deprecated - use UXMLObject instead", DopalPendingDeprecationWarning)
  139. _XMLObjectBase.__init__(self, tag_name)
  140. class UXMLObject(_XMLObjectBase):
  141. '''
  142. An object representing a block of XML.
  143. Any string which is added to this block (either through the L{add_content}
  144. or L{add_attribute} methods should be a unicode string, rather than a byte
  145. string. If it is a byte string, then it must be a string which contains
  146. text in the system's default encoding - attempting to add text encoding in
  147. other formats is not allowed.
  148. '''
  149. def to_string(self, out=None, indent=0):
  150. result = _XMLObjectBase.to_string(self, out, indent)
  151. if result is None:
  152. return None
  153. return unicode(result)
  154. def encode(self, encoding='UTF-8'):
  155. return (('<?xml version="1.0" encoding="%s"?>\n' % encoding) + self.to_string()).encode(encoding)
  156. def __unicode__(self):
  157. return self.to_string()
  158. def make_xml_ref_for_az_object(object_id):
  159. '''
  160. Creates an XML block which represents a remote object in Azureus with the given object ID.
  161. @param object_id: The object ID to reference.
  162. @type object_id: int / long
  163. @return: A L{UXMLObject} instance.
  164. '''
  165. object_id_block = UXMLObject('_object_id')
  166. object_id_block.add_content(str(object_id))
  167. object_block = UXMLObject('OBJECT')
  168. object_block.add_content(object_id_block)
  169. return object_block