| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- # File: xmlutils.py
- # Library: DOPAL - DO Python Azureus Library
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation; version 2 of the License.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details ( see the COPYING file ).
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- '''
- XML utility functions.
- '''
- # Given an object which has the same interface as xml.dom.Node:
- # a) Join all concurrent text nodes together.
- # b) Strip all trailing and leading whitespace from each text node.
- #
- # This function will recursively process the tree structure given in the node
- # object. No value will be returned by this function, instead the given object
- # will be modified.
- def normalise_xml_structure(xml_node):
- # Concurrent text nodes should be joined together.
- xml_node.normalize()
- # Strip all text nodes which are empty of content (whitespace is not
- # content).
- from xml.dom import Node
- nodes_to_delete = []
- for node in xml_node.childNodes:
- if node.nodeType == Node.TEXT_NODE:
- stripped_text = node.nodeValue.strip()
- if stripped_text:
- node.nodeValue = stripped_text
- else:
- nodes_to_delete.append(node)
- else:
- normalise_xml_structure(node)
- for node in nodes_to_delete:
- xml_node.removeChild(node)
- node.unlink()
- def get_text_content(node):
- from xml.dom import Node
- # Text content is stored directly in this node.
- if node.nodeType == Node.TEXT_NODE:
- return node.nodeValue
- # Otherwise, must be in a child node.
- #elif len(node.childNodes) == 1 and \
- # node.firstChild.nodeType == Node.TEXT_NODE:
- # return node.firstChild.nodeValue
- # Sometimes happens for attributes with no real value.
- elif len(node.childNodes) == 0:
- return ''
- text_node = None
- err_text = None
- for child in node.childNodes:
- if child.nodeType == Node.TEXT_NODE:
- if text_node is None:
- text_node = child
- else:
- err_text = "contained multiple text nodes"
- break
- else:
- if text_node is None:
- if len(node.childNodes) != 1:
- err_text = "contained multiple nodes, but none were text"
- else:
- err_text = "did not contain a character string as its value"
- else:
- return text_node.nodeValue
- raise ValueError, ("the node %s " % node.nodeName) + err_text
- from xml.sax.saxutils import quoteattr, escape
- # This base class will be removed when XMLObject is removed.
- class _XMLObjectBase(object):
- def __init__(self, tag_name):
- self.tag_name = tag_name
- self.attributes = {}
- self.contents = []
- def add_attribute(self, attribute_name, attribute_value):
- self.attributes[attribute_name] = attribute_value
- def add_content(self, content):
- self.contents.append(content)
- def to_string(self, out=None, indent=0):
- if out is None:
- # We use StringIO instead of cStringIO not to lose unicode strings.
- import StringIO
- out = StringIO.StringIO()
- return_as_string = True
- else:
- return_as_string = False
- indent_string = ' ' * indent
- out.write(indent_string)
- out.write('<')
- out.write(self.tag_name)
- for attr_name, attr_value in self.attributes.items():
- out.write(' ')
- out.write(attr_name)
- out.write('=')
- out.write(quoteattr(attr_value))
- # If we have no contents, we'll close the tag here.
- if not self.contents:
- out.write(' />\n')
- else:
- out.write('>')
- # If we have one piece of content, which is just a string, then
- # we'll put it on the same line as the opening tag is on.
- if len(self.contents) == 1 and not hasattr(self.contents[0], 'to_string'):
- out.write(escape(self.contents[0]))
- # Otherwise, we assume we have some more XML blocks to write out,
- # so we'll indent them and put them on newlines.
- elif self.contents:
- out.write('\n')
- for content in self.contents:
- content.to_string(out, indent+2)
- out.write(indent_string)
- # Write out the closing tag (if we haven't written it already).
- if self.contents:
- out.write('</')
- out.write(self.tag_name)
- out.write('>\n')
- # If the invocation of this method was not passed a buffer to write
- # into, then we return the string representation.
- if return_as_string:
- return out.getvalue()
- return None
- class XMLObject(_XMLObjectBase):
- '''
- B{Deprecated:} An object representing a block of XML.
- @attention: B{Deprecated:} This class does not provide any guarantees in
- the way that byte strings are handled. Use L{UXMLObject} instead.
- '''
- def __init__(self, tag_name):
- from dopal.errors import DopalPendingDeprecationWarning
- import warnings
- warnings.warn("XMLObject is deprecated - use UXMLObject instead", DopalPendingDeprecationWarning)
- _XMLObjectBase.__init__(self, tag_name)
- class UXMLObject(_XMLObjectBase):
- '''
- An object representing a block of XML.
- Any string which is added to this block (either through the L{add_content}
- or L{add_attribute} methods should be a unicode string, rather than a byte
- string. If it is a byte string, then it must be a string which contains
- text in the system's default encoding - attempting to add text encoding in
- other formats is not allowed.
- '''
- def to_string(self, out=None, indent=0):
- result = _XMLObjectBase.to_string(self, out, indent)
- if result is None:
- return None
- return unicode(result)
- def encode(self, encoding='UTF-8'):
- return (('<?xml version="1.0" encoding="%s"?>\n' % encoding) + self.to_string()).encode(encoding)
- def __unicode__(self):
- return self.to_string()
- def make_xml_ref_for_az_object(object_id):
- '''
- Creates an XML block which represents a remote object in Azureus with the given object ID.
- @param object_id: The object ID to reference.
- @type object_id: int / long
- @return: A L{UXMLObject} instance.
- '''
- object_id_block = UXMLObject('_object_id')
- object_id_block.add_content(str(object_id))
- object_block = UXMLObject('OBJECT')
- object_block.add_content(object_id_block)
- return object_block
|