zurllib.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # Written by John Hoffman
  2. # see LICENSE.txt for license information
  3. from httplib import HTTPConnection, HTTPSConnection, HTTPException
  4. from urlparse import urlparse
  5. from bencode import bdecode
  6. import socket
  7. from gzip import GzipFile
  8. from StringIO import StringIO
  9. from urllib import quote, unquote
  10. from __init__ import product_name, version_short
  11. VERSION = product_name+'/'+version_short
  12. MAX_REDIRECTS = 10
  13. class btHTTPcon(HTTPConnection): # attempt to add automatic connection timeout
  14. def connect(self):
  15. HTTPConnection.connect(self)
  16. try:
  17. self.sock.settimeout(30)
  18. except:
  19. pass
  20. class btHTTPScon(HTTPSConnection): # attempt to add automatic connection timeout
  21. def connect(self):
  22. HTTPSConnection.connect(self)
  23. try:
  24. self.sock.settimeout(30)
  25. except:
  26. pass
  27. class urlopen:
  28. def __init__(self, url):
  29. self.tries = 0
  30. self._open(url.strip())
  31. self.error_return = None
  32. def _open(self, url):
  33. self.tries += 1
  34. if self.tries > MAX_REDIRECTS:
  35. raise IOError, ('http error', 500,
  36. "Internal Server Error: Redirect Recursion")
  37. (scheme, netloc, path, pars, query, fragment) = urlparse(url)
  38. if scheme != 'http' and scheme != 'https':
  39. raise IOError, ('url error', 'unknown url type', scheme, url)
  40. url = path
  41. if pars:
  42. url += ';'+pars
  43. if query:
  44. url += '?'+query
  45. # if fragment:
  46. try:
  47. if scheme == 'http':
  48. self.connection = btHTTPcon(netloc)
  49. else:
  50. self.connection = btHTTPScon(netloc)
  51. self.connection.request('GET', url, None,
  52. { 'User-Agent': VERSION,
  53. 'Accept-Encoding': 'gzip' } )
  54. self.response = self.connection.getresponse()
  55. except HTTPException, e:
  56. raise IOError, ('http error', str(e))
  57. status = self.response.status
  58. if status in (301,302):
  59. try:
  60. self.connection.close()
  61. except:
  62. pass
  63. self._open(self.response.getheader('Location'))
  64. return
  65. if status != 200:
  66. try:
  67. data = self._read()
  68. d = bdecode(data)
  69. if d.has_key('failure reason'):
  70. self.error_return = data
  71. return
  72. except:
  73. pass
  74. raise IOError, ('http error', status, self.response.reason)
  75. def read(self):
  76. if self.error_return:
  77. return self.error_return
  78. return self._read()
  79. def _read(self):
  80. data = self.response.read()
  81. if self.response.getheader('Content-Encoding','').find('gzip') >= 0:
  82. try:
  83. compressed = StringIO(data)
  84. f = GzipFile(fileobj = compressed)
  85. data = f.read()
  86. except:
  87. raise IOError, ('http error', 'got corrupt response')
  88. return data
  89. def close(self):
  90. self.connection.close()