1
0

ebencode.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. # The contents of this file are subject to the BitTorrent Open Source License
  2. # Version 1.1 (the License). You may not copy or use this file, in either
  3. # source code or executable form, except in compliance with the License. You
  4. # may obtain a copy of the License at http://www.bittorrent.com/license/.
  5. #
  6. # Software distributed under the License is distributed on an AS IS basis,
  7. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  8. # for the specific language governing rights and limitations under the
  9. # License.
  10. from binascii import a2b_hex, b2a_hex
  11. import types
  12. import decimal
  13. class EBError(ValueError):
  14. pass
  15. class EBObject(object):
  16. def __init__(self):
  17. pass
  18. def get_int(self):
  19. raise EBError
  20. return 0
  21. def get_string(self):
  22. raise EBError
  23. return ''
  24. def get_ustring(self):
  25. raise EBError
  26. return u''
  27. def get_list(self):
  28. raise EBError
  29. return [EBObject()]
  30. def get_dict(self):
  31. raise EBError
  32. return {u'': EBObject()}
  33. class IntEBObject(EBObject):
  34. def __init__(self, i):
  35. self.v = i
  36. def get_int(self):
  37. return self.v
  38. class StringEBObject(EBObject):
  39. def __init__(self, s):
  40. self.v = s
  41. def get_string(self):
  42. return self.v
  43. class UStringEBObject(EBObject):
  44. def __init__(self, u):
  45. self.v = u
  46. def get_ustring(self):
  47. return self.v
  48. class ListEBObject(EBObject):
  49. def __init__(self, l):
  50. self.v = l
  51. def get_list(self):
  52. return self.v
  53. class DictEBObject(EBObject):
  54. def __init__(self, d):
  55. self.v = d
  56. def get_dict(self):
  57. return self.v
  58. def toint(s):
  59. return int(b2a_hex(s), 16)
  60. def tostr(i):
  61. if i == 0:
  62. return ''
  63. h = hex(i)[2:]
  64. if h[-1] == 'L':
  65. h = h[:-1]
  66. if len(h) & 1 == 1:
  67. h = '0' + h
  68. return a2b_hex(h)
  69. def read_int(s, pos):
  70. y = ord(s[pos])
  71. pos += 1
  72. if not y & 0x80:
  73. return y, pos
  74. elif not y & 0x40:
  75. y = y & 0x7f
  76. return toint(s[pos:pos + y]), pos + y
  77. else:
  78. y = y & 0x3F
  79. z = toint(s[pos:pos + y])
  80. pos += y
  81. return toint(s[pos:pos + z]), pos + z
  82. def decode_none(s, pos):
  83. return None, pos
  84. def decode_int(s, pos):
  85. i, pos = read_int(s, pos)
  86. return i, pos
  87. def decode_decimal(s, pos):
  88. i, pos = read_int(s, pos)
  89. r = s[pos:pos + i]
  90. return decimal.Decimal(r), pos + i
  91. def decode_bool(s, pos):
  92. i, pos = read_int(s, pos)
  93. return bool(i), pos
  94. def decode_negative_int(s, pos):
  95. i, pos = read_int(s, pos)
  96. return -i, pos
  97. def decode_string(s, pos):
  98. i, pos = read_int(s, pos)
  99. r = s[pos:pos + i]
  100. return r, pos + i
  101. def decode_float(s, pos):
  102. r, newpos = decode_string(s, pos)
  103. f = float(r)
  104. return f, newpos
  105. def decode_ustring(s, pos):
  106. i, pos = read_int(s, pos)
  107. r = s[pos:pos + i].decode('utf-8')
  108. return r, pos + i
  109. def decode_list(s, pos):
  110. r = []
  111. while s[pos] != ']':
  112. next, pos = decode_obj(s, pos)
  113. r.append(next)
  114. return r, pos + 1
  115. def decode_dict(s, pos):
  116. r = {}
  117. while s[pos] != '}':
  118. key, pos = decode_obj(s, pos)
  119. val, pos = decode_obj(s, pos)
  120. r[key] = val
  121. return r, pos + 1
  122. def decode_obj(s, pos):
  123. c = s[pos]
  124. pos += 1
  125. if c == 'n':
  126. return decode_none(s, pos)
  127. elif c == 'i':
  128. return decode_int(s, pos)
  129. elif c == 'd':
  130. return decode_decimal(s, pos)
  131. elif c == 'b':
  132. return decode_bool(s, pos)
  133. elif c == '-':
  134. return decode_negative_int(s, pos)
  135. elif c == 'f':
  136. return decode_float(s, pos)
  137. elif c == 's':
  138. return decode_string(s, pos)
  139. elif c == 'u':
  140. return decode_ustring(s, pos)
  141. elif c == '[':
  142. return decode_list(s, pos)
  143. elif c == '{':
  144. return decode_dict(s, pos)
  145. else:
  146. raise EBError('invalid type character: %s' % str(c))
  147. class EBIndexError(IndexError, EBError):
  148. pass
  149. def ebdecode(x):
  150. try:
  151. r, pos = decode_obj(x, 0)
  152. except IndexError:
  153. raise EBIndexError('apparently truncated string')
  154. except UnicodeDecodeError:
  155. raise EBError('invalid utf-8')
  156. if pos != len(x):
  157. raise EBError('excess data after valid prefix')
  158. return r
  159. class EBencached(object):
  160. __slots__ = ['bencoded']
  161. def __init__(self, s):
  162. self.bencoded = s
  163. def encode_bencached(x,r):
  164. r.append(x.bencoded)
  165. def make_int(i):
  166. if i < 0x80:
  167. return chr(i)
  168. s = tostr(i)
  169. if len(s) < 0x40:
  170. return chr(0x80 | len(s)) + s
  171. s2 = tostr(len(s))
  172. return chr(0xC0 | len(s2)) + s2 + s
  173. def encode_none(v, r):
  174. r.extend(('n', ''))
  175. def encode_int(i, r):
  176. if i >= 0:
  177. r.extend(('i', make_int(i)))
  178. else:
  179. r.extend(('-', make_int(-i)))
  180. def encode_decimal(d, r):
  181. s = str(d)
  182. r.extend(('d', make_int(len(s)), str(s)))
  183. def encode_bool(b, r):
  184. r.extend(('b', make_int(int(bool(b)))))
  185. def encode_float(f, r):
  186. s = repr(f)
  187. r.extend(('f', make_int(len(s)), s))
  188. def encode_string(s, r):
  189. r.extend(('s', make_int(len(s)), s))
  190. def encode_unicode_string(u, r):
  191. s = u.encode('utf-8')
  192. r.extend(('u', make_int(len(s)), s))
  193. def encode_list(x, r):
  194. r.append('[')
  195. for i in x:
  196. encode_func[type(i)](i, r)
  197. r.append(']')
  198. def encode_dict(x, r):
  199. r.append('{')
  200. ilist = x.items()
  201. ilist.sort()
  202. for k, v in ilist:
  203. encode_func[type(k)](k, r)
  204. encode_func[type(v)](v, r)
  205. r.append('}')
  206. encode_func = {}
  207. encode_func[EBencached] = encode_bencached
  208. encode_func[types.NoneType] = encode_none
  209. encode_func[int] = encode_int
  210. encode_func[long] = encode_int
  211. encode_func[decimal.Decimal] = encode_decimal
  212. encode_func[bool] = encode_bool
  213. encode_func[float] = encode_float
  214. encode_func[str] = encode_string
  215. encode_func[unicode] = encode_unicode_string
  216. encode_func[list] = encode_list
  217. encode_func[tuple] = encode_list
  218. encode_func[dict] = encode_dict
  219. def encode_wrapped(x, r):
  220. encode_func[type(x.v)](x.v, r)
  221. encode_func[IntEBObject] = encode_wrapped
  222. encode_func[StringEBObject] = encode_wrapped
  223. encode_func[UStringEBObject] = encode_wrapped
  224. encode_func[ListEBObject] = encode_wrapped
  225. encode_func[DictEBObject] = encode_wrapped
  226. def ebencode(x):
  227. r = []
  228. encode_func[type(x)](x, r)
  229. return ''.join(r)
  230. def c(v):
  231. s = ebencode(v)
  232. r = ebdecode(s)
  233. assert v == r
  234. if isinstance(v, bool):
  235. assert isinstance(r, bool)
  236. elif isinstance(v, (int, long)) and isinstance(r, (int, long)):
  237. # assume it's right
  238. pass
  239. else:
  240. assert type(v) == type(r), '%s is not %s' % (type(v), type(r))
  241. assert ebencode(r) == s
  242. c(None)
  243. c(0)
  244. c(3)
  245. c(3l)
  246. c(500)
  247. c(-4)
  248. c(True)
  249. c(False)
  250. c(4.0)
  251. c(-4.0)
  252. c(2 ** 5000 + 27)
  253. c('abc')
  254. c(decimal.Decimal('4.5'))
  255. c(u'pqr')
  256. c([1, 2])
  257. c([2, 'abc', u'pqr'])
  258. c({})
  259. c([[]])
  260. c({u'a': 2})
  261. c({u'abc': 2, u'pqr': 4})
  262. c([[1, 2], ['abc', 'pqr']])
  263. ##class StreamEbdecode:
  264. ## def __init__(self):
  265. ## self.buf = ''
  266. ## self.bufint = None
  267. ## self.returns = []
  268. ##
  269. ## def add(self, stuff):
  270. ## self.buf += stuff
  271. ## try:
  272. ## while True:
  273. ## if self.bufint is None:
  274. ## mylength, pos = read_int(self.buf, 0)
  275. ## else:
  276. ## mylength, pos = self.bufint, 0
  277. ## if pos + mylength > len(self.buf):
  278. ## self.bufint = mylength
  279. ## self.buf = self.buf[pos:]
  280. ## break
  281. ## mything = ebdecode(self.buf[pos:pos + mylength])
  282. ## self.returns.append(mything)
  283. ## self.buf = self.buf[pos + mylength:]
  284. ## self.bufint = None
  285. ## except IndexError:
  286. ## pass
  287. ##
  288. ## def next(self):
  289. ## return self.returns.pop(0)
  290. ##
  291. ##def streamwrap(thing):
  292. ## x = ebencode(thing)
  293. ## return make_int(len(x)) + x
  294. ##
  295. ##def c2(v):
  296. ## b = ''
  297. ## for i in v:
  298. ## b += streamwrap(i)
  299. ## r = []
  300. ## mystream = StreamEbdecode()
  301. ## for i in xrange(0, len(b), 11):
  302. ## mystream.add(b[i:min(i + 11, len(b))])
  303. ## try:
  304. ## while True:
  305. ## r.append(mystream.next())
  306. ## except IndexError:
  307. ## pass
  308. ## assert r == v
  309. ##
  310. ##c2(['a'])
  311. ##c2(range(5000))
  312. ##c2([''.join(str(i) for i in xrange(j)) for j in xrange(300)])