bencode.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. # Written by Petru Paler, Uoti Urpala, Ross Cohen and John Hoffman
  2. # see LICENSE.txt for license information
  3. from types import IntType, LongType, StringType, ListType, TupleType, DictType
  4. try:
  5. from types import BooleanType
  6. except ImportError:
  7. BooleanType = None
  8. try:
  9. from types import UnicodeType
  10. except ImportError:
  11. UnicodeType = None
  12. from cStringIO import StringIO
  13. def decode_int(x, f):
  14. f += 1
  15. newf = x.index('e', f)
  16. try:
  17. n = int(x[f:newf])
  18. except:
  19. n = long(x[f:newf])
  20. if x[f] == '-':
  21. if x[f + 1] == '0':
  22. raise ValueError
  23. elif x[f] == '0' and newf != f+1:
  24. raise ValueError
  25. return (n, newf+1)
  26. def decode_string(x, f):
  27. colon = x.index(':', f)
  28. try:
  29. n = int(x[f:colon])
  30. except (OverflowError, ValueError):
  31. n = long(x[f:colon])
  32. if x[f] == '0' and colon != f+1:
  33. raise ValueError
  34. colon += 1
  35. return (x[colon:colon+n], colon+n)
  36. def decode_unicode(x, f):
  37. s, f = decode_string(x, f+1)
  38. return (s.decode('UTF-8'),f)
  39. def decode_list(x, f):
  40. r, f = [], f+1
  41. while x[f] != 'e':
  42. v, f = decode_func[x[f]](x, f)
  43. r.append(v)
  44. return (r, f + 1)
  45. def decode_dict(x, f):
  46. r, f = {}, f+1
  47. lastkey = None
  48. while x[f] != 'e':
  49. k, f = decode_string(x, f)
  50. if lastkey >= k:
  51. raise ValueError
  52. lastkey = k
  53. r[k], f = decode_func[x[f]](x, f)
  54. return (r, f + 1)
  55. decode_func = {}
  56. decode_func['l'] = decode_list
  57. decode_func['d'] = decode_dict
  58. decode_func['i'] = decode_int
  59. decode_func['0'] = decode_string
  60. decode_func['1'] = decode_string
  61. decode_func['2'] = decode_string
  62. decode_func['3'] = decode_string
  63. decode_func['4'] = decode_string
  64. decode_func['5'] = decode_string
  65. decode_func['6'] = decode_string
  66. decode_func['7'] = decode_string
  67. decode_func['8'] = decode_string
  68. decode_func['9'] = decode_string
  69. #decode_func['u'] = decode_unicode
  70. def bdecode(x, sloppy = 0):
  71. try:
  72. r, l = decode_func[x[0]](x, 0)
  73. # except (IndexError, KeyError):
  74. except (IndexError, KeyError, ValueError):
  75. raise ValueError, "bad bencoded data"
  76. if not sloppy and l != len(x):
  77. raise ValueError, "bad bencoded data"
  78. return r
  79. def test_bdecode():
  80. try:
  81. bdecode('0:0:')
  82. assert 0
  83. except ValueError:
  84. pass
  85. try:
  86. bdecode('ie')
  87. assert 0
  88. except ValueError:
  89. pass
  90. try:
  91. bdecode('i341foo382e')
  92. assert 0
  93. except ValueError:
  94. pass
  95. assert bdecode('i4e') == 4L
  96. assert bdecode('i0e') == 0L
  97. assert bdecode('i123456789e') == 123456789L
  98. assert bdecode('i-10e') == -10L
  99. try:
  100. bdecode('i-0e')
  101. assert 0
  102. except ValueError:
  103. pass
  104. try:
  105. bdecode('i123')
  106. assert 0
  107. except ValueError:
  108. pass
  109. try:
  110. bdecode('')
  111. assert 0
  112. except ValueError:
  113. pass
  114. try:
  115. bdecode('i6easd')
  116. assert 0
  117. except ValueError:
  118. pass
  119. try:
  120. bdecode('35208734823ljdahflajhdf')
  121. assert 0
  122. except ValueError:
  123. pass
  124. try:
  125. bdecode('2:abfdjslhfld')
  126. assert 0
  127. except ValueError:
  128. pass
  129. assert bdecode('0:') == ''
  130. assert bdecode('3:abc') == 'abc'
  131. assert bdecode('10:1234567890') == '1234567890'
  132. try:
  133. bdecode('02:xy')
  134. assert 0
  135. except ValueError:
  136. pass
  137. try:
  138. bdecode('l')
  139. assert 0
  140. except ValueError:
  141. pass
  142. assert bdecode('le') == []
  143. try:
  144. bdecode('leanfdldjfh')
  145. assert 0
  146. except ValueError:
  147. pass
  148. assert bdecode('l0:0:0:e') == ['', '', '']
  149. try:
  150. bdecode('relwjhrlewjh')
  151. assert 0
  152. except ValueError:
  153. pass
  154. assert bdecode('li1ei2ei3ee') == [1, 2, 3]
  155. assert bdecode('l3:asd2:xye') == ['asd', 'xy']
  156. assert bdecode('ll5:Alice3:Bobeli2ei3eee') == [['Alice', 'Bob'], [2, 3]]
  157. try:
  158. bdecode('d')
  159. assert 0
  160. except ValueError:
  161. pass
  162. try:
  163. bdecode('defoobar')
  164. assert 0
  165. except ValueError:
  166. pass
  167. assert bdecode('de') == {}
  168. assert bdecode('d3:agei25e4:eyes4:bluee') == {'age': 25, 'eyes': 'blue'}
  169. assert bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee') == {'spam.mp3': {'author': 'Alice', 'length': 100000}}
  170. try:
  171. bdecode('d3:fooe')
  172. assert 0
  173. except ValueError:
  174. pass
  175. try:
  176. bdecode('di1e0:e')
  177. assert 0
  178. except ValueError:
  179. pass
  180. try:
  181. bdecode('d1:b0:1:a0:e')
  182. assert 0
  183. except ValueError:
  184. pass
  185. try:
  186. bdecode('d1:a0:1:a0:e')
  187. assert 0
  188. except ValueError:
  189. pass
  190. try:
  191. bdecode('i03e')
  192. assert 0
  193. except ValueError:
  194. pass
  195. try:
  196. bdecode('l01:ae')
  197. assert 0
  198. except ValueError:
  199. pass
  200. try:
  201. bdecode('9999:x')
  202. assert 0
  203. except ValueError:
  204. pass
  205. try:
  206. bdecode('l0:')
  207. assert 0
  208. except ValueError:
  209. pass
  210. try:
  211. bdecode('d0:0:')
  212. assert 0
  213. except ValueError:
  214. pass
  215. try:
  216. bdecode('d0:')
  217. assert 0
  218. except ValueError:
  219. pass
  220. bencached_marker = []
  221. class Bencached:
  222. def __init__(self, s):
  223. self.marker = bencached_marker
  224. self.bencoded = s
  225. BencachedType = type(Bencached('')) # insufficient, but good as a filter
  226. def encode_bencached(x,r):
  227. assert x.marker == bencached_marker
  228. r.append(x.bencoded)
  229. def encode_int(x,r):
  230. r.extend(('i',str(x),'e'))
  231. def encode_bool(x,r):
  232. encode_int(int(x),r)
  233. def encode_string(x,r):
  234. r.extend((str(len(x)),':',x))
  235. def encode_unicode(x,r):
  236. #r.append('u')
  237. encode_string(x.encode('UTF-8'),r)
  238. def encode_list(x,r):
  239. r.append('l')
  240. for e in x:
  241. encode_func[type(e)](e, r)
  242. r.append('e')
  243. def encode_dict(x,r):
  244. r.append('d')
  245. ilist = x.items()
  246. ilist.sort()
  247. for k,v in ilist:
  248. r.extend((str(len(k)),':',k))
  249. encode_func[type(v)](v, r)
  250. r.append('e')
  251. encode_func = {}
  252. encode_func[BencachedType] = encode_bencached
  253. encode_func[IntType] = encode_int
  254. encode_func[LongType] = encode_int
  255. encode_func[StringType] = encode_string
  256. encode_func[ListType] = encode_list
  257. encode_func[TupleType] = encode_list
  258. encode_func[DictType] = encode_dict
  259. if BooleanType:
  260. encode_func[BooleanType] = encode_bool
  261. if UnicodeType:
  262. encode_func[UnicodeType] = encode_unicode
  263. def bencode(x):
  264. r = []
  265. try:
  266. encode_func[type(x)](x, r)
  267. except:
  268. print "*** error *** could not encode type %s (value: %s)" % (type(x), x)
  269. assert 0
  270. return ''.join(r)
  271. def test_bencode():
  272. assert bencode(4) == 'i4e'
  273. assert bencode(0) == 'i0e'
  274. assert bencode(-10) == 'i-10e'
  275. assert bencode(12345678901234567890L) == 'i12345678901234567890e'
  276. assert bencode('') == '0:'
  277. assert bencode('abc') == '3:abc'
  278. assert bencode('1234567890') == '10:1234567890'
  279. assert bencode([]) == 'le'
  280. assert bencode([1, 2, 3]) == 'li1ei2ei3ee'
  281. assert bencode([['Alice', 'Bob'], [2, 3]]) == 'll5:Alice3:Bobeli2ei3eee'
  282. assert bencode({}) == 'de'
  283. assert bencode({'age': 25, 'eyes': 'blue'}) == 'd3:agei25e4:eyes4:bluee'
  284. assert bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}) == 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee'
  285. try:
  286. bencode({1: 'foo'})
  287. assert 0
  288. except AssertionError:
  289. pass
  290. try:
  291. import psyco
  292. psyco.bind(bdecode)
  293. psyco.bind(bencode)
  294. except ImportError:
  295. pass