1
0

parsedir.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. # The contents of this file are subject to the BitTorrent Open Source License
  2. # Version 1.1 (the License). You may not copy or use this file, in either
  3. # source code or executable form, except in compliance with the License. You
  4. # may obtain a copy of the License at http://www.bittorrent.com/license/.
  5. #
  6. # Software distributed under the License is distributed on an AS IS basis,
  7. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  8. # for the specific language governing rights and limitations under the
  9. # License.
  10. # Written by John Hoffman and Uoti Urpala and David Harrison
  11. import os
  12. import sys #DEBUG
  13. from BTL.translation import _
  14. from BTL.hash import sha
  15. from BTL.bencode import bencode, bdecode
  16. from BTL.btformats import check_message
  17. from BTL.ConvertedMetainfo import ConvertedMetainfo
  18. from BTL.defer import defer_to_thread, wrap_task
  19. from BTL.coro import coroutine
  20. from twisted.internet import reactor
  21. def dtt(f, *a, **k):
  22. return defer_to_thread(reactor.callFromThread, reactor.callInThread, f, *a, **k )
  23. import logging
  24. log = logging.getLogger("BTL.parsedir")
  25. def like_gettorrent(path):
  26. data = open(path, 'rb').read()
  27. b = bdecode(data)
  28. metainfo = ConvertedMetainfo(b)
  29. return metainfo
  30. NOISY = False
  31. def parsedir(directory, parsed, files, blocked, errfunc,
  32. include_metainfo=True):
  33. """Recurses breadth-first starting from the passed 'directory'
  34. looking for .torrrent files.
  35. THIS IS BLOCKING. Run this in a thread if you don't want it to block
  36. the program. Or better yet, use async_parsedir.
  37. The directory, parsed, files, and blocked arguments are passed
  38. from the previous iteration of parsedir.
  39. @param directory: root of the breadth-first search for .torrent files.
  40. @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
  41. @param files: dict mapping path -> [(modification time, size), infohash]
  42. @param blocked: dict used as set. keys are list of paths of files
  43. that were not parsed on a prior call to parsedir for some reason.
  44. Valid reasons are that the .torrent file is unparseable or that a
  45. torrent with a matching infohash is alread in the parsed set.
  46. @param errfunc: error-reporting callback.
  47. @param include_metainfo: deprecated?
  48. @return: The tuple (new parsed, new files, new blocked, added, removed)
  49. where 'new parsed', 'new files', and 'new blocked' are updated
  50. versions of 'parsed', 'files', and 'blocked' respectively. 'added'
  51. and 'removed' contain the changes made to the first three members
  52. of the tuple. 'added' and 'removed' are dicts mapping from
  53. infohash on to the same torrent-specific info dict that is in
  54. or was in parsed.
  55. """
  56. if NOISY:
  57. errfunc('checking dir')
  58. dirs_to_check = [directory]
  59. new_files = {} # maps path -> [(modification time, size),infohash]
  60. new_blocked = {} # used as a set.
  61. while dirs_to_check: # first, recurse directories and gather torrents
  62. directory = dirs_to_check.pop()
  63. errfunc( "parsing directory %s" % directory )
  64. try:
  65. dir_contents = os.listdir(directory)
  66. except (IOError, OSError), e:
  67. errfunc(_("Could not read directory ") + directory)
  68. continue
  69. for f in dir_contents:
  70. if f.endswith('.torrent'):
  71. p = os.path.join(directory, f)
  72. try:
  73. new_files[p] = [(os.path.getmtime(p),os.path.getsize(p)),0]
  74. except (IOError, OSError), e:
  75. errfunc(_("Could not stat ") + p + " : " + unicode(e.args[0]))
  76. for f in dir_contents:
  77. p = os.path.join(directory, f)
  78. if os.path.isdir(p):
  79. dirs_to_check.append(p)
  80. new_parsed = {}
  81. to_add = []
  82. added = {}
  83. removed = {}
  84. # files[path] = [(modification_time, size),infohash], hash is 0 if the file
  85. # has not been successfully parsed
  86. for p,v in new_files.items(): # re-add old items and check for changes
  87. oldval = files.get(p)
  88. if oldval is None: # new file
  89. to_add.append(p)
  90. continue
  91. h = oldval[1]
  92. if oldval[0] == v[0]: # file is unchanged from last parse
  93. if h:
  94. if p in blocked: # parseable + blocked means duplicate
  95. to_add.append(p) # other duplicate may have gone away
  96. else:
  97. new_parsed[h] = parsed[h]
  98. new_files[p] = oldval
  99. else:
  100. new_blocked[p] = None # same broken unparseable file
  101. continue
  102. if p not in blocked and h in parsed: # modified; remove+add
  103. if NOISY:
  104. errfunc(_("removing %s (will re-add)") % p)
  105. removed[h] = parsed[h]
  106. to_add.append(p)
  107. to_add.sort()
  108. for p in to_add: # then, parse new and changed torrents
  109. new_file = new_files[p]
  110. v = new_file[0] # new_file[0] is the file's (mod time,sz).
  111. infohash = new_file[1]
  112. if infohash in new_parsed: # duplicate, i.e., have same infohash.
  113. if p not in blocked or files[p][0] != v:
  114. errfunc(_("**warning** %s is a duplicate torrent for %s") %
  115. (p, new_parsed[infohash][0]))
  116. new_blocked[p] = None
  117. continue
  118. if NOISY:
  119. errfunc('adding '+p)
  120. try:
  121. metainfo = like_gettorrent(p)
  122. new_file[1] = metainfo.infohash
  123. if new_parsed.has_key(metainfo.infohash):
  124. errfunc(_("**warning** %s is a duplicate torrent for %s") %
  125. (p, new_parsed[metainfo.infohash][0]))
  126. new_blocked[p] = None
  127. continue
  128. except Exception ,e:
  129. errfunc(_("**warning** %s has errors") % p)
  130. new_blocked[p] = None
  131. continue
  132. if NOISY:
  133. errfunc(_("... successful"))
  134. #new_parsed[h] = a
  135. #added[h] = a
  136. new_parsed[metainfo.infohash] = (p,metainfo)
  137. added[metainfo.infohash] = (p,metainfo)
  138. for p,v in files.iteritems(): # and finally, mark removed torrents
  139. if p not in new_files and p not in blocked:
  140. if NOISY:
  141. errfunc(_("removing %s") % p)
  142. removed[v[1]] = parsed[v[1]]
  143. if NOISY:
  144. errfunc(_("done checking"))
  145. return (new_parsed, new_files, new_blocked, added, removed)
  146. @coroutine
  147. def async_parsedir(directory, parsed, files, blocked,
  148. include_metainfo=True):
  149. """Recurses breadth-first starting from the passed 'directory'
  150. looking for .torrrent files. async_parsedir differs from
  151. parsedir in three ways: it is non-blocking, it returns a deferred,
  152. and it reports all errors to the logger BTL.parsedir meaning
  153. it does not use an errfunc.
  154. The directory, parsed, files, and blocked arguments are passed
  155. from the previous iteration of parsedir.
  156. @param directory: root of the breadth-first search for .torrent files.
  157. @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
  158. @param files: dict mapping path -> [(modification time, size), infohash]
  159. @param blocked: dict used as set. keys are list of paths of files
  160. that were not parsed on a prior call to parsedir for some reason.
  161. Valid reasons are that the .torrent file is unparseable or that a
  162. torrent with a matching infohash is alread in the parsed set.
  163. @param include_metainfo: deprecated?
  164. @return: The tuple (new parsed, new files, new blocked, added, removed)
  165. where 'new parsed', 'new files', and 'new blocked' are updated
  166. versions of 'parsed', 'files', and 'blocked' respectively. 'added'
  167. and 'removed' contain the changes made to the first three members
  168. of the tuple. 'added' and 'removed' are dicts mapping from
  169. infohash on to the same torrent-specific info dict that is in
  170. or was in parsed.
  171. """
  172. log.info('async_parsedir %s' % directory )
  173. dirs_to_check = [directory]
  174. new_files = {} # maps path -> [(modification time, size),infohash]
  175. new_blocked = {} # used as a set.
  176. while dirs_to_check: # first, recurse directories and gather torrents
  177. directory = dirs_to_check.pop()
  178. if NOISY:
  179. log.info( "parsing directory %s" % directory )
  180. try:
  181. df = dtt(os.listdir,directory)
  182. yield df
  183. dir_contents = df.getResult()
  184. except (IOError, OSError), e:
  185. log.error(_("Could not read directory ") + directory)
  186. continue
  187. for f in dir_contents:
  188. if f.endswith('.torrent'):
  189. p = os.path.join(directory, f)
  190. try:
  191. df = dtt(os.path.getmtime,p)
  192. yield df
  193. tmt = df.getResult()
  194. df = dtt(os.path.getsize,p)
  195. yield df
  196. sz = df.getResult()
  197. new_files[p] = [(tmt,sz),0]
  198. except (IOError, OSError), e:
  199. log.error(_("Could not stat ") + p + " : " + unicode(e.args[0]))
  200. for f in dir_contents:
  201. p = os.path.join(directory, f)
  202. df = dtt(os.path.isdir,p)
  203. yield df
  204. is_dir = df.getResult()
  205. if is_dir:
  206. dirs_to_check.append(p)
  207. if NOISY:
  208. log.info( "Finished parsing directories." )
  209. new_parsed = {}
  210. to_add = []
  211. added = {}
  212. removed = {}
  213. # files[path] = [(modification_time, size),infohash], hash is 0 if the file
  214. # has not been successfully parsed
  215. for p,v in new_files.items(): # re-add old items and check for changes
  216. oldval = files.get(p)
  217. if oldval is None: # new file
  218. to_add.append(p)
  219. continue
  220. h = oldval[1]
  221. if oldval[0] == v[0]: # file is unchanged from last parse
  222. if h:
  223. if p in blocked: # parseable + blocked means duplicate
  224. to_add.append(p) # other duplicate may have gone away
  225. else:
  226. new_parsed[h] = parsed[h]
  227. new_files[p] = oldval
  228. else:
  229. new_blocked[p] = None # same broken unparseable file
  230. continue
  231. if p not in blocked and h in parsed: # modified; remove+add
  232. if NOISY:
  233. log.info(_("removing %s (will re-add)") % p)
  234. removed[h] = parsed[h]
  235. to_add.append(p)
  236. to_add.sort()
  237. for p in to_add: # then, parse new and changed torrents
  238. new_file = new_files[p]
  239. v = new_file[0] # new_file[0] is the file's (mod time,sz).
  240. infohash = new_file[1]
  241. if infohash in new_parsed: # duplicate, i.e., have same infohash.
  242. if p not in blocked or files[p][0] != v:
  243. log.warning(_("%s is a duplicate torrent for %s") %
  244. (p, new_parsed[infohash][0]))
  245. new_blocked[p] = None
  246. continue
  247. if NOISY:
  248. log.info('adding '+p)
  249. try:
  250. df = dtt(like_gettorrent,p)
  251. yield df
  252. metainfo = df.getResult()
  253. new_file[1] = metainfo.infohash
  254. if new_parsed.has_key(metainfo.infohash):
  255. log.warning(_("%s is a duplicate torrent for %s") %
  256. (p, new_parsed[metainfo.infohash][0]))
  257. new_blocked[p] = None
  258. continue
  259. except Exception ,e:
  260. log.warning(_("%s has errors") % p)
  261. new_blocked[p] = None
  262. continue
  263. if NOISY:
  264. log.info(_("... successful"))
  265. new_parsed[metainfo.infohash] = (p,metainfo)
  266. added[metainfo.infohash] = (p,metainfo)
  267. for p,v in files.iteritems(): # and finally, mark removed torrents
  268. if p not in new_files and p not in blocked:
  269. if NOISY:
  270. log.info(_("removing %s") % p)
  271. removed[v[1]] = parsed[v[1]]
  272. if NOISY:
  273. log.info(_("done checking"))
  274. yield (new_parsed, new_files, new_blocked, added, removed)