| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306 |
- # The contents of this file are subject to the BitTorrent Open Source License
- # Version 1.1 (the License). You may not copy or use this file, in either
- # source code or executable form, except in compliance with the License. You
- # may obtain a copy of the License at http://www.bittorrent.com/license/.
- #
- # Software distributed under the License is distributed on an AS IS basis,
- # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
- # for the specific language governing rights and limitations under the
- # License.
- # Written by John Hoffman and Uoti Urpala and David Harrison
- import os
- import sys #DEBUG
- from BTL.translation import _
- from BTL.hash import sha
- from BTL.bencode import bencode, bdecode
- from BTL.btformats import check_message
- from BTL.ConvertedMetainfo import ConvertedMetainfo
- from BTL.defer import defer_to_thread, wrap_task
- from BTL.coro import coroutine
- from twisted.internet import reactor
- def dtt(f, *a, **k):
- return defer_to_thread(reactor.callFromThread, reactor.callInThread, f, *a, **k )
- import logging
- log = logging.getLogger("BTL.parsedir")
- def like_gettorrent(path):
- data = open(path, 'rb').read()
- b = bdecode(data)
- metainfo = ConvertedMetainfo(b)
- return metainfo
- NOISY = False
- def parsedir(directory, parsed, files, blocked, errfunc,
- include_metainfo=True):
- """Recurses breadth-first starting from the passed 'directory'
- looking for .torrrent files.
- THIS IS BLOCKING. Run this in a thread if you don't want it to block
- the program. Or better yet, use async_parsedir.
- The directory, parsed, files, and blocked arguments are passed
- from the previous iteration of parsedir.
- @param directory: root of the breadth-first search for .torrent files.
- @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
- @param files: dict mapping path -> [(modification time, size), infohash]
- @param blocked: dict used as set. keys are list of paths of files
- that were not parsed on a prior call to parsedir for some reason.
- Valid reasons are that the .torrent file is unparseable or that a
- torrent with a matching infohash is alread in the parsed set.
- @param errfunc: error-reporting callback.
- @param include_metainfo: deprecated?
- @return: The tuple (new parsed, new files, new blocked, added, removed)
- where 'new parsed', 'new files', and 'new blocked' are updated
- versions of 'parsed', 'files', and 'blocked' respectively. 'added'
- and 'removed' contain the changes made to the first three members
- of the tuple. 'added' and 'removed' are dicts mapping from
- infohash on to the same torrent-specific info dict that is in
- or was in parsed.
- """
- if NOISY:
- errfunc('checking dir')
- dirs_to_check = [directory]
- new_files = {} # maps path -> [(modification time, size),infohash]
- new_blocked = {} # used as a set.
- while dirs_to_check: # first, recurse directories and gather torrents
- directory = dirs_to_check.pop()
- errfunc( "parsing directory %s" % directory )
- try:
- dir_contents = os.listdir(directory)
- except (IOError, OSError), e:
- errfunc(_("Could not read directory ") + directory)
- continue
- for f in dir_contents:
- if f.endswith('.torrent'):
- p = os.path.join(directory, f)
- try:
- new_files[p] = [(os.path.getmtime(p),os.path.getsize(p)),0]
- except (IOError, OSError), e:
- errfunc(_("Could not stat ") + p + " : " + unicode(e.args[0]))
- for f in dir_contents:
- p = os.path.join(directory, f)
- if os.path.isdir(p):
- dirs_to_check.append(p)
- new_parsed = {}
- to_add = []
- added = {}
- removed = {}
- # files[path] = [(modification_time, size),infohash], hash is 0 if the file
- # has not been successfully parsed
- for p,v in new_files.items(): # re-add old items and check for changes
- oldval = files.get(p)
- if oldval is None: # new file
- to_add.append(p)
- continue
- h = oldval[1]
- if oldval[0] == v[0]: # file is unchanged from last parse
- if h:
- if p in blocked: # parseable + blocked means duplicate
- to_add.append(p) # other duplicate may have gone away
- else:
- new_parsed[h] = parsed[h]
- new_files[p] = oldval
- else:
- new_blocked[p] = None # same broken unparseable file
- continue
- if p not in blocked and h in parsed: # modified; remove+add
- if NOISY:
- errfunc(_("removing %s (will re-add)") % p)
- removed[h] = parsed[h]
- to_add.append(p)
- to_add.sort()
- for p in to_add: # then, parse new and changed torrents
- new_file = new_files[p]
- v = new_file[0] # new_file[0] is the file's (mod time,sz).
- infohash = new_file[1]
- if infohash in new_parsed: # duplicate, i.e., have same infohash.
- if p not in blocked or files[p][0] != v:
- errfunc(_("**warning** %s is a duplicate torrent for %s") %
- (p, new_parsed[infohash][0]))
- new_blocked[p] = None
- continue
- if NOISY:
- errfunc('adding '+p)
- try:
- metainfo = like_gettorrent(p)
- new_file[1] = metainfo.infohash
- if new_parsed.has_key(metainfo.infohash):
- errfunc(_("**warning** %s is a duplicate torrent for %s") %
- (p, new_parsed[metainfo.infohash][0]))
- new_blocked[p] = None
- continue
- except Exception ,e:
- errfunc(_("**warning** %s has errors") % p)
- new_blocked[p] = None
- continue
- if NOISY:
- errfunc(_("... successful"))
- #new_parsed[h] = a
- #added[h] = a
- new_parsed[metainfo.infohash] = (p,metainfo)
- added[metainfo.infohash] = (p,metainfo)
- for p,v in files.iteritems(): # and finally, mark removed torrents
- if p not in new_files and p not in blocked:
- if NOISY:
- errfunc(_("removing %s") % p)
- removed[v[1]] = parsed[v[1]]
- if NOISY:
- errfunc(_("done checking"))
- return (new_parsed, new_files, new_blocked, added, removed)
- @coroutine
- def async_parsedir(directory, parsed, files, blocked,
- include_metainfo=True):
- """Recurses breadth-first starting from the passed 'directory'
- looking for .torrrent files. async_parsedir differs from
- parsedir in three ways: it is non-blocking, it returns a deferred,
- and it reports all errors to the logger BTL.parsedir meaning
- it does not use an errfunc.
- The directory, parsed, files, and blocked arguments are passed
- from the previous iteration of parsedir.
- @param directory: root of the breadth-first search for .torrent files.
- @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
- @param files: dict mapping path -> [(modification time, size), infohash]
- @param blocked: dict used as set. keys are list of paths of files
- that were not parsed on a prior call to parsedir for some reason.
- Valid reasons are that the .torrent file is unparseable or that a
- torrent with a matching infohash is alread in the parsed set.
- @param include_metainfo: deprecated?
- @return: The tuple (new parsed, new files, new blocked, added, removed)
- where 'new parsed', 'new files', and 'new blocked' are updated
- versions of 'parsed', 'files', and 'blocked' respectively. 'added'
- and 'removed' contain the changes made to the first three members
- of the tuple. 'added' and 'removed' are dicts mapping from
- infohash on to the same torrent-specific info dict that is in
- or was in parsed.
- """
- log.info('async_parsedir %s' % directory )
- dirs_to_check = [directory]
- new_files = {} # maps path -> [(modification time, size),infohash]
- new_blocked = {} # used as a set.
- while dirs_to_check: # first, recurse directories and gather torrents
- directory = dirs_to_check.pop()
- if NOISY:
- log.info( "parsing directory %s" % directory )
- try:
- df = dtt(os.listdir,directory)
- yield df
- dir_contents = df.getResult()
- except (IOError, OSError), e:
- log.error(_("Could not read directory ") + directory)
- continue
- for f in dir_contents:
- if f.endswith('.torrent'):
- p = os.path.join(directory, f)
- try:
- df = dtt(os.path.getmtime,p)
- yield df
- tmt = df.getResult()
- df = dtt(os.path.getsize,p)
- yield df
- sz = df.getResult()
- new_files[p] = [(tmt,sz),0]
- except (IOError, OSError), e:
- log.error(_("Could not stat ") + p + " : " + unicode(e.args[0]))
- for f in dir_contents:
- p = os.path.join(directory, f)
- df = dtt(os.path.isdir,p)
- yield df
- is_dir = df.getResult()
- if is_dir:
- dirs_to_check.append(p)
- if NOISY:
- log.info( "Finished parsing directories." )
- new_parsed = {}
- to_add = []
- added = {}
- removed = {}
- # files[path] = [(modification_time, size),infohash], hash is 0 if the file
- # has not been successfully parsed
- for p,v in new_files.items(): # re-add old items and check for changes
- oldval = files.get(p)
- if oldval is None: # new file
- to_add.append(p)
- continue
- h = oldval[1]
- if oldval[0] == v[0]: # file is unchanged from last parse
- if h:
- if p in blocked: # parseable + blocked means duplicate
- to_add.append(p) # other duplicate may have gone away
- else:
- new_parsed[h] = parsed[h]
- new_files[p] = oldval
- else:
- new_blocked[p] = None # same broken unparseable file
- continue
- if p not in blocked and h in parsed: # modified; remove+add
- if NOISY:
- log.info(_("removing %s (will re-add)") % p)
- removed[h] = parsed[h]
- to_add.append(p)
- to_add.sort()
- for p in to_add: # then, parse new and changed torrents
- new_file = new_files[p]
- v = new_file[0] # new_file[0] is the file's (mod time,sz).
- infohash = new_file[1]
- if infohash in new_parsed: # duplicate, i.e., have same infohash.
- if p not in blocked or files[p][0] != v:
- log.warning(_("%s is a duplicate torrent for %s") %
- (p, new_parsed[infohash][0]))
- new_blocked[p] = None
- continue
- if NOISY:
- log.info('adding '+p)
- try:
- df = dtt(like_gettorrent,p)
- yield df
- metainfo = df.getResult()
- new_file[1] = metainfo.infohash
- if new_parsed.has_key(metainfo.infohash):
- log.warning(_("%s is a duplicate torrent for %s") %
- (p, new_parsed[metainfo.infohash][0]))
- new_blocked[p] = None
- continue
- except Exception ,e:
- log.warning(_("%s has errors") % p)
- new_blocked[p] = None
- continue
- if NOISY:
- log.info(_("... successful"))
- new_parsed[metainfo.infohash] = (p,metainfo)
- added[metainfo.infohash] = (p,metainfo)
- for p,v in files.iteritems(): # and finally, mark removed torrents
- if p not in new_files and p not in blocked:
- if NOISY:
- log.info(_("removing %s") % p)
- removed[v[1]] = parsed[v[1]]
- if NOISY:
- log.info(_("done checking"))
- yield (new_parsed, new_files, new_blocked, added, removed)
|