# The contents of this file are subject to the BitTorrent Open Source License
# Version 1.1 (the License).  You may not copy or use this file, in either
# source code or executable form, except in compliance with the License.  You
# may obtain a copy of the License at http://www.bittorrent.com/license/.
#
# Software distributed under the License is distributed on an AS IS basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the License
# for the specific language governing rights and limitations under the
# License.

# Written by John Hoffman and Uoti Urpala and David Harrison

import os
import sys #DEBUG
from BTL.translation import _
from BTL.hash import sha

from BTL.bencode import bencode, bdecode
from BTL.btformats import check_message
from BTL.ConvertedMetainfo import ConvertedMetainfo
from BTL.defer import defer_to_thread, wrap_task
from BTL.coro import coroutine
from twisted.internet import reactor

def dtt(f, *a, **k):
    return defer_to_thread(reactor.callFromThread, reactor.callInThread, f, *a, **k )
import logging
log = logging.getLogger("BTL.parsedir")

def like_gettorrent(path):
    data = open(path, 'rb').read()
    b = bdecode(data)
    metainfo = ConvertedMetainfo(b)
    return metainfo


NOISY = False

def parsedir(directory, parsed, files, blocked, errfunc,
             include_metainfo=True):
    """Recurses breadth-first starting from the passed 'directory'
       looking for .torrrent files.

       THIS IS BLOCKING. Run this in a thread if you don't want it to block
       the program.  Or better yet, use async_parsedir.

       The directory, parsed, files, and blocked arguments are passed
       from the previous iteration of parsedir.

       @param directory: root of the breadth-first search for .torrent files.
       @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
       @param files: dict mapping path -> [(modification time, size), infohash]
       @param blocked: dict used as set.  keys are list of paths of files
          that were not parsed on a prior call to parsedir for some reason.
          Valid reasons are that the .torrent file is unparseable or that a
          torrent with a matching infohash is alread in the parsed set.
       @param errfunc: error-reporting callback.
       @param include_metainfo: deprecated?
       @return: The tuple (new parsed, new files, new blocked, added, removed)
          where 'new parsed', 'new files', and 'new blocked' are updated
          versions of 'parsed', 'files', and 'blocked' respectively. 'added'
          and 'removed' contain the changes made to the first three members
          of the tuple.  'added' and 'removed' are dicts mapping from
          infohash on to the same torrent-specific info dict that is in
          or was in parsed.
       """

    if NOISY:
        errfunc('checking dir')
    dirs_to_check = [directory]
    new_files = {}          # maps path -> [(modification time, size),infohash]
    new_blocked = {}        # used as a set.
    while dirs_to_check:    # first, recurse directories and gather torrents
        directory = dirs_to_check.pop()
        errfunc( "parsing directory %s" % directory )
        try:
            dir_contents = os.listdir(directory)
        except (IOError, OSError), e:
            errfunc(_("Could not read directory ") + directory)
            continue
        for f in dir_contents:
            if f.endswith('.torrent'):
                p = os.path.join(directory, f)
                try:
                    new_files[p] = [(os.path.getmtime(p),os.path.getsize(p)),0]
                except (IOError, OSError), e:
                    errfunc(_("Could not stat ") + p + " : " + unicode(e.args[0]))
        for f in dir_contents:
            p = os.path.join(directory, f)
            if os.path.isdir(p):
                dirs_to_check.append(p)
    new_parsed = {}
    to_add = []
    added = {}
    removed = {}
    # files[path] = [(modification_time, size),infohash], hash is 0 if the file
    # has not been successfully parsed
    for p,v in new_files.items():   # re-add old items and check for changes
        oldval = files.get(p)
        if oldval is None:     # new file
            to_add.append(p)
            continue
        h = oldval[1]
        if oldval[0] == v[0]:   # file is unchanged from last parse
            if h:
                if p in blocked:      # parseable + blocked means duplicate
                    to_add.append(p)  # other duplicate may have gone away
                else:
                    new_parsed[h] = parsed[h]
                new_files[p] = oldval
            else:
                new_blocked[p] = None  # same broken unparseable file
            continue
        if p not in blocked and h in parsed:  # modified; remove+add
            if NOISY:
                errfunc(_("removing %s (will re-add)") % p)
            removed[h] = parsed[h]
        to_add.append(p)

    to_add.sort()

    for p in to_add:                # then, parse new and changed torrents
        new_file = new_files[p]
        v = new_file[0]             # new_file[0] is the file's (mod time,sz).
        infohash = new_file[1]
        if infohash in new_parsed:  # duplicate, i.e., have same infohash.
            if p not in blocked or files[p][0] != v:
                errfunc(_("**warning** %s is a duplicate torrent for %s") %
                        (p, new_parsed[infohash][0]))
            new_blocked[p] = None
            continue

        if NOISY:
            errfunc('adding '+p)
        try:
            metainfo = like_gettorrent(p)
            new_file[1] = metainfo.infohash

            if new_parsed.has_key(metainfo.infohash):
                errfunc(_("**warning** %s is a duplicate torrent for %s") %
                        (p, new_parsed[metainfo.infohash][0]))
                new_blocked[p] = None
                continue

        except Exception ,e:
            errfunc(_("**warning** %s has errors") % p)
            new_blocked[p] = None
            continue

        if NOISY:
            errfunc(_("... successful"))
        #new_parsed[h] = a
        #added[h] = a
        new_parsed[metainfo.infohash] = (p,metainfo)
        added[metainfo.infohash] = (p,metainfo)

    for p,v in files.iteritems():       # and finally, mark removed torrents
        if p not in new_files and p not in blocked:
            if NOISY:
                errfunc(_("removing %s") % p)
            removed[v[1]] = parsed[v[1]]

    if NOISY:
        errfunc(_("done checking"))
    return (new_parsed, new_files, new_blocked, added, removed)

@coroutine
def async_parsedir(directory, parsed, files, blocked,
             include_metainfo=True):
    """Recurses breadth-first starting from the passed 'directory'
       looking for .torrrent files.  async_parsedir differs from
       parsedir in three ways: it is non-blocking, it returns a deferred,
       and it reports all errors to the logger BTL.parsedir meaning
       it does not use an errfunc.

       The directory, parsed, files, and blocked arguments are passed
       from the previous iteration of parsedir.

       @param directory: root of the breadth-first search for .torrent files.
       @param parsed: dict mapping infohash to (path,ConvertedMetainfo).
       @param files: dict mapping path -> [(modification time, size), infohash]
       @param blocked: dict used as set.  keys are list of paths of files
          that were not parsed on a prior call to parsedir for some reason.
          Valid reasons are that the .torrent file is unparseable or that a
          torrent with a matching infohash is alread in the parsed set.
       @param include_metainfo: deprecated?
       @return: The tuple (new parsed, new files, new blocked, added, removed)
          where 'new parsed', 'new files', and 'new blocked' are updated
          versions of 'parsed', 'files', and 'blocked' respectively. 'added'
          and 'removed' contain the changes made to the first three members
          of the tuple.  'added' and 'removed' are dicts mapping from
          infohash on to the same torrent-specific info dict that is in
          or was in parsed.
       """
    log.info('async_parsedir %s' % directory )
    dirs_to_check = [directory]
    new_files = {}          # maps path -> [(modification time, size),infohash]
    new_blocked = {}        # used as a set.
    while dirs_to_check:    # first, recurse directories and gather torrents
        directory = dirs_to_check.pop()
        if NOISY:
            log.info( "parsing directory %s" % directory )
        try:
            df = dtt(os.listdir,directory)
            yield df
            dir_contents = df.getResult()
        except (IOError, OSError), e:
            log.error(_("Could not read directory ") + directory)
            continue
        for f in dir_contents:
            if f.endswith('.torrent'):
                p = os.path.join(directory, f)
                try:
                    df = dtt(os.path.getmtime,p)
                    yield df
                    tmt = df.getResult()
                    df = dtt(os.path.getsize,p)
                    yield df
                    sz = df.getResult()
                    new_files[p] = [(tmt,sz),0]
                except (IOError, OSError), e:
                    log.error(_("Could not stat ") + p + " : " + unicode(e.args[0]))
        for f in dir_contents:
            p = os.path.join(directory, f)
            df = dtt(os.path.isdir,p)
            yield df
            is_dir = df.getResult()
            if is_dir:
                dirs_to_check.append(p)
    if NOISY:
        log.info( "Finished parsing directories." )
    new_parsed = {}
    to_add = []
    added = {}
    removed = {}
    # files[path] = [(modification_time, size),infohash], hash is 0 if the file
    # has not been successfully parsed
    for p,v in new_files.items():   # re-add old items and check for changes
        oldval = files.get(p)
        if oldval is None:     # new file
            to_add.append(p)
            continue
        h = oldval[1]
        if oldval[0] == v[0]:   # file is unchanged from last parse
            if h:
                if p in blocked:      # parseable + blocked means duplicate
                    to_add.append(p)  # other duplicate may have gone away
                else:
                    new_parsed[h] = parsed[h]
                new_files[p] = oldval
            else:
                new_blocked[p] = None  # same broken unparseable file
            continue
        if p not in blocked and h in parsed:  # modified; remove+add
            if NOISY:
                log.info(_("removing %s (will re-add)") % p)
            removed[h] = parsed[h]
        to_add.append(p)

    to_add.sort()

    for p in to_add:                # then, parse new and changed torrents
        new_file = new_files[p]
        v = new_file[0]             # new_file[0] is the file's (mod time,sz).
        infohash = new_file[1]
        if infohash in new_parsed:  # duplicate, i.e., have same infohash.
            if p not in blocked or files[p][0] != v:
                log.warning(_("%s is a duplicate torrent for %s") %
                        (p, new_parsed[infohash][0]))
            new_blocked[p] = None
            continue

        if NOISY:
            log.info('adding '+p)
        try:
            df = dtt(like_gettorrent,p)
            yield df
            metainfo = df.getResult()
            new_file[1] = metainfo.infohash

            if new_parsed.has_key(metainfo.infohash):
                log.warning(_("%s is a duplicate torrent for %s") %
                        (p, new_parsed[metainfo.infohash][0]))
                new_blocked[p] = None
                continue

        except Exception ,e:
            log.warning(_("%s has errors") % p)
            new_blocked[p] = None
            continue

        if NOISY:
            log.info(_("... successful"))
        new_parsed[metainfo.infohash] = (p,metainfo)
        added[metainfo.infohash] = (p,metainfo)

    for p,v in files.iteritems():       # and finally, mark removed torrents
        if p not in new_files and p not in blocked:
            if NOISY:
                log.info(_("removing %s") % p)
            removed[v[1]] = parsed[v[1]]

    if NOISY:
        log.info(_("done checking"))
    yield (new_parsed, new_files, new_blocked, added, removed)