#!/Users/martijn/.virtualenvs/ranch/bin/python

import datetime
from concurrent.futures import ThreadPoolExecutor
from functools import partial
from pprint import pprint
import json

from bs4 import BeautifulSoup
from progressbar import ProgressBar
import requests
import percache


def get_uri(uri):
    r = requests.get(uri)
    return r.text


def get_json(uri):
    r = requests.get(uri)
    return r.json()


class Downloader(object):
    def __init__(self, base_url, entry, query, defaults=None, cache=None):
        self.base_url = base_url
        self.defaults_url = defaults
        self.entry = entry
        self.query = query
        self.data = {}

        self.endpoints_total = 0

        if cache:
            self._get_uri = cache(get_uri)
            self._get_json = cache(get_json)
        else:
            self._get_uri = get_uri
            self._get_json = get_json

    def _download_list(self):
        page = self._get_uri(self.base_url + self.entry)
        self.page = BeautifulSoup(page, 'html.parser')

    def _download_endpoints(self):
        self.endpoints_total += 1
        print(self.endpoints_total)
        bar = ProgressBar(max_value=self.endpoints_total)

        if self.defaults_url:
            self.data['url'] = self.defaults_url
            self._download_endpoint(self.data)

        tasks = self._find_and_download_urls(self.data['subs'])
        bar_range = iter(range(self.endpoints_total))

        futures = []
        with ThreadPoolExecutor(max_workers=None) as executor:
            for task in tasks:
                f = executor.submit(task)
                f.add_done_callback(lambda _: bar.update(next(bar_range)))
                futures.append(f)

        bar.finish()

    def _find_and_download_urls(self, data):
        for key, value in data.items():
            if 'url' in value:
                yield partial(self._download_endpoint, value)

            if 'subs' in value:
                yield from self._find_and_download_urls(value['subs'])

    def _download_endpoint(self, value):
        json = self._get_json(self.base_url + value['url'])
        value['details'] = json

    def _parse_list(self):
        for anchor in self.page.find_all('a'):
            p = self._parse_list_anchor(anchor)
            if p:
                value = self.add_depth(p)
                value['url'] = anchor['href']

                self.endpoints_total += 1

    def _parse_list_anchor(self, anchor):
        if not anchor['href'].startswith(self.query):
            return False

        value = anchor['href'][len(self.query):]
        return value.split('/')

    def add_depth(self, keys):
        depth = self.data
        for key in keys:
            depth = depth.setdefault('subs', {}).setdefault(key, {})
        return depth

    def run(self):
        self._download_list()
        self._parse_list()
        self._download_endpoints()

        return self.data

    def print(self):
        pprint(self.data)


if __name__ == '__main__':
    cache = None
    if input('Generate or use cache? [Y/n] ').lower() in ['yes', 'y', '']:
        loc = input('Cache location: [/tmp/uri-cache] ')
        if loc == '':
            loc = '/tmp/uri-cache'

        cache = percache.Cache(loc)

    d = Downloader('http://i18napis.appspot.com', '/address',
                   '/address/data/', defaults='/address/data/ZZ', cache=cache)
    data = d.run()

    default_filename = 'data/address-export.{}.json'.format(
        datetime.datetime.now().isoformat()
    )

    filename = input('Where to store? [{}] '.format(default_filename))
    if filename == '':
        filename = default_filename

    with open(filename, 'w') as export:
        json.dump(data, export)

    if cache:
        cache.close()
