#!/usr/bin/env python
"""
git-word-blame: word-by-word blame for git

usage: 
    git word-blame <path>
"""
import itertools
import sys
from pathlib import Path
import argparse
from collections import Counter
import os.path

import git
import mwpersistence
import deltas
import mwreverts
from lys import L, render, raw

from gitwordblame import wikiwho_engine
from gitwordblame.themes import THEMES, COMMON_CSS


def get_git_config(repo, key, default=None):
    try:
        return repo.git.config('--get', key)
    except Exception:
        return default


def get_github_url(path):
    repo = git.Repo(path, search_parent_directories=True)
    git_root = repo.git.rev_parse("--show-toplevel")

    if repo.remotes:
        for remote in repo.remotes:
            url = remote.url
            if '@github.com' in url:
                proj = url.split('github.com:')[1].split('.git')[0]
                return "https://github.com/" + proj
            elif '/github.com' in url:
                return url.split('.git')[0]
            elif '@framagit.org' in url:
                proj = url.split('framagit.org:')[1].split('.git')[0]
                return "https://framagit.org/" + proj
    print('WARNING: Could not find an online link for the commits', file=sys.stderr)


def git_file_history(path):
    repo = git.Repo(path, search_parent_directories=True)
    git_root = repo.git.rev_parse("--show-toplevel")
    file_in_repo = str(Path(path)).replace(git_root + '/', '')

    if Path(path).is_dir():
        raise NotImplementedError('Blaming directories is not yet supported')

    def list_commits():
        nonlocal repo, git_root, file_in_repo
        for i, commit in enumerate(reversed(list(repo.iter_commits(paths=path)))):
            try:
                filecontents = (commit.tree / file_in_repo).data_stream.read().decode('utf-8')
                yield commit, filecontents
            except KeyError:
                # file renammed
                continue

    return repo, git_root, file_in_repo, list_commits


def get_revision(revisions):
    # first for original authorship
    return revisions[0]
    # last introduced for last introduction
    last_rev = revisions[-1] + 1
    for rev in revisions:
        if rev != last_rev + 1:
            return rev
        last_rev = rev
    return revisions[0]


def author_stats(tokens, revisions, dest):
    with open(os.path.join(dest, "author_stats.tsv"), 'w') as f:
        print('author', 'characters', 'characters %', sep='\t', file=f)
        c = Counter()
        for token in tokens:
            commit, _ = revisions[get_revision(token.revisions)]
            author = str(commit.author)
            for char in token: c[author] += 1

        total = sum(c.values())
        for rev, n in c.most_common():
            print(rev, n, round(n/total*100, 2), sep='\t', file=f)


def commit_stats(tokens, revisions, dest):
    with open(os.path.join(dest, "commit_stats.tsv"), 'w') as f:
        print('commit', 'message', 'characters', 'characters %', sep='\t', file=f)
        c = Counter()
        for token in tokens:
            commit, _ = revisions[get_revision(token.revisions)]
            msg = '%s\t%s' % (commit, commit.message.split('\n')[0])
            for char in token: c[msg] += 1

        total = sum(c.values())
        for rev, n in c.most_common():
            print(rev, n, round(n/total*100, 2), sep='\t', file=f)


def text_output(tokens, revisions, dest):
    with open(os.path.join(dest, "text-output"), 'w') as f:
        last_commit = None
        for token in tokens:
            commit, _ = revisions[get_revision(token.revisions)]
            if last_commit != str(commit):
                print(file=f)
                print('[', commit, commit.author, commit.message.split('\n')[0], ']', file=f)
            print(token, end='', file=f)
            last_commit = str(commit)


def html_output(tokens, revisions, path, file_in_repo, THEME, LINK_TO_COMMIT, dest):
    def token_commit(token):
        commit, _ = revisions[get_revision(token.revisions)]
        return str(commit)

    github_link = get_github_url(path) if LINK_TO_COMMIT else None

    tokens_span = []
    for _, tokens in itertools.groupby(tokens, key=token_commit):
        tokens = list(tokens)
        commit, _ = revisions[get_revision(tokens[0].revisions)]
        span = L.a(
            title=commit.message[:600] + '\n - ' + str(commit.author),
            href="%s/commit/%s" % (github_link, commit) if github_link else None,
            target="blank",
            data_file=file_in_repo,
            data_commit=str(commit),
            data_author=str(commit.author),
        ) / [str(token) for token in tokens]
        tokens_span.append(span)

    for HIGHLIGHT_BY in ('commit', 'author'):
        EXPORT_FILE = os.path.join(dest, "word-blame-by-%s.html" % HIGHLIGHT_BY)
        html = (
            L.head / (
                raw("""<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">"""),
                L.title / 'git-word-blame',
            ),
            L.body / (
                L.style / raw(THEMES[THEME]),
                L.style / raw(COMMON_CSS),
                L.pre('#content') / tokens_span,
                L.script / raw("""
                    var select_by = "%s";
                    document.querySelectorAll('pre > a').forEach(a => {
                        a.onmouseenter = () => {
                          document.querySelectorAll('pre > a').forEach(x => x.className = '');
                          document.querySelectorAll('a[data-' + select_by + '="' + a.attributes['data-' + select_by].value + '"]').forEach(x => x.className = "hovered");
                        };
                    });
                """ % (HIGHLIGHT_BY, )),
                L.script / raw("""
                    document.querySelectorAll('pre > a').forEach(a => {
                        a.onclick = () => {
                           var commit = a.attributes['data-commit'].value;
                           var file = a.attributes['data-file'].value;
                           alert('git show --color-words ' + commit + ' ' + file);
                           return false;
                        };
                    });
                """) if not github_link else None,
            ),

        )

        open(EXPORT_FILE, 'w').write(render(html))


def get_tokens_authorship(revisions):
    state = mwpersistence.DiffState(
        deltas.SegmentMatcher(),
        revert_radius=15,
        revert_detector=mwreverts.Detector(),
    )

    for i, revision in enumerate(revisions):
        _, filecontents = revision
        tokens, _, _ = state.update(filecontents, revision=i)
        if i and i % 10 == 0:
            print('...', i, 'revisions processed')
    return tokens


def main(args):
    # TODO: move to a class
    path = args.path[0]

    OUTPUT_DIRECTORY = args.dest

    repo, _, file_in_repo, list_commits = git_file_history(path)
    revisions = list(list_commits())

    ENGINE = get_git_config(repo, 'word-blame.engine', 'wikiwho')
    THEME = get_git_config(repo, 'word-blame.theme', 'solarized-dark')
    LIMIT = int(get_git_config(repo, 'word-blame.limit', '2000'))
    LINK_TO_COMMIT = get_git_config(repo, 'word-blame.link-to-online-commit', 'false') == 'true'

    print(len(revisions), 'revision(s) to process')

    if LIMIT > 0 and len(revisions) > LIMIT:
        revisions = revisions[-LIMIT:] # shorten it since it's too slow to process more for now
        print('  -> shortened to', LIMIT, 'revisions')

    if ENGINE == 'mwpersistence':
        tokens = get_tokens_authorship(revisions)
    elif ENGINE == 'wikiwho':
        tokens = list(wikiwho_engine.get_tokens_authorship(revisions))
    else:
        raise Exception('Unkwnown engine: ' + ENGINE)

    os.makedirs(OUTPUT_DIRECTORY, exist_ok=True)

    text_output(tokens, revisions, dest=OUTPUT_DIRECTORY)
    author_stats(tokens, revisions, dest=OUTPUT_DIRECTORY)
    commit_stats(tokens, revisions, dest=OUTPUT_DIRECTORY)
    html_output(tokens, revisions, path, file_in_repo, THEME, LINK_TO_COMMIT, dest=OUTPUT_DIRECTORY)

    print('results in', OUTPUT_DIRECTORY)
    print(' - author_stats.tsv')
    print(' - commit_stats.tsv')
    print(' - word-blame-by-commit.html')
    print(' - word-blame-by-author.html')
    print(' - text-output')


if __name__ == '__main__':
    cls = argparse.RawDescriptionHelpFormatter
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=cls)
    parser.add_argument('path', nargs=1, help='')
    parser.add_argument('dest', nargs='?', default='/tmp/word-blame-output/', help='path for the directory where the results will be stored')
    args = parser.parse_args()

    main(args)
