#!/usr/bin/env python3
#
# Script to check a GitHub org for commits without a DCO signoff that should have one.
#
# Loads config file ( dco_org_check.yaml by default, override with -c command line arg ) for credentials and other config options ( refer to README.md for more details
#
# Copyright this project and it's contributors
# SPDX-License-Identifier: Apache-2.0
#
# encoding=utf8

import re
import csv
import yaml
import io
import os
import sys
import base64
import shutil
import time
from github import Github, GithubException, RateLimitExceededException

def loadconfig(config_file):
    try:
        with open(config_file, 'r') as stream:
            data_loaded = yaml.safe_load(stream)
    except:
        sys.exit(config_file+" config file is not defined")


    if not 'token' in data_loaded:
        raise Exception('\'token\' is not defined')
    if not 'org' in data_loaded:
        raise Exception('\'org\' is not defined')
    if not 'csvfile' in data_loaded:
        data_loaded['csvfile'] = "dco_issues.csv"
    if not 'dco_signoffs_directories' in data_loaded:
        data_loaded['dco_signoffs_directories'] = ["dco-signoffs"]
    if not 'create_prior_commits_file' in data_loaded:
        data_loaded['create_prior_commits_file'] = 0
    if not 'create_prior_commits_dir' in data_loaded:
        data_loaded['create_prior_commits_dir'] = 'dco-signoffs'

    return data_loaded

def has_sign_off(commit_message):
    return re.search("Signed-off-by: (.+)",commit_message)

def get_past_signoffs(org,signoff_dirs,g):
    signoffs = []

    for signoff_dir in signoff_dirs:
        try:
            results = g.search_code("org:"+org+" path:"+signoff_dir)

            for result in results:
                signoffs.append((result.repository.name,result.path,base64.b64decode(result.content)))
        except RateLimitExceededException:
            print("Sleeping for an hour as we hit the API rate limit....")
            time.sleep(3600) # sleep 1 hour
        except GithubException as e:
            if e.status == 502:
                print("Server error - retrying...")
            else:
                print(e.data)

    return signoffs

def has_past_signoff(commit_url,signoffs):
    url_search = re.search("https://github.com/.*/(.*)/commit/(.*)",commit_url)
    repo = url_search.group(1)
    sha = url_search.group(2)

    for signoff in signoffs:
        if signoff[0] == repo:
            if not signoff[2].find(sha.encode()) == -1:
                return 1

    return 0;

def is_merge_commit(commit):
    if len(commit.parents) > 1 :
        return 1
    else:
        return 0

def cleanup_previous_run(config):
    if os.path.isfile(config['csvfile']):
        os.remove(config['csvfile'])
    if config['create_prior_commits_file']:
        shutil.rmtree(config['create_prior_commits_dir'],1)

from argparse import ArgumentParser

parser = ArgumentParser()
parser.add_argument("-c", "--config", dest="configfile", default="dco_org_check.yaml", help="name of YAML config file (defaults to dco_org_check.yaml)")
args = parser.parse_args()

config = loadconfig(args.configfile);
cleanup_previous_run(config)

csvfile = open(config['csvfile'], mode='w')
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
g = Github(config['token'])

past_signoffs = get_past_signoffs(config['org'],config['dco_signoffs_directories'],g)

for repo in g.get_organization(config['org']).get_repos():
    try:
        # Check if we are only looking at certain repos
        if 'only_repos' in config:
            if not repo.name in config['only_repos']:
                continue
        # Check if there are ignore repos defined
        if 'ignore_repos' in config:
            if repo.name in config['ignore_repos']:
                continue

        print("Searching repo {}...".format(repo.name))
        # Parse commits
        for commit in repo.get_commits():
            html_url = commit.commit.html_url
            commit_message = commit.commit.message
            author_name = commit.commit.author.name
            author_email = commit.commit.author.email
            author_date = commit.commit.author.date

            if has_sign_off(commit_message):
                continue
            if is_merge_commit(commit):
                continue
            if has_past_signoff(html_url,past_signoffs):
                continue
            try:
                csv_writer.writerow([html_url,commit_message,author_name,author_email,author_date])
            except GithubException as e:
                if e.status == 502:
                     csv_writer.writerow([html_url,commit_message,author_name,author_email,author_date])

            if config['create_prior_commits_file']:
                if not os.path.exists(config['create_prior_commits_dir']):
                    os.mkdir(config['create_prior_commits_dir'])

                url_search = re.search("https://github.com/"+config['org']+"/(.*)/commit/(.*)",html_url)
                repo = url_search.group(1)
                sha = url_search.group(2)
                commitfilename = config['create_prior_commits_dir']+'/'+author_name+'-'+repo+'.txt'

                if not os.path.isfile(commitfilename):
                    fh = open(commitfilename,  mode='w+')
                    fh.write("I, "+author_name+" hereby sign-off-by all of my past commits to this repo subject to the Developer Certificate of Origin (DCO), Version 1.1. In the past I have used emails: "+author_email+"\n\n")
                else:
                    fh = open(commitfilename,  mode='a')

                fh.write(sha+" "+commit_message+"\n")
                fh.close()
    except RateLimitExceededException:
        print("Sleeping for an hour as we hit the API rate limit....")
        time.sleep(3600) # sleep 1 hour
    except GithubException as e:
        if e.status == 502:
            print("Server error - retrying...")
        else:
            print(e.data)
