#!/usr/bin/env python

import os
from joblib import load
from sys import argv

from paras.domain_extraction.extract_residues import extract_stach_codes
import paras.data
import paras.data.train_and_test_data.structure_alignments
from paras.feature_extraction.get_sequence_features import get_features, to_feature_vectors
from paras.domain_extraction.extract_adomains import find_adomains
from paras.common.parsers import parse_specificities

SPECIFICITIES_FILE = os.path.join(os.path.dirname(paras.data.__file__), 'specificities.txt')
SPECIFICITIES = parse_specificities(SPECIFICITIES_FILE)


def run_paras(sequence_file, alignment_file, classifier_file):
    out_adomains = '.'
    adomain_sequences = find_adomains(sequence_file, out_adomains)

    id_to_stach, id_to_34 = extract_stach_codes(adomain_sequences, alignment_file)
    id_to_features = get_features(id_to_34)
    ids, feature_vectors = to_feature_vectors(id_to_features)
    true_vals = []
    for seq_id in ids:
        edited_id = seq_id.split('|')[0]
        true_vals.append(SPECIFICITIES[edited_id])
    classifier = load(classifier_file)

    score = classifier.score(feature_vectors, true_vals)
    print(score)


if __name__ == "__main__":
    sequence_file = argv[1]
    alignment_file = argv[2]
    classifier_file = argv[3]

    run_paras(sequence_file, alignment_file, classifier_file)