#!/usr/bin/env python3

import argparse
import json
import os
import signal
import sys
from threading import Lock

import requests

if __name__ == '__main__':  # noqa
    pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))  # noqa
    sys.path.insert(0, pkg_root)  # noqa

from hca.util.pool import ThreadPool

from dcp_diag.finders import Finder
from dcp_diag.component_agents import DataStoreAgent
from dcp_diag.component_agents import AnalysisAgent
from dcp_diag.component_agents import AzulAgent


VERBOSITY_MASK = 0x0f
V_SILENT = 0x00
V_SUMMARY = 0x01
V_BAD_DETAIL = 0x02
V_GOOD_DETAIL = 0x03

V_TTY_ONLY = 0x10

verbosity_level = V_SUMMARY


def output(message, verbosity=V_SILENT):
    if verbosity & V_TTY_ONLY and not sys.stdout.isatty():
        return

    message_verbosity = verbosity & VERBOSITY_MASK
    if message_verbosity <= verbosity_level:
        sys.stdout.write(message)
        sys.stdout.flush()


class AnalyzeSubmission:

    """
    # Start with Submission ID, then check:
    # ✔︎    Primary bundles (known by Ingest)
    # ✔︎    Primary bundles in DSS/AWS (direct access)
    # ✔︎    Primary bundles in DSS/GCP (direct access)
    # ✔︎    Primary bundles searchable in DSS/AWS using Project ID
    # ✔︎    Primary bundles searchable in DSS/GCP using Project ID
    # ✔︎    Analysis workflows searchable in Secondary-Analysis using Project ID
    # ✔    Secondary bundles searchable in DSS/AWS using files.analysis_process_json.input_bundles
    # ✔    Secondary bundles searchable in DSS/GCP using files.analysis_process_json.input_bundles
    # todo  Secondary bundles searchable in DSS/AWS using Project ID
    # todo  Secondary bundles searchable in DSS/GCP using Project ID

    # Cache results so we can reload and update later?
    """

    class AnalysisState:

        """
        bundle_map holds a map of gathered bundle information:

        "<uuid>": {
            "type": "primary",
            "aws": {
                "dss_presence": bool,
                "in_dss_project_search": bool,
                "results_bundles": [
                    "<uuid>.<version>"
                ]
            },
            "gcp": {
                "dss_presence": bool,
                "in_dss_project_search": bool
                "results_bundles": [
                    "<uuid>.<version>"
                ]
            },
            "fqid": "<uuid>.<timestamp>",
            "analysis_workflows": {},
            "present_in_azul": bool,
            "azul_result_bundles": [
                "<uuid>.<version>"
            ]
        },
        """

        SAVEFILE_SCHEMA_VERSION = 1

        def __init__(self, submission_id):
            self.submission_id = submission_id
            self.project_uuid = None
            self.bundle_map = {}
            self.lock = Lock()
            self._state_filename = f"{self.submission_id}.json"
            self._raw_data = None

        @property
        def primary_bundle_count(self):
            return len(list(self.iter_bundles('primary')))

        def iter_bundles(self, bundle_type=None):
            for uuid, bundle_info in self.bundle_map.items():
                if bundle_type and bundle_type != bundle_info['type']:
                    continue
                yield uuid, bundle_info

        @property
        def analysis_workflow_count(self):
            """Return the number of the analysis workflows from bundle_map, if the map is empty, return 0."""
            return sum([len(info['analysis_workflows']) for uuid, info in self.iter_bundles('primary')])

        @property
        def succeeded_analysis_workflow_count(self):
            """Return the number of succeeded analysis workflows from bundle_map, if the map is empty, return 0."""
            return sum([wf_body['status'] == 'Succeeded'
                        for uuid, info in self.iter_bundles('primary')
                        for wf_id, wf_body in info['analysis_workflows'].items()])

        @property
        def failed_analysis_workflow_count(self):
            """Return the number of failed or aborted analysis workflows from bundle_map,
                if the map is empty, return 0.
            """
            return sum([wf_body['status'] in ('Failed', 'Aborted')
                        for uuid, info in self.iter_bundles('primary')
                        for wf_id, wf_body in info['analysis_workflows'].items()])

        @property
        def ongoing_analysis_workflow_count(self):
            """Return the number of ongoing analysis workflows from bundle_map, if the map is empty, return 0."""
            return sum([wf_body['status'] not in ('Failed', 'Aborted', 'Succeeded')
                        for uuid, info in self.iter_bundles('primary')
                        for wf_id, wf_body in info['analysis_workflows'].items()])

        def iter_succeeded_analysis_workflows(self):
            """Return an generator of all succeeded analysis workflows."""
            nested_all_workflows = [info['analysis_workflows'] for uuid, info in self.iter_bundles('primary')]
            for workflow_obj in nested_all_workflows:
                for id, body in workflow_obj.items():
                    yield id

        def savefile_is_good(self):
            if not os.path.isfile(self._state_filename):
                return False
            try:
                self._load_data()
            except json.decoder.JSONDecodeError:  # in some rare cases the file may exist but got broken
                return False
            return 'version' in self._raw_data and self._raw_data['version'] == self.SAVEFILE_SCHEMA_VERSION

        def save(self):
            output(f"\tSaving state in {self._state_filename}...", V_SUMMARY | V_TTY_ONLY)
            with open(self._state_filename, 'w') as fp:
                data = {
                    'version': self.SAVEFILE_SCHEMA_VERSION,
                    'submission_id': self.submission_id,
                    'project_uuid': self.project_uuid,
                    'bundle_map': self.bundle_map
                }
                fp.write(json.dumps(data, indent=4))
            output("done.\n", V_SUMMARY | V_TTY_ONLY)

        def load(self):
            output(f"\tLoading state from {self._state_filename}...", V_SUMMARY)
            if not self._raw_data:
                self._load_data()
            self.submission_id = self._raw_data['submission_id']
            self.project_uuid = self._raw_data['project_uuid']
            self.bundle_map = self._raw_data['bundle_map']
            del self._raw_data
            output("done\n", V_SUMMARY)

        def _load_data(self):
            with open(self._state_filename, 'r') as fp:
                self._raw_data = json.loads(fp.read())

    class IngestSubmissionGrabber:

        def __init__(self, deployment, state):
            self.deployment = deployment
            self.state = state

        def get_submission_project_and_primary_bundle_list_from_ingest(self):
            output("\tRetrieving submission...", V_SUMMARY | V_TTY_ONLY)
            finder = Finder.factory(finder_name="ingest", deployment=self.deployment)
            submission = finder.find(f"subm_id={self.state.submission_id}")
            output("done.\n", V_SUMMARY | V_TTY_ONLY)
            output(f"\tSubmission ID: {submission.envelope_id}\n", V_SUMMARY)

            project = submission.project()
            self.state.project_uuid = project.uuid
            output(f"\tProject UUID: {self.state.project_uuid}\n", V_SUMMARY)

            self._get_primary_bundle_list_from_ingest(submission)

        def _get_primary_bundle_list_from_ingest(self, subm):
            output("\tRetrieving submission's primary bundle list...", V_SUMMARY | V_TTY_ONLY)

            for pri_uuid in subm.bundles():
                self.state.bundle_map[pri_uuid] = {
                    'type': 'primary',
                    'aws': {},
                    'gcp': {}
                }
            output("done.\n", V_SUMMARY | V_TTY_ONLY)

            output(f"\tIngest created {self.state.primary_bundle_count} bundles.\n", V_SUMMARY)
            if verbosity_level >= V_GOOD_DETAIL:
                for bundle_uuid in sorted([uuid for uuid, info in self.state.iter_bundles('primary')]):
                    print(f"\t    {bundle_uuid}")

    class DSSBundlePresenceChecker:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options

            self.primary_bundle_count = self.state.primary_bundle_count
            self.checked_bundles = {
                'aws': len([k for k, v in self.state.iter_bundles('primary') if v['aws'].get('dss_presence')]),
                'gcp': len([k for k, v in self.state.iter_bundles('primary') if v['gcp'].get('dss_presence')])
            }

        def check(self):
            output("\tChecking for bundle manifests:", V_SUMMARY | V_TTY_ONLY)
            pool = ThreadPool(self.options.jobs)
            for bundle_uuid, bundle_info in self.state.iter_bundles('primary'):
                for replica in ['aws', 'gcp']:
                    replica_data = bundle_info[replica]
                    if 'dss_presence' in replica_data:
                        if replica_data['dss_presence']:
                            continue
                    else:
                        replica_data['dss_presence'] = None
                    # self._check_bundle_manifest_exists(bundle_uuid, replica)  # single threaded
                    pool.add_task(self._check_bundle_manifest_exists, bundle_uuid, replica)  # multi-threaded
            pool.wait_for_completion()
            output("...done.\n", V_SUMMARY | V_TTY_ONLY)

        def _check_bundle_manifest_exists(self, bundle_uuid, replica):
            dss = DataStoreAgent(self.deployment)
            bundle_info = self.state.bundle_map[bundle_uuid]
            try:
                manifest = dss.bundle_manifest(bundle_uuid, replica)
                with self.state.lock:
                    bundle_info['fqid'] = ".".join([manifest['bundle']['uuid'], manifest['bundle']['version']])
                    bundle_info[replica]['dss_presence'] = True
            except AssertionError as e:
                with self.state.lock:
                    bundle_info[replica]['dss_presence'] = False
                    output(f"\rbundle {bundle_uuid} is missing from {replica.upper()}\n", V_BAD_DETAIL)
            with self.state.lock:
                # create entry for analysis workflows at the same time
                bundle_info['analysis_workflows'] = {}
            self.checked_bundles[replica] += 1
            self._print_progress()

        def _print_progress(self):
            output(f"\r\tChecking for bundle manifests: "
                   f"AWS: {self.checked_bundles['aws']}/{self.primary_bundle_count}"
                   f" GCP: {self.checked_bundles['gcp']}/{self.primary_bundle_count}", V_SUMMARY | V_TTY_ONLY)

        def print_results(self):
            for replica in ['aws', 'gcp']:
                present_bundles = {
                    k: v for (k, v) in self.state.iter_bundles('primary') if v[replica].get('dss_presence')
                }
                absent_bundles = {
                    k: v for (k, v) in self.state.iter_bundles('primary') if not v[replica].get('dss_presence')
                }
                output(f"\t{len(present_bundles)} bundle are present in {replica.upper()}\n", V_SUMMARY)
                if len(present_bundles) > 0 and verbosity_level >= V_GOOD_DETAIL:
                    for uuid in sorted(present_bundles.keys()):
                        info = present_bundles[uuid]
                        print(f"\t    {info.get('fqid', uuid)}")
                if len(absent_bundles) > 0 and verbosity_level >= V_BAD_DETAIL:
                        output(f"\t{len(absent_bundles)} bundle is absent from {replica.upper()}\n", V_SUMMARY)
                        for uuid in sorted(absent_bundles.keys()):
                            info = absent_bundles[uuid]
                            print(f"\t    {info.get('fqid', uuid)}")

    class SearchDSSbyProjectUUID:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options

        def check(self):
            output("\tSearching DSS...", V_SUMMARY | V_TTY_ONLY)
            dss = DataStoreAgent(self.deployment)
            query = {
                "query": {
                    "bool": {
                        "must": [
                            {
                                "match": {
                                    "files.project_json.provenance.document_id": self.state.project_uuid
                                }
                            }
                        ],
                        "must_not": [
                            {
                                "match": {
                                    "files.analysis_process_json.type.text": "analysis"
                                }
                            }
                        ]
                    }
                }
            }

            for replica in ['aws', 'gcp']:
                results = dss.search(query, replica=replica)
                for result in results:
                    with self.state.lock:
                        bundle_components = result['bundle_fqid'].split('.', 1)
                        bundle_uuid = bundle_components[0]

                        if bundle_uuid in self.state.bundle_map:
                            bundle_info = self.state.bundle_map[bundle_uuid]
                        else:
                            # Extra bundle that Ingest does not know about
                            bundle_info = {
                                'type': 'extra',
                                'aws': {},
                                'gcp': {}
                            }
                            self.state.bundle_map[bundle_uuid] = bundle_info

                        bundle_info[replica]['in_dss_project_search'] = True
                        if 'fqid' not in bundle_info:
                            bundle_info['fqid'] = result['bundle_fqid']
                        else:
                            assert(bundle_info['fqid'] == result['bundle_fqid'])
            output("done.\n", V_SUMMARY | V_TTY_ONLY)

        def print_results(self):
            for replica in ['aws', 'gcp']:
                self._print_results_for_replica(replica)

        def _print_results_for_replica(self, replica):
            primary_bundles_indexed_by_project = {
                k: v for (k, v) in self.state.iter_bundles('primary') if v[replica].get('in_dss_project_search')
            }
            primary_bundles_not_indexed_by_project = {
                k: v for (k, v) in self.state.iter_bundles('primary') if not v[replica].get('in_dss_project_search')
            }
            extra_bundles_indexed_by_project = {
                k: v for (k, v) in self.state.iter_bundles('extra') if v[replica].get('in_dss_project_search')
            }
            output(f"\tIn {replica.upper()} DSS, "
                   f"{len(primary_bundles_indexed_by_project)} primary bundles are indexed by project\n", V_SUMMARY)
            if verbosity_level >= V_GOOD_DETAIL:
                for uuid in sorted(primary_bundles_indexed_by_project.keys()):
                    info = primary_bundles_indexed_by_project[uuid]
                    print(f"\t    {info.get('fqid', uuid)}")

            if len(primary_bundles_not_indexed_by_project) > 0:
                output(
                    f"\tIn {replica.upper()} DSS, "
                    f"{len(primary_bundles_not_indexed_by_project)} primary bundles are NOT indexed by project\n",
                    V_SUMMARY)
                if verbosity_level >= V_BAD_DETAIL:
                    for bundle_uuid, bundle_info in primary_bundles_not_indexed_by_project.items():
                        print(f"\t    {bundle_info.get('fqid', bundle_uuid)}")

            if len(extra_bundles_indexed_by_project) > 0:
                output(
                    f"\tIn {replica.upper()} DSS, "
                    f"{len(extra_bundles_indexed_by_project)} extra bundles are associated with this project\n",
                    V_SUMMARY)
                if verbosity_level >= V_BAD_DETAIL:
                    for bundle_uuid, bundle_info in extra_bundles_indexed_by_project.items():
                        print(f"\t    {bundle_info.get('fqid', bundle_uuid)}")

    class SearchAnalysisWorkflowsbyProjectUUID:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options
            # FIXME: Use a better way to authenticate instead of asking for service account JSON key
            # FIXME: If use OAuth, this should align with the Ingest Agent
            self.service_account_key = self.options.credentials

            self.analysis_workflow_count = self.state.analysis_workflow_count
            self.succeeded_analysis_workflow_count = self.state.succeeded_analysis_workflow_count
            self.failed_analysis_workflow_count = self.state.failed_analysis_workflow_count
            self.ongoing_analysis_workflow_count = self.state.ongoing_analysis_workflow_count
            self.succeeded_workflows = set(self.state.iter_succeeded_analysis_workflows())
            self.failed_workflows = set([])
            self.ongoing_workflows = set([])
            self.errors = set([])
            self.errors_count = 0
            self.remaining_analysis_workflows_summary = None

        def check(self):
            # TODO: this method is a very slow synchronous loop, speed up by using multi-threading with the options.jobs
            output("\tSearching for secondary analysis workflows:\n", V_SUMMARY | V_TTY_ONLY)
            analysis = AnalysisAgent(deployment=self.deployment,
                                     service_account_key=self.service_account_key)

            # TODO: remove the following line once there are no more scalability concerns of the analysis agent
            with analysis.ignore_logging_msg():
                all_analysis_workflows_summary_ids = self._get_workflows_summary(analysis_agent=analysis)

                # figure out skippable workflows and exclude them from query to save I/O
                self.remaining_analysis_workflows_summary = all_analysis_workflows_summary_ids - self.succeeded_workflows

                self._get_workflows_detailed_info(analysis_agent=analysis)

            output("...done.\n", V_SUMMARY | V_TTY_ONLY)

        def _get_workflows_summary(self, analysis_agent):
            try:
                workflows = analysis_agent.query_by_project_uuid(project_uuid=self.state.project_uuid,
                                                                 with_labels=False)
                self.analysis_workflow_count = len(workflows)

                for workflow in workflows:
                    output(f"\t    {workflow.uuid}\n", V_GOOD_DETAIL)

                return set([workflow.uuid for workflow in workflows])

            except requests.exceptions.HTTPError as err:
                output(f"An error occurred when trying to fetch the workflow list: {err}")

        def _get_workflows_detailed_info(self, analysis_agent):
            finished_work_count = len(self.succeeded_workflows)
            remaining_work = self.remaining_analysis_workflows_summary

            for idx, workflow_id in enumerate(remaining_work):
                output("\r\tSearching for secondary analysis workflows: "
                       f"{idx + 1 + finished_work_count}/{self.analysis_workflow_count}", V_SUMMARY | V_TTY_ONLY)

                try:
                    detailed_workflow = analysis_agent.query_by_workflow_uuid(uuid=workflow_id)

                    bundle_info = self.state.bundle_map[detailed_workflow.labels['bundle-uuid']]
                    # TODO: control the detail of workflow based on "V_BAD_DETAIL"
                    bundle_info['analysis_workflows'][workflow_id] = {
                        'labels': detailed_workflow.labels,
                        'start': detailed_workflow.start_time,
                        'end': detailed_workflow.end_time,
                        'id': detailed_workflow.uuid,
                        'name': detailed_workflow.name,
                        'status': detailed_workflow.status,
                        'submission': detailed_workflow.submission_time
                    }

                    if detailed_workflow.status == 'Succeeded':
                        self.succeeded_workflows.add(workflow_id)
                        self.succeeded_analysis_workflow_count += 1
                    elif detailed_workflow.status in ('Failed', 'Aborted'):
                        self.failed_workflows.add(workflow_id)
                        self.failed_analysis_workflow_count += 1
                    else:
                        self.ongoing_workflows.add(workflow_id)
                        self.ongoing_analysis_workflow_count += 1

                except requests.exceptions.HTTPError:
                    self.errors.add(workflow_id)
                    self.errors_count += 1
                    output(f"\rAn error occurred when querying for workflow {workflow_id}\n", V_BAD_DETAIL)

        def print_results(self):
            # TODO: implement duplication checkers, so that we can see if there are duplicated
            #       workflows ran for the same bundle. Similar to the PHASE 5

            # TODO: make it print the workflow ids when user wants verbosity (V_GOOD_DETAIL)
            output(f"\r\tWorkflows are succeeded  : {self.succeeded_analysis_workflow_count}/{self.analysis_workflow_count}\n"
                   f"\tWorkflows are in progress: {self.ongoing_analysis_workflow_count}/{self.analysis_workflow_count}\n"
                   f"\tWorkflows are failed     : {self.failed_analysis_workflow_count}/{self.analysis_workflow_count}\n")
            if self.errors:
                # list the errors due to connection so user won't be confusing
                output(f"\r\tError fetching requests   : {self.errors_count}/{self.analysis_workflow_count}\n")

    class SearchDSSforSecondaryBundles:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options

            self.primary_bundle_count = self.state.primary_bundle_count
            self.checked_bundles = {
                'aws': len([k for (k, v) in self.state.iter_bundles() if len(v['aws'].get('results_bundles', [])) > 0]),
                'gcp': len([k for (k, v) in self.state.iter_bundles() if len(v['gcp'].get('results_bundles', [])) > 0])
            }

        def check(self):
            output("\tSearching for secondary bundles: ", V_SUMMARY | V_TTY_ONLY)
            pool = ThreadPool(self.options.jobs)
            for pri_uuid, bundle_info in self.state.iter_bundles('primary'):
                for replica in ['aws', 'gcp']:
                    replica_data = bundle_info[replica]
                    if 'results_bundles' in replica_data:
                        if len(replica_data['results_bundles']) > 0:
                            continue
                    else:
                        replica_data['results_bundles'] = []
                    pool.add_task(self._find_secondary_bundles_for_primary_bundle, pri_uuid, replica)
            pool.wait_for_completion()
            output("...done.\n", V_SUMMARY | V_TTY_ONLY)

        def _find_secondary_bundles_for_primary_bundle(self, pri_uuid, replica):
            dss = DataStoreAgent(self.deployment)
            query = {
                "query": {
                    "match": {
                        "files.analysis_process_json.input_bundles": pri_uuid
                    }
                }
            }
            results = dss.search(query, replica=replica)
            if len(results) > 0:
                with self.state.lock:
                    for result in results:
                        self.state.bundle_map[pri_uuid][replica]['results_bundles'].append(result['bundle_fqid'])
            self.checked_bundles[replica] += 1
            self._print_progress()

        def _print_progress(self):
            output(f"\r\tSearching for secondary bundles: "
                   f"AWS: {self.checked_bundles['aws']}/{self.primary_bundle_count}"
                   f" GCP: {self.checked_bundles['gcp']}/{self.primary_bundle_count}", V_SUMMARY | V_TTY_ONLY)

        def print_results(self):
            self._print_results_for_replica('aws')
            self._print_results_for_replica('gcp')

        def _print_results_for_replica(self, replica):
            replica_results = {k: v[replica].get('results_bundles') for k, v in self.state.iter_bundles('primary')}

            i = 0
            while len(replica_results) > 0:
                pri_sec = {k: v for (k, v) in replica_results.items() if len(v) == i}
                count = len(pri_sec)
                if count > 0:
                    output(f"\tIn {replica.upper()} there are {count} primary bundles with {i} results bundles\n",
                           V_SUMMARY)
                    if i == 0 and verbosity_level >= V_BAD_DETAIL or i >= 1 and verbosity_level >= V_GOOD_DETAIL:
                        for pri in sorted(pri_sec.keys()):
                            bundle_fqid = self.state.bundle_map[pri].get('fqid', pri)
                            print(f"\t    primary: {bundle_fqid} secondary: {pri_sec[pri]}")

                for key in pri_sec.keys():
                    del replica_results[key]

                i += 1

    class SearchAzulForPrimaryBundles:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options

            self.primary_bundle_count = self.state.primary_bundle_count

        def check(self):
            output("\tCounting bundles in webservice...", V_SUMMARY | V_TTY_ONLY)
            agent = AzulAgent(self.deployment)
            project_bundle_fqids = agent.get_project_bundle_fqids(self.state.project_uuid)
            for primary_bundle_uuid, bundle_info in self.state.iter_bundles('primary'):
                present_in_azul = primary_bundle_uuid in [fqid.split('.')[0] for fqid in project_bundle_fqids]
                bundle_info['present_in_azul'] = present_in_azul
            output("done.\n", V_SUMMARY | V_TTY_ONLY)

        def print_results(self):
            primary_bundles_indexed_by_project = {
                k: v for k, v in self.state.iter_bundles('primary') if v.get('present_in_azul')
            }
            primary_bundles_not_indexed_by_project = {
                k: v for k, v in self.state.iter_bundles('primary') if not v.get('present_in_azul')
            }
            output(f"\tIn Azul, {len(primary_bundles_indexed_by_project)} primary bundles are indexed\n", V_SUMMARY)
            if verbosity_level >= V_GOOD_DETAIL:
                for bundle_uuid in sorted(primary_bundles_indexed_by_project.keys()):
                    bundle_info = primary_bundles_indexed_by_project[bundle_uuid]
                    print(f"\t    {bundle_info.get('fqid', bundle_uuid)}")

            if len(primary_bundles_not_indexed_by_project) > 0:
                output(f"\tIn Azul, {len(primary_bundles_not_indexed_by_project)} primary bundles are NOT indexed\n",
                       V_SUMMARY)
                if verbosity_level >= V_BAD_DETAIL:
                    for bundle_uuid in sorted(primary_bundles_not_indexed_by_project.keys()):
                        bundle_info = primary_bundles_indexed_by_project[bundle_uuid]
                        print(f"\t    {bundle_info.get('fqid', bundle_uuid)}")

    class SearchAzulForSecondaryBundles:

        def __init__(self, deployment, state, options):
            self.deployment = deployment
            self.state = state
            self.options = options

            self.primary_bundle_count = self.state.primary_bundle_count
            self.azul_result_bundle_group_count = len([k for (k, v) in self.state.iter_bundles()
                                                       if len(v.get('azul_result_bundles', [])) > 0])

        def check(self):
            output("\tCounting secondary bundles in webservice...", V_SUMMARY | V_TTY_ONLY)
            agent = AzulAgent(self.deployment)
            project_bundle_fqids = agent.get_project_bundle_fqids(self.state.project_uuid)
            for primary_bundle_uuid, primary_bundle_state in self.state.iter_bundles('primary'):
                primary_bundle_state['azul_result_bundles'] = []
                for fqid in primary_bundle_state['aws']['results_bundles']:
                    if fqid in project_bundle_fqids and fqid not in primary_bundle_state.get('azul_result_bundles', []):
                        primary_bundle_state['azul_result_bundles'].append(fqid)
                        self.azul_result_bundle_group_count += 1
                        self._print_progress()
            output("done.\n", V_SUMMARY | V_TTY_ONLY)

        def _print_progress(self):
            output(f"\r\tSearching for secondary bundles: "
                   f"{self.azul_result_bundle_group_count}/{self.primary_bundle_count}", V_SUMMARY | V_TTY_ONLY)

        def print_results(self):
            azul_result_bundles = {k: v['azul_result_bundles'] for (k, v) in self.state.iter_bundles('primary')}

            i = 0
            while len(azul_result_bundles) > 0:
                pri_sec = {k: v for (k, v) in azul_result_bundles.items() if len(v) == i}
                count = len(pri_sec)
                if count > 0:
                    output(f"\tIn Azul there are {count} primary bundles with {i} results bundles\n", V_SUMMARY)
                    if i == 0 and verbosity_level >= V_BAD_DETAIL or i >= 1 and verbosity_level >= V_GOOD_DETAIL:
                        for pri in sorted(pri_sec.keys()):
                            bundle_fqid = self.state.bundle_map[pri].get('fqid', pri)
                            print(f"\t    primary: {bundle_fqid} secondary: {pri_sec[pri]}")

                for key in pri_sec.keys():
                    del azul_result_bundles[key]

                i += 1

    def __init__(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('-d', '--deployment', help="search this deployment")
        parser.add_argument('submission_id')
        parser.add_argument('-v', '--verbose', default=V_SUMMARY, action='count', dest='verbosity',
                            help="provide more detail (can be added multiple times)")
        parser.add_argument('-j', '--jobs', type=int, default=10,
                            help="concurrently level to use (default: 10)")
        parser.add_argument('-f', '--fresh', action='store_true',
                            help="don't start with saved state (if present)")
        parser.add_argument('-c', '--credentials', type=str, default='',
                            help="path to the JSON file containing credentials to query for analysis "
                                 "service(if present), otherwise will skip searching for workflows")

        args = parser.parse_args()
        global verbosity_level
        verbosity_level = args.verbosity

        self.deployment = self._choose_deployment(args)
        self.state = self.AnalysisState(args.submission_id)

        if self.state.savefile_is_good() and not args.fresh:
            output("\nPHASE 1: Loading cached state:\n", V_SUMMARY)
            self.state.load()
            output(f"\tSubmission ID: {self.state.submission_id}\n", V_SUMMARY)
            output(f"\tProject UUID: {self.state.project_uuid}\n", V_SUMMARY)
            output(f"\tIngest created {self.state.primary_bundle_count} bundles.\n", V_SUMMARY)
            try:
                output(f"\tSecondary Analysis ran {self.state.analysis_workflow_count} analysis workflows.\n",
                       V_SUMMARY)
            except KeyError:
                pass
        else:
            output("\nPHASE 1: Get submission primary bundle list from Ingest:\n", V_SUMMARY)
            checker1 = self.IngestSubmissionGrabber(deployment=self.deployment, state=self.state)
            checker1.get_submission_project_and_primary_bundle_list_from_ingest()
            self.state.save()

        # From now on we have data worth saving on Ctrl-C
        signal.signal(signal.SIGINT, self._save_on_signal)

        output("\nPHASE 2: Checking bundles are present in DSS:\n", V_SUMMARY)
        checker2 = self.DSSBundlePresenceChecker(self.deployment, self.state, options=args)
        checker2.check()
        checker2.print_results()
        self.state.save()

        output("\nPHASE 3: Check DSS for primary bundles with this project UUID:\n", V_SUMMARY)
        checker3 = self.SearchDSSbyProjectUUID(deployment=self.deployment, state=self.state, options=args)
        checker3.check()
        checker3.print_results()
        self.state.save()

        # Only query for the analysis workflows if the path to the service account JSON key is provided
        if args.credentials:
            output("\nPHASE 4: Check Secondary Analysis for workflows with this project UUID:\n", V_SUMMARY)
            checker4 = self.SearchAnalysisWorkflowsbyProjectUUID(deployment=self.deployment,
                                                                 state=self.state,
                                                                 options=args)
            checker4.check()
            checker4.print_results()
            self.state.save()
        else:
            output("\nPHASE 4: No auth information provided, skip checking Secondary Analysis for workflows.\n")

        output("\nPHASE 5: Check DSS for secondary bundles:\n", V_SUMMARY)
        checker5 = self.SearchDSSforSecondaryBundles(deployment=self.deployment, state=self.state, options=args)
        checker5.check()
        checker5.print_results()
        self.state.save()

        output("\nPHASE 6: Check Azul for primary bundles:\n", V_SUMMARY)
        checker6 = self.SearchAzulForPrimaryBundles(deployment=self.deployment, state=self.state, options=args)
        checker6.check()
        checker6.print_results()
        self.state.save()

        output("\nPHASE 7: Check Azul for secondary bundles:\n", V_SUMMARY)
        checker7 = self.SearchAzulForSecondaryBundles(deployment=self.deployment, state=self.state, options=args)
        checker7.check()
        checker7.print_results()
        self.state.save()

    def _choose_deployment(self, args):
        if 'deployment' in args and args.deployment:
            deployment = args.deployment
        elif 'DEPLOYMENT_STAGE' in os.environ:
            deployment = os.environ['DEPLOYMENT_STAGE']
            answer = input(f"Use deployment {deployment}? (y/n): ")
            if answer is not 'y':
                exit(1)
        else:
            print("You must supply the --deployment argument or set environment variable DEPLOYMENT_STAGE")
            sys.exit(1)
        output(f"Using deployment: {deployment}\n", V_SUMMARY)
        return deployment

    def _save_on_signal(self, sig, frame):
        print("\n")
        self.state.save()
        exit(0)


AnalyzeSubmission()
