#!/usr/bin/env python3

"""
Loads Graph embeddings into TrustGraph processing.
"""

import pulsar
from pulsar.schema import JsonSchema
from trustgraph.schema import Triples, Triple, Value, Metadata
from trustgraph.schema import triples_store_queue
import argparse
import os
import time
import pyarrow as pa
import pyarrow.parquet as pq

from trustgraph.log_level import LogLevel

class Loader:

    def __init__(
            self,
            pulsar_host,
            output_queue,
            log_level,
            file,
            user,
            collection,
    ):

        self.client = pulsar.Client(
            pulsar_host,
            logger=pulsar.ConsoleLogger(log_level.to_pulsar())
        )

        self.producer = self.client.create_producer(
            topic=output_queue,
            schema=JsonSchema(Triples),
            chunking_enabled=True,
        )

        self.file = file
        self.user = user
        self.collection = collection

    def run(self):

        try:

            path = self.file

            print("Reading file...")
            table = pq.read_table(path)
            print("Loaded.")

            names = set(table.column_names)

            if "s" not in names:
                print("No 's' column")

            if "p" not in names:
                print("No 'p' column")

            if "o" not in names:
                print("No 'o' column")

            sc = table.column("s")
            pc = table.column("p")
            oc = table.column("o")

            for s, p, o in zip(sc, pc, oc):

                r = Triples(
                    metadata=Metadata(
                        metadata=[],
                        user=self.user,
                        collection=self.collection,
                    ),
                    triples=[
                        Triple(
                            s=Value(
                                value=s.as_py(), is_uri=True
                            ),
                            p=Value(
                                value=p.as_py(), is_uri=True
                            ),
                            o=Value(
                                value=o.as_py(),
                                is_uri=o.as_py().startswith("https:")
                            )
                        )
                    ]
                )

                self.producer.send(r)

        except Exception as e:
            print(e, flush=True)
            
    def __del__(self):
        self.client.close()

def main():

    parser = argparse.ArgumentParser(
        prog='loader',
        description=__doc__,
    )

    default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://localhost:6650')
    default_output_queue = triples_store_queue
    default_user = 'trustgraph'
    default_collection = 'default'

    parser.add_argument(
        '-p', '--pulsar-host',
        default=default_pulsar_host,
        help=f'Pulsar host (default: {default_pulsar_host})',
    )

    parser.add_argument(
        '-o', '--output-queue',
        default=default_output_queue,
        help=f'Output queue (default: {default_output_queue})'
    )

    parser.add_argument(
        '-u', '--user',
        default=default_user,
        help=f'User ID (default: {default_user})'
    )

    parser.add_argument(
        '-c', '--collection',
        default=default_collection,
        help=f'Collection ID (default: {default_collection})'
    )

    parser.add_argument(
        '-l', '--log-level',
        type=LogLevel,
        default=LogLevel.ERROR,
        choices=list(LogLevel),
        help=f'Output queue (default: info)'
    )

    parser.add_argument(
        '-f', '--file',
        required=True,
        help=f'File to load'
    )

    args = parser.parse_args()

    while True:

        try:
            p = Loader(
                pulsar_host=args.pulsar_host,
                output_queue=args.output_queue,
                log_level=args.log_level,
                file=args.file,
                user=args.user,
                collection=args.collection,
            )

            p.run()

            print("File loaded.")
            break

        except Exception as e:

            print("Exception:", e, flush=True)
            print("Will retry...", flush=True)

        time.sleep(10)

main()

