#!/usr/bin/env python3

"""
Concatenates multiple parquet files into a single parquet output
"""

import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
import sys
import argparse

parser = argparse.ArgumentParser(
    prog="combine-parquet",
    description=__doc__
)

parser.add_argument(
    '-i', '--input',
    nargs='*',
    help=f'Input files'
)

parser.add_argument(
    '-o', '--output',
    help=f'Output files'
)

args = parser.parse_args()

df = None

for file in args.input:

    part = pq.read_table(file).to_pandas()

    if df is None:
        df = part
    else:
        df = pd.concat([df, part], ignore_index=True)

if df is not None:

    table = pa.Table.from_pandas(df)
    pq.write_table(table, args.output)
