#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#                   Martin Czygan, <martin.czygan@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: Persee
SID: 39
Ticket: #3133, #11349, #18619
Origin: OAI

"""


import os
import sys
import re
from io import BytesIO

import marcx
import pymarc
from siskin.configuration import Config
from siskin.utils import marc_clean_record, xmlstream
from siskin.arguments import FincArgumentParser


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "39"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Generates path for inputfile
path = fip.sid_path(SID)

# Removes n old inputfiles and outputfiles as specified in input-hist-size and output-hist-size
fip.remove_old_outputfiles(SID)
fip.remove_old_inputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

# Get current ISSN list over AMSL-API
config = Config.instance()
try:
    issn_list_url = config.get(SID, "issn-list-url")
except:
    issn_list_url = ""
if not issn_list_url:
    sys.exit("No issn-list-url found. Add entry issn-list-url to siskin.ini")

issn_file = path + "/39_issn_file"
os.system("curl -sL --fail '%s' > %s" % (issn_list_url, issn_file))
issn_file = open(issn_file, "r")

# Write ISSNs into list removing line breaks
issn_list = issn_file.readlines()
issn_list = [issn.rstrip("\n") for issn in issn_list]

# Check if inputfile exists and harvest new if necessary
if not inputfile:
    try:
        inputfile = config.get(SID, "input")
    except:
        inputfile = ""
    if not inputfile:
        print("No inputfile given. Starting new harvesting ...")
        inputfile = fip.inputfilename(SID)
        os.system("metha-sync -format marc -from 2000-01-01 http://oai.persee.fr/oai")
        os.system("metha-cat -format marc -from 2000-01-01 http://oai.persee.fr/oai > %s" % inputfile)


##################################################################################
# 3. Process data
##################################################################################

# Set format for entire source
format = "Score"

for oldrecord in xmlstream(inputfile, "record"):

    oldrecord = BytesIO(oldrecord)
    oldrecord = pymarc.marcxml.parse_xml_to_array(oldrecord)
    oldrecord = oldrecord[0]

    record = marcx.Record.from_record(oldrecord)
    record.force_utf8 = True
    record.strict = False

    # Kick records without title
    if not record["245"]:
        continue

    # Identifer
    f001 = record["001"].data
    record.remove_fields("001")
    f001 = f001.replace("-", "").replace("_", "")
    record.add("001", data="finc-39-%s" % f001)

    # ISSN
    try:
        f022a = record["022"]["a"]
    except:
        f022a = ""

    # Check parent ISSN
    try:
        f760x = record["760"]["x"]
    except:
        f760x = ""

    # Check parent ISSN
    try:
        f787x = record["787"]["x"]
    except:
        f787x = ""

    # Collection and sealing according ISSN
    if f022a in issn_list or f760x in issn_list or f787x in issn_list:
        collections = ["a", f001, "b", SID, "c", "sid-39-col-persee", "c", "sid-39-col-perseeadlr"]
    else:
        collections = ["a", f001, "b", SID, "c", "sid-39-col-persee"]

    record.add("980", subfields=collections)

    # Remove empty subfields
    marc_clean_record(record)

    # Write record to file
    if outputformat == "xml":
        outputfile.write(record)
    else:
        outputfile.write(record.as_marc())

issn_file.close()
outputfile.close()
