#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: HeBIS-Verbundkatalog - Bibliographie zum Archivwesen der Archivschule Marburg
Source: HeBIS-Verbundkatalog - Gutenberg-Museum Mainz
SID: 182
Ticket: #16049
Origin: local files

"""


import os
import re
import sys
from io import BytesIO

import marcx
import pymarc

from siskin.configuration import Config
from siskin.utils import marc_clean_record, xmlstream
from siskin.arguments import FincArgumentParser


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "182"

fip = FincArgumentParser()

# Get arguments
inputfolder = fip.args.inputfolder
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

if not inputfolder:
    config = Config.instance()
    try:
        inputfolder = config.get(SID, "input")
    except:
        sys.exit("Input folder required. Use --inputfolder.")

for root, _, files in os.walk(inputfolder):

    for inputfile in files:

        if not "input" in inputfile and not inputfile.endswith(".xml"):
            continue
        
        if "archiv" in inputfile:
            database = "Archivwesen"
        else:
            database = "Gutenberg"

        inputfilepath = os.path.join(root, inputfile)
        

        ##################################################################################
        # 3. Process data
        ##################################################################################

        for oldrecord in xmlstream(inputfilepath, "record"):

            oldrecord = BytesIO(oldrecord)
            oldrecord = pymarc.marcxml.parse_xml_to_array(oldrecord)
            oldrecord = oldrecord[0]

            newrecord = marcx.Record.from_record(oldrecord)
            newrecord.force_utf8 = True
            newrecord.strict = False

            # Adapt Leader Pos 7, if article anstead of bookchapter
            try:
                is_article = newrecord["773"]["x"]
            except:
                is_article = ""

            if is_article:
                leader1 = newrecord.leader[:7]
                leader2 = newrecord.leader[8:]
                leader = leader1 + "b" + leader2
                newrecord.leader = leader

            # Identifier
            f001 = newrecord["001"].data
            newrecord.remove_fields("001")
            newrecord.add("001", data="182-" + f001)

            # Remove old classifications
            newrecord.remove_fields("082")
            newrecord.remove_fields("083")
            newrecord.remove_fields("084")
            newrecord.remove_fields("085")

            # Set collection and class facet
            if database == "Archivwesen":
                newrecord.add("082", a="002")
                f980c = "sid-182-col-archivwesen"
            else:
                newrecord.add("084", a="06.90", _2="bkl")
                f980c = "sid-182-col-gutenberg"

            # Collection and sealing
            collections = ["a", f001, "b", SID, "c", f980c]
            newrecord.add("980", subfields=collections)

            # Remove empty subfields
            marc_clean_record(newrecord)

            # Write record to file
            if outputformat == "xml":
                outputfile.write(newrecord)
            else:
                outputfile.write(newrecord.as_marc())

outputfile.close()
