#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: UrMEL
SID: 187
Ticket: #15951
Origin: OAI

"""


import os
import re
import sys

import xmltodict
import marcx
import pymarc

from siskin.mappings import formats
from siskin.configuration import Config
from siskin.arguments import FincArgumentParser
from siskin.utils import check_isbn, check_issn, marc_build_field_008


def clean_language(language):
    """
    Returns a correct language code for MARC.  
    """
    if language == "de":
        language = "ger"
    elif language == "en":
        language = "eng"
    else:
        print(language)
        sys.exit("check language: " + language)
    return language


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "187"

fip = FincArgumentParser()

# Get arguments
inputfolder = fip.args.inputfolder
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Generates path for inputfiles
current_path = fip.sid_path(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
################################################################################

if not inputfolder:
    config = Config.instance()
    try:
        inputfolder = config.get(SID, "input")
    except:
        inputfolder = ""

if not inputfolder:

    inputfolder = current_path + "/inputfiles/"

    sets = ['jportal_jpjournal_00001103',
            'jportal_jpjournal_00001120',
            'jportal_jpjournal_00001109',
            'jportal_jpjournal_00001110',
            'jportal_jpjournal_00001311',
            'jportal_jpjournal_00001014',
            'jportal_jpjournal_00001102',
            'jportal_jpjournal_00001225']

    for i, set in enumerate(sets, start=1):
        
        os.system("metha-sync -set '%s' https://zs.thulb.uni-jena.de/oai2" % set)
        os.system("metha-cat -set '%s' https://zs.thulb.uni-jena.de/oai2 > %s/187_input%d.xml" % (set, inputfolder, i))
 
for root, _, files in os.walk(inputfolder):

    for inputfilename in files:
        if not inputfilename.endswith(".xml"):
            continue

        inputfilepath = os.path.join(root, inputfilename)
        inputfile = open(inputfilepath, "r", encoding="utf-8")
        xmlfile = inputfile.read()
        xmlrecords = xmltodict.parse(xmlfile, force_list=["dc:creator", "dc:contributor"])


        ##################################################################################
        # 3. Process data
        ##################################################################################

        for xmlrecord in xmlrecords["Records"]["Record"]:

            marcrecord = marcx.Record(force_utf8=True)
            marcrecord.strict = False

            if xmlrecord["header"]["@status"] == "deleted":
                continue

            try:
                setspec = xmlrecord["header"]["setSpec"]
            except:
                continue

            try:
                xmlrecord = xmlrecord["metadata"]["oai_dc:dc"]
            except:
                continue

            title = xmlrecord["dc:title"]
            if title == "Inhalt":
                continue

            # Format
            format = xmlrecord["dc:type"]
            if format == "article":
                format = "Article"
            elif format == "volume":
                format = "Book"
            else:
                sys.exit("Unknown format: " + format)

            # Leader
            leader = formats[format]["Leader"]
            marcrecord.leader = leader

            # Identifier
            id = xmlrecord["dc:identifier"][0]
            match1 = re.search("jparticle_(\d+)", id)
            match2 = re.search("jpvolume_(\d+)", id)
            if match1:
                f001 = match1.group(1)
            elif match2:
                f001 = match2.group(1)
            else:
                continue
            marcrecord.add("001", data="187-" + f001)

            # Access facet
            f007 = formats[format]["e007"]
            marcrecord.add("007", data=f007)

            # Periodicity
            year = xmlrecord["dc:date"]
            periodicity = formats[format]["008"]
            language = xmlrecord["dc:language"]
            language = clean_language(language)
            f008 = marc_build_field_008(year, periodicity, language)
            marcrecord.add("008", data=f008)

            # Language
            language = xmlrecord["dc:language"]
            language = clean_language(language)
            marcrecord.add("041", a=language)

            # First creator
            try:
                persons = xmlrecord["dc:creator"]
            except:
                persons = []
            if persons:
                f100a = persons[0]
                marcrecord.add("100", a=f100a, _4="aut")

            # Title
            title = xmlrecord["dc:title"]
            if " :: " in title:
                match = re.search("(.*?)\s::\s(.*?)\s:", title)
                if match:
                    f245c, f245a = match.groups()
            elif " / " in title:
                f245 = title.split(" / ")
                f245a = f245[0]
                f245c = f245[-1]
            else:
                f245a = title
                f245c = ""
            marcrecord.add("245", a=f245a, c=f245c)

            # Imprint of digitalization
            f260b = xmlrecord["dc:publisher"]
            f260c = xmlrecord["dc:date"]
            marcrecord.add("260", b=f260b, c=f260c)

            # Original imprint
            title = xmlrecord["dc:title"]
            if " :: " in title:
                match = re.search(".*?\s::\s.*?\s:\s(.*?),\s(.*?),\s(\d\d\d\d)", title)
                if match:
                    f260a, f260b, f260c = match.groups()
                    marcrecord.add("260", a=f260a, b=f260b, c=f260c)

            # Extent
            try:
                f300a = xmlrecord["dc:format"]
            except:
                f300a = ""
            if f300a:
                f300a = f300a.replace("SizeOrDuration", "")
                f300a = f300a.strip()
                f300a = f300a.replace(" - ", "-")
                f300a = f300a.replace("*", "")
                f300a = re.sub("-0+", "-", f300a)
                f300a = re.sub("^0+", "", f300a)
                if "-" in f300a:
                    f300a = "S. " + f300a
                marcrecord.add("300", a=f300a)

            # RDA-content
            f336b = formats[format]["336b"]
            marcrecord.add("336", b=f336b)

            # RDA-carrier
            f338b = formats[format]["338b"]
            marcrecord.add("338", b=f338b)

            # Rights
            f500a = xmlrecord["dc:rights"]
            marcrecord.add("500", a=f500a)

            # GND-content and -carrier
            f655a = formats[format]["655a"]
            f6552 = formats[format]["6552"]
            marcrecord.add("655", a=f655a, _2=f6552)

            # Additional creators
            for f700a in persons[1:]:
                marcrecord.add("700", a=f700a, _4="aut")

            # Editors and other contributors
            try:
                editors = xmlrecord["dc:contributor"]
            except:
                editors = []
            if editors:
                for f700a in editors:
                    marcrecord.add("700", a=f700a, _4="edt")

            # Parent work
            if setspec == "jportal_jpjournal_00001103":
                f773t = "Annalen der Naturphilosophie"
                f773w = "572422962"
            elif setspec == "jportal_jpjournal_00001120":
                f773t = "Deutsch als Fremdsprache"
                f773w = "745617522"
            elif setspec == "jportal_jpjournal_00001109":
                f773t = "Jenaische Beyträge zur neuesten gelehrten Geschichte"
                f773w = "756302161"
            elif setspec == "jportal_jpjournal_00001110":
                f773t = "Neue Berichte von Gelehrten Sachen"
                f773w = "756825113"
            elif setspec == "jportal_jpjournal_00001311":
                f773t = "Erneuerte Berichte von gelehrten Sachen"
                f773w = "756825156"
            elif setspec == "jportal_jpjournal_00001014":
                f773t = "Neue Zeitungen von gelehrten Sachen"
                f773w = "728151391"
            elif setspec == "jportal_jpjournal_00001102":
                f773t = "Signale für die musikalische Welt"
                f773w = "756825202"
            elif setspec == "jportal_jpjournal_00001225":
                f773t = "Magazin für Schullehrer..."
                f773w = "820686344"
            else:
                sys.exit("Set unbekannt: " + setspec)
            marcrecord.add("773", t=f773t, w=f773w)

            # Link to Digitalization
            url = xmlrecord["dc:identifier"][1]
            match = re.search("\d/(.*?)$", url)
            if match:
                image = match.group(1)
                f856u = "https://zs.thulb.uni-jena.de/rsc/viewer/jportal_derivate_00234793/" + image
                marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=f856u)
            else:
                sys.exit("Wrong URL pattern: " + url)

            # SWB-content
            f935c = formats[format]["935c"]
            marcrecord.add("935", c=f935c)

            # Collection and sealing
            marcrecord.add("980", a=f001, b="187", c="sid-187-col-urmel")

            if outputformat == "xml":
                outputfile.write(marcrecord)
            else:
                outputfile.write(marcrecord.as_marc())

        inputfile.close()

outputfile.close()
