#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: IZI-Datenbank
SID: 78
Ticket: #14692, #17107
Origin: local file

"""


import os
import sys
import re

import xmltodict

import marcx
from siskin.configuration import Config
from siskin.mappings import formats
from siskin.utils import marc_clean_record, marc_build_imprint, marc_build_field_008
from siskin.arguments import FincArgumentParser


lang_map = {
    "deutsch": "ger",
    "englisch": "eng",
    "franzoesisch": "fre",
    "spanisch": "spa",
    "portugiesisch": "por",
    "italienisch": "ita",
    "tuerkisch": "tur",
    "niederlaendisch": "dut",
    "norwegisch": "nor",
    "russisch": "rus",
    "japanisch": "jap",
    "schwedisch": "swe"
}


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "78"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

if not inputfile:
    config = Config.instance()
    inputfile = config.get(SID, "input")

inputfile = open(inputfile, "r")
xmlfile = inputfile.read()
xmlrecords = xmltodict.parse(xmlfile)


##################################################################################
# 3. Process data
##################################################################################

for xmlrecord in xmlrecords["IZI_Datensaetze"]["Datensatz"]:

    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Format recognition
    pubtype = xmlrecord["PUBLIKATIONSTYP"]
    nummer = xmlrecord["NUMMER"]
    url = xmlrecord["URL"]
    isbn = xmlrecord["ISBN"]
    zeitschrift = xmlrecord["ZEITSCHRIFT"]
    band = xmlrecord["BAND"]
    quelle = xmlrecord["QUELLE"]

    if quelle:
        match = re.search(".*\sS\.\s\d+-\d+", quelle)
        if match:
            artikel = True
        else:
            artikel = False

    if not pubtype:
        pubtype = ""

    if ("Bachelorarbeit" in pubtype or "Masterarbeit" in pubtype or "Diplomarbeitarbeit" in pubtype or "Magisterarbeitarbeit" in pubtype or "Dissertation" in pubtype):
        format = "Thesis"

    elif isbn:
        format = "Book"

    elif zeitschrift or artikel or "beitrag" in pubtype:   
        format = "Article"

    elif quelle:    
        format = "Loose-leaf"

    else:
        format = "Book"

    if url:
        electronic = True
    else:
        electronic = False

    # Leader
    leader = formats[format]["Leader"]
    marcrecord.leader = leader

    # Identifier
    f001 = xmlrecord["NUMMER"]
    if not f001:
        continue
    marcrecord.add("001", data="finc-78-" + f001)

    # Access facet
    if electronic:
        f007 = formats[format]["e007"]
    else:
        f007 = formats[format]["p007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    year = xmlrecord["JAHR"]
    periodicity = formats[format]["008"]
    languages = xmlrecord["SPRACHE"]
    if languages:
        languages = languages.split("; ")
        if len(languages) < 4:
            language = languages[0]
            language = lang_map.get(language, "")
        else:
            language = "mul"
    else:
        language = ""
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # ISBN
    isbns = xmlrecord["ISBN"]
    if isbns:
        isbns = isbns.split("; ")
        for f020a in isbns:
            marcrecord.add("020", a=f020a)

    # Language
    languages = xmlrecord["SPRACHE"]
    if languages:
        languages = languages.split("; ")
        if len(languages) < 4:
            subfields = []
            for language in languages:
                f041a = lang_map.get(language, "")
                if f041a:
                    subfields.append("a")
                    subfields.append(f041a)
                else:
                    print("Die Sprache %s fehlt in der Lang_Map!" % language)
            marcrecord.add("041", subfields=subfields)
        else:
            marcrecord.add("041", a="mul")

    # First creator
    persons = xmlrecord["AUTOR"]
    if persons:
        persons = persons.split("; ")
        f100a = persons[0]
        marcrecord.add("100", a=f100a)
    else:
        persons = []

    # First corporate creator
    corporates = xmlrecord["KORP_URHEBER"]
    if corporates:
        corporates = corporates.split("; ")
        f110a = corporates[0]
        marcrecord.add("110", a=f110a)

    # Main title
    f245a = xmlrecord["TITEL"]
    f245b = xmlrecord["BAND"]
    if f245b:
        marcrecord.add("245", a=f245a, b=f245b)
    else:
        marcrecord.add("245", a=f245a)

    # Imprint
    year = xmlrecord["JAHR"]
    f260 = xmlrecord["QUELLE"]
    if f260:
        match = re.search("(.*?):\s(.*?)\s(\d\d\d\d)[,\.]\s.*", f260)
        if match:
            f260a, f260b, f260c = match.groups()
        else:
            f260a = ""
            f260b = ""
            f260c = ""

    if year:
        f260c = year

    subfields = marc_build_imprint(f260a, f260b, f260c)
    marcrecord.add("260", subfields=subfields)

    # Extension
    f300a = xmlrecord["QUELLE"]
    if f300a:
        match = re.search(".*\s\d\d\d\d[,\.]\s(.*)", f300a)
        if match:
            f300a = match.groups()
            marcrecord.add("300", a=f300a)

    # RDA-content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Series
    f490a = xmlrecord["REIHENTITEL"]
    if f490a:
        f490a = f490a.replace("(", "")
        f490a = f490a.replace(")", "")
        f490a = re.sub("\.\s(\d)", r" ; \1", f490a)
        marcrecord.add("490", a=f490a)

    # Footnote
    languages = xmlrecord["SPRACHE"]
    if languages:
        numlang = languages.split()
        if len(numlang) > 3:
            languages = languages.replace(";", ",")
            f500a = "Text auf " + languages.title()
            marcrecord.add("500", a=f500a)

    # Subject headings
    subjects = xmlrecord["SCHLAGWORT"]
    subjects = subjects.split("; ")
    for f650a in subjects:
        marcrecord.add("650", a=f650a)

    # GND-content and -carrier
    f655a = formats[format]["655a"]
    f6552 = formats[format]["6552"]
    marcrecord.add("655", a=f655a, _2=f6552)

    # Additional creators
    for f700a in persons[1:]:
        marcrecord.add("700", a=f700a)

    editors = xmlrecord["HRSG_MITARBEITER"]
    if editors:
        editors = editors.split("; ")
        for f700a in editors:
            marcrecord.add("700", a=f700a)

    # Additional corporate creators
    if corporates:
        for f710a in corporates[1:]:
            if f710a:
                f710a = f710a.replace(" (Hrsg.)", "")
                marcrecord.add("710", a=f710a)

    # Corporate editors
    corporate_editors = xmlrecord["KORP_HRSG"]
    if corporate_editors:
        corporate_editors = corporate_editors.split("; ")
        for f710a in corporate_editors[1:]:
            if f710a:
                f710a = f710a.replace(" (Hrsg.)", "")
                marcrecord.add("710", a=f710a)

    # Parent work
    f773g = xmlrecord["ZEITSCHRIFT"]
    marcrecord.add("773", g=f773g)

    f773t = xmlrecord["SAMMELWERK"]
    marcrecord.add("773", t=f773t)

    # Link to fulltext
    f856u = xmlrecord["URL"]
    if f856u:
        marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=f856u, z="kostenfrei")

    # Link to catalog entry
    f856u = "http://www.izi-datenbank.de/details/" + f001
    marcrecord.add("856", q="text/html", _3="Link zum Datensatz", u=f856u)

    # SWB-content
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection and sealing
    marcrecord.add("980", a=f001, b="78", c="sid-78-col-izi")

    # Write record to file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

inputfile.close()
outputfile.close()
