#!/usr/bin/env python
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: Deutsche Kinemathek (VK Film)
SID: 155
Ticket: #8696
Origin: local file

"""


import os
import re
import sys

import xmltodict

import marcx
import pymarc
from siskin.mappings import formats
from siskin.configuration import Config
from siskin.utils import check_isbn, check_issn, marc_build_field_008, marc_build_field_773g, xmlstream, marc_get_languages
from siskin.arguments import FincArgumentParser


def get_field(xmlrecord, number):
    for field in xmlrecord["record"]["categorie"]:
        if field["number"] != number:
            continue
        else:
            return field["field"]
    return ""


def get_url(xmlrecord):
    for field in xmlrecord["record"]["categorie"]:
        if field["number"] == "8e":
            try:
               subfields = field["subfield"]
            except:
                return field["field"]
            if subfields:
                for subfield in subfields:
                    test = "".join(subfield)
                    if "http" in test:
                        if subfield["@code"] == "u":
                            return subfield["#text"]
    return ""


def is_journal(xmlrecord):
    publishing_history = get_field(xmlrecord, "76p")
    parent = get_field(xmlrecord, "70")
    isbn = get_field(xmlrecord, "87")
    issn = get_field(xmlrecord, "88")

    if issn and publishing_history and not parent and not isbn:
        return True
    else:
        return False


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "155"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
################################################################################

if not inputfile:
    config = Config.instance()
    inputfile = config.get(SID, "input")


##################################################################################
# 3. Process data
##################################################################################

for xmlrecord in xmlstream(inputfile, "record"):

    xmlrecord = xmltodict.parse(xmlrecord)

    # Kick record without title
    for field in xmlrecord["record"]["categorie"]:
        if field["number"] == "20":
            break
    else:
        continue

    # Kick journals
    journal = is_journal(xmlrecord)
    if journal:
        continue

    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Format mapping
    pages = get_field(xmlrecord, "708")
    carrier = get_field(xmlrecord, "770")
    carrier = carrier.lower()
    thesis = get_field(xmlrecord, "818")

    if "-" in pages:
        format = "Article"
    elif thesis:
        format = "Thesis"
    elif carrier == "dvd":
        format = "DVD-Video"
    elif carrier == "ftp":
        format = "Remote-Computerfile"
    elif carrier == "blu":
        format = "Blu-Ray-Disc"
    elif carrier == "onl":
        format = "Website"
    elif carrier == "schall":
        format = "Vinyl-Record"
    elif carrier == "cdr":
        format = "CD-ROM"
    else:
        format = "Book"

    # Kick webites without URL
    url = get_url(xmlrecord)
    if format == "Website" and not url:
        continue

    # Leader
    leader = formats[format]["Leader"]
    marcrecord.leader = leader

    # Identifier
    f001 = get_field(xmlrecord, "00")
    if not f001:
        continue
    marcrecord.add("001", data="155-" + f001)

    # Access type
    url = get_url(xmlrecord)
    if not url:
        f007 = formats[format]["p007"]
    else:
        f007 = formats[format]["e007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    year = get_field(xmlrecord, "76")
    periodicity = formats[format]["008"]
    language = get_field(xmlrecord, "37")
    language = language[:3]
    language = marc_get_languages(language)
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # ISBN
    f020a = get_field(xmlrecord, "87")
    f020a = check_isbn(f020a)
    marcrecord.add("020", a=f020a)

    # ISSN
    f022a = get_field(xmlrecord, "88")
    f022a = check_issn(f022a)
    marcrecord.add("022", a=f022a)

    # Language
    marcrecord.add("041", a=language)

    # Notation
    f084a = get_field(xmlrecord, "30")
    marcrecord.add("084", a=f084a)

    # First creator (person)
    f100a = get_field(xmlrecord, "40")
    marcrecord.add("100", a=f100a, _4="aut")

    # First creator (corporation)
    f110a = get_field(xmlrecord, "60")
    marcrecord.add("110", a=f110a)

    # Main title and responsibility
    f19 = get_field(xmlrecord, "19")
    f20 = get_field(xmlrecord, "20")
    if " : " in f19 and not " : " in f20:
        f245 = f19
    else:
        f245 = f20
    f245 = f245.replace("¬", "")
    f245 = f245.replace("\n", " ")
    f245 = f245.replace("   ", "")
    if ":" in f245:
        f245 = f245.split(":")
        f245a = f245[0].strip()
        f245b = f245[1].strip()
    else:
        f245a = f245
        f245b = ""
    f245c = get_field(xmlrecord, "39")
    marcrecord.add("245", a=f245a, b=f245b, c=f245c)

    # Alternative title
    f246a = get_field(xmlrecord, "24")
    f246a = f246a.replace("\n", "")
    marcrecord.add("246", a=f246a)

    # Imprint
    f260a = get_field(xmlrecord, "74")
    f260b = get_field(xmlrecord, "75")
    f260c = get_field(xmlrecord, "76")
    subfields = ("a", f260a, "b", f260b, "c", f260c)
    marcrecord.add("260", subfields=subfields)

    # Dimension
    f300a = get_field(xmlrecord, "77")
    match = re.search("(\d+)-(\d+)", f300a)
    if match:
        spage, epage = match.groups()
        f300a = int(epage) - int(spage) + 1
        f300a = str(f300a) + " S."
    marcrecord.add("300", a=f300a)

    # RDA-Content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-Carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Series
    f490 = get_field(xmlrecord, "85")
    match = re.search("(.*?) ; (\d+)", f490)
    if match:
        f490a, f490v = match.groups()
    else:
        f490a = f490
        f490v = ""
    marcrecord.add("490", a=f490a, v=f490v)

    # Footnote
    f500a = get_field(xmlrecord, "81")
    marcrecord.add("500", a=f500a)

    # Thesis note
    f502a = get_field(xmlrecord, "818")
    marcrecord.add("502", a=f502a)

    # Subject heading
    f650 = []
    tags = ["31", "312", "313", "314", "315"]
    for tag in tags:
        subjects = get_field(xmlrecord, tag)
        subjects = subjects.split("; ")
        for subject in subjects:
            subject = subject.replace("\n", "")
            subject = subject.replace("   ", "")
            f650.append(subject)
    f650 = set(f650)
    for f650a in f650:
        marcrecord.add("650", a=f650a)

    # GND-Content- and Carrier
    f655a = formats[format]["655a"]
    f6552 = formats[format]["6552"]
    marcrecord.add("655", a=f655a, _2=f6552)

    # Further creators (persons)
    for i in range(41, 59):
        tag = str(i)
        f700a = get_field(xmlrecord, tag)
        marcrecord.add("700", a=f700a)

    # Further creators (corporations)
    f710a = get_field(xmlrecord, "61")
    marcrecord.add("710", a=f710a)

    # Parent journal
    if format == "Article":
        f773t = get_field(xmlrecord, "70")
        volume = get_field(xmlrecord, "704")
        year = get_field(xmlrecord, "76")
        issue = get_field(xmlrecord, "706")
        pages = get_field(xmlrecord, "708")
        match = re.search("(\d+)-(\d+)", pages)
        if match:
            spage, epage = match.groups()
        else:
            spage = ""
            epage = ""
        issn = get_field(xmlrecord, "88")
        issn = check_isbn(issn)
        f773g = marc_build_field_773g(volume, year, issue, spage, epage)
        marcrecord.add("773", t=f773t, g=f773g, x=issn)

    # Link to fulltext or further information
    url = get_url(xmlrecord)
    if url:
        marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=url)

    # Collection facet for entire Verbundkatalog Film
    marcrecord.add("912", a="vkfilm")

    # SWB-Content
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection and sealing
    marcrecord.add("980", a=f001, b=SID, c="sid-155-col-kinemathek")

    # Write record to file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

outputfile.close()
