#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: Kunsthochschule für Medien Köln (VK Film)
SID: 109
Ticket: #8391, #17737
Origin: Cloud

"""


import os
import sys
import re
from io import BytesIO, StringIO

import marcx
import pymarc
import xmltodict

from siskin.configuration import Config
from siskin.mappings import formats
from siskin.utils import check_isbn, check_issn, marc_build_field_008, marc_get_languages, xmlstream, convert_to_finc_id
from siskin.arguments import FincArgumentParser


def get_datafield(record, tag, code, all=False):
    """
    Return string value for (record, tag, code) or list of values if all is True.
    """
    values = []
    for field in record["ns0:record"]["ns0:datafield"]:
        if field["@tag"] != tag:
            continue
        if isinstance(field["ns0:subfield"], list):
            for subfield in field["ns0:subfield"]:
                if subfield["@code"] != code:
                    continue
                if not all:
                    if isinstance(subfield["#text"], list):
                        return ""
                    return subfield["#text"]
                values.append(subfield["#text"])
        else:
            if field["ns0:subfield"]["@code"] == code:
                if not all:
                    if isinstance(field["ns0:subfield"]["#text"], list):
                        return ""
                    return field["ns0:subfield"]["#text"]
                values.append(field["ns0:subfield"]["#text"])
    if len(values) == 0:
        return ""
    return values


def remove_brackets(field):
    if isinstance(field, list) and len(field) == 0:
        return ""
    return field.replace("<<", "").replace(">>", "")


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "109"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

if not inputfile:
    config = Config.instance()
    inputfile = config.get(SID, "input")


##################################################################################
# 3. Process data
##################################################################################

parent_ids = []
parent_titles = {}

for oldrecord in xmlstream(inputfile, "record"):

    record = xmltodict.parse(oldrecord)
    parent_id = get_datafield(record, "010", "a")

    if len(parent_id) > 0:
        parent_ids.append(parent_id)

for oldrecord in xmlstream(inputfile, "record"):

    record = xmltodict.parse(oldrecord)
    id = get_datafield(record, "001", "a")
    title = get_datafield(record, "331", "a")

    if id in parent_ids:
        parent_titles[id] = title

for oldrecord in xmlstream(inputfile, "record"):

    record = xmltodict.parse(oldrecord)
    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    id = get_datafield(record, "001", "a")
    parent_id = get_datafield(record, "010", "a")
    title = get_datafield(record, "331", "a")

    if "Brockhaus" in title or len(title) == 0:
        continue

    f245a = title
    f245p = ""
    f773w = ""
    f773t = ""
    is_child = False

    if len(parent_id) > 0:
        has_parent_title = parent_titles.get(parent_id, None)
        if has_parent_title:
            f245a = parent_titles[parent_id]
            f245p = title
            f773w = "(DE-576)" + parent_id
            f773t = f245a
            is_child = True

    # Format
    format = get_datafield(record, "433", "a")
    format = u'%s' % format
    isbn = get_datafield(record, "540", "a")
    regexp = re.search("S\.\s\d+\s?-\s?\d+", format)

    if id in parent_ids:
        format = "Multipart"
    elif len(isbn) > 0 and "Videokassette" not in format and "VHS" not in format and "DVD" not in format:
        format = "Book"
    elif ("S." in format or "Bl." in format or "Ill." in format or " p." in format or "XI" in format or "XV" in format or "X," in format or "Bde." in format
          or ": graph" in format):
        format = "Book"
    elif "CD" in format:
        format = "CD-Audio"
    elif "DVD" in format:
        format = "DVD-Video"
    elif "Blu-ray" in format:
        format = "Blu-Ray-Disc"
    elif "Videokassette" in format or "VHS" in format or "Min" in format:
        format = "Video-Cassette"
    elif "Losebl.-Ausg." in format:
        format = "Loose-leaf"
    elif regexp:
        format = "Article"
    elif ("Plakat" in format or "Kassette" in format or "Box" in format or "Karton" in format or "Postkarten" in format or "Teile" in format or "USB" in format
          or "Schachtel" in format or "Schautafel" in format or "Medienkombination" in format or "Tafel" in format or "Faltbl" in format
          or "Schuber" in format):
        format = "Object"
    elif id in parent_ids and len(isbn) == 0:  # Zeitschrift
        continue
    else:
        format = "Book"

    # Leader
    leader = formats[format]["Leader"]
    if is_child:
        leader = leader[:19] + "c4500"
    marcrecord.leader = leader

    # Identifier
    f001 = get_datafield(record, "001", "a")
    marcrecord.add("001", data="finc-109-" + f001)

    # Access type
    f007 = formats[format]["p007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    language = get_datafield(record, "037", "a")
    language = marc_get_languages(language)
    year = get_datafield(record, "425", "a")
    periodicity = formats[format]["008"]
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # ISBN
    isbn = get_datafield(record, "540", "a")
    f020a = check_isbn(isbn)
    marcrecord.add("020", a=f020a)
    isbn = get_datafield(record, "570", "a")
    f020a = check_isbn(isbn)
    marcrecord.add("020", a=f020a)

    # Language
    languages = get_datafield(record, "037", "a")
    f041a = marc_get_languages(languages)
    marcrecord.add("041", a=f041a)

    # First creator
    f100a = get_datafield(record, "100", "a")
    f100a = remove_brackets(f100a)
    marcrecord.add("100", a=f100a, _4="aut")

    # Main title, part title and responsibility
    f245a = remove_brackets(f245a)
    f245c = get_datafield(record, "359", "a")
    f245p = remove_brackets(f245p)
    f245 = ["a", f245a, "c", f245c, "p", f245p]
    marcrecord.add("245", subfields=f245)

    # Imprint
    f260a = get_datafield(record, "410", "a")
    f260b = get_datafield(record, "412", "a")
    f260b = remove_brackets(f260b)
    f260c = get_datafield(record, "425", "a")
    subfields = ["a", f260a, "b", f260b, "c", f260c]
    marcrecord.add("260", subfields=subfields)

    # Extent
    f300a = get_datafield(record, "433", "a")
    f300a = remove_brackets(f300a)
    f300b = get_datafield(record, "434", "a")
    f300 = ["a", f300a, "b", f300b]
    marcrecord.add("300", subfields=f300)

    # RDA-Content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-Carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Series
    f490 = get_datafield(record, "451", "a")
    if len(f490) > 0:
        f490 = f490.split(" ; ")
        if len(f490) == 2:
            f490a = f490[0]
            f490v = f490[1]
        else:
            f490a = f490
            f490v = ""
        marcrecord.add("490", a=f490a, v=f490v)

    # Subject headings
    for f650a in set(get_datafield(record, "710", "a", all=True)):
        f650a = remove_brackets(f650a)
        marcrecord.add("650", a=f650a)

    for f650a in set(get_datafield(record, "711", "a", all=True)):
        f650a = remove_brackets(f650a)
        marcrecord.add("650", a=f650a)

    # GND-Content and carrier
    f655a = formats[format]["655a"]
    f6552 = formats[format]["6552"]
    marcrecord.add("655", a=f655a, _2=f6552)

    # Additional creators
    for tag in range(101, 200):
        f700a = get_datafield(record, str(tag), "a")
        f700a = remove_brackets(f700a)
        marcrecord.add("700", a=f700a)

    # Additional institutions
    for tag in range(200, 300):
        f710a = get_datafield(record, str(tag), "a")
        f710a = remove_brackets(f710a)
        marcrecord.add("710", a=f710a)

    # Parent work
    if f773t and f773w:
        marcrecord.add("773", t=f773t, w=f773w, indicators="18")

    # Links
    f856u = get_datafield(record, "655", "u")
    f8563 = get_datafield(record, "655", "x")
    if len(f8563) == 0:
        f8563 = u"zusätzliche Informationen"
    if "http" in f856u:
        marcrecord.add("856", q="text/html", _3=f8563, u=f856u)

    # Collection facet for entire Verbundkatalog Film
    marcrecord.add("912", a="vkfilm")

    # SWB-Inhaltstyp
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection
    collection = ["a", f001, "b", SID, "c", "sid-109-col-kunsthochschulekoeln"]
    marcrecord.add("980", subfields=collection)

    # Convert all identifier in 001, 770, 772 ... to Finc schema
    marcrecord = convert_to_finc_id(SID, marcrecord, encode=False, finc_prefix=True)

    # Write output file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

outputfile.close()
