#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#                   Martin Czygan, <martin.czygan@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: mediarep
SID: 170
Ticket: #14064, #15766, #16137, #18260
Origin: OAI

"""


import io
import os
import re
import sys

import xmltodict
from io import BytesIO

import marcx
import pymarc
from siskin.configuration import Config
from siskin.mappings import formats
from siskin.utils import marc_build_field_008, check_isbn, check_issn, marc_get_languages
from siskin.arguments import FincArgumentParser


def delistify(value, first=True, concat=None):
    """
    Returns a string value. If value is a list, choose a strategy to create a
    single value.
    """
    if isinstance(value, (list, tuple)):
        if len(value) > 0:
            if first:
                return value[0]
            if concat is not None:
                return concat.join(value)
        else:
            return ""
    return value


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "170"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old input and outputfiles as specified in input-hist-size and output-hist-size
fip.remove_old_outputfiles(SID)
fip.remove_old_inputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
################################################################################

if not inputfile:
    inputfile = fip.inputfilename(SID)
    os.system('metha-sync -rm -format oai_dc https://mediarep.org/oai/request')
    os.system('metha-cat -format oai_dc https://mediarep.org/oai/request > %s' % inputfile)

inputfile = open(inputfile, "rb")
xmlfile = inputfile.read()
xmlrecords = xmltodict.parse(xmlfile)
xmlrecords = xmlrecords["Records"]["Record"]


##################################################################################
# 3. Process data
##################################################################################

for xmlrecord in xmlrecords:

    if not xmlrecord["metadata"]["oai_dc:dc"].get("dc:title"):
        continue
    else:
        identifier = xmlrecord["header"]["identifier"]
        xmlrecord = xmlrecord["metadata"]["oai_dc:dc"]

    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Format mapping
    allformats = xmlrecord["dc:type"]
    for format in allformats:
        if format == "book":
            format = "Book"
            break
        elif format == "bookPart":
            format = "Chapter"
            break
        elif format == "doctoralThesis":
            format = "Thesis"
            break
        elif format == "MovingImage":
            format = "Online-Video"
            break
    else:
        format = "Article"

    # Leader
    leader = formats[format]["Leader"]
    marcrecord.leader = leader

    # Identifier
    regexp = re.match("oai:mediarep.org:doc/(\d+)", identifier)
    if regexp:
        f001 = regexp.group(1)
        marcrecord.add("001", data="finc-170-" + f001)
    else:
        print("Der Identifier konnte nicht zerlegt werden: " + f001)

    # Access type
    f007 = formats[format]["e007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    if xmlrecord.get("dc:date"):
        year = xmlrecord["dc:date"]
    else:
        year = ""
    periodicity = formats[format]["008"]
    language = xmlrecord.get("dc:language", "")
    language = marc_get_languages(language)
    if language == "de" or language == "deu":
        language = "ger"
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # ISBN
    if xmlrecord.get("dc:identifier"):
        identifiers = xmlrecord["dc:identifier"]
        if isinstance(identifiers, list):
            for identifier in identifiers:
                f020a = check_isbn(identifier)
                marcrecord.add("020", a=f020a)
                break

    # ISSN
    if xmlrecord.get("dc:identifier"):
        identifiers = xmlrecord["dc:identifier"]
        if isinstance(identifiers, list):
            for identifier in identifiers:
                identifier = identifier.replace("issn:", "")
                f022a = check_issn(identifier)
                marcrecord.add("022", a=f022a)
                break

    # Language
    language = xmlrecord.get("dc:language", "")
    language = marc_get_languages(language)
    marcrecord.add("041", a=language)

    # Topic
    if xmlrecord.get("dc:subject"):
        subjects = xmlrecord["dc:subject"]
        if isinstance(subjects, list):
            for subject in subjects:
                if subject:
                    if re.search("\d\d\d", subject):
                        marcrecord.add("082", a=subject, _2="ddc")
                        break
        else:
            if re.search("\d\d\d", subjects):
                marcrecord.add("084", a=subjects, _2="ddc")

    # First creator
    if xmlrecord.get("dc:creator"):
        creator = xmlrecord["dc:creator"]
        if isinstance(creator, list):
            marcrecord.add("100", a=creator[0], _4="aut")
        else:
            marcrecord.add("100", a=creator, _4="aut")

    # Title
    f245 = xmlrecord["dc:title"]
    marcrecord.add("245", a=f245)

    # Imprint
    if xmlrecord.get("dc:publisher"):
        f260b = xmlrecord["dc:publisher"]
    else:
        f260b = ""

    if xmlrecord.get("dc:date"):
        f260c = xmlrecord["dc:date"]
    else:
        f260c = ""

    publisher = ["b", delistify(f260b), "c", delistify(f260c)]
    marcrecord.add("260", subfields=publisher)

    # RDA-Content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-Carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Description
    if xmlrecord.get("dc:description"):
        f520a = xmlrecord["dc:description"]
        marcrecord.add("520", a=f520a)

    # Subject headings
    if xmlrecord.get("dc:subject"):
        subjects = xmlrecord["dc:subject"]
        if isinstance(subjects, list):
            for subject in subjects:
                if subject:
                    if not re.search("\d\d\d", subject):
                        marcrecord.add("650", a=subject)
        else:
            if not re.search("\d\d\d", subject):
                marcrecord.add("650", a=subject)

    # GND-Content and -Carrier
    f655a = formats[format]["655a"]
    f6552 = formats[format]["6552"]
    marcrecord.add("655", a=f655a, _2=f6552)

    # Additional creators
    if xmlrecord.get("dc:creator"):
        creators = xmlrecord["dc:creator"]
        if isinstance(creators, list):
            for creator in creators[1:]:
                marcrecord.add("700", a=creator)

    # Parent journal
    if xmlrecord.get("dc:source"):
        sources = xmlrecord["dc:source"]
        if isinstance(sources, list):
            for source in sources:
                if ":" in source:
                    f773a = source.split("In: ")
                    if len(f773a) == 2:
                        marcrecord.add("773", a=f773a[1])
                    else:
                        marcrecord.add("773", a=f773a[0])
        else:
            f773a = sources.split("In: ")
            if len(f773a) == 2:
                marcrecord.add("773", a=f773a[1])
            else:
                marcrecord.add("773", a=f773a[0])

    # Link to record and fulltext
    urls = xmlrecord["dc:identifier"]
    for f856u in urls:
        if "https://mediarep.org" in f856u:
            marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=f856u)
        elif "doi.org" in f856u:
            marcrecord.add("856", q="text/html", _3="Zitierlink (DOI)", u=f856u)

    # SWB-Content
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection
    collection = ["a", f001, "b", SID, "c", "sid-170-col-mediarep"]
    marcrecord.add("980", subfields=collection)

    # Write record to file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

outputfile.close()
