#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source:  Filmuniversität Konrad Wolf Potsdam (VK Film)
sid: 127
Ticket: #17547
Origin: Website

"""


import io
import re
import sys

import xmltodict

import marcx
from siskin.mab import MabXMLFile
from siskin.mappings import formats
from siskin.configuration import Config
from siskin.arguments import FincArgumentParser
from siskin.utils import marc_clean_record, check_isbn, check_issn, marc_build_field_008, marc_get_languages, convert_to_finc_id


def get_valid_language(record, field):
    """
    Takes languages codes and returns the first in a valid form.
    """
    languages = record.field(field, alt="")
    if "$$" in languages:
        languages = languages.split("$$")
    elif ";" in languages:
        languages = languages.split(" ; ")
    else:
        languages = languages.split("$")
    return languages[0]


def get_clean_url(record, url):
    """
    Takes a URL and returns it without subfield codes and other fragments.
    """
    match1 = re.search("^(http.*?)\$", url)
    match2 = re.search("\$(.*?)\$", url)
    if match1:
        url = match1.group(1)
    elif match2:
        url = match2.group(1)
    return url


def get_clean_subject(subject):
    """
    Takes a subject and returns it without old subfield codes and other invalid chars.
    """
    subject = subject.replace("/C/", " ; ")
    match1 = re.search("(.*?)\/\w\$", subject)
    match2 = re.search("(.*?)\$", subject)
    if match1:
        subject = match1.group(1)
    elif match2:
        subject = match2.group(1)
    return subject


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "127"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

if not inputfile:
    config = Config.instance()
    inputfile = config.get(SID, "input")

reader = MabXMLFile(inputfile, replace=(u"�", ""), encoding='utf-8')


##################################################################################
# 3. Process data
##################################################################################

parent_ids = []
parent_titles = {}

for record in reader:

    id = record.field("010", alt="")
    if id:
        parent_ids.append(id)

for record in reader:

    id = record.field("001")
    if id in parent_ids:
        title = record.field("331")
        parent_titles[id] = title

for record in reader:

    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Format mapping
    url = record.field("655", alt="")
    if url and "http://d-nb.info" not in url:
        access = "electronic"
    else:
        access = "physical"

    f050 = record.field("050", alt="")
    f051 = record.field("051", alt="")
    f052 = record.field("052", alt="")
    f519 = record.field("519", alt="")
    f001 = record.field("001")
    match = re.search("^a", f051)  # Artikel

    # a||||||||||||| --> Buch
    # |||||ce|d||||| --> Video auf optischem Speichermedium
    # |||||ce||||||| --> Video
    # |||||ca||||||| --> Videobandkassette
    # |||||aa||||||| --> Audio-CD
    # |||||ba||||||| --> Filmrolle
    # |||||aj||||||| --> Schallplatte
    # a|a||||||||||| --> Buch
    # ||||||||d||||| --> CD-ROM
    # |||||ba|d||||| --> Filmspule auf optischem Speichermedium?
    # ||||||||g||||| --> Computerdatei im Fernzugriff
    # a|a|||||z||||| --> Computerdatei
    # a|||||||g||||| --> Computerdatei im Fernzugriff
    # |||||ad||||||| --> Kompaktkassette
    # a|a|||||g||||| --> Computerdatei im Fernzugriff
    # ||||||||z||||| --> Computerdatei
    # |||||ac||||||| --> Tonband
    # ||||||||e||||| --> Einsteckmodul
    # |||||dc||||||| --> Videodisc
    # ||||||||a||||| --> Computerdatei
    # a|||||||d||||| --> optisches Speichermedium
    # ||a|||||d||||| --> optisches Speichermedium
    # |||||aa|d||||| --> Audio-CD
    # |||||ba| ||||| --> Filmrolle

    if f001 in parent_ids:
        format = "Multipart"
    elif f519:
        format = "Thesis"
    elif f052:
        format = "Journal"
    elif match:
        format = "Article"
    elif f050 == "a|||||||||||||":
        format = "Book"
    elif f050 == "|||||ce|d|||||":
        format = "CD-Video"
    elif f050 == "|||||ce|||||||":
        format = "CD-Video"
    elif f050 == "|||||ca|||||||":
        format = "Video-Cassette"
    elif f050 == "|||||aa|||||||":
        format = "CD-Audio"
    elif f050 == "|||||ba|||||||":
        format = "Film-Role"
    elif f050 == "|||||aj|||||||":
        format = "Vinyl-Record"
    elif f050 == "a|a|||||||||||":
        format = "Book"
    elif f050 == "||||||||d|||||":
        format = "CD-ROM"
    elif f050 == "|||||ba|d|||||":
        format = "Film-Role"
    elif f050 == "||||||||g|||||":
        format = "Remote-Computerfile"
    elif f050 == "a|a|||||z|||||":
        format = "Local-Computerfile"
    elif f050 == "a|||||||g|||||":
        format = "Remote-Computerfile"
    elif f050 == "|||||ad|||||||":
        format = "Audio-Cassette"
    elif f050 == "a|a|||||g|||||":
        format = "Remote-Computerfile"
    elif f050 == "||||||||z|||||":
        format = "Local-Computerfile"
    elif f050 == "|||||ac|||||||":
        format = "Audio-Cassette"
    elif f050 == "||||||||e|||||":
        format = "Object"
    elif f050 == "|||||dc|||||||":
        format = "CD-Video"
    elif f050 == "||||||||a|||||":
        format = "Local-Computerfile"
    elif f050 == "a|||||||d|||||":
        format = "CD-ROM"
    elif f050 == "||a|||||d|||||":
        format = "CD-ROM"
    elif f050 == "|||||aa|d|||||":
        format = "CD-Audio"
    elif f050 == "|||||ba| |||||":
        format = "Film-Role"
    else:
        format = "Book"

    # Leader
    leader = formats[format]["Leader"]
    marcrecord.leader = leader

    # Identifier
    f001 = record.field("001")
    f001 = "127-" + f001
    marcrecord.add("001", data=f001)

    # Access type
    if access == "physical":
        f007 = formats[format]["p007"]
    else:
        f007 = formats[format]["e007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    year = record.field("425", alt="")
    periodicity = formats[format]["008"]
    language = get_valid_language(record, "037")
    language = marc_get_languages(language)
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # ISBN
    isbns = record.fields("540")
    for isbn in isbns:
        f020a = check_isbn(isbn)
        marcrecord.add("020", a=f020a)

    # ISSN
    f022a = ""
    issns = record.fields("542")
    for issn in issns:
        f022a = check_issn(issn)
        marcrecord.add("022", a=f022a)

    # Language
    f041a = get_valid_language(record, "037")
    marcrecord.add("041", a=f041a)

    # First creator
    f100a = record.field("100", alt="")
    if "$$" in f100a:
        f100a = f100a.split("$$")
        f100a = f100a[0]
    if f100a:
        marcrecord.add("100", a=f100a, _4="aut")

    # Worktitle
    f240a = record.field("304")
    marcrecord.add("240", a=f240a)

    # Main title, subtitle and responsibility
    f010 = record.field("010")
    try:
        parent_titles[f010]
    except:
        f245a = ""
    if not f010 or not f245a:
        f245a = record.field("331", alt="")
        f245p = ""
    else:
        f245a = parent_titles[f010]
        f245p = record.field("331", alt="")
    f245b = record.field("335", alt="")
    f245c = record.field("359", alt="")
    f245n = record.field("089", alt="")
    marcrecord.add("245", a=f245a, b=f245b, c=f245c, n=f245n, p=f245p)

    # Parallel- and alternative title
    f246a = record.field("340", alt="")
    marcrecord.add("246", a=f246a)
    f246a = record.field("370", alt="")
    marcrecord.add("246", a=f246a)

    # Attached work
    f249a = record.field("361", alt="")
    marcrecord.add("249", a=f249a)

    # Edition
    f250a = record.field("403", alt="")
    marcrecord.add("250", a=f250a)

    # Imprint
    f260a = record.field("410", alt="")
    f260b = record.field("412", alt="")
    f260c = record.field("425", alt="")
    marcrecord.add("260", a=f260a, b=f260b, c=f260c)

    # Extent
    f300a = record.field("433", alt="")
    f300b = record.field("434", alt="")
    f300c = record.field("435", alt="")
    marcrecord.add("300", a=f300a, b=f300b, c=f300c)

    # RDA-content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Footnote
    f500a = record.field("501", alt="")
    if "FSK" not in f500a:
        marcrecord.add("500", a=f500a)

    # University note
    f502a = record.field("519", alt="")
    marcrecord.add("502", a=f502a)
    f502a = record.field("520", alt="")
    marcrecord.add("502", a=f502a)

    # Abstract
    fields = ["750", "753", "754", "755", "756", "757", "758"]
    for field in fields:
        f520a = record.field(field, alt="")
        marcrecord.add("520", a=f520a)

    subfields = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"]
    for subfield in subfields:
        f520a = record.field("750", subfield, alt="")
        marcrecord.add("520", a=f520a)

    # System requirements
    f538a = record.field("651", alt="")
    marcrecord.add("538", a=f538a)

    # FSK note
    f521a = record.field("501", alt="")
    if "FSK" in f500a:
        marcrecord.add("521", a=f521a)

    # Subject headings
    all_subjects = set()
    subjects = record.fields("710")
    if subjects:
        for subject in subjects:
            subject = get_clean_subject(subject)
            all_subjects.add(subject)

    subjects = record.field("720", alt="")
    if " / " in subjects:
        subjects = subjects.split(" / ")
    else:
        subjects = subjects.split(" ; ")

    for subject in subjects:
        subject = get_clean_subject(subject)
        all_subjects.add(subject)

    for subject in all_subjects:
        marcrecord.add("650", a=subject)

    # GND-content and -carrier
    f655a = formats[format]["655a"]
    f6552 = formats[format]["6552"]
    marcrecord.add("655", a=f655a, _2=f6552)

    # Additional creators
    fields = [
        "100", "104", "108", "112", "116", "120", "124", "128", "132", "136", "140", "144", "148", "152", "156", "160", "164", "168", "172", "176", "180",
        "184", "188", "192", "196"
    ]

    for field in fields:
        f700a = record.field(field, alt="")
        if "$$" in f700a:
            f700a = f700a.split("$$")
            f700a = f700a[0]
        if f700a != f100a:
            marcrecord.add("700", a=f700a)

    # Parent work
    f773g = record.field("596", alt="")
    match = re.search("(.*)\. - Sign", f773g)
    if match:
        f773g = match.group(1)
    year = record.field("425", alt="")
    match = re.match("\d\d\d\d", year)
    if f773g and match:
        f773g = year + ". - " + f773g
    f773t = record.field("590", alt="")
    if not f773t:
        f773t = record.field("597", alt="")
    f773w = record.field("010", alt="")
    marcrecord.add("773", t=f773t, g=f773g, w=f773w, x=f022a)

    # Link zu Ressource oder Inhaltsverzeichnis
    # beginnt teilweise mit htm$$u und endet teilweise mit $$
    # http://www.shortfilm.ch
    # http://www.ses.fi$$9htm
    # htm$$uhttp://www.freidok.uni-freiburg.de/volltexte/127/$$

    # Volltexte
    # "http://www.bibliothek.uni-regensburg.de/ezeit/" in URL
    # "Volltext" in URL
    # "http://www.oapen.org/search?identifier" in URL
    # "http://dx.doi.org/" in URL

    url = record.field("655", alt="")
    if url:

        if "Volltext" in url or "http://www.bibliothek.uni-regensburg.de/ezeit/" in url or "http://www.oapen.org/search?identifier" in url or "http://dx.doi.org/" in url:
            f8563 = "Link zur Ressource"
            f856u = get_clean_url(record, url)
            marcrecord.add("856", _3=f8563, u=f856u)

        if "Inhalt" in url:
            f8563 = "Link zum Inhaltsverzeichnis"
            f856u = get_clean_url(record, url)
            marcrecord.add("856", _3=f8563, u=f856u)

    # Link to fulltext
    id = record.field("001")
    marcrecord.add("856",
                   _3="Link zu Filmuniversität Babelsberg Konrad Wolf",
                   u="http://server8.bibl.filmuniversitaet.de/F/?func=find-c&ccl_term=idn=%s&local_base=HFF01" % id)

    # Collection
    marcrecord.add("912", a="vkfilm")

    # SWB-content
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection and sealing
    f001 = record.field("001")
    collections = ["a", f001, "b", SID, "c", "sid-127-col-filmunivpotsdam"]
    marcrecord.add("980", subfields=collections)

    # Convert all identifier in 001, 770, 772 ... to Finc schema
    marcrecord = convert_to_finc_id(SID, marcrecord, encode=False, finc_prefix=True)

    # Write record to file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

outputfile.close()
