#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#                   Martin Czygan, <martin.czygan@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar. If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: Filmakademie Baden-Württemberg (VK Film)
SID: 151
Ticket: #8700, #15871, #16012, #18258
Origin: HTTP

"""


import os
import sys
import re
import time
import requests

import marcx
import xmltodict

from urllib.parse import urlencode
from xml.dom.minidom import parseString
from siskin.mappings import formats, roles
from siskin.configuration import Config
from siskin.utils import marc_build_field_008, check_isbn, xmlstream, marc_get_languages
from siskin.arguments import FincArgumentParser
from six.moves import html_parser


parser = html_parser.HTMLParser()


def get_controlfield(xmlrecord, tag):
    for field in xmlrecord["record"]["controlfield"]:
        if field["@tag"] != tag:
            continue
        else:
            return field["#text"]
    return ""


def get_datafield(xmlrecord, tag):
    for field in xmlrecord["record"]["datafield"]:
        if field["@tag"] == tag:
            return field["#text"]
    return ""


def get_subfield(xmlrecord, tag, code):
    for datafield in xmlrecord["record"]["datafield"]:
        if datafield["@tag"] == tag:
            for subfield in datafield["subfield"]:
                try:
                    sc = subfield["@code"]
                except:
                    sc = ""
                if  sc == code:
                    return subfield["#text"]
    return ""


def get_creators(xmlrecord):
    values = []
    for datafield in xmlrecord["record"]["datafield"]:
        try:
            tag = int(datafield["@tag"])
        except:
            continue
        if tag in range(101, 197):
            creator = ""
            role = ""
            for subfield in datafield["subfield"]:
                try:
                    subfield["@code"]
                except:
                    continue
                if subfield["@code"] == "a":
                    creator = subfield["#text"]

                if subfield["@code"] == "b":
                    role = subfield["#text"]
                    match = re.search("\[(.*?)\]", role)
                    if match:
                        role = match.group(1)
                        role = role.lower()
                        role = role.replace(".", "")
                        role = roles.get(role, "")
            values.append([creator, role])
    return values


def get_subjects(xmlrecord):
    values = []
    for datafield in xmlrecord["record"]["datafield"]:
        if datafield["@tag"] == "710":
            for subfield in datafield["subfield"]:
                if subfield["@code"] == "a":
                    values.append(subfield["#text"])
                    continue
    return values


def get_languages(xmlrecord):
    values = []
    for field in xmlrecord["record"]["datafield"]:
        if field["@tag"] == "037":
            values.append(field["#text"])
    return values


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "151"

fip = FincArgumentParser()

# Get arguments
inputfile = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old inputfiles and outputfiles as specified in input-hist-size and output-hist-size
fip.remove_old_outputfiles(SID)
fip.remove_old_inputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
##################################################################################

if not inputfile:    
    config = Config.instance()
    try:
        inputfile = config.get(SID, "input")
    except:
        inputfile = ""

    if not inputfile:

        user = config.get(SID, "user")
        password = config.get(SID, "password")
        baseurl = "http://193.196.129.6:8031/alix"
        
        query = {
            'op': 'find',
            'base': 'B-TIT',
            'query': 'IDN=1 < *',
            'usr': user,
            'pwd': password
        }

        url = "%s?%s" % (baseurl, urlencode(query))        
        result = requests.get(url)          

        if result.status_code != 200:
            sys.exit("HTTP error %s: %s" % (result.status_code, url))

        dd = xmltodict.parse(result.text)
        set_number = int(dd["find"]["set_number"])
        no_entries = int(dd["find"]["no_entries"])

        inputfile = fip.inputfilename(SID)        
        ifile = open(inputfile, "w")
        ifile.write("""<?xml version="1.0" encoding="UTF-8" ?>""")
        ifile.write("<collection>")

        for entry in range(1, no_entries + 1):
            query = {
                'op': 'getrec',
                'set_number': set_number,
                'number_entry': entry,
                'usr': user,
                'pwd': password
            }

            url = "%s?%s" % (baseurl, urlencode(query))

            while True:
                try:
                    result = requests.get(url)
                    break
                except:
                    time.sleep(60)

            if result.status_code != 200:
                print("HTTP error %s: %s" % (result.status_code, url))
                continue

            doc = parseString(result.text.encode('utf-8').strip())
            document = doc.childNodes[0].childNodes[0]
            document.tagName = 'record'
            document.removeAttribute('base')
            document.removeAttribute('idn')
            ifile.write(document.toxml())

        ifile.write("</collection>")
        ifile.close()


##################################################################################
# 3. Process data
##################################################################################

for oldrecord in xmlstream(inputfile, "record"):

    xmlrecord = xmltodict.parse(oldrecord, force_list="subfield")
    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Format recognition
    form = get_datafield(xmlrecord, "433")

    regexp1 = re.search("\d\]?\sS\.", form)
    regexp2 = re.search("\d\]?\sSeit", form)
    regexp3 = re.search("\d\]?\sBl", form)
    regexp4 = re.search("\s?Illl?\.", form)
    regexp5 = re.search("[XVI],\s", form)
    regexp6 = re.search("^\d+\s[SsPp]", form)
    regexp7 = re.search("^DVD", form)
    regexp8 = re.search("^Blu.?-[Rr]ay", form)
    regexp9 = re.search("^H[DC] [Cc][Aa][Mm]", form)
    regexp10 = re.search("^HDCAM", form)
    regexp11 = re.search("[Bb]et.?-?[Cc]am", form)
    regexp12 = re.search("CD", form)
    regexp13 = re.search("[kKCc]asss?ette", form)
    regexp14 = re.search("^VHS", form)
    regexp15 = re.search("^Noten", form)
    regexp16 = re.search("^Losebl", form)
    regexp17 = re.search("^Film\s?\[", form)
    regexp18 = re.search("\d\smin", form)
    regexp19 = re.search("S\.\s\d+\s?-\s?\d+", form)

    if regexp1 or regexp2 or regexp3 or regexp4 or regexp5 or regexp6:
        format = "Book"
    elif regexp7:
        format = "DVD-Video"
    elif regexp8:
        format = "Blu-Ray-Disc"
    elif regexp9 or regexp10 or regexp11:
        format = "CD-Video"
    elif regexp12:
        format = "CD-Audio"
    elif regexp13 or regexp14:
        format = "Video-Cassette"
    elif regexp15:
        format = "Score"
    elif regexp16:
        format = "Loose-leaf"
    elif regexp17 or regexp18:
        format = "CD-Video"
    elif regexp19:
        format = "Article"
    else:
        format = "Book"

    # Leader
    leader = formats[format]["Leader"]
    marcrecord.leader = leader

    # Identifier
    f001 = get_controlfield(xmlrecord, "001")
    # Kick records without identifier
    if not f001:
        continue
    marcrecord.add("001", data="finc-151-" + f001)

    # Access facet (online or physical)
    f007 = formats[format]["p007"]
    marcrecord.add("007", data=f007)

    # Periodicity
    year = get_datafield(xmlrecord, "425")
    periodicity = formats[format]["008"]
    language = get_languages(xmlrecord)
    language = marc_get_languages(language)
    f008 = marc_build_field_008(year, periodicity, language)
    marcrecord.add("008", data=f008)

    # Language
    language = get_languages(xmlrecord)
    f041a = marc_get_languages(language)
    marcrecord.add("041", a=f041a)

    # ISBN
    isbn = get_subfield(xmlrecord, "540", "a")
    f020a = check_isbn(isbn)
    marcrecord.add("020", a=f020a)

    # First creator and role
    f100a = get_subfield(xmlrecord, "100", "a")
    role = get_subfield(xmlrecord, "100", "b")
    if role:
        match = re.search("\[(.*?)\]", role)
        if match:
            role = match.group(1)
            role = role.lower()
            role = role.replace(".", "")
            f1004 = roles.get(role, "")
            if not f1004:
                print("Missing role: %s." % role)
    else:
        f1004 = ""
    marcrecord.add("100", a=f100a, _4=f1004)

    # Title statement
    f245a = get_datafield(xmlrecord, "331")
    f245b = get_datafield(xmlrecord, "335")
    marcrecord.add("245", a=f245a, b=f245b)

    # Imprint (place, publisher, year)
    f260a = get_datafield(xmlrecord, "410")
    f260b = get_datafield(xmlrecord, "412")
    f260c = get_datafield(xmlrecord, "425")
    publisher = ["a", f260a, "b", f260b, "c", f260c]
    marcrecord.add("260", subfields=publisher)

    # Extension and physical description
    f300 = get_datafield(xmlrecord, "433")
    # 335 S. : zahlr. Ill. ; 32 cm
    regexp1 = re.search("(.*)\s?:\s(.*);\s(.*)", f300)
    # 289 S.: Zahlr. Ill.
    regexp2 = re.search("(.*)\s?:\s(.*)", f300)
    # 106 S. ; 21 cm
    regexp3 = re.search("(.*)\s?;\s(.*)", f300)

    f300b = ""
    f300c = ""

    if regexp1:
        f300a, f300b, f300c = regexp1.groups()
    elif regexp2:
        f300a, f300b = regexp2.groups()
    elif regexp3:
        f300a, f300c = regexp3.groups()
    else:
        f300a = f300

    physicaldescription = ["a", f300a, "b", f300b, "c", f300c]
    marcrecord.add("300", subfields=physicaldescription)

    # RDA-content
    f336b = formats[format]["336b"]
    marcrecord.add("336", b=f336b)

    # RDA-carrier
    f338b = formats[format]["338b"]
    marcrecord.add("338", b=f338b)

    # Subject headings
    subjects = get_subjects(xmlrecord)
    for f650a in subjects:
        marcrecord.add("650", a=f650a)

    # Additional creators and their roles
    creators = get_creators(xmlrecord)
    for creator in creators:
        f700a = creator[0]
        f7004 = creator[1]
        marcrecord.add("700", a=f700a, _4=f7004)

    # Collection
    marcrecord.add("912", a="vkfilm")

    # SWB-content
    f935c = formats[format]["935c"]
    marcrecord.add("935", c=f935c)

    # Collection and sealing
    collections = ["a", f001, "b", SID, "c", "sid-151-col-filmakademiebawue"]
    marcrecord.add("980", subfields=collections)

    # Write record to file
    if outputformat == "xml":
        outputfile.write(marcrecord)
    else:
        outputfile.write(marcrecord.as_marc())

outputfile.close()
