#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: IMSLP
SID: 15
Ticket: #1240, #12288, #18000
Origin: HTTP

"""


import os
import sys
import re
import json
import time
import requests
from xml.sax.saxutils import escape, unescape

import xmltodict
import marcx

from siskin.configuration import Config
from siskin.mappings import formats
from siskin.arguments import FincArgumentParser
from siskin.utils import marc_build_field_008, marc_get_languages, scrape_html_listing


html_escape_table = {'"': "&quot;", "'": "&apos;"}
html_unescape_table = {v: k for k, v in html_escape_table.items()}


def html_escape(text):
    """ Escape HTML, see also: https://wiki.python.org/moin/EscapingHtml"""
    return escape(text, html_escape_table)


def html_unescape(text):
    """ Unescape HTML, see also: https://wiki.python.org/moin/EscapingHtml"""
    return unescape(text, html_unescape_table)


def get_field(tag):
    try:
        return record[tag]
    except:
        return ""


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "15"

fip = FincArgumentParser()

# Get arguments
inputfolder = fip.args.inputfolder
outputformat = fip.args.outputformat
filemap = fip.args.filemap

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old input and outputfiles as specified in input-hist-size and output-hist-size
fip.remove_old_outputfiles(SID)
fip.remove_old_inputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
################################################################################

if not inputfolder:
    
    config = Config.instance()
    try:
        inputfolder = config.get(SID, "input")
    except:
        inputfolder = ""

if not inputfolder:

    listings_url = config.get(SID, "listings-url")
    links = sorted([link for link in scrape_html_listing(listings_url) if "imslpOut_" in link])

    if len(links) == 0:
        raise ValueError("Could not find any suitable links: %s", listings_url)
    
    latest_file_url = links[-1]
    latest_file = latest_file_url.split("/")
    latest_file = latest_file[-1]
    path = fip.sid_path(SID)

    os.system("rm -rf %s/inputfiles/" % path)
    os.system("mkdir %s/inputfiles/" % path)
    os.system("wget -P %s %s" % (path, latest_file_url))
    os.system("tar -zxf %s/%s -C %s/inputfiles/" % (path, latest_file, path))
    
    inputfolder = path + "/inputfiles/"

if filemap:
    with open(filemap, "r") as handle:
        filemap = json.load(handle)
else:
    filemap = ""


##################################################################################
# 3. Process data
##################################################################################

failed = 0

for root, _, files in os.walk(inputfolder):
   
    for filename in files:
        if not filename.endswith(".xml"):
            continue
        
        inputfilepath = os.path.join(root, filename)
        inputfile = open(inputfilepath, "r", encoding="utf-8")

        record = xmltodict.parse(inputfile.read())
        record = record["document"]

        try:
            title = record["title"]
        except:
            continue

        marcrecord = marcx.Record(force_utf8=True)
        marcrecord.strict = False

        f650a = []
        format = "Score"

        # Leader
        leader = formats[format]["Leader"]
        marcrecord.leader = leader

        # Identifier
        f001 = record["identifier"]["#text"]
        marcrecord.add("001", data="finc-15-%s" % f001)

        # Access type
        f007 = formats[format]["e007"]
        marcrecord.add("007", data=f007)

        # Periodicity
        language = get_field("languages")
        language = marc_get_languages(language)
        language = language[0]
        year = get_field("date")
        match = re.match("(\d\d\d\d)", year)
        if match:
            year = match.group(1)
        periodicity = formats[format]["008"]
        f008 = marc_build_field_008(year, periodicity, language)
        marcrecord.add("008", data=f008)

        # Language
        language = get_field("languages")
        language = marc_get_languages(language)
        marcrecord.add("041", a=language)

        # Composer
        f100a = record["creator"]["mainForm"]
        if filemap:
            f1000 = filemap.get(f001, {}).get("viaf", "")
        else:
            f1000 = ""
        marcrecord.add("100", a=f100a, e="cmp", _0=f1000)

        # Worktitle
        if filemap:
            f240a = filemap.get(f001, {}).get("title", "")
            marcrecord.add("240", a=f240a)

        # Main title
        f245a = record["title"]
        f245a = html_unescape(f245a)
        marcrecord.add("245", a=f245a)

        # Alternative title
        f246a = get_field("additionalTitle")
        f246a = html_unescape(f246a)
        marcrecord.add("246", a=f246a)

        # RDA-content
        f336b = formats[format]["336b"]
        marcrecord.add("336", b=f336b)

        # RDA-carrier
        f338b = formats[format]["338b"]
        marcrecord.add("338", b=f338b)

        # Year of composition
        year = get_field("date")
        marcrecord.add("260", c=year)
        marcrecord.add("650", y=year)

        # Footnote
        f500a = get_field("abstract")
        if len(f500a) > 8000:
            f500a = f500a[:8000]
        marcrecord.add("500", a=f500a)

        # Style and epoch
        try:
            subject = record["subject"]
        except:
            subject = ""
        if subject != "":
            if isinstance(subject, list):
                # [OrderedDict([('mainForm', 'Piano piece')]), OrderedDict([('mainForm', 'Romantic')])]
                style = subject[0]["mainForm"]
                style = style.title()
                epoch = subject[1]["mainForm"]
                epoch = epoch.title()
            else:
                epoch = record["subject"]["mainForm"]
                epoch = epoch.title()
            f650a.append(epoch)
            f590a = epoch

        # Instrumentation
        instrumentation = get_field("music_arrangement_of")
        instrumentation = instrumentation.title()
        f650a.append(instrumentation)
        f590b = instrumentation
        marcrecord.add("590", subfields=["a", f590a, "b", f590b])

        # GND-content and -carrier
        f655a = formats[format]["655a"]
        f6552 = formats[format]["6552"]
        marcrecord.add("338", a=f655a, _2=f6552)

        # Subject headings
        subtest = []
        for subject in f650a:
            subject = subject.title()
            if subject not in subtest:
                subtest.append(subject)
                marcrecord.add("689", a=subject)

        # Contributors
        try:
            f700a = record["contributor"]["mainForm"]
            marcrecord.add("700", a=f700a, e="ctb")
        except:
            pass

        # Link to URL
        f856u = record["url"]["#text"]
        marcrecord.add("856", q="text/html", _3="Petrucci Musikbibliothek", u=f856u)

        # SWB-content
        f935c = formats[format]["935c"]
        marcrecord.add("935", c=f935c)

        # Local field for music libraries
        marcrecord.add("970", c="PN")

        # Collection
        marcrecord.add("980", a=f001, b="15", c="sid-15-col-imslp")

        # Write output file
        if outputformat == "xml":
            outputfile.write(marcrecord)
        else:
            outputfile.write(marcrecord.as_marc())

inputfile.close()
outputfile.close()
