#!/usr/bin/env python3
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: Music Treasures Consortium
SID: 10
Ticket: #1016, #5798, #17983
Origin: HTTP

"""


import sys
import re
import json
import time
import requests

import marcx

from siskin.configuration import Config
from siskin.mappings import formats
from siskin.arguments import FincArgumentParser
from siskin.utils import marc_build_field_008, marc_get_languages


def get_field(jsonrecord, tag):
    try:
        value = jsonrecord[tag]
    except:
        value = ""
    return value


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "10"

fip = FincArgumentParser()

# Get arguments
inputfilename = fip.args.inputfile
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old input and outputfiles as specified in input-hist-size and output-hist-size
fip.remove_old_outputfiles(SID)
fip.remove_old_inputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get input data
################################################################################

if not inputfilename:
    
    inputfilename = fip.inputfilename(SID)
    inputfile = open(inputfilename, "w", encoding="utf-8")

    page = 1 
    retry_count = 5
    headers = {'user-agent': 'Mozilla/5.0 (Windows NT x.y; rv:10.0) Gecko/20100101 Firefox/10.0'}

    while True:
        
        url = "https://www.loc.gov/collections/music-treasures-consortium/?sp=%s&fo=json" % page
        result = requests.get(url, headers=headers)
        
        if result.status_code >= 500:
            if retry_count == 0:
                sys.exit("Error: " + result.status_code + ". Maximum number of attempts exceeded.")
            else:
                print("Error: " + result.status_code + ". Try again ...")
                retry_count -= 1
                continue

        check = json.loads(result.text)
        if check.get("status") == 404:
            inputfile.close()
            break
        
        inputfile.write(result.text)
        inputfile.write("\n")
        
        page += 1
        retry_count = 5
        time.sleep(5)

inputfile = open(inputfilename, "r")


##################################################################################
# 3. Process data
##################################################################################

for line in inputfile:

    jsonobject = json.loads(line)
    jsonrecords = jsonobject["content"]["results"]

    for jsonrecord in jsonrecords:

        marcrecord = marcx.Record(force_utf8=True)
        marcrecord.strict = False

        # Format 
        format = get_field(jsonrecord, "original_format")
        format = format[0]
        if "book" in format:
            format = "Book"
        elif "notated music" in format:
            format = "Score"
        elif "web page" in format:
            format = "Score"
        elif "photo" in format:
            format = "Image"
        else:
            format = "Book"

        # Leader
        leader = formats[format]["Leader"]
        marcrecord.leader = leader

        #Identifier
        f001 = jsonrecord["id"]
        f001 = f001.rstrip("/").split("/")[-1]
        marcrecord.add("001", data="finc-10-" + f001)

        # Access type
        f007 = formats[format]["e007"]
        marcrecord.add("007", data=f007)

        # Periodicity
        language = get_field(jsonrecord, "language")
        language = marc_get_languages(language)
        date = get_field(jsonrecord, "date")
        match = re.match("(\d\d\d\d)", date)
        if match:
            year = match.group(1)
        periodicity = formats[format]["008"]
        f008 = marc_build_field_008(year, periodicity, language)
        marcrecord.add("008", data=f008)

        # Language
        marcrecord.add("041", a=language)

        # 1. Creator
        f100a = get_field(jsonrecord, "contributor")
        if f100a != "":
            f100a = f100a[0]
            f100a = f100a.title()
            marcrecord.add("100", a=f100a)

        # Main title
        f245a = get_field(jsonrecord, "title")
        marcrecord.add("245", a=f245a)

        # Alternative title
        atitles = get_field(jsonrecord, "other_title")
        for atitle in atitles:
            marcrecord.add("246", a=atitle)

        # Imprint
        f260c = get_field(jsonrecord, "date")
        marcrecord.add("260", c=f260c)

        # RDA-content
        f336b = formats[format]["336b"]
        marcrecord.add("336", b=f336b)

        # RDA-carrier
        f338b = formats[format]["338b"]
        marcrecord.add("338", b=f338b)

        # Subjects
        subjects = get_field(jsonrecord, "subject")
        for subject in subjects:
            subject = subject.title()
            marcrecord.add("650", a=subject)

        # Additional creators
        persons = get_field(jsonrecord, "contributor")
        if persons != "":
            for person in persons[1:]:
                person = person.title()
                marcrecord.add("700", a=person)

        #n Link to resource
        f856u = get_field(jsonrecord, "url")
        marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=f856u)

        # SWB-content
        f935c = formats[format]["935c"]
        marcrecord.add("935", c=f935c)

        # Collection
        marcrecord.add("980", a=f001, b="10", c="sid-10-col-musictreasures")

        # Write output file
        if outputformat == "xml":
            outputfile.write(marcrecord)
        else:
            outputfile.write(marcrecord.as_marc())

inputfile.close()
outputfile.close()
