#!/usr/bin/env python
# coding: utf-8
#
# Copyright 2020 by Leipzig University Library, http://ub.uni-leipzig.de
#                   The Finc Authors, http://finc.info
#                   Robert Schenk, <robert.schenk@uni-leipzig.de>
#
# This file is part of some open source application.
#
# Some open source application is free software: you can redistribute
# it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
#
# Some open source application is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
# @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>

"""

Source: MediArxiv - Open Archive for Media, Film, and Communication Studies
SID: 191
Ticket: #14959
Origin: OSF (https://api.osf.io/v2/preprints/?filter[provider]=mediarxiv)

"""


import os
import re
import sys

import requests
import json

import marcx
import pymarc
from siskin.configuration import Config
from siskin.arguments import FincArgumentParser
from siskin.mappings import formats
from siskin.utils import marc_build_field_008


def get_osf_records(page):

    result = requests.get("https://api.osf.io/v2/preprints/?filter[provider]=mediarxiv&format=json&page=%d" % page)
    result = result.json()
    if "data" not in result:
        return None
    return result


def get_authors(url):

    result = requests.get(url)
    result = result.json()
    try:
        result["data"][0]["embeds"]["users"]
    except:
        return [""]

    authors = []

    for author in result["data"]:

        try:
            author = author["embeds"]["users"]["data"]["attributes"]
        except:
            author = author["embeds"]["users"]["errors"][0]["meta"]

        forename = author["given_name"]
        middlenames = author["middle_names"]
        surname = author["family_name"]
        author = surname + ", " + forename + " " + middlenames
        authors.append(author)

    return authors


##################################################################################
# 1. Parse arguments and prepare outputfile
##################################################################################

SID = "191"

fip = FincArgumentParser()

# Get arguments
outputformat = fip.args.outputformat

# Generates string for outputfilename, example: 196-output-20200701.fincmarc.mrc
outputfilename = fip.outputfilename(SID)

# Removes n old outputfiles as specified in output-hist-size
fip.remove_old_outputfiles(SID)

# Set output format for MARC record
if outputformat == "xml":
    outputfile = pymarc.XMLWriter(open(outputfilename, "wb"))
else:
    outputfile = open(outputfilename, "wb")


##################################################################################
# 2. Get and process input data
################################################################################

page = 1

while True:

    jsonrecords = get_osf_records(page)

    if not jsonrecords:
        break
    
    for jsonrecord in jsonrecords["data"]:
        
        marcrecord = marcx.Record(force_utf8=True)
        marcrecord.strict = False

        # Formatfestelegung
        format = "Article"

        # Leader
        leader = formats[format]["Leader"]
        marcrecord.leader = leader

        # Identifikator
        f001 = jsonrecord["id"]
        if not f001:
            continue
        marcrecord.add("001", data="191-" + f001)

        # Zugangsfacette        
        f007 = formats[format]["e007"]
        marcrecord.add("007", data=f007)

        # Periodizität
        year = jsonrecord["attributes"]["date_published"]
        periodicity = formats[format]["008"]
        language = "eng"
        f008 = marc_build_field_008(year, periodicity, language)
        marcrecord.add("008", data=f008)

        # Sprache
        language = "eng"
        marcrecord.add("041", a=language)

        # 1. Urheber
        url = jsonrecord["relationships"]["contributors"]["links"]["related"]["href"]
        authors = get_authors(url)
        f100a = authors[0]
        marcrecord.add("100", a=f100a, _4="aut")

        # Haupttitel und Titelzusatz
        title = jsonrecord["attributes"]["title"]
        if type(title) == "list":
            print(title)
        if ": " in title:
            f245 = title.split(": ")
            f245a = f245[0]
            f245b = f245[1]
        else:
            f245a = title
            f245b = ""
        marcrecord.add("245", a=f245a, b=f245b)

        # Erscheinungsvermerk
        f260b = "MediArXiv"
        f260c = year[:4]
        marcrecord.add("260", b=f260b, c=f260c)

        # RDA-Inhaltstyp
        f336b = formats[format]["336b"]
        marcrecord.add("336", b=f336b)

        # RDA-Datenträgertyp
        f338b = formats[format]["338b"]
        marcrecord.add("338", b=f338b)

        # Zusammenfassung
        f520a = jsonrecord["attributes"]["description"]
        marcrecord.add("520", a=f520a)

        # Schlagwörter
        subjects = jsonrecord["attributes"]["tags"]
        for subject in subjects:
            subject = subject.title()
            marcrecord.add("650", a=subject)

        # GND-Inhalts- und Datenträgertyp
        f655a = formats[format]["655a"]
        f6552 = formats[format]["6552"]
        marcrecord.add("655", a=f655a, _2=f6552)

        # weitere Urheber
        for author in authors[1:]:
            marcrecord.add("700", a=author, _4="aut")

        # Link zur Ressource
        url = jsonrecord["links"]["preprint_doi"]
        if not url:
            continue
        marcrecord.add("856", q="text/html", _3="Link zur Ressource", u=url)

        # SWB-Inhaltstyp
        f935c = formats[format]["935c"]
        marcrecord.add("935", c=f935c)

        collections = ["a", f001, "b", "191", "c", "sid-191-col-mediarxiv"]
        marcrecord.add("980", subfields=collections)


        if outputformat == "xml":
            outputfile.write(marcrecord)
        else:
            outputfile.write(marcrecord.as_marc())

    page += 1

outputfile.close()
