#!/usr/bin/env python

import argparse
import os
import pysrt
import re
import subprocess
import sys
import math

from moviepy.editor import VideoFileClip, TextClip, ImageClip, concatenate_videoclips

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer

SUMMARIZERS = {
    'luhn': LuhnSummarizer,
    'edmundson': EdmundsonSummarizer,
    'lsa': LsaSummarizer,
    'text-rank': TextRankSummarizer,
    'lex-rank': LexRankSummarizer
}

def create_summary(filename, regions):
    subclips = []
    input_video = VideoFileClip(filename)
    last_end = 0
    for (start, end) in regions:
        subclip = input_video.subclip(start, end)
        subclips.append(subclip)
        last_end = end
    return concatenate_videoclips(subclips)

def srt_item_to_range(item):
    start_s = item.start.hours*60*60 + item.start.minutes*60 + item.start.seconds + item.start.milliseconds/1000.
    end_s = item.end.hours*60*60 + item.end.minutes*60 + item.end.seconds + item.end.milliseconds/1000.
    return start_s, end_s

def srt_to_doc(srt_file):
    text = ''
    for index, item in enumerate(srt_file):
        if item.text.startswith("["): continue
        text += "(%d) " % index
        text += item.text.replace("\n", "").strip("...").replace(".", "").replace("?", "").replace("!", "")
        text += ". "
    return text

def total_duration_of_regions(regions):
    return sum(map(lambda (start, end): end-start, regions))

def summarize(srt_file, summarizer, n_sentences, language):
    parser = PlaintextParser.from_string(srt_to_doc(srt_file), Tokenizer(language))
    stemmer = Stemmer(language)
    summarizer = SUMMARIZERS[summarizer](stemmer)
    summarizer.stop_words = get_stop_words(language)
    ret = []
    for sentence in summarizer(parser.document, n_sentences):
        index = int(re.findall("\(([0-9]+)\)", str(sentence))[0])
        item = srt_file[index]
        ret.append(srt_item_to_range(item))
    return ret

def find_summary_regions(srt_filename, summarizer="lsa", duration=30, language="english"):
    srt_file = pysrt.open(srt_filename)
    avg_subtitle_duration = total_duration_of_regions(map(srt_item_to_range, srt_file))/len(srt_file)
    n_sentences = duration / avg_subtitle_duration
    summary = summarize(srt_file, summarizer, n_sentences, language)
    total_time = total_duration_of_regions(summary)
    try_higher = total_time < duration
    if try_higher:
        while total_time < duration:
            n_sentences += 1
            summary = summarize(srt_file, summarizer, n_sentences, language)
            total_time = total_duration_of_regions(summary)
    else:
        while total_time > duration:
            n_sentences -= 1
            summary = summarize(srt_file, summarizer, n_sentences, language)
            total_time = total_duration_of_regions(summary)
    return summary

if __name__ == '__main__':
    parser = argparse.ArgumentParser("videodigest: Automatic Video Summaries")
    parser.add_argument('-i', '--video-file', help="Input video file", required=True)
    parser.add_argument('-s', '--subtitles-file', help="Input subtitle file (srt)", required=True)
    parser.add_argument('-t', '--duration', type=int, help="Duration of summary", default=60)
    parser.add_argument('-L', '--language', help="Language of subtitles", default="english")
    parser.add_argument('-S', '--summarizer', help="Auto-summarization algorithm ("+' | '.join(SUMMARIZERS.keys())+")", default="lsa")
    parser.add_argument('-o', '--output', help="Output file")

    args = parser.parse_args()

    if args.summarizer not in SUMMARIZERS.keys():
        print 'Summarizer must be one of: ' + ', '.join(SUMMARIZERS.keys())
        sys.exit(1)

    regions = find_summary_regions(args.subtitles_file,
                                   summarizer=args.summarizer,
                                   duration=args.duration,
                                   language=args.language)
    summary = create_summary(args.video_file, regions)

    dst = args.output
    if not dst:
        base, ext = os.path.splitext(args.video_file)
        dst = "{0}_summarized.mp4".format(base)

    summary.to_videofile(
        dst, 
        codec="libx264", 
        temp_audiofile="temp.m4a",
        remove_temp=True,
        audio_codec="aac",
    )

