import logging
import os
import re

import srt
from moviepy import editor

from . import utils


# Cut media
class Cutter:
    def __init__(self, args):
        self.args = args

    def run(self):
        fns = {"srt": None, "media": None, "md": None}
        for fn in self.args.inputs:
            ext = os.path.splitext(fn)[1][1:]
            fns[ext if ext in fns else "media"] = fn
    
        assert fns["media"], "must provide a media filename"
        assert fns["srt"], "must provide a srt filename"

        output_fn = utils.change_ext(utils.add_cut(fns['media']), "mp4")
        output_fn = utils.add_anything(output_fn, self.args.lang)
        print(output_fn)
        
        if utils.check_exists(output_fn, self.args.force):
            return

        with open(fns["srt"], encoding=self.args.encoding) as f:
            subs = list(srt.parse(f.read()))

        if fns["md"]:
            md = utils.MD(fns["md"], self.args.encoding)
            # if not md.done_editing():
            #     return
            index = []
            for mark, sent in md.tasks():
                # print(mark, sent)
                # if not mark:
                #     continue
                m = re.match(r"\[(\d+)", sent.strip())
                if m:
                    index.append(int(m.groups()[0]))
            subs = [s for s in subs if s.index in index]
            logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}')
        else:
            logging.info(f'Cut {fns["media"]} based on {fns["srt"]}')

        segments = []
        # Avoid disordered subtitles
        subs.sort(key=lambda x: x.start)
        # print(subs)
        base, _ = os.path.splitext(fns['media'])
        for x in subs:
            v_start = 0.000 if x.index == 1 else x.start.total_seconds()
            v_end = x.end.total_seconds()
            v_duration = round(v_end - v_start, 3)
            if x.content == "< No Speech >":
                tts_fn = ""
                new_duration = v_duration
            else:
                # edge-tts 生成新的音频
                tts_fn = f"{base}_temp_{x.index}_tts.mp3"
                res = utils.text_to_audio_by_edge_tts(x.content, tts_fn, self.args.lang)      
                if res:
                    new_duration = utils.get_mp3_duration(tts_fn)
                else:
                    new_duration = v_duration
            # 记录新的开始和结束时间，供后面生成字幕使用。
            # 新的开始 = max(上一段的新的结束时间 vs 本段的旧的开始时间 孰大？)
            # 新的结束 = 新的开始 + 新的历时
            new_v_start = 0.000 if x.index == 1 else max(new_v_end, v_start)
            new_v_end = round(new_v_start + new_duration, 3)
            segments.append(
                {"idx":x.index, "video_start": v_start, "video_end": v_end, "video_duration": v_duration, "content": x.content, "tts_fn": tts_fn, 
                 "new_duration": new_duration, "new_start": new_v_start, "new_end": new_v_end}
            )

        media = editor.VideoFileClip(fns["media"])
        # 发现视频最后一段没说话就没了，并且稍微剪短一点，防止cut的时候报错，这里改一下看看 by zcb
        segments[-1]["video_end"] = media.duration - 0.5

        # 切视频，并且将视频变化到新的长度，再合并新的音频
        clips = [media.subclip(s["video_start"], s["video_end"]) for s in segments]
        for i, clip in enumerate(clips, start=0):
            # 先把 原clip 存下来
            old_clip_fn = f"{base}_temp_{i+1}_old.mp4"
            if segments[i]["content"] == "< No Speech >":
                # 没说话的clip保留声音
                clip.write_videofile(
                    old_clip_fn, audio_codec="aac", bitrate=self.args.bitrate
                )
                segments[i]["new_fn"] = old_clip_fn
            else:
                # 说话的clip去掉声音
                clip.write_videofile(
                    old_clip_fn, bitrate=self.args.bitrate
                )
                # 然后将其改变时长，生成new clip
                new_clip_fn = f"{base}_temp_{i+1}_new_no_audio.mp4"
                utils.change_video_duration(old_clip_fn, new_clip_fn, segments[i]["new_duration"])

                # 把new clip和tts音频合并
                new_clip_data = editor.VideoFileClip(new_clip_fn)
                tts_audio_data = editor.AudioFileClip(segments[i]["tts_fn"])
                new_clip_with_audio = new_clip_data.without_audio().set_audio(tts_audio_data)
                new_clip_with_audio_fn = f"{base}_temp_{i+1}_new_with_audio.mp4"
                new_clip_with_audio.write_videofile(
                    new_clip_with_audio_fn, audio_codec="aac", bitrate=self.args.bitrate
                )
                segments[i]["new_fn"] = new_clip_with_audio_fn
                new_clip_data.close()
                tts_audio_data.close()

        print(segments)
        # 根据新的segments生成新的字幕，供后面使用
        new_subs = utils.gen_srt_from_new_segments(segments)
        new_subs_fn = utils.add_anything(fns["srt"], 'cut')
        with open(new_subs_fn, "wb") as f:
            f.write(srt.compose(new_subs).encode(self.args.encoding, "replace"))
        # 生成字幕 end

        final_clips = [editor.VideoFileClip(s["new_fn"]) for s in segments]
        
        final_clip: editor.VideoClip = editor.concatenate_videoclips(final_clips)
        logging.info(
            f"Reduced duration from {media.duration:.1f} to {final_clip.duration:.1f}"
        )

        # final_clip = final_clip.fx(editor.afx.audio_normalize)
        # an alternative to birate is use crf, e.g. ffmpeg_params=['-crf', '18']
        final_clip.write_videofile(
            output_fn, audio_codec="aac", bitrate=self.args.bitrate
        )

        media.close()
        logging.info(f"Saved media to {output_fn}")

        # 清除临时文件
        utils.delete_files(os.path.dirname(fns['media']), f"{os.path.splitext(os.path.basename(fns['media']))[0]}_temp_")
        utils.delete_files(os.path.dirname(fns['media']), f".")
        return output_fn, new_subs_fn