import logging
import os
import re
import datetime
import time

import ffmpeg
import numpy as np
import opencc
import srt
from moviepy.editor import VideoFileClip  
import asyncio
import edge_tts
from textwrap import wrap
import subprocess


def load_audio(file: str, sr: int = 16000) -> np.ndarray:
    try:
        out, _ = (
            ffmpeg.input(file, threads=0)
            .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
        )
        # out, _ = (
        #     ffmpeg.input(file, threads=0)
        #     .output("./20231103/temp.wav", format="wav", acodec="pcm_s16le", ac=1, ar=sr)
        #     .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
        # )
    except ffmpeg.Error as e:
        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e

    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0


def is_video(filename):
    _, ext = os.path.splitext(filename)
    return ext in [".mp4", ".mov", ".mkv", ".avi", ".flv", ".f4v", ".webm"]


def is_audio(filename):
    _, ext = os.path.splitext(filename)
    return ext in [".ogg", ".wav", ".mp3", ".flac", ".m4a"]


def change_ext(filename, new_ext):
    # Change the extension of filename to new_ext
    base, _ = os.path.splitext(filename)
    if not new_ext.startswith("."):
        new_ext = "." + new_ext
    return base + new_ext


def add_cut(filename):
    # Add cut mark to the filename
    base, ext = os.path.splitext(filename)
    if base.endswith("_cut"):
        base = base[:-4] + "_" + base[-4:]
    else:
        base += "_cut"
    return base + ext


def add_anything(filename, anything):
    base, ext = os.path.splitext(filename)
    base += f"_{anything}"
    return base + ext


# a very simple markdown parser
class MD:
    def __init__(self, filename, encoding):
        self.lines = []
        self.EDIT_DONE_MAKR = "<-- Mark if you are done editing."
        self.encoding = encoding
        self.filename = filename
        if filename:
            self.load_file()

    def load_file(self):
        if os.path.exists(self.filename):
            with open(self.filename, encoding=self.encoding) as f:
                self.lines = f.readlines()

    def clear(self):
        self.lines = []

    def write(self):
        with open(self.filename, "wb") as f:
            f.write("\n".join(self.lines).encode(self.encoding, "replace"))

    def tasks(self):
        # get all tasks with their status
        ret = []
        for l in self.lines:
            mark, task = self._parse_task_status(l)
            if mark is not None:
                ret.append((mark, task))
        return ret

    def done_editing(self):
        for m, t in self.tasks():
            if m and self.EDIT_DONE_MAKR in t:
                return True
        return False

    def add(self, line):
        self.lines.append(line)

    def add_task(self, mark, contents):
        self.add(f'- [{"x" if mark else " "}] {contents.strip()}')

    def add_done_editing(self, mark):
        self.add_task(mark, self.EDIT_DONE_MAKR)

    def add_video(self, video_fn):
        ext = os.path.splitext(video_fn)[1][1:]
        self.add(
            f'\n<video controls="true" allowfullscreen="true"> <source src="{video_fn}" type="video/{ext}"> </video>\n'
        )

    def _parse_task_status(self, line):
        # return (is_marked, rest) or (None, line) if not a task
        m = re.match(r"- +\[([ x])\] +(.*)", line)
        if not m:
            return None, line
        return m.groups()[0].lower() == "x", m.groups()[1]


def check_exists(output, force):
    if os.path.exists(output):
        if force:
            logging.info(f"{output} exists. Will overwrite it")
        else:
            logging.info(
                f"{output} exists, skipping... Use the --force flag to overwrite"
            )
            return True
    return False


def expand_segments(segments, expand_head, expand_tail, total_length):
    # Pad head and tail for each time segment
    results = []
    for i in range(len(segments)):
        t = segments[i]
        start = max(t["start"] - expand_head, segments[i - 1]["end"] if i > 0 else 0)
        end = min(
            t["end"] + expand_tail,
            segments[i + 1]["start"] if i < len(segments) - 1 else total_length,
        )
        results.append({"start": start, "end": end})
    return results


def remove_short_segments(segments, threshold):
    # Remove segments whose length < threshold
    return [s for s in segments if s["end"] - s["start"] > threshold]


def merge_adjacent_segments(segments, threshold):
    # Merge two adjacent segments if their distance < threshold
    results = []
    i = 0
    while i < len(segments):
        s = segments[i]
        for j in range(i + 1, len(segments)):
            if segments[j]["start"] < s["end"] + threshold:
                s["end"] = segments[j]["end"]
                i = j
            else:
                break
        i += 1
        results.append(s)
    return results


def compact_rst(sub_fn, encoding):
    cc = opencc.OpenCC("t2s")

    base, ext = os.path.splitext(sub_fn)
    COMPACT = "_compact"
    if ext != ".srt":
        logging.fatal("only .srt file is supported")

    if base.endswith(COMPACT):
        # to original rst
        with open(sub_fn, encoding=encoding) as f:
            lines = f.readlines()
        subs = []
        for l in lines:
            items = l.split(" ")
            if len(items) < 4:
                continue
            subs.append(
                srt.Subtitle(
                    index=0,
                    start=srt.srt_timestamp_to_timedelta(items[0]),
                    end=srt.srt_timestamp_to_timedelta(items[2]),
                    content=" ".join(items[3:]).strip(),
                )
            )
        with open(base[: -len(COMPACT)] + ext, "wb") as f:
            f.write(srt.compose(subs).encode(encoding, "replace"))
    else:
        # to a compact version
        with open(sub_fn, encoding=encoding) as f:
            subs = srt.parse(f.read())
        with open(base + COMPACT + ext, "wb") as f:
            for s in subs:
                f.write(
                    f"{srt.timedelta_to_srt_timestamp(s.start)} --> {srt.timedelta_to_srt_timestamp(s.end)} "
                    f"{cc.convert(s.content.strip())}\n".encode(encoding, "replace")
                )


def trans_srt_to_md(encoding, force, srt_fn, video_fn=None):
    base, ext = os.path.splitext(srt_fn)
    if ext != ".srt":
        logging.fatal("only .srt file is supported")
    md_fn = base + ext.split(".")[0] + ".md"

    check_exists(md_fn, force)

    with open(srt_fn, encoding=encoding) as f:
        subs = srt.parse(f.read())

    md = MD(md_fn, encoding)
    md.clear()
    md.add_done_editing(False)
    if video_fn:
        if not is_video(video_fn):
            logging.fatal(f"{video_fn} may not be a video")
        md.add_video(os.path.basename(video_fn))
    md.add(
        f"\nTexts generated from [{os.path.basename(srt_fn)}]({os.path.basename(srt_fn)})."
        "Mark the sentences to keep for autocut.\n"
        "The format is [subtitle_index,duration_in_second] subtitle context.\n\n"
    )

    for s in subs:
        sec = s.start.seconds
        pre = f"[{s.index},{sec // 60:02d}:{sec % 60:02d}]"
        md.add_task(False, f"{pre:11} {s.content.strip()}")
    md.write()



def change_video_duration(input_video_path, output_video_path, target_duration):  
    # 加载原始视频
    video = VideoFileClip(input_video_path)

    # 计算需要改变的倍速
    original_duration = video.duration
    speed = original_duration / target_duration

    # 改变视频的播放速度并生成新的视频文件
    video = video.speedx(speed)
    video.write_videofile(output_video_path)


def text_to_audio_by_edge_tts(text, local_voice_file, lang, retry=1):
    logging.info(f"开始edge-tts生成语音：{text}")
    voice = "zh-CN-XiaoxiaoNeural"
    if lang == "en":
       voice = "en-US-JennyNeural"
    if lang == "Afrikaans":
       voice = "af-ZA-AdriNeural"
    if lang == "Arabic":
        voice = "ar-AE-FatimaNeural"
    if lang == "Armenian":
        voice = "ar-AE-HamdanNeural"
    if lang == "Azerbaijani":
        voice = "az-AZ-BanuNeural"
    if lang == "Belarusian":
        voice = "bs-BA-VesnaNeural"
    if lang == "Bosnian":
        voice = "bn-IN-TanishaaNeural"
    if lang == "Bulgarian":
        voice = "bg-BG-KalinaNeural"
    if lang == "Catalan":
        voice = "ca-ES-JoanaNeural"
    if lang == "Croatian":
        voice = "cy-GB-NiaNeural"
    if lang == "Czech":
        voice = "cs-CZ-VlastaNeural"
    if lang == "Danish":
        voice = "da-DK-ChristelNeural"
    if lang == "Dutch":
        voice = "de-AT-IngridNeural"
    if lang == "Estonian":
        voice = "es-ES-ElviraNeural"
    if lang == "Finnish":
        voice = "fi-FI-NooraNeural"
    if lang == "French":
        voice = "fr-FR-DeniseNeural"
    if lang == "Galician":
        voice = "ga-IE-OrlaNeural"
    if lang == "German":
        voice = "gl-ES-SabelaNeural"
    if lang == "Greek":
        voice = "gu-IN-DhwaniNeural"
    if lang == "Hebrew":
        voice = "he-IL-HilaNeural"
    if lang == "Hindi":
        voice = "hi-IN-SwaraNeural"
    if lang == "Hungarian":
        voice = "hu-HU-NoemiNeural"
    if lang == "Icelandic":
        voice = "is-IS-GudrunNeural"
    if lang == "Indonesian":
        voice = "id-ID-GadisNeural"
    if lang == "Italian":
        voice = "it-IT-ElsaNeural"
    if lang == "Japanese":
        voice = "ja-JP-NanamiNeural"
    if lang == "Kannada":
        voice = "ka-GE-EkaNeural"
    if lang == "Kazakh":
        voice = "kk-KZ-AigulNeural"
    if lang == "Korean":
        voice = "ko-KR-SunHiNeural"
    if lang == "Latvian":
        voice = "lo-LA-KeomanyNeural"
    if lang == "Lithuanian":
        voice = "lt-LT-OnaNeural"
    if lang == "Macedonian":
        voice = "mk-MK-MarijaNeural"
    if lang == "Malay":
        voice = "ml-IN-SobhanaNeural"
    if lang == "Marathi":
        voice = "mr-IN-AarohiNeural"
    if lang == "Maori":
        voice = "mr-IN-AarohiNeural"
    if lang == "Nepali":
        voice = "ne-NP-HemkalaNeural"
    if lang == "Norwegian":
        voice = "nl-BE-DenaNeural"
    if lang == "Persian":
        voice = "ps-AF-LatifaNeural"
    if lang == "Polish":
        voice = "pl-PL-ZofiaNeural"
    if lang == "Portuguese":
        voice = "pt-BR-FranciscaNeural"
    if lang == "Romanian":
        voice = "ro-RO-AlinaNeural"
    if lang == "Russian":
        voice = "ru-RU-DmitryNeural"
    if lang == "Serbian":
        voice = "sr-RS-SophieNeural"
    if lang == "Slovak":
        voice = "sl-SI-PetraNeural"
    if lang == "Slovenian":
        voice = "sl-SI-PetraNeural"
    if lang == "Spanish":
        voice = "sw-KE-ZuriNeural"
    if lang == "Swahili":
        voice = "sw-KE-ZuriNeural"
    if lang == "Swedish":
        voice = "sw-KE-ZuriNeural"
    if lang == "Tagalog":
        voice = "ta-LK-SaranyaNeural"
    if lang == "Tamil":
        voice = "ta-SG-VenbaNeural"
    if lang == "Thai":
        voice = "th-TH-PremwadeeNeural"
    if lang == "Turkish":
        voice = "tr-TR-EmelNeural"
    if lang == "Ukrainian":
        voice = "uk-UA-PolinaNeural"
    if lang == "Urdu":
        voice = "ur-IN-GulNeural"
    if lang == "Vietnamese":
        voice = "vi-VN-HoaiMyNeural"
    if lang == "Welsh":
        voice = "en-US-JennyNeural"
    
    
    rate = '+10%'
    volume = '+0%'

    async def _main() -> None:
        communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate, volume=volume)
        with open(local_voice_file, "wb") as file:
            async for chunk in communicate.stream():
                if chunk["type"] == "audio":
                    file.write(chunk["data"])
    
    # logging.info(f"提交edge-tts")
    try:
        asyncio.run(_main())
    except:
        if retry == 10:
            raise RuntimeError(f"Failed to generate voice from edge-tts: {text}")
        else:
            time.sleep(1)
            logging.info(f"Retry {i} to generate voice from edge-tts: {text}")
            retry += 1
            text_to_audio_by_edge_tts(text, local_voice_file, retry=1)

    i = 1
    while True:
        if os.path.exists(local_voice_file):
            return True
        else:
            logging.info(f"轮询等待edge-tts异步生成结果：第{i}次")
            time.sleep(1)
            i += 1
            if i > 60:
                logging.error("轮询等待edge-tts生成语音和字幕异常")
                raise RuntimeError(f"Failed to generate voice from edge-tts")
            else:
                continue


def get_mp3_duration(file_path):
    from mutagen.mp3 import MP3
    try:
        audio = MP3(file_path)
        duration_in_seconds = audio.info.length
        return duration_in_seconds
    except:
        return 0


def delete_files(current_dir, prefix):
    files = os.listdir(current_dir)  # 获取当前目录下的所有文件和文件夹
    for file in files:
        if file.startswith(prefix):
            file_path = os.path.join(current_dir, file)
            try:
                os.remove(file_path)
                print(f"文件 {file_path} 删除成功")
            except OSError as e:
                print(f"删除文件 {file_path} 失败: {e}")


def remove_chinese(text):
    result = ''
    for char in text:
        if '\u4e00' <= char <= '\u9fff':
            print(f'有中文:{char}')
            continue
        result += char
    return result


def re_gen_subtitle_wrap(srt_file, fontsize, max_width, scaling, wrap_srt_file):
    font_to_char_ratio = 2
    subtitle_width_ratio = 0.9
    average_char_width = float(font_to_char_ratio) * float(fontsize) * scaling  # 以视频宽1024为基准来计算字体宽度
    max_line_length = int(max_width * float(subtitle_width_ratio) / float(average_char_width))  # 用来控制最大宽度多少字符开始换行
    with open(srt_file, 'r', encoding='utf-8') as f:
        content = f.read()
    pattern = re.compile(r'(\d+)\n(.*?) --> (.*?)\n(.*?)(?:\n\n|$)', re.DOTALL)
    subtitles = re.findall(pattern, content)

    with open(wrap_srt_file, "w", encoding="utf-8") as f:
        for i, start, end, text in subtitles:
            wrapped_text_lines = wrap(text, width=max_line_length)
            adjusted_text = ""
            if text != "< No Speech >":
                for idx, line in enumerate(wrapped_text_lines):
                    adjusted_text += line
                    if idx < len(wrapped_text_lines) - 1:
                        adjusted_text += "\n"
            # 写入序号
            f.write(str(i) + '\n')
            # 写入时间轴
            f.write(start + ' --> ' + end + '\n')
            # 写入字幕文本
            f.write(adjusted_text + '\n')
            # 写入空行
            f.write('\n')


def gen_srt_from_new_segments(segments):
    cc = opencc.OpenCC("t2s")
    subs = []
    def _add_sub(start, end, text):
        subs.append(
            srt.Subtitle(
                index=0,
                start=datetime.timedelta(seconds=start),
                end=datetime.timedelta(seconds=end),
                content=cc.convert(text.strip()),
            )
        )
    
    for s in segments:
        _add_sub(s["new_start"], s["new_end"], s["content"])
    return subs


def combine_video_with_subtitle(video_file, subtitle_file):
    print(f"开始给视频加字幕（合并）")
    result_file = add_anything(video_file, 'sub')
    font_name = 'STHeitiSC-Light'
    font_size = 8
    font_color = '000000'
    back_color = '00FFFF'
    border_style = 3
    outline = 1
    shadow = 0
    marginv= 50
    # 加载视频文件
    clip = VideoFileClip(video_file)
    # 获取视频宽度
    video_width = clip.w
    # 关闭视频
    clip.close()
    # 计算缩放比例，以视频宽1024为基准
    scaling = float(video_width) / 1024
    # 根据字号大小，重新生成自动换行的字幕文件
    wrap_sub_fn = add_anything(subtitle_file, 'wrap')
    re_gen_subtitle_wrap(subtitle_file, font_size, video_width, scaling, wrap_sub_fn)
    # 设置字幕的style
    style = f'FontName={font_name},FontSize={font_size},PrimaryColour=&H{font_color},OutlineColour=&H{back_color},' \
            f'BorderStyle={border_style},Outline={outline},Shadow={shadow},MarginV={marginv}'
    # 加字幕
    cmdline = f"""ffmpeg -y -i {video_file} -vf "subtitles={wrap_sub_fn}:force_style='{style}'" -c:a copy {result_file}"""
    print(cmdline)
    result = subprocess.call(cmdline, shell=True)
    # 0--success, 1-fail
    if result == 0:
        return result_file
    else:
        raise "combine_video_with_subtitle 给视频加字幕异常"    
