From 40830af43b5d1312a9eac56a365a3a9a8a078201 Mon Sep 17 00:00:00 2001
From: zhouchengbo <zhouchengbo@wmdigit.com>
Date: Sat, 25 Nov 2023 16:31:01 +0800
Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E6=B5=81=E7=A8=8B=E6=94=B9=E4=B8=BA?=
 =?UTF-8?q?=E4=B8=A4=E6=AD=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                   |  3 +-
 app/video_cut/autocut/utils.py              | 41 +++++++++++-
 app/video_cut/autocut/wmdigit_cut.py        | 32 ++++-----
 app/video_cut/autocut/wmdigit_transcribe.py | 22 +++---
 app/video_cut/main.py                       | 74 +++++++++++++--------
 start.py                                    | 10 +--
 6 files changed, 118 insertions(+), 64 deletions(-)

diff --git a/README.md b/README.md
index 51b783d..990849e 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,5 @@ gunicorn start:app -c ./gunicorn.conf.py
 
 mac调试
 source /Users/zhouchengbo/Projects/active311
-<!-- python test.py --wmdigit ./inputs/20231103/3068_1698978622.mp4 --force --lang=en --vad=0 -->
-直接flask起来后调
 flask --app start run --debug
+<!-- python test.py --wmdigit ./inputs/20231103/3068_1698978622.mp4 --force --lang=en --vad=0 -->
diff --git a/app/video_cut/autocut/utils.py b/app/video_cut/autocut/utils.py
index c7d6a22..151b98c 100644
--- a/app/video_cut/autocut/utils.py
+++ b/app/video_cut/autocut/utils.py
@@ -3,6 +3,7 @@ import os
 import re
 import datetime
 import time
+import json
 
 import ffmpeg
 import numpy as np
@@ -496,10 +497,11 @@ def combine_video_with_subtitle(video_file, subtitle_file):
     print(f"开始给视频加字幕(合并)")
     result_file = add_anything(video_file, 'sub')
     font_name = 'STHeitiSC-Light'
-    font_size = 8
+    font_size = 9
     font_color = '000000'
-    back_color = '00FFFF'
-    border_style = 3
+    # back_color = '00FFFF'  # 黄色
+    back_color = 'FFFFFF'  # 白色
+    border_style = 1  # 1=描边,3=整个字体背景
     outline = 1
     shadow = 0
     marginv= 50
@@ -526,3 +528,36 @@ def combine_video_with_subtitle(video_file, subtitle_file):
         return result_file
     else:
         raise "combine_video_with_subtitle 给视频加字幕异常"    
+
+
+def gen_subjson_from_subs(subs):
+    # 定义字典列表,用于存储每个字幕项的信息
+    subtitle_list = []
+    # 遍历每个字幕项,将其转换为字典形式,添加到字典列表中
+    for i, subtitle in enumerate(subs):
+        subtitle_dict = {
+            'index': str(i+1),
+            'start': str(subtitle.start),
+            'end': str(subtitle.end),
+            'content': subtitle.content
+        }
+        subtitle_list.append(subtitle_dict)
+    return subtitle_list
+
+
+def gen_srt_from_subjson(sub_json, srt_file):
+    def format_time(time):
+        return time.replace('.', ',')
+    # 遍历每个字幕项
+    with open(srt_file, "w", encoding="utf-8") as f:
+        for subtitle in sub_json:
+            # 获取字幕索引、开始时间、结束时间和内容
+            index = subtitle['index']
+            start = format_time(subtitle['start'])
+            end = format_time(subtitle['end'])
+            content = subtitle['content']
+            # 构建单个字幕的文本
+            subtitle_text = f"{index}\n{start} --> {end}\n{content}\n\n"
+            # 将单个字幕添加到总体字幕文本中
+            f.write(subtitle_text)
+
diff --git a/app/video_cut/autocut/wmdigit_cut.py b/app/video_cut/autocut/wmdigit_cut.py
index ca223ee..81821d7 100644
--- a/app/video_cut/autocut/wmdigit_cut.py
+++ b/app/video_cut/autocut/wmdigit_cut.py
@@ -32,22 +32,22 @@ class Cutter:
         with open(fns["srt"], encoding=self.args.encoding) as f:
             subs = list(srt.parse(f.read()))
 
-        if fns["md"]:
-            md = utils.MD(fns["md"], self.args.encoding)
-            # if not md.done_editing():
-            #     return
-            index = []
-            for mark, sent in md.tasks():
-                # print(mark, sent)
-                # if not mark:
-                #     continue
-                m = re.match(r"\[(\d+)", sent.strip())
-                if m:
-                    index.append(int(m.groups()[0]))
-            subs = [s for s in subs if s.index in index]
-            logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}')
-        else:
-            logging.info(f'Cut {fns["media"]} based on {fns["srt"]}')
+        # if fns["md"]:
+        #     md = utils.MD(fns["md"], self.args.encoding)
+        #     # if not md.done_editing():
+        #     #     return
+        #     index = []
+        #     for mark, sent in md.tasks():
+        #         # print(mark, sent)
+        #         # if not mark:
+        #         #     continue
+        #         m = re.match(r"\[(\d+)", sent.strip())
+        #         if m:
+        #             index.append(int(m.groups()[0]))
+        #     subs = [s for s in subs if s.index in index]
+        #     logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}')
+        # else:
+        #     logging.info(f'Cut {fns["media"]} based on {fns["srt"]}')
 
         segments = []
         # Avoid disordered subtitles
diff --git a/app/video_cut/autocut/wmdigit_transcribe.py b/app/video_cut/autocut/wmdigit_transcribe.py
index 900ee49..2b63acd 100644
--- a/app/video_cut/autocut/wmdigit_transcribe.py
+++ b/app/video_cut/autocut/wmdigit_transcribe.py
@@ -49,14 +49,16 @@ class Transcribe:
                 speech_array_indices = self._detect_voice_activity(audio)
                 transcribe_results = self._transcribe(input, audio, speech_array_indices)
 
-                output = name + ".srt"
+                srt_fn = name + ".srt"
+                md_fn = name + ".md"
                 # print(transcribe_results)
-                self._save_srt(output, transcribe_results)
-                logging.info(f"Transcribed {input} to {output}")
-                self._save_md(name + ".md", output, input, bool(self.args.wmdigit))
-                logging.info(f'Saved texts to {name + ".md"} to mark sentences')
+                srt_json = self._save_srt(srt_fn, transcribe_results)
+                logging.info(f"Transcribed {input} to {srt_fn}")
+                self._save_md(md_fn, srt_fn, input, bool(self.args.wmdigit))
+                logging.info(f'Saved texts to {md_fn} to mark sentences')
+                return md_fn, srt_fn, srt_json
             except Exception as e:
-                if retry == 3:
+                if retry == 1:
                     raise RuntimeError(f"Failed to Transcribing {e}")
                 else:
                     time.sleep(1)
@@ -122,13 +124,17 @@ class Transcribe:
     def _save_srt(self, output, transcribe_results):
         subs = self.whisper_model.gen_srt(transcribe_results)
         # print(subs)
-        # 把字幕中的中文去掉
+        # 把翻译后的字幕中的中文去掉,有的翻译的不好
         if self.args.lang not in ("zh","Japanese"):
             for s in subs:
                 s.content = utils.remove_chinese(s.content)
-        
+        # 生成字幕文件
         with open(output, "wb") as f:
             f.write(srt.compose(subs).encode(self.args.encoding, "replace"))
+        # 生成字幕json
+        sub_json = utils.gen_subjson_from_subs(subs)
+        # print(sub_json)
+        return sub_json
 
     def _save_md(self, md_fn, srt_fn, video_fn, is_auto_edit=False):
         with open(srt_fn, encoding=self.args.encoding) as f:
diff --git a/app/video_cut/main.py b/app/video_cut/main.py
index f00801d..730633e 100644
--- a/app/video_cut/main.py
+++ b/app/video_cut/main.py
@@ -13,63 +13,83 @@ def validate_request():
         error("参数错误: 缺少instances参数")
 
     instance = request.json['instances']
-    if len(instance) <= 0 or 'video' not in instance[0] or 'lang' not in instance[0]:
-        error('参数错误: instances缺少:video,lang')
-
+    if len(instance) <= 0 or 'video' not in instance[0] or 'steps' not in instance[0] or 'lang' not in instance[0]:
+        error('参数错误: instances需要: video, steps, lang')
     video = instance[0]['video']
+    steps = instance[0]['steps']
     lang = instance[0]['lang']
-    with_sub = True if instance[0]['with_sub'] else False
-
     if len(video) <= 0:
         error('参数错误: video 参数不可为空')
+    if len(steps) <= 0 or ('step1' not in steps and 'step2' not in steps):
+        error('参数错误: steps 参数不可为空,取值为:step1、step2')
     if len(lang) <= 0:
-        error('参数错误: lang 参数不可为空')
+            error('参数错误: lang 参数不可为空')
+
+    with_sub = instance[0]['with_sub'] if 'with_sub' in instance[0] and instance[0]['with_sub'] else False
 
-    # if not video.startswith('http'):
-    #     error('video 必须是网络路径')
+    srt_json_in = ""
+    if len(steps) == 1 and 'step2' in steps:
+        if 'srt_json_in' not in instance[0]:
+            error('参数错误: instances需要: srt_json_in')
+        srt_json_in = instance[0]['srt_json_in']
+        if len(srt_json_in) <= 0:
+            error('参数错误: srt_json_in 参数不可为空')
 
-    return video, lang, with_sub
+    return video, steps, lang, with_sub, srt_json_in
 
 
 # 主线
 def video_cut_pipeline(logger, args, whispermodel):
     # print(args)
     time_record = []
-    media_file, lang, with_sub = validate_request()
+    media_file, steps, lang, with_sub, srt_json_in = validate_request()
     all_start_time = time.time()
-    srt_fn = utils.change_ext(media_file, "srt")
-    md_fn = utils.change_ext(media_file, "md")
     args.lang = lang
+    srt_json_out = ""
+    final_video_fn = ""
+    if 'step1' in steps:
+        # 1、从视频生成字幕
+        time_record, srt_json_out = step1(time_record, logger, args, whispermodel, media_file, lang)
+    if 'step2' in steps:
+        srt_json = srt_json_out if 'step1' in steps else srt_json_in
+        # 2、从字幕生成cut视频
+        time_record, final_video_fn = step2(time_record, logger, args, media_file, with_sub, srt_json)
+    time_record.append(f"所有步骤处理完毕。耗时: {time.time() - all_start_time:.4f} 秒")
+    for i in time_record:
+        print(i)
+    # 如果只有step1则返回字幕,如果只有step2或者1和2都有,则返回最终结果
+    if len(steps) == 1 and steps[0] == 'step1':
+        return srt_json_out
+    else:
+        return final_video_fn
 
-    # 1、视频生成srt和md
+
+def step1(time_record, logger, args, whispermodel, media_file, lang):
     start_time = time.time()
     if lang != "zh":
         prompt = f"Subtitles must be fully translated into {lang}"
     else:
         prompt = ""
     logger.info(f"Transcribe {media_file} lang={lang} promt={prompt}")
-
     args.inputs = [media_file]
-    wmdigit_transcribe.Transcribe(args, whispermodel).run()
+    md_fn, srt_fn, srt_json_out = wmdigit_transcribe.Transcribe(args, whispermodel).run()
     time_record.append(f"视频生成srt和md。耗时: {time.time() - start_time:.4f} 秒")
+    return time_record, srt_json_out
 
-    # 2、从字幕生成cut视频
+
+def step2(time_record, logger, args, media_file, with_sub, srt_json_in):
     start_time = time.time()
+    srt_fn = utils.change_ext(media_file, "srt")
+    md_fn = utils.change_ext(media_file, "md")
+    # 根据 srt_json_in 重新生成 srt 文件
+    utils.gen_srt_from_subjson(srt_json_in, srt_fn)
+    logger.info(f"Cut {media_file} srt={srt_fn} sub={with_sub}")
     args.inputs = [media_file, md_fn, srt_fn]
     final_video_fn, new_srt_fn = wmdigit_cut.Cutter(args).run()
     time_record.append(f"从字幕生成cut视频。耗时: {time.time() - start_time:.4f} 秒")
-    
-    # 3、加字幕
+    # 加字幕
     if with_sub:
         start_time = time.time()
         final_video_fn = utils.combine_video_with_subtitle(final_video_fn, new_srt_fn)
         time_record.append(f"加字幕耗时: {time.time() - start_time:.4f} 秒")
-
-    time_record.append(f"所有步骤处理完毕。耗时: {time.time() - all_start_time:.4f} 秒")
-    for i in time_record:
-        print(i)
-
-    return final_video_fn, srt_fn
-
-
-
+    return time_record, final_video_fn
\ No newline at end of file
diff --git a/start.py b/start.py
index d69cb6f..eb67f82 100644
--- a/start.py
+++ b/start.py
@@ -19,20 +19,14 @@ app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+pymysql://root:WMdigit.2018@rm-2z
 db.init_app(app)
 logger.info("start services")
 
-# 全局路径
-root = './'
-app_root = os.path.join(root, 'app')
-input_root = os.path.join(root, 'inputs')
-output_root = os.path.join(root, 'outputs')
-
 # 预加载模型
 args, whispermodel = main_args(logger, app.config['DEBUG'])
 
 # 对外接口
 @app.route('/wm_video_cut', methods=['POST'])
 def wm_video_cut():
-    final_video_url, srt_url = video_cut_pipeline(logger, args, whispermodel)
-    return jsonify({"result": {"final_video_url": final_video_url, "srt_url": srt_url}})
+    result = video_cut_pipeline(logger, args, whispermodel)
+    return jsonify({"result": result})
 
 
 @app.route('/upload_file', methods=['POST'])
-- 
2.18.1