From 40830af43b5d1312a9eac56a365a3a9a8a078201 Mon Sep 17 00:00:00 2001 From: zhouchengbo <zhouchengbo@wmdigit.com> Date: Sat, 25 Nov 2023 16:31:01 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86=E6=B5=81=E7=A8=8B=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=E4=B8=A4=E6=AD=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 +- app/video_cut/autocut/utils.py | 41 +++++++++++- app/video_cut/autocut/wmdigit_cut.py | 32 ++++----- app/video_cut/autocut/wmdigit_transcribe.py | 22 +++--- app/video_cut/main.py | 74 +++++++++++++-------- start.py | 10 +-- 6 files changed, 118 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 51b783d..990849e 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,5 @@ gunicorn start:app -c ./gunicorn.conf.py mac调试 source /Users/zhouchengbo/Projects/active311 -<!-- python test.py --wmdigit ./inputs/20231103/3068_1698978622.mp4 --force --lang=en --vad=0 --> -直接flaskèµ·æ¥åŽè°ƒ flask --app start run --debug +<!-- python test.py --wmdigit ./inputs/20231103/3068_1698978622.mp4 --force --lang=en --vad=0 --> diff --git a/app/video_cut/autocut/utils.py b/app/video_cut/autocut/utils.py index c7d6a22..151b98c 100644 --- a/app/video_cut/autocut/utils.py +++ b/app/video_cut/autocut/utils.py @@ -3,6 +3,7 @@ import os import re import datetime import time +import json import ffmpeg import numpy as np @@ -496,10 +497,11 @@ def combine_video_with_subtitle(video_file, subtitle_file): print(f"å¼€å§‹ç»™è§†é¢‘åŠ å—幕(åˆå¹¶ï¼‰") result_file = add_anything(video_file, 'sub') font_name = 'STHeitiSC-Light' - font_size = 8 + font_size = 9 font_color = '000000' - back_color = '00FFFF' - border_style = 3 + # back_color = '00FFFF' # 黄色 + back_color = 'FFFFFF' # 白色 + border_style = 1 # 1=æè¾¹ï¼Œ3=整个å—体背景 outline = 1 shadow = 0 marginv= 50 @@ -526,3 +528,36 @@ def combine_video_with_subtitle(video_file, subtitle_file): return result_file else: raise "combine_video_with_subtitle ç»™è§†é¢‘åŠ å—幕异常" + + +def gen_subjson_from_subs(subs): + # 定义å—典列表,用于å˜å‚¨æ¯ä¸ªå—å¹•é¡¹çš„ä¿¡æ¯ + subtitle_list = [] + # é历æ¯ä¸ªå—幕项,将其转æ¢ä¸ºå—典形å¼ï¼Œæ·»åŠ åˆ°å—å…¸åˆ—è¡¨ä¸ + for i, subtitle in enumerate(subs): + subtitle_dict = { + 'index': str(i+1), + 'start': str(subtitle.start), + 'end': str(subtitle.end), + 'content': subtitle.content + } + subtitle_list.append(subtitle_dict) + return subtitle_list + + +def gen_srt_from_subjson(sub_json, srt_file): + def format_time(time): + return time.replace('.', ',') + # é历æ¯ä¸ªå—幕项 + with open(srt_file, "w", encoding="utf-8") as f: + for subtitle in sub_json: + # 获å–å—幕索引ã€å¼€å§‹æ—¶é—´ã€ç»“æŸæ—¶é—´å’Œå†…容 + index = subtitle['index'] + start = format_time(subtitle['start']) + end = format_time(subtitle['end']) + content = subtitle['content'] + # 构建å•个å—幕的文本 + subtitle_text = f"{index}\n{start} --> {end}\n{content}\n\n" + # å°†å•个å—å¹•æ·»åŠ åˆ°æ€»ä½“å—å¹•æ–‡æœ¬ä¸ + f.write(subtitle_text) + diff --git a/app/video_cut/autocut/wmdigit_cut.py b/app/video_cut/autocut/wmdigit_cut.py index ca223ee..81821d7 100644 --- a/app/video_cut/autocut/wmdigit_cut.py +++ b/app/video_cut/autocut/wmdigit_cut.py @@ -32,22 +32,22 @@ class Cutter: with open(fns["srt"], encoding=self.args.encoding) as f: subs = list(srt.parse(f.read())) - if fns["md"]: - md = utils.MD(fns["md"], self.args.encoding) - # if not md.done_editing(): - # return - index = [] - for mark, sent in md.tasks(): - # print(mark, sent) - # if not mark: - # continue - m = re.match(r"\[(\d+)", sent.strip()) - if m: - index.append(int(m.groups()[0])) - subs = [s for s in subs if s.index in index] - logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}') - else: - logging.info(f'Cut {fns["media"]} based on {fns["srt"]}') + # if fns["md"]: + # md = utils.MD(fns["md"], self.args.encoding) + # # if not md.done_editing(): + # # return + # index = [] + # for mark, sent in md.tasks(): + # # print(mark, sent) + # # if not mark: + # # continue + # m = re.match(r"\[(\d+)", sent.strip()) + # if m: + # index.append(int(m.groups()[0])) + # subs = [s for s in subs if s.index in index] + # logging.info(f'Cut {fns["media"]} based on {fns["srt"]} and {fns["md"]}') + # else: + # logging.info(f'Cut {fns["media"]} based on {fns["srt"]}') segments = [] # Avoid disordered subtitles diff --git a/app/video_cut/autocut/wmdigit_transcribe.py b/app/video_cut/autocut/wmdigit_transcribe.py index 900ee49..2b63acd 100644 --- a/app/video_cut/autocut/wmdigit_transcribe.py +++ b/app/video_cut/autocut/wmdigit_transcribe.py @@ -49,14 +49,16 @@ class Transcribe: speech_array_indices = self._detect_voice_activity(audio) transcribe_results = self._transcribe(input, audio, speech_array_indices) - output = name + ".srt" + srt_fn = name + ".srt" + md_fn = name + ".md" # print(transcribe_results) - self._save_srt(output, transcribe_results) - logging.info(f"Transcribed {input} to {output}") - self._save_md(name + ".md", output, input, bool(self.args.wmdigit)) - logging.info(f'Saved texts to {name + ".md"} to mark sentences') + srt_json = self._save_srt(srt_fn, transcribe_results) + logging.info(f"Transcribed {input} to {srt_fn}") + self._save_md(md_fn, srt_fn, input, bool(self.args.wmdigit)) + logging.info(f'Saved texts to {md_fn} to mark sentences') + return md_fn, srt_fn, srt_json except Exception as e: - if retry == 3: + if retry == 1: raise RuntimeError(f"Failed to Transcribing {e}") else: time.sleep(1) @@ -122,13 +124,17 @@ class Transcribe: def _save_srt(self, output, transcribe_results): subs = self.whisper_model.gen_srt(transcribe_results) # print(subs) - # 把å—幕ä¸çš„䏿–‡åŽ»æŽ‰ + # 把翻译åŽçš„å—幕ä¸çš„䏿–‡åŽ»æŽ‰ï¼Œæœ‰çš„ç¿»è¯‘çš„ä¸å¥½ if self.args.lang not in ("zh","Japanese"): for s in subs: s.content = utils.remove_chinese(s.content) - + # 生æˆå—幕文件 with open(output, "wb") as f: f.write(srt.compose(subs).encode(self.args.encoding, "replace")) + # 生æˆå—幕json + sub_json = utils.gen_subjson_from_subs(subs) + # print(sub_json) + return sub_json def _save_md(self, md_fn, srt_fn, video_fn, is_auto_edit=False): with open(srt_fn, encoding=self.args.encoding) as f: diff --git a/app/video_cut/main.py b/app/video_cut/main.py index f00801d..730633e 100644 --- a/app/video_cut/main.py +++ b/app/video_cut/main.py @@ -13,63 +13,83 @@ def validate_request(): error("傿•°é”™è¯¯: 缺少instances傿•°") instance = request.json['instances'] - if len(instance) <= 0 or 'video' not in instance[0] or 'lang' not in instance[0]: - error('傿•°é”™è¯¯: instances缺少:video,lang') - + if len(instance) <= 0 or 'video' not in instance[0] or 'steps' not in instance[0] or 'lang' not in instance[0]: + error('傿•°é”™è¯¯: instances需è¦: video, steps, lang') video = instance[0]['video'] + steps = instance[0]['steps'] lang = instance[0]['lang'] - with_sub = True if instance[0]['with_sub'] else False - if len(video) <= 0: error('傿•°é”™è¯¯: video 傿•°ä¸å¯ä¸ºç©º') + if len(steps) <= 0 or ('step1' not in steps and 'step2' not in steps): + error('傿•°é”™è¯¯: steps 傿•°ä¸å¯ä¸ºç©ºï¼Œå–值为:step1ã€step2') if len(lang) <= 0: - error('傿•°é”™è¯¯: lang 傿•°ä¸å¯ä¸ºç©º') + error('傿•°é”™è¯¯: lang 傿•°ä¸å¯ä¸ºç©º') + + with_sub = instance[0]['with_sub'] if 'with_sub' in instance[0] and instance[0]['with_sub'] else False - # if not video.startswith('http'): - # error('video 必须是网络路径') + srt_json_in = "" + if len(steps) == 1 and 'step2' in steps: + if 'srt_json_in' not in instance[0]: + error('傿•°é”™è¯¯: instances需è¦: srt_json_in') + srt_json_in = instance[0]['srt_json_in'] + if len(srt_json_in) <= 0: + error('傿•°é”™è¯¯: srt_json_in 傿•°ä¸å¯ä¸ºç©º') - return video, lang, with_sub + return video, steps, lang, with_sub, srt_json_in # 主线 def video_cut_pipeline(logger, args, whispermodel): # print(args) time_record = [] - media_file, lang, with_sub = validate_request() + media_file, steps, lang, with_sub, srt_json_in = validate_request() all_start_time = time.time() - srt_fn = utils.change_ext(media_file, "srt") - md_fn = utils.change_ext(media_file, "md") args.lang = lang + srt_json_out = "" + final_video_fn = "" + if 'step1' in steps: + # 1ã€ä»Žè§†é¢‘生æˆå—幕 + time_record, srt_json_out = step1(time_record, logger, args, whispermodel, media_file, lang) + if 'step2' in steps: + srt_json = srt_json_out if 'step1' in steps else srt_json_in + # 2ã€ä»Žå—幕生æˆcut视频 + time_record, final_video_fn = step2(time_record, logger, args, media_file, with_sub, srt_json) + time_record.append(f"所有æ¥éª¤å¤„ç†å®Œæ¯•。耗时: {time.time() - all_start_time:.4f} ç§’") + for i in time_record: + print(i) + # å¦‚æžœåªæœ‰step1则返回å—å¹•ï¼Œå¦‚æžœåªæœ‰step2或者1å’Œ2都有,则返回最终结果 + if len(steps) == 1 and steps[0] == 'step1': + return srt_json_out + else: + return final_video_fn - # 1ã€è§†é¢‘生æˆsrtå’Œmd + +def step1(time_record, logger, args, whispermodel, media_file, lang): start_time = time.time() if lang != "zh": prompt = f"Subtitles must be fully translated into {lang}" else: prompt = "" logger.info(f"Transcribe {media_file} lang={lang} promt={prompt}") - args.inputs = [media_file] - wmdigit_transcribe.Transcribe(args, whispermodel).run() + md_fn, srt_fn, srt_json_out = wmdigit_transcribe.Transcribe(args, whispermodel).run() time_record.append(f"视频生æˆsrtå’Œmd。耗时: {time.time() - start_time:.4f} ç§’") + return time_record, srt_json_out - # 2ã€ä»Žå—幕生æˆcut视频 + +def step2(time_record, logger, args, media_file, with_sub, srt_json_in): start_time = time.time() + srt_fn = utils.change_ext(media_file, "srt") + md_fn = utils.change_ext(media_file, "md") + # æ ¹æ® srt_json_in 釿–°ç”Ÿæˆ srt 文件 + utils.gen_srt_from_subjson(srt_json_in, srt_fn) + logger.info(f"Cut {media_file} srt={srt_fn} sub={with_sub}") args.inputs = [media_file, md_fn, srt_fn] final_video_fn, new_srt_fn = wmdigit_cut.Cutter(args).run() time_record.append(f"从å—幕生æˆcut视频。耗时: {time.time() - start_time:.4f} ç§’") - - # 3ã€åŠ å—幕 + # åŠ å—幕 if with_sub: start_time = time.time() final_video_fn = utils.combine_video_with_subtitle(final_video_fn, new_srt_fn) time_record.append(f"åŠ å—幕耗时: {time.time() - start_time:.4f} ç§’") - - time_record.append(f"所有æ¥éª¤å¤„ç†å®Œæ¯•。耗时: {time.time() - all_start_time:.4f} ç§’") - for i in time_record: - print(i) - - return final_video_fn, srt_fn - - - + return time_record, final_video_fn \ No newline at end of file diff --git a/start.py b/start.py index d69cb6f..eb67f82 100644 --- a/start.py +++ b/start.py @@ -19,20 +19,14 @@ app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql+pymysql://root:WMdigit.2018@rm-2z db.init_app(app) logger.info("start services") -# 全局路径 -root = './' -app_root = os.path.join(root, 'app') -input_root = os.path.join(root, 'inputs') -output_root = os.path.join(root, 'outputs') - # é¢„åŠ è½½æ¨¡åž‹ args, whispermodel = main_args(logger, app.config['DEBUG']) # å¯¹å¤–æŽ¥å£ @app.route('/wm_video_cut', methods=['POST']) def wm_video_cut(): - final_video_url, srt_url = video_cut_pipeline(logger, args, whispermodel) - return jsonify({"result": {"final_video_url": final_video_url, "srt_url": srt_url}}) + result = video_cut_pipeline(logger, args, whispermodel) + return jsonify({"result": result}) @app.route('/upload_file', methods=['POST']) -- 2.18.1