From 2c7a85ec44c183df865bcf5b6d399c8feee1878c Mon Sep 17 00:00:00 2001
From: jcy <goukenyi@outlook.com>
Date: Tue, 13 May 2025 09:45:31 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=204=EF=BC=8C0/=E6=AF=95?=
 =?UTF-8?q?=E8=AE=BE.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 4，0/毕设.py | 525 ---------------------------------------------------
 1 file changed, 525 deletions(-)
 delete mode 100644 4，0/毕设.py

diff --git a/4，0/毕设.py b/4，0/毕设.py
deleted file mode 100644
index c6ac889..0000000
--- a/4，0/毕设.py
+++ /dev/null
@@ -1,525 +0,0 @@
-import os
-import re
-import base64
-import warnings
-import imageio
-import whisper
-import numpy as np
-import pdfkit
-from PIL import Image
-from skimage.metrics import structural_similarity as ssim
-from collections import defaultdict
-import subprocess
-from jinja2 import Environment
-import cv2
-from scipy.signal import find_peaks
-from skimage.feature import hog
-from skimage.color import rgb2gray
-
-# ======================== 全局配置 ========================
-warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
-VIDEO_PATH = "D:/python项目文件/1/input.mp4"  # 输入视频路径
-MODEL_DIR = "D:/whisper_models"  # Whisper模型目录
-FFMPEG_BIN = r"D:\Program Files\ffmpeg\bin"  # FFmpeg安装路径
-WKHTMLTOPDF_PATH = r"D:\wkhtmltopdf\bin\wkhtmltopdf.exe"  # wkhtmltopdf路径
-SSIM_THRESHOLD = 0.85  # 关键帧去重阈值
-FRAME_INTERVAL = 2  # 抽帧间隔（秒）
-OUTPUT_DIR = "D:\桌面文件\python\output"  # 输出目录
-TRANSITION_WORDS = ["接下来", "下一页", "如图"]  # 过渡词过滤列
-HOG_THRESHOLD = 0.7  # HOG特征相似度阈值
-COLOR_THRESHOLD = 0.8  # 颜色直方图相似度阈值
-WHISPER_MODEL = "base"  # Whisper模型大小
-PROFESSIONAL_TERMS = {
-    "人工智能": "AI",
-    "机器学习": "ML",
-    "深度学习": "DL",
-    "神经网络": "NN",
-    "卷积神经网络": "CNN",
-    "循环神经网络": "RNN",
-    "自然语言处理": "NLP",
-    "计算机视觉": "CV",
-    "大数据": "Big Data",
-    "云计算": "Cloud Computing"
-}  # 专业术语词典
-
-
-# ========================================================
-
-# ---------------------- 核心功能模块 ----------------------
-class VideoProcessor:
-    def __init__(self):
-        os.environ["PATH"] = FFMPEG_BIN + os.pathsep + os.environ["PATH"]
-
-    @staticmethod
-    def check_ffmpeg():
-        """验证FFmpeg可用性"""
-        try:
-            subprocess.run(["ffmpeg", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            print("[系统] FFmpeg验证成功")
-            return True
-        except Exception as e:
-            print(f"[错误] FFmpeg验证失败: {str(e)}")
-            return False
-
-    @staticmethod
-    def calculate_color_histogram(frame):
-        """计算颜色直方图特征"""
-        hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
-        cv2.normalize(hist, hist)
-        return hist.flatten()
-
-    @staticmethod
-    def calculate_hog_features(frame):
-        """计算HOG特征"""
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        features = hog(gray, orientations=8, pixels_per_cell=(16, 16),
-                       cells_per_block=(1, 1), visualize=False)
-        return features
-
-    @staticmethod
-    def is_ppt_transition(frame1, frame2):
-        """检测PPT页面切换"""
-        # 转换为灰度图
-        gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
-        gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
-
-        # 计算边缘
-        edges1 = cv2.Canny(gray1, 100, 200)
-        edges2 = cv2.Canny(gray2, 100, 200)
-
-        # 计算边缘差异
-        diff = cv2.absdiff(edges1, edges2)
-        return np.mean(diff) > 50  # 阈值可调整
-
-    @staticmethod
-    def extract_keyframes(video_path: str) -> tuple:
-        """提取去重关键帧及其时间戳（多特征融合）"""
-        try:
-            reader = imageio.get_reader(video_path)
-            fps = reader.get_meta_data()["fps"]
-            keyframes = []
-            timestamps = []
-            prev_frame = None
-            prev_features = None
-
-            for idx, frame in enumerate(reader):
-                curr_time = idx / fps
-                if curr_time - (timestamps[-1] if timestamps else 0) < FRAME_INTERVAL:
-                    continue
-
-                # 多特征相似度计算
-                if prev_frame is not None:
-                    # 1. SSIM相似度
-                    gray_prev = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
-                    gray_curr = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-                    ssim_score = ssim(gray_prev, gray_curr)
-
-                    # 2. 颜色直方图相似度
-                    hist_prev = VideoProcessor.calculate_color_histogram(prev_frame)
-                    hist_curr = VideoProcessor.calculate_color_histogram(frame)
-                    color_sim = cv2.compareHist(hist_prev, hist_curr, cv2.HISTCMP_CORREL)
-
-                    # 3. HOG特征相似度
-                    hog_prev = VideoProcessor.calculate_hog_features(prev_frame)
-                    hog_curr = VideoProcessor.calculate_hog_features(frame)
-                    hog_sim = np.dot(hog_prev, hog_curr) / (np.linalg.norm(hog_prev) * np.linalg.norm(hog_curr))
-
-                    # 4. PPT页面切换检测
-                    is_transition = VideoProcessor.is_ppt_transition(prev_frame, frame)
-
-                    # 综合判断
-                    if (ssim_score > SSIM_THRESHOLD and
-                            color_sim > COLOR_THRESHOLD and
-                            hog_sim > HOG_THRESHOLD and
-                            not is_transition):
-                        continue
-
-                keyframes.append(Image.fromarray(frame))
-                timestamps.append(curr_time)
-                prev_frame = frame
-
-            reader.close()
-            print(f"[图像] 关键帧提取完成，共{len(keyframes)}帧")
-            return keyframes, timestamps
-        except Exception as e:
-            print(f"[错误] 关键帧提取失败: {str(e)}")
-            return [], []
-
-    @staticmethod
-    def transcribe_audio(video_path: str, model_name: str = WHISPER_MODEL) -> list:
-        """语音识别与时间戳获取（支持中英文混合）"""
-        try:
-            # 使用更大的模型提高准确率
-            model = whisper.load_model(model_name, device="cpu", download_root=MODEL_DIR)
-
-            # 配置转写参数
-            result = model.transcribe(
-                video_path,
-                fp16=False,
-                language="zh",
-                task="transcribe",
-                verbose=True,
-                initial_prompt="这是一段包含中英文的PPT讲解视频，可能包含专业术语。"
-            )
-
-            segments = result.get("segments", [])
-
-            # 后处理：专业术语替换
-            for seg in segments:
-                text = seg["text"]
-                for cn, en in PROFESSIONAL_TERMS.items():
-                    text = text.replace(cn, f"{cn}({en})")
-                seg["text"] = text
-
-            return segments
-        except Exception as e:
-            print(f"[错误] 语音识别失败: {str(e)}")
-            return []
-
-
-# ---------------------- 业务逻辑模块 ----------------------
-class ContentAligner:
-    @staticmethod
-    def generate_page_intervals(timestamps: list, duration: float) -> list:
-        """生成页面时间段"""
-        intervals = []
-        for i in range(len(timestamps)):
-            start = timestamps[i]
-            end = timestamps[i + 1] if i < len(timestamps) - 1 else duration
-            intervals.append((start, end))
-        return intervals
-
-    @staticmethod
-    def calculate_text_similarity(text1: str, text2: str) -> float:
-        """计算文本相似度"""
-        # 使用简单的词重叠度计算
-        words1 = set(re.findall(r'\w+', text1.lower()))
-        words2 = set(re.findall(r'\w+', text2.lower()))
-        if not words1 or not words2:
-            return 0.0
-        intersection = words1.intersection(words2)
-        union = words1.union(words2)
-        return len(intersection) / len(union)
-
-    @staticmethod
-    def find_best_match(segments: list, intervals: list) -> dict:
-        """为每个语音片段找到最佳匹配的页面"""
-        page_texts = defaultdict(list)
-        unmatched_segments = []
-
-        for seg in segments:
-            seg_start = seg["start"]
-            best_match = None
-            best_score = 0.0
-
-            # 1. 首先尝试时间戳匹配
-            for page_idx, (start, end) in enumerate(intervals):
-                if start <= seg_start < end:
-                    best_match = page_idx
-                    break
-
-            # 2. 如果时间戳匹配失败，尝试文本相似度匹配
-            if best_match is None:
-                for page_idx, (start, end) in enumerate(intervals):
-                    # 获取该页面的所有文本
-                    page_text = " ".join([s["text"] for s in segments if start <= s["start"] < end])
-                    similarity = ContentAligner.calculate_text_similarity(seg["text"], page_text)
-                    if similarity > best_score:
-                        best_score = similarity
-                        best_match = page_idx
-
-            # 3. 如果找到匹配，添加到对应页面
-            if best_match is not None:
-                page_texts[best_match].append(seg)
-            else:
-                unmatched_segments.append(seg)
-
-        # 4. 处理未匹配的片段
-        if unmatched_segments:
-            print(f"[警告] 发现{len(unmatched_segments)}个未匹配的语音片段")
-            # 将未匹配片段添加到最近的页面
-            for seg in unmatched_segments:
-                closest_page = min(range(len(intervals)),
-                                   key=lambda i: abs(seg["start"] - (intervals[i][0] + intervals[i][1]) / 2))
-                page_texts[closest_page].append(seg)
-
-        return page_texts
-
-    @staticmethod
-    def align_content(video_path: str, timestamps: list) -> list:
-        """语音-画面对齐主逻辑（改进版）"""
-        try:
-            reader = imageio.get_reader(video_path)
-            duration = reader.get_meta_data()["duration"]
-            reader.close()
-        except:
-            duration = timestamps[-1] + FRAME_INTERVAL
-
-        segments = VideoProcessor.transcribe_audio(video_path)
-        intervals = ContentAligner.generate_page_intervals(timestamps, duration)
-
-        # 使用改进的匹配算法
-        page_texts = ContentAligner.find_best_match(segments, intervals)
-
-        # 生成最终的对齐数据
-        aligned_data = []
-        for idx in range(len(intervals)):
-            text = " ".join([seg["text"] for seg in page_texts.get(idx, [])])
-            aligned_data.append({
-                "page": idx,
-                "start_time": intervals[idx][0],
-                "end_time": intervals[idx][1],
-                "text": text
-            })
-
-        return aligned_data
-
-
-# ---------------------- 摘要生成模块 ----------------------
-class SummaryGenerator:
-    @staticmethod
-    def optimize_text(text: str) -> str:
-        """文本浓缩优化"""
-        sentences = re.split(r'[。！？]', text)
-        filtered = []
-        seen = set()
-        for sent in sentences:
-            sent = sent.strip()
-            if (len(sent) >= 10
-                    and not any(word in sent for word in TRANSITION_WORDS)
-                    and sent not in seen):
-                filtered.append(sent)
-                seen.add(sent)
-        return '。'.join(filtered) + '。' if filtered else ""
-
-    @staticmethod
-    def generate_html(aligned_data: list, keyframes: list, output_dir: str):
-        """生成HTML报告"""
-        pages_data = []
-        temp_img_dir = os.path.join(output_dir, "_temp_images")
-        os.makedirs(temp_img_dir, exist_ok=True)
-
-        try:
-            for idx, frame in enumerate(keyframes):
-                img_path = os.path.join(temp_img_dir, f"page_{idx}.jpg")
-                frame.save(img_path)
-                with open(img_path, "rb") as f:
-                    img_data = base64.b64encode(f.read()).decode("utf-8")
-
-                pages_data.append({
-                    "num": idx + 1,
-                    "time": f"{aligned_data[idx]['start_time']:.1f}s - {aligned_data[idx]['end_time']:.1f}s",
-                    "image": f"data:image/jpeg;base64,{img_data}",
-                    "text": SummaryGenerator.optimize_text(aligned_data[idx]["text"])
-                })
-
-            env = Environment()
-            template = env.from_string("""
-            <!DOCTYPE html>
-            <html>
-            <head>
-                <meta charset="UTF-8">
-                <title>PPT视频摘要报告</title>
-                <style>
-                    .page { margin: 20px; padding: 15px; border: 1px solid #eee; }
-                    img { max-width: 800px; height: auto; }
-                    .timestamp { color: #666; font-size: 0.9em; }
-                    .content { margin-top: 10px; }
-                </style>
-            </head>
-            <body>
-                <h1>PPT视频结构化摘要</h1>
-                {% for page in pages %}
-                <div class="page">
-                    <h2>页面 {{ page.num }}</h2>
-                    <div class="timestamp">{{ page.time }}</div>
-                    <img src="{{ page.image }}" alt="页面截图">
-                    <div class="content">{{ page.text }}</div>
-                </div>
-                {% endfor %}
-            </body>
-            </html>
-            """)
-
-            output_path = os.path.join(output_dir, "summary.html")
-            with open(output_path, "w", encoding="utf-8") as f:
-                f.write(template.render(pages=pages_data))
-            print(f"[输出] HTML报告已生成: {output_path}")
-        finally:
-            for f in os.listdir(temp_img_dir):
-                os.remove(os.path.join(temp_img_dir, f))
-            os.rmdir(temp_img_dir)
-
-    @staticmethod
-    def generate_pdf(aligned_data: list, keyframes: list, output_dir: str):
-        """生成PDF报告（优化版）"""
-        temp_html = os.path.join(output_dir, "_temp_pdf.html")
-        temp_img_dir = os.path.join(output_dir, "_temp_pdf_images")
-        os.makedirs(temp_img_dir, exist_ok=True)
-
-        try:
-            # 使用绝对路径
-            abs_temp_img_dir = os.path.abspath(temp_img_dir)
-
-            html_content = """
-            <!DOCTYPE html>
-            <html>
-            <head>
-                <meta charset="UTF-8">
-                <style>
-                    @page { 
-                        margin: 20mm;
-                        size: A4;
-                    }
-                    body {
-                        font-family: "Microsoft YaHei", "SimSun", sans-serif;
-                        line-height: 1.6;
-                        color: #333;
-                    }
-                    .page {
-                        page-break-inside: avoid;
-                        margin-bottom: 30px;
-                        padding: 20px;
-                        border: 1px solid #eee;
-                        border-radius: 5px;
-                    }
-                    .page-number {
-                        text-align: center;
-                        font-size: 24pt;
-                        font-weight: bold;
-                        margin-bottom: 20px;
-                        color: #2c3e50;
-                    }
-                    .timestamp {
-                        color: #666;
-                        font-size: 12pt;
-                        margin-bottom: 15px;
-                    }
-                    .image-container {
-                        text-align: center;
-                        margin: 20px 0;
-                    }
-                    img {
-                        max-width: 90% !important;
-                        height: auto;
-                        display: block;
-                        margin: 0 auto;
-                        box-shadow: 0 2px 5px rgba(0,0,0,0.1);
-                    }
-                    .content {
-                        font-size: 14pt;
-                        line-height: 1.8;
-                        margin-top: 20px;
-                        padding: 15px;
-                        background: #f9f9f9;
-                        border-radius: 5px;
-                    }
-                    .professional-term {
-                        color: #2980b9;
-                        font-weight: bold;
-                    }
-                </style>
-            </head>
-            <body>
-                <h1 style="text-align: center; color: #2c3e50; margin-bottom: 40px;">PPT视频结构化摘要</h1>
-                {% for page in pages %}
-                <div class="page">
-                    <div class="page-number">第 {{ page.num }} 页</div>
-                    <div class="timestamp">时间区间：{{ page.time }}</div>
-                    <div class="image-container">
-                        <img src="{{ page.image_path }}" alt="页面截图">
-                    </div>
-                    <div class="content">{{ page.text }}</div>
-                </div>
-                {% endfor %}
-            </body>
-            </html>
-            """
-
-            pages_data = []
-            for idx, frame in enumerate(keyframes):
-                img_filename = f"page_{idx}.jpg"
-                img_path = os.path.join(abs_temp_img_dir, img_filename)
-                frame.save(img_path)
-                pages_data.append({
-                    "num": idx + 1,
-                    "time": f"{aligned_data[idx]['start_time']:.1f}s - {aligned_data[idx]['end_time']:.1f}s",
-                    "image_path": img_path,
-                    "text": SummaryGenerator.optimize_text(aligned_data[idx]["text"])
-                })
-
-            env = Environment()
-            template = env.from_string(html_content)
-            with open(temp_html, "w", encoding="utf-8") as f:
-                f.write(template.render(pages=pages_data))
-
-            # PDF生成选项
-            options = {
-                "enable-local-file-access": "",
-                "encoding": "UTF-8",
-                "margin-top": "20mm",
-                "margin-bottom": "20mm",
-                "margin-left": "20mm",
-                "margin-right": "20mm",
-                "no-stop-slow-scripts": "",
-                "quiet": "",
-                "dpi": "300",
-                "image-quality": "100",
-                "enable-smart-shrinking": "",
-                "print-media-type": ""
-            }
-            config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH)
-
-            pdf_path = os.path.join(output_dir, "summary.pdf")
-            pdfkit.from_file(
-                temp_html,
-                pdf_path,
-                configuration=config,
-                options=options
-            )
-            print(f"[输出] PDF报告已生成: {pdf_path}")
-
-        finally:
-            # 清理临时文件
-            if os.path.exists(temp_html):
-                os.remove(temp_html)
-            if os.path.exists(temp_img_dir):
-                for f in os.listdir(temp_img_dir):
-                    os.remove(os.path.join(temp_img_dir, f))
-                os.rmdir(temp_img_dir)
-
-    @classmethod
-    def generate_all(cls, aligned_data: list, keyframes: list, output_dir: str):
-        """生成所有格式报告"""
-        cls.generate_html(aligned_data, keyframes, output_dir)
-        cls.generate_pdf(aligned_data, keyframes, output_dir)
-
-
-# ---------------------- 主流程控制 ----------------------
-def main_process():
-    # 环境检查
-    processor = VideoProcessor()
-    if not processor.check_ffmpeg():
-        return
-    if not os.path.exists(VIDEO_PATH):
-        print(f"[错误] 视频文件不存在: {VIDEO_PATH}")
-        return
-
-    # 关键帧提取
-    keyframes, timestamps = processor.extract_keyframes(VIDEO_PATH)
-    if not keyframes:
-        print("[错误] 未提取到关键帧")
-        return
-
-    # 内容对齐
-    aligned_data = ContentAligner.align_content(VIDEO_PATH, timestamps)
-    if not aligned_data:
-        print("[警告] 未识别到有效语音内容")
-
-    # 生成摘要
-    os.makedirs(OUTPUT_DIR, exist_ok=True)
-    SummaryGenerator.generate_all(aligned_data, keyframes, OUTPUT_DIR)
-
-
-if __name__ == "__main__":
-    main_process()