{"id":2568,"date":"2025-11-27T17:47:02","date_gmt":"2025-11-27T09:47:02","guid":{"rendered":"https:\/\/sanlangcode.com\/?p=2568"},"modified":"2025-11-27T17:47:29","modified_gmt":"2025-11-27T09:47:29","slug":"%e5%a4%a7%e6%a8%a1%e5%9e%8b%e9%a9%b1%e5%8a%a8%e7%9a%84%e4%bc%9a%e8%ae%ae%e4%ba%ba%e5%a3%b0%e5%86%85%e5%ae%b9%e6%80%bb%e7%bb%93%ef%bc%9a%e6%8a%80%e6%9c%af%e5%ae%9e%e7%8e%b0%e4%b8%8e%e6%88%90%e7%86%9f","status":"publish","type":"post","link":"https:\/\/sanlangcode.com\/index.php\/2025\/11\/27\/%e5%a4%a7%e6%a8%a1%e5%9e%8b%e9%a9%b1%e5%8a%a8%e7%9a%84%e4%bc%9a%e8%ae%ae%e4%ba%ba%e5%a3%b0%e5%86%85%e5%ae%b9%e6%80%bb%e7%bb%93%ef%bc%9a%e6%8a%80%e6%9c%af%e5%ae%9e%e7%8e%b0%e4%b8%8e%e6%88%90%e7%86%9f\/","title":{"rendered":"\u5927\u6a21\u578b\u9a71\u52a8\u7684\u4f1a\u8bae\u4eba\u58f0\u5185\u5bb9\u603b\u7ed3\u6280\u672f\u6d45\u6790"},"content":{"rendered":"\n<h1 class=\"wp-block-heading\"><\/h1>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e00\u3001\u5f15\u8a00<\/h2>\n\n\n\n<p>\u4f1a\u8bae\u5185\u5bb9\u603b\u7ed3\u662f\u63d0\u5347\u534f\u4f5c\u6548\u7387\u7684\u5173\u952e\u3002\u7ed3\u5408\u8bed\u97f3\u8bc6\u522b\u3001\u81ea\u7136\u8bed\u8a00\u5904\u7406\u548c\u5927\u8bed\u8a00\u6a21\u578b\uff0c\u53ef\u81ea\u52a8\u751f\u6210\u7ed3\u6784\u5316\u6458\u8981\u3002\u672c\u6587\u4ecb\u7ecd\u6280\u672f\u8981\u70b9\u4e0e\u6210\u719f\u5b9e\u73b0\u8def\u5f84\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e8c\u3001\u6280\u672f\u67b6\u6784\u6982\u89c8<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">2.1 \u6574\u4f53\u6d41\u7a0b<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>\u97f3\u9891\u8f93\u5165 \u2192 \u8bed\u97f3\u8bc6\u522b(ASR) \u2192 \u6587\u672c\u9884\u5904\u7406 \u2192 \u5927\u6a21\u578b\u603b\u7ed3 \u2192 \u7ed3\u6784\u5316\u8f93\u51fa\n    \u2193           \u2193              \u2193            \u2193            \u2193\n \u964d\u566a\u5904\u7406   \u8bf4\u8bdd\u4eba\u5206\u79bb    \u6587\u672c\u6e05\u6d17      \u5185\u5bb9\u7406\u89e3      \u683c\u5f0f\u4f18\u5316<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">2.2 \u6838\u5fc3\u6a21\u5757<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u97f3\u9891\u5904\u7406\u6a21\u5757<\/strong>\uff1a\u964d\u566a\u3001\u683c\u5f0f\u8f6c\u6362\u3001\u5206\u6bb5<\/li>\n\n\n\n<li><strong>\u8bed\u97f3\u8bc6\u522b\u6a21\u5757<\/strong>\uff1aASR\u3001\u8bf4\u8bdd\u4eba\u8bc6\u522b\u3001\u65f6\u95f4\u6233<\/li>\n\n\n\n<li><strong>\u6587\u672c\u5904\u7406\u6a21\u5757<\/strong>\uff1a\u6e05\u6d17\u3001\u5206\u6bb5\u3001\u53bb\u91cd<\/li>\n\n\n\n<li><strong>\u5927\u6a21\u578b\u6a21\u5757<\/strong>\uff1a\u5185\u5bb9\u7406\u89e3\u3001\u6458\u8981\u751f\u6210\u3001\u7ed3\u6784\u5316<\/li>\n\n\n\n<li><strong>\u540e\u5904\u7406\u6a21\u5757<\/strong>\uff1a\u683c\u5f0f\u4f18\u5316\u3001\u5173\u952e\u4fe1\u606f\u63d0\u53d6<\/li>\n<\/ol>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e09\u3001\u6838\u5fc3\u6280\u672f\u8981\u70b9<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">3.1 \u8bed\u97f3\u8bc6\u522b\uff08ASR\uff09<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u4e3b\u6d41\u65b9\u6848\u5bf9\u6bd4<\/h4>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u65b9\u6848<\/th><th>\u4f18\u52bf<\/th><th>\u9002\u7528\u573a\u666f<\/th><th>\u6210\u672c<\/th><\/tr><\/thead><tbody><tr><td><strong>Whisper (OpenAI)<\/strong><\/td><td>\u9ad8\u51c6\u786e\u7387\u3001\u591a\u8bed\u8a00\u3001\u5f00\u6e90<\/td><td>\u901a\u7528\u573a\u666f<\/td><td>\u514d\u8d39<\/td><\/tr><tr><td><strong>Azure Speech<\/strong><\/td><td>\u4f01\u4e1a\u7ea7\u3001\u9ad8\u53ef\u7528<\/td><td>\u5546\u4e1a\u5e94\u7528<\/td><td>\u6309\u91cf\u4ed8\u8d39<\/td><\/tr><tr><td><strong>Google Speech-to-Text<\/strong><\/td><td>\u4e91\u7aef\u670d\u52a1\u3001\u6613\u96c6\u6210<\/td><td>Web\u5e94\u7528<\/td><td>\u6309\u91cf\u4ed8\u8d39<\/td><\/tr><tr><td><strong>\u963f\u91cc\u4e91ASR<\/strong><\/td><td>\u4e2d\u6587\u4f18\u5316\u3001\u56fd\u5185\u670d\u52a1<\/td><td>\u56fd\u5185\u9879\u76ee<\/td><td>\u6309\u91cf\u4ed8\u8d39<\/td><\/tr><tr><td><strong>\u8baf\u98deASR<\/strong><\/td><td>\u4e2d\u6587\u8bc6\u522b\u4f18\u79c0<\/td><td>\u4e2d\u6587\u573a\u666f<\/td><td>\u6309\u91cf\u4ed8\u8d39<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h4 class=\"wp-block-heading\">Whisper \u5b9e\u73b0\u793a\u4f8b<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import whisper\n\n# \u52a0\u8f7d\u6a21\u578b\uff08base\/small\/medium\/large\uff09\nmodel = whisper.load_model(\"base\")\n\n# \u8f6c\u5f55\u97f3\u9891\nresult = model.transcribe(\"meeting.mp3\", \n    language=\"zh\",           # \u6307\u5b9a\u8bed\u8a00\n    task=\"transcribe\",       # \u8f6c\u5f55\u4efb\u52a1\n    verbose=True,           # \u663e\u793a\u8fdb\u5ea6\n    fp16=False              # \u4f7f\u7528FP32\u7cbe\u5ea6\n)\n\n# \u83b7\u53d6\u5e26\u65f6\u95f4\u6233\u7684\u6587\u672c\ntranscription = result&#91;\"text\"]\nsegments = result&#91;\"segments\"]  # \u5305\u542b\u65f6\u95f4\u6233\u7684\u7247\u6bb5<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u8bf4\u8bdd\u4eba\u5206\u79bb\uff08Speaker Diarization\uff09<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>from pyannote.audio import Pipeline\n\n# \u52a0\u8f7d\u8bf4\u8bdd\u4eba\u5206\u79bb\u6a21\u578b\npipeline = Pipeline.from_pretrained(\n    \"pyannote\/speaker-diarization-3.1\",\n    use_auth_token=\"YOUR_TOKEN\"\n)\n\n# \u6267\u884c\u8bf4\u8bdd\u4eba\u5206\u79bb\ndiarization = pipeline(\"meeting.wav\")\n\n# \u83b7\u53d6\u8bf4\u8bdd\u4eba\u6807\u7b7e\nfor turn, _, speaker in diarization.itertracks(yield_label=True):\n    print(f\"{speaker}: {turn.start:.1f}s - {turn.end:.1f}s\")<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">3.2 \u6587\u672c\u9884\u5904\u7406<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u5173\u952e\u5904\u7406\u6b65\u9aa4<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import re\nfrom datetime import datetime\n\ndef preprocess_transcription(segments, diarization):\n    \"\"\"\n    \u9884\u5904\u7406\u8f6c\u5f55\u6587\u672c\n    \"\"\"\n    processed_text = &#91;]\n\n    for segment in segments:\n        # 1. \u6587\u672c\u6e05\u6d17\n        text = clean_text(segment&#91;'text'])\n\n        # 2. \u8bf4\u8bdd\u4eba\u6807\u6ce8\n        speaker = get_speaker(segment&#91;'start'], diarization)\n\n        # 3. \u65f6\u95f4\u6233\u683c\u5f0f\u5316\n        timestamp = format_timestamp(segment&#91;'start'])\n\n        # 4. \u6784\u5efa\u7ed3\u6784\u5316\u6570\u636e\n        processed_text.append({\n            'speaker': speaker,\n            'timestamp': timestamp,\n            'text': text,\n            'duration': segment&#91;'end'] - segment&#91;'start']\n        })\n\n    return processed_text\n\ndef clean_text(text):\n    \"\"\"\u6587\u672c\u6e05\u6d17\"\"\"\n    # \u53bb\u9664\u586b\u5145\u8bcd\n    text = re.sub(r'\\b(\u55ef|\u554a|\u90a3\u4e2a|\u8fd9\u4e2a|\u5c31\u662f)\\b', '', text)\n    # \u53bb\u9664\u91cd\u590d\u6807\u70b9\n    text = re.sub(r'&#91;\u3002\uff0c]{2,}', '\u3002', text)\n    # \u53bb\u9664\u591a\u4f59\u7a7a\u683c\n    text = re.sub(r'\\s+', ' ', text)\n    return text.strip()<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">3.3 \u5927\u6a21\u578b\u603b\u7ed3<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6848\u4e00\uff1a\u4f7f\u7528 OpenAI GPT<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>from openai import OpenAI\n\nclient = OpenAI(api_key=\"YOUR_API_KEY\")\n\ndef summarize_meeting(transcription_text):\n    \"\"\"\n    \u4f7f\u7528GPT\u603b\u7ed3\u4f1a\u8bae\u5185\u5bb9\n    \"\"\"\n    prompt = f\"\"\"\n\u8bf7\u5bf9\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\u8fdb\u884c\u603b\u7ed3\uff0c\u8981\u6c42\uff1a\n1. \u63d0\u53d6\u5173\u952e\u8bae\u9898\u548c\u51b3\u7b56\n2. \u5217\u51fa\u884c\u52a8\u9879\uff08Action Items\uff09\n3. \u6807\u6ce8\u91cd\u8981\u65f6\u95f4\u8282\u70b9\n4. \u8bc6\u522b\u4e0d\u540c\u53d1\u8a00\u4eba\u7684\u4e3b\u8981\u89c2\u70b9\n\n\u4f1a\u8bae\u5185\u5bb9\uff1a\n{transcription_text}\n\"\"\"\n\n    response = client.chat.completions.create(\n        model=\"gpt-4-turbo-preview\",\n        messages=&#91;\n            {\"role\": \"system\", \"content\": \"\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u4f1a\u8bae\u8bb0\u5f55\u52a9\u624b\u3002\"},\n            {\"role\": \"user\", \"content\": prompt}\n        ],\n        temperature=0.3,\n        max_tokens=2000\n    )\n\n    return response.choices&#91;0].message.content<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6848\u4e8c\uff1a\u4f7f\u7528 Claude (Anthropic)<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import anthropic\n\nclient = anthropic.Anthropic(api_key=\"YOUR_API_KEY\")\n\ndef summarize_with_claude(transcription_text):\n    \"\"\"\n    \u4f7f\u7528Claude\u603b\u7ed3\u4f1a\u8bae\u5185\u5bb9\n    \"\"\"\n    message = client.messages.create(\n        model=\"claude-3-opus-20240229\",\n        max_tokens=2000,\n        temperature=0.3,\n        system=\"\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u4f1a\u8bae\u8bb0\u5f55\u52a9\u624b\uff0c\u64c5\u957f\u63d0\u53d6\u5173\u952e\u4fe1\u606f\u548c\u7ed3\u6784\u5316\u603b\u7ed3\u3002\",\n        messages=&#91;\n            {\n                \"role\": \"user\",\n                \"content\": f\"\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\uff1a\\n\\n{transcription_text}\"\n            }\n        ]\n    )\n\n    return message.content&#91;0].text<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6848\u4e09\uff1a\u4f7f\u7528\u56fd\u4ea7\u5927\u6a21\u578b\uff08\u667a\u8c31GLM\u3001\u901a\u4e49\u5343\u95ee\u7b49\uff09<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import zhipuai\n\nzhipuai.api_key = \"YOUR_API_KEY\"\n\ndef summarize_with_glm(transcription_text):\n    \"\"\"\n    \u4f7f\u7528\u667a\u8c31GLM\u603b\u7ed3\u4f1a\u8bae\u5185\u5bb9\n    \"\"\"\n    response = zhipuai.model_api.invoke(\n        model=\"glm-4\",\n        prompt=&#91;\n            {\n                \"role\": \"system\",\n                \"content\": \"\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u4f1a\u8bae\u8bb0\u5f55\u52a9\u624b\u3002\"\n            },\n            {\n                \"role\": \"user\",\n                \"content\": f\"\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\uff1a\\n\\n{transcription_text}\"\n            }\n        ],\n        temperature=0.3,\n        top_p=0.7\n    )\n\n    return response&#91;'data']&#91;'choices']&#91;0]&#91;'content']<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">3.4 \u7ed3\u6784\u5316\u8f93\u51fa<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u5b9a\u4e49\u8f93\u51fa\u683c\u5f0f<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>def generate_structured_summary(raw_summary, transcription_data):\n    \"\"\"\n    \u751f\u6210\u7ed3\u6784\u5316\u4f1a\u8bae\u603b\u7ed3\n    \"\"\"\n    structured_summary = {\n        \"meeting_info\": {\n            \"date\": get_meeting_date(),\n            \"duration\": calculate_duration(transcription_data),\n            \"participants\": extract_participants(transcription_data)\n        },\n        \"key_topics\": extract_topics(raw_summary),\n        \"decisions\": extract_decisions(raw_summary),\n        \"action_items\": extract_action_items(raw_summary),\n        \"timeline\": build_timeline(transcription_data),\n        \"full_summary\": raw_summary\n    }\n\n    return structured_summary\n\ndef extract_action_items(text):\n    \"\"\"\n    \u63d0\u53d6\u884c\u52a8\u9879\n    \"\"\"\n    # \u4f7f\u7528\u6b63\u5219\u8868\u8fbe\u5f0f\u6216\u5927\u6a21\u578b\u63d0\u53d6\n    pattern = r'(?:\u884c\u52a8\u9879|\u5f85\u529e|TODO|Action Item)&#91;\uff1a:]\\s*(.+?)(?:\\n|$)'\n    action_items = re.findall(pattern, text, re.IGNORECASE)\n    return action_items<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u56db\u3001\u6210\u719f\u6280\u672f\u7ebf\u8def<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">4.1 \u7ebf\u8def\u4e00\uff1aWhisper + GPT-4\uff08\u63a8\u8350\uff09<\/h3>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f<\/strong>\uff1a\u901a\u7528\u573a\u666f\uff0c\u8ffd\u6c42\u9ad8\u51c6\u786e\u7387<\/p>\n\n\n\n<p><strong>\u6280\u672f\u6808<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>ASR: Whisper (large-v3)<\/li>\n\n\n\n<li>\u8bf4\u8bdd\u4eba\u5206\u79bb: pyannote.audio<\/li>\n\n\n\n<li>\u5927\u6a21\u578b: GPT-4 Turbo<\/li>\n\n\n\n<li>\u90e8\u7f72: Python + FastAPI<\/li>\n<\/ul>\n\n\n\n<p><strong>\u4f18\u52bf<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u51c6\u786e\u7387\u9ad8<\/li>\n\n\n\n<li>\u591a\u8bed\u8a00\u652f\u6301<\/li>\n\n\n\n<li>\u5f00\u6e90\u53ef\u63a7<\/li>\n<\/ul>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b<\/strong>\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from fastapi import FastAPI, UploadFile, File\nimport whisper\nimport openai\n\napp = FastAPI()\n\n# \u521d\u59cb\u5316\u6a21\u578b\uff08\u542f\u52a8\u65f6\u52a0\u8f7d\uff09\nwhisper_model = whisper.load_model(\"large-v3\")\nopenai_client = OpenAI()\n\n@app.post(\"\/summarize-meeting\")\nasync def summarize_meeting(audio: UploadFile = File(...)):\n    # 1. \u4fdd\u5b58\u97f3\u9891\u6587\u4ef6\n    audio_path = f\"\/tmp\/{audio.filename}\"\n    with open(audio_path, \"wb\") as f:\n        f.write(await audio.read())\n\n    # 2. \u8bed\u97f3\u8bc6\u522b\n    result = whisper_model.transcribe(audio_path, language=\"zh\")\n    transcription = result&#91;\"text\"]\n\n    # 3. \u5927\u6a21\u578b\u603b\u7ed3\n    summary = summarize_with_gpt(transcription)\n\n    # 4. \u8fd4\u56de\u7ed3\u679c\n    return {\n        \"transcription\": transcription,\n        \"summary\": summary\n    }<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">4.2 \u7ebf\u8def\u4e8c\uff1aAzure Speech + Claude<\/h3>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f<\/strong>\uff1a\u4f01\u4e1a\u7ea7\u5e94\u7528\uff0c\u9700\u8981\u9ad8\u53ef\u7528<\/p>\n\n\n\n<p><strong>\u6280\u672f\u6808<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>ASR: Azure Speech Services<\/li>\n\n\n\n<li>\u8bf4\u8bdd\u4eba\u5206\u79bb: Azure Speaker Recognition<\/li>\n\n\n\n<li>\u5927\u6a21\u578b: Claude 3 Opus<\/li>\n\n\n\n<li>\u90e8\u7f72: Node.js + Express<\/li>\n<\/ul>\n\n\n\n<p><strong>\u4f18\u52bf<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4f01\u4e1a\u7ea7SLA<\/li>\n\n\n\n<li>\u6613\u4e8e\u6269\u5c55<\/li>\n\n\n\n<li>\u591a\u533a\u57df\u90e8\u7f72<\/li>\n<\/ul>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b<\/strong>\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>const sdk = require(\"microsoft-cognitiveservices-speech-sdk\");\nconst Anthropic = require(\"@anthropic-ai\/sdk\");\n\n\/\/ Azure Speech \u914d\u7f6e\nconst speechConfig = sdk.SpeechConfig.fromSubscription(\n    process.env.AZURE_SPEECH_KEY,\n    process.env.AZURE_SPEECH_REGION\n);\n\n\/\/ Claude \u5ba2\u6237\u7aef\nconst anthropic = new Anthropic({\n    apiKey: process.env.ANTHROPIC_API_KEY\n});\n\nasync function transcribeAndSummarize(audioBuffer) {\n    \/\/ 1. Azure Speech \u8bc6\u522b\n    const audioConfig = sdk.AudioConfig.fromStream(\n        sdk.AudioInputStream.createPushStream()\n    );\n    const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);\n\n    const transcription = await new Promise((resolve, reject) =&gt; {\n        let fullText = \"\";\n        recognizer.recognized = (s, e) =&gt; {\n            if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {\n                fullText += e.result.text + \" \";\n            }\n        };\n        recognizer.sessionStopped = () =&gt; resolve(fullText);\n        recognizer.startContinuousRecognitionAsync();\n    });\n\n    \/\/ 2. Claude \u603b\u7ed3\n    const message = await anthropic.messages.create({\n        model: \"claude-3-opus-20240229\",\n        max_tokens: 2000,\n        messages: &#91;{\n            role: \"user\",\n            content: `\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\uff1a\\n\\n${transcription}`\n        }]\n    });\n\n    return {\n        transcription,\n        summary: message.content&#91;0].text\n    };\n}<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">4.3 \u7ebf\u8def\u4e09\uff1a\u672c\u5730\u90e8\u7f72\u65b9\u6848\uff08Whisper + \u672c\u5730\u5927\u6a21\u578b\uff09<\/h3>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f<\/strong>\uff1a\u6570\u636e\u5b89\u5168\u8981\u6c42\u9ad8\uff0c\u9700\u8981\u79bb\u7ebf\u8fd0\u884c<\/p>\n\n\n\n<p><strong>\u6280\u672f\u6808<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>ASR: Whisper (\u672c\u5730\u90e8\u7f72)<\/li>\n\n\n\n<li>\u5927\u6a21\u578b: Llama 3 \/ Qwen \/ ChatGLM (\u672c\u5730\u90e8\u7f72)<\/li>\n\n\n\n<li>\u90e8\u7f72: Python + Ollama \/ vLLM<\/li>\n<\/ul>\n\n\n\n<p><strong>\u4f18\u52bf<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6570\u636e\u4e0d\u51fa\u672c\u5730<\/li>\n\n\n\n<li>\u65e0API\u8c03\u7528\u6210\u672c<\/li>\n\n\n\n<li>\u53ef\u5b9a\u5236\u5316<\/li>\n<\/ul>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b<\/strong>\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import whisper\nimport ollama\n\n# \u672c\u5730 Whisper\nwhisper_model = whisper.load_model(\"base\")\n\n# \u672c\u5730\u5927\u6a21\u578b (Ollama)\ndef summarize_with_local_llm(text):\n    response = ollama.chat(\n        model='llama3:8b',\n        messages=&#91;\n            {\n                'role': 'system',\n                'content': '\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u4f1a\u8bae\u8bb0\u5f55\u52a9\u624b\u3002'\n            },\n            {\n                'role': 'user',\n                'content': f'\u8bf7\u603b\u7ed3\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\uff1a\\n\\n{text}'\n            }\n        ]\n    )\n    return response&#91;'message']&#91;'content']\n\n# \u5b8c\u6574\u6d41\u7a0b\ndef process_meeting(audio_path):\n    # 1. \u8bed\u97f3\u8bc6\u522b\n    result = whisper_model.transcribe(audio_path)\n    transcription = result&#91;\"text\"]\n\n    # 2. \u672c\u5730\u5927\u6a21\u578b\u603b\u7ed3\n    summary = summarize_with_local_llm(transcription)\n\n    return {\n        \"transcription\": transcription,\n        \"summary\": summary\n    }<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">4.4 \u7ebf\u8def\u56db\uff1a\u6d41\u5f0f\u5904\u7406\u65b9\u6848<\/h3>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f<\/strong>\uff1a\u5b9e\u65f6\u4f1a\u8bae\uff0c\u9700\u8981\u5b9e\u65f6\u603b\u7ed3<\/p>\n\n\n\n<p><strong>\u6280\u672f\u6808<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>ASR: \u6d41\u5f0f Whisper \/ WebRTC<\/li>\n\n\n\n<li>\u5927\u6a21\u578b: \u6d41\u5f0f API (GPT-4 Stream \/ Claude Stream)<\/li>\n\n\n\n<li>\u90e8\u7f72: WebSocket + Server-Sent Events<\/li>\n<\/ul>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b<\/strong>\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from fastapi import FastAPI, WebSocket\nimport asyncio\nimport whisper\n\napp = FastAPI()\nwhisper_model = whisper.load_model(\"base\")\n\n@app.websocket(\"\/ws\/meeting\")\nasync def websocket_endpoint(websocket: WebSocket):\n    await websocket.accept()\n\n    # \u97f3\u9891\u7f13\u51b2\u533a\n    audio_buffer = &#91;]\n\n    try:\n        while True:\n            # \u63a5\u6536\u97f3\u9891\u6570\u636e\n            data = await websocket.receive_bytes()\n            audio_buffer.append(data)\n\n            # \u6bcf5\u79d2\u5904\u7406\u4e00\u6b21\n            if len(audio_buffer) &gt;= 5:\n                # \u8f6c\u5f55\n                transcription = whisper_model.transcribe(\n                    b''.join(audio_buffer)\n                )&#91;\"text\"]\n\n                # \u6d41\u5f0f\u603b\u7ed3\n                async for chunk in stream_summarize(transcription):\n                    await websocket.send_json({\n                        \"type\": \"summary_chunk\",\n                        \"content\": chunk\n                    })\n\n                audio_buffer = &#91;]\n    except Exception as e:\n        await websocket.close()\n\nasync def stream_summarize(text):\n    \"\"\"\u6d41\u5f0f\u603b\u7ed3\"\"\"\n    async for chunk in openai_client.chat.completions.create(\n        model=\"gpt-4-turbo-preview\",\n        messages=&#91;{\"role\": \"user\", \"content\": f\"\u603b\u7ed3\uff1a{text}\"}],\n        stream=True\n    ):\n        if chunk.choices&#91;0].delta.content:\n            yield chunk.choices&#91;0].delta.content<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e94\u3001\u5173\u952e\u6280\u672f\u4f18\u5316<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">5.1 \u97f3\u9891\u9884\u5904\u7406\u4f18\u5316<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>import librosa\nimport noisereduce as nr\n\ndef preprocess_audio(audio_path):\n    \"\"\"\n    \u97f3\u9891\u9884\u5904\u7406\uff1a\u964d\u566a\u3001\u6807\u51c6\u5316\n    \"\"\"\n    # \u52a0\u8f7d\u97f3\u9891\n    audio, sr = librosa.load(audio_path, sr=16000)\n\n    # \u964d\u566a\n    audio_denoised = nr.reduce_noise(\n        y=audio,\n        sr=sr,\n        stationary=False,\n        prop_decrease=0.8\n    )\n\n    # \u97f3\u91cf\u6807\u51c6\u5316\n    audio_normalized = librosa.util.normalize(audio_denoised)\n\n    return audio_normalized, sr<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">5.2 \u957f\u6587\u672c\u5206\u5757\u5904\u7406<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>def chunk_text(text, max_length=4000):\n    \"\"\"\n    \u5c06\u957f\u6587\u672c\u5206\u5757\uff0c\u907f\u514d\u8d85\u51fa\u6a21\u578btoken\u9650\u5236\n    \"\"\"\n    chunks = &#91;]\n    sentences = text.split('\u3002')\n\n    current_chunk = \"\"\n    for sentence in sentences:\n        if len(current_chunk) + len(sentence) &lt; max_length:\n            current_chunk += sentence + \"\u3002\"\n        else:\n            chunks.append(current_chunk)\n            current_chunk = sentence + \"\u3002\"\n\n    if current_chunk:\n        chunks.append(current_chunk)\n\n    return chunks\n\ndef summarize_long_meeting(transcription):\n    \"\"\"\n    \u5904\u7406\u957f\u4f1a\u8bae\u5185\u5bb9\n    \"\"\"\n    chunks = chunk_text(transcription)\n    summaries = &#91;]\n\n    for chunk in chunks:\n        summary = summarize_with_gpt(chunk)\n        summaries.append(summary)\n\n    # \u5408\u5e76\u603b\u7ed3\n    final_summary = summarize_with_gpt(\"\\n\\n\".join(summaries))\n    return final_summary<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">5.3 \u63d0\u793a\u8bcd\u5de5\u7a0b\u4f18\u5316<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>MEETING_SUMMARY_PROMPT = \"\"\"\n\u4f60\u662f\u4e00\u4e2a\u4e13\u4e1a\u7684\u4f1a\u8bae\u8bb0\u5f55\u52a9\u624b\u3002\u8bf7\u5bf9\u4ee5\u4e0b\u4f1a\u8bae\u5185\u5bb9\u8fdb\u884c\u7ed3\u6784\u5316\u603b\u7ed3\u3002\n\n\u8981\u6c42\uff1a\n1. **\u4f1a\u8bae\u57fa\u672c\u4fe1\u606f**\n   - \u4f1a\u8bae\u4e3b\u9898\n   - \u53c2\u4e0e\u4eba\u5458\n   - \u4f1a\u8bae\u65f6\u957f\n\n2. **\u6838\u5fc3\u8bae\u9898**\n   - \u5217\u51fa\u8ba8\u8bba\u7684\u4e3b\u8981\u8bae\u9898\uff083-5\u4e2a\uff09\n   - \u6bcf\u4e2a\u8bae\u9898\u7684\u5173\u952e\u89c2\u70b9\n\n3. **\u91cd\u8981\u51b3\u7b56**\n   - \u660e\u786e\u8bb0\u5f55\u6240\u6709\u51b3\u7b56\u4e8b\u9879\n   - \u6807\u6ce8\u51b3\u7b56\u4eba\u548c\u65f6\u95f4\n\n4. **\u884c\u52a8\u9879\uff08Action Items\uff09**\n   - \u8d1f\u8d23\u4eba\n   - \u622a\u6b62\u65f6\u95f4\n   - \u5177\u4f53\u4efb\u52a1\n\n5. **\u540e\u7eed\u8ddf\u8fdb**\n   - \u9700\u8981\u8fdb\u4e00\u6b65\u8ba8\u8bba\u7684\u4e8b\u9879\n   - \u5f85\u786e\u8ba4\u4fe1\u606f\n\n\u8bf7\u4f7f\u7528Markdown\u683c\u5f0f\u8f93\u51fa\uff0c\u786e\u4fdd\u7ed3\u6784\u6e05\u6670\u3002\n\n\u4f1a\u8bae\u5185\u5bb9\uff1a\n{transcription}\n\"\"\"\n\ndef summarize_with_optimized_prompt(transcription):\n    prompt = MEETING_SUMMARY_PROMPT.format(transcription=transcription)\n    # ... \u8c03\u7528\u5927\u6a21\u578b<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u516d\u3001\u6027\u80fd\u4f18\u5316\u4e0e\u6210\u672c\u63a7\u5236<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">6.1 \u7f13\u5b58\u7b56\u7565<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>import redis\nimport hashlib\nimport json\n\nredis_client = redis.Redis(host='localhost', port=6379)\n\ndef get_cached_summary(audio_hash):\n    \"\"\"\u83b7\u53d6\u7f13\u5b58\u7684\u603b\u7ed3\"\"\"\n    cached = redis_client.get(f\"summary:{audio_hash}\")\n    if cached:\n        return json.loads(cached)\n    return None\n\ndef cache_summary(audio_hash, summary):\n    \"\"\"\u7f13\u5b58\u603b\u7ed3\"\"\"\n    redis_client.setex(\n        f\"summary:{audio_hash}\",\n        3600 * 24,  # 24\u5c0f\u65f6\u8fc7\u671f\n        json.dumps(summary)\n    )\n\ndef process_with_cache(audio_path):\n    # \u8ba1\u7b97\u97f3\u9891hash\n    with open(audio_path, 'rb') as f:\n        audio_hash = hashlib.md5(f.read()).hexdigest()\n\n    # \u68c0\u67e5\u7f13\u5b58\n    cached = get_cached_summary(audio_hash)\n    if cached:\n        return cached\n\n    # \u5904\u7406\u5e76\u7f13\u5b58\n    result = process_meeting(audio_path)\n    cache_summary(audio_hash, result)\n    return result<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">6.2 \u5f02\u6b65\u5904\u7406<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>from celery import Celery\n\ncelery_app = Celery('meeting_processor')\n\n@celery_app.task\ndef process_meeting_async(audio_path):\n    \"\"\"\u5f02\u6b65\u5904\u7406\u4f1a\u8bae\u97f3\u9891\"\"\"\n    return process_meeting(audio_path)\n\n# \u8c03\u7528\ntask = process_meeting_async.delay(audio_path)\nresult = task.get()  # \u83b7\u53d6\u7ed3\u679c<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e03\u3001\u6700\u4f73\u5b9e\u8df5\u603b\u7ed3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">7.1 \u6280\u672f\u9009\u578b\u5efa\u8bae<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u573a\u666f<\/th><th>\u63a8\u8350\u65b9\u6848<\/th><th>\u7406\u7531<\/th><\/tr><\/thead><tbody><tr><td><strong>\u901a\u7528\u573a\u666f<\/strong><\/td><td>Whisper + GPT-4<\/td><td>\u5e73\u8861\u51c6\u786e\u7387\u548c\u6210\u672c<\/td><\/tr><tr><td><strong>\u4f01\u4e1a\u7ea7<\/strong><\/td><td>Azure Speech + Claude<\/td><td>\u9ad8\u53ef\u7528\u3001SLA\u4fdd\u969c<\/td><\/tr><tr><td><strong>\u6570\u636e\u654f\u611f<\/strong><\/td><td>\u672c\u5730Whisper + \u672c\u5730LLM<\/td><td>\u6570\u636e\u4e0d\u51fa\u672c\u5730<\/td><\/tr><tr><td><strong>\u5b9e\u65f6\u573a\u666f<\/strong><\/td><td>\u6d41\u5f0fASR + \u6d41\u5f0fLLM<\/td><td>\u4f4e\u5ef6\u8fdf<\/td><\/tr><tr><td><strong>\u6210\u672c\u654f\u611f<\/strong><\/td><td>Whisper + \u56fd\u4ea7\u5927\u6a21\u578b<\/td><td>\u6210\u672c\u66f4\u4f4e<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">7.2 \u5b9e\u65bd\u6b65\u9aa4<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>MVP\u9636\u6bb5<\/strong>\uff1aWhisper + GPT-3.5\uff0c\u5feb\u901f\u9a8c\u8bc1<\/li>\n\n\n\n<li><strong>\u4f18\u5316\u9636\u6bb5<\/strong>\uff1a\u52a0\u5165\u8bf4\u8bdd\u4eba\u5206\u79bb\u3001\u6587\u672c\u9884\u5904\u7406<\/li>\n\n\n\n<li><strong>\u751f\u4ea7\u9636\u6bb5<\/strong>\uff1a\u6027\u80fd\u4f18\u5316\u3001\u7f13\u5b58\u3001\u5f02\u6b65\u5904\u7406<\/li>\n\n\n\n<li><strong>\u6269\u5c55\u9636\u6bb5<\/strong>\uff1a\u591a\u8bed\u8a00\u652f\u6301\u3001\u5b9e\u65f6\u5904\u7406<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">7.3 \u6ce8\u610f\u4e8b\u9879<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u9690\u79c1\u5b89\u5168<\/strong>\uff1a\u654f\u611f\u4f1a\u8bae\u6570\u636e\u52a0\u5bc6\u5b58\u50a8<\/li>\n\n\n\n<li><strong>\u51c6\u786e\u7387\u4f18\u5316<\/strong>\uff1a\u9488\u5bf9\u7279\u5b9a\u9886\u57df\u5fae\u8c03\u6a21\u578b<\/li>\n\n\n\n<li><strong>\u6210\u672c\u63a7\u5236<\/strong>\uff1a\u5408\u7406\u4f7f\u7528\u7f13\u5b58\uff0c\u907f\u514d\u91cd\u590d\u8ba1\u7b97<\/li>\n\n\n\n<li><strong>\u7528\u6237\u4f53\u9a8c<\/strong>\uff1a\u63d0\u4f9b\u8fdb\u5ea6\u53cd\u9988\uff0c\u4f18\u5316\u54cd\u5e94\u65f6\u95f4<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u516b\u3001\u603b\u7ed3<\/h2>\n\n\n\n<p>\u4f1a\u8bae\u5185\u5bb9\u603b\u7ed3\u7cfb\u7edf\u6d89\u53ca\u8bed\u97f3\u8bc6\u522b\u3001\u81ea\u7136\u8bed\u8a00\u5904\u7406\u548c\u5927\u8bed\u8a00\u6a21\u578b\u3002\u9009\u62e9\u5408\u9002\u7684\u6280\u672f\u6808\uff0c\u7ed3\u5408\u4f18\u5316\u7b56\u7565\uff0c\u53ef\u6784\u5efa\u9ad8\u6548\u3001\u51c6\u786e\u7684\u81ea\u52a8\u603b\u7ed3\u7cfb\u7edf\u3002\u968f\u7740\u6a21\u578b\u80fd\u529b\u63d0\u5347\uff0c\u8be5\u9886\u57df\u4ecd\u6709\u4f18\u5316\u7a7a\u95f4\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4e00\u3001\u5f15\u8a00 \u4f1a\u8bae\u5185\u5bb9\u603b\u7ed3\u662f\u63d0\u5347\u534f\u4f5c&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-2568","post","type-post","status-publish","format-standard","hentry","category-4"],"_links":{"self":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2568","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/comments?post=2568"}],"version-history":[{"count":2,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2568\/revisions"}],"predecessor-version":[{"id":2570,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2568\/revisions\/2570"}],"wp:attachment":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/media?parent=2568"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/categories?post=2568"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/tags?post=2568"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}