{"id":2571,"date":"2025-12-01T14:35:05","date_gmt":"2025-12-01T06:35:05","guid":{"rendered":"https:\/\/sanlangcode.com\/?p=2571"},"modified":"2025-12-01T14:35:05","modified_gmt":"2025-12-01T06:35:05","slug":"%e6%96%87%e6%9c%ac%e5%8e%8b%e7%bc%a9-vs-%e8%af%ad%e6%96%99%e5%8e%8b%e7%bc%a9%ef%bc%9a%e5%8e%9f%e7%90%86%e3%80%81%e7%ae%97%e6%b3%95%e4%b8%8e%e5%ba%94%e7%94%a8%e5%9c%ba%e6%99%af%e6%b7%b1%e5%ba%a6","status":"publish","type":"post","link":"https:\/\/sanlangcode.com\/index.php\/2025\/12\/01\/%e6%96%87%e6%9c%ac%e5%8e%8b%e7%bc%a9-vs-%e8%af%ad%e6%96%99%e5%8e%8b%e7%bc%a9%ef%bc%9a%e5%8e%9f%e7%90%86%e3%80%81%e7%ae%97%e6%b3%95%e4%b8%8e%e5%ba%94%e7%94%a8%e5%9c%ba%e6%99%af%e6%b7%b1%e5%ba%a6\/","title":{"rendered":"\u6587\u672c\u538b\u7f29 vs \u8bed\u6599\u538b\u7f29\uff1a\u539f\u7406\u3001\u7b97\u6cd5\u4e0e\u5e94\u7528\u573a\u666f\u6df1\u5ea6\u89e3\u6790"},"content":{"rendered":"\n<p><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u5f15\u8a00<\/h2>\n\n\n\n<p>\u5728\u4fe1\u606f\u65f6\u4ee3\uff0c\u6570\u636e\u538b\u7f29\u662f\u5173\u952e\u6280\u672f\u3002\u6587\u672c\u538b\u7f29\u548c\u8bed\u6599\u538b\u7f29\u662f\u4e24\u79cd\u4e0d\u540c\u7684\u538b\u7f29\u601d\u8def\uff1a\u524d\u8005\u5728\u4fdd\u6301\u5185\u5bb9\u4e0d\u53d8\u7684\u524d\u63d0\u4e0b\u51cf\u5c11\u5b58\u50a8\u7a7a\u95f4\uff0c\u540e\u8005\u901a\u8fc7\u8bed\u4e49\u7406\u89e3\u51cf\u5c11\u4fe1\u606f\u91cf\u3002\u672c\u6587\u4ecb\u7ecd\u5b83\u4eec\u7684\u539f\u7406\u3001\u7ecf\u5178\u7b97\u6cd5\u548c\u4f7f\u7528\u573a\u666f\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u7b2c\u4e00\u90e8\u5206\uff1a\u6587\u672c\u538b\u7f29\uff08Storage Compression\uff09<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">1.1 \u4ec0\u4e48\u662f\u6587\u672c\u538b\u7f29\uff1f<\/h3>\n\n\n\n<p>\u6587\u672c\u538b\u7f29\u662f\u5728\u4fdd\u6301\u5185\u5bb9\u5b8c\u5168\u4e0d\u53d8\u7684\u524d\u63d0\u4e0b\uff0c\u901a\u8fc7\u7b97\u6cd5\u51cf\u5c11\u5b58\u50a8\u7a7a\u95f4\u7684\u6280\u672f\u3002\u538b\u7f29\u540e\u7684\u6570\u636e\u53ef\u4ee5\u7cbe\u786e\u8fd8\u539f\u4e3a\u539f\u59cb\u6587\u672c\u3002<\/p>\n\n\n\n<p><strong>\u6838\u5fc3\u7279\u5f81\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u2705 \u65e0\u635f\u538b\u7f29\uff1a\u5185\u5bb9100%\u4fdd\u6301\u4e0d\u53d8<\/li>\n\n\n\n<li>\u2705 \u53ef\u7cbe\u786e\u8fd8\u539f\uff1a\u89e3\u538b\u540e\u5b57\u8282\u7ea7\u5b8c\u5168\u4e00\u81f4<\/li>\n\n\n\n<li>\u2705 \u5b58\u50a8\u4f18\u5316\uff1a\u51cf\u5c11\u5b58\u50a8\u7a7a\u95f4\u548c\u4f20\u8f93\u5e26\u5bbd<\/li>\n\n\n\n<li>\u2705 \u901a\u7528\u6027\u5f3a\uff1a\u9002\u7528\u4e8e\u4efb\u610f\u6587\u672c\u6570\u636e<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">1.2 \u6587\u672c\u538b\u7f29\u7684\u57fa\u672c\u539f\u7406<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">1.2.1 \u4fe1\u606f\u71b5\u7406\u8bba<\/h4>\n\n\n\n<p>\u4fe1\u606f\u71b5\u8861\u91cf\u4fe1\u606f\u7684\u4e0d\u786e\u5b9a\u6027\u3002\u91cd\u590d\u6a21\u5f0f\u8d8a\u591a\uff0c\u71b5\u8d8a\u4f4e\uff0c\u538b\u7f29\u6f5c\u529b\u8d8a\u5927\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\u9ad8\u71b5\u6587\u672c\uff08\u96be\u4ee5\u538b\u7f29\uff09\uff1a\n\"a7b3c9d2e8f1g5h4i6j0\"  # \u968f\u673a\u5b57\u7b26\uff0c\u91cd\u590d\u5c11\n\n\u4f4e\u71b5\u6587\u672c\uff08\u6613\u4e8e\u538b\u7f29\uff09\uff1a\n\"aaaaaaaaaa\"  # \u9ad8\u5ea6\u91cd\u590d\uff0c\u538b\u7f29\u6bd4\u9ad8<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">1.2.2 \u538b\u7f29\u7b56\u7565<\/h4>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u6d88\u9664\u5197\u4f59\uff1a\u8bc6\u522b\u5e76\u538b\u7f29\u91cd\u590d\u6a21\u5f0f<\/li>\n\n\n\n<li>\u7edf\u8ba1\u7f16\u7801\uff1a\u9ad8\u9891\u5b57\u7b26\u7528\u77ed\u7801\uff0c\u4f4e\u9891\u5b57\u7b26\u7528\u957f\u7801<\/li>\n\n\n\n<li>\u5b57\u5178\u538b\u7f29\uff1a\u7528\u6307\u9488\u5f15\u7528\u5df2\u51fa\u73b0\u7684\u5b57\u7b26\u4e32<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">1.3 \u7ecf\u5178\u6587\u672c\u538b\u7f29\u7b97\u6cd5<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u7b97\u6cd51\uff1a\u6e38\u7a0b\u7f16\u7801\uff08Run-Length Encoding, RLE\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u5c06\u8fde\u7eed\u91cd\u590d\u7684\u5b57\u7b26\u7528&#8221;\u5b57\u7b26+\u6b21\u6570&#8221;\u8868\u793a<\/p>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f\uff1a<\/strong> \u5927\u91cf\u8fde\u7eed\u91cd\u590d\u5b57\u7b26\u7684\u6587\u672c\uff08\u5982\u7a7a\u683c\u3001\u5236\u8868\u7b26\uff09<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def rle_encode(text):\n    \"\"\"RLE\u7f16\u7801\"\"\"\n    if not text:\n        return \"\"\n\n    encoded = &#91;]\n    count = 1\n    char = text&#91;0]\n\n    for i in range(1, len(text)):\n        if text&#91;i] == char:\n            count += 1\n        else:\n            if count &gt; 1:\n                encoded.append(f\"{char}{count}\")\n            else:\n                encoded.append(char)\n            char = text&#91;i]\n            count = 1\n\n    # \u5904\u7406\u6700\u540e\u4e00\u4e2a\u5b57\u7b26\n    if count &gt; 1:\n        encoded.append(f\"{char}{count}\")\n    else:\n        encoded.append(char)\n\n    return \"\".join(encoded)\n\ndef rle_decode(encoded):\n    \"\"\"RLE\u89e3\u7801\"\"\"\n    decoded = &#91;]\n    i = 0\n\n    while i &lt; len(encoded):\n        char = encoded&#91;i]\n        if i + 1 &lt; len(encoded) and encoded&#91;i + 1].isdigit():\n            # \u63d0\u53d6\u6570\u5b57\n            num_str = \"\"\n            j = i + 1\n            while j &lt; len(encoded) and encoded&#91;j].isdigit():\n                num_str += encoded&#91;j]\n                j += 1\n            count = int(num_str)\n            decoded.append(char * count)\n            i = j\n        else:\n            decoded.append(char)\n            i += 1\n\n    return \"\".join(decoded)\n\n# \u793a\u4f8b\noriginal = \"AAAAABBBCCCCCCDDD\"\ncompressed = rle_encode(original)  # \"A5B3C6D3\"\ndecompressed = rle_decode(compressed)  # \"AAAAABBBCCCCCCDDD\"\nassert original == decompressed  # \u2705 \u5b8c\u5168\u4e00\u81f4<\/code><\/pre>\n\n\n\n<p><strong>\u538b\u7f29\u6548\u679c\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u539f\u59cb\uff1a17\u5b57\u8282<\/li>\n\n\n\n<li>\u538b\u7f29\uff1a8\u5b57\u8282<\/li>\n\n\n\n<li>\u538b\u7f29\u6bd4\uff1a47%<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u7b97\u6cd52\uff1a\u970d\u592b\u66fc\u7f16\u7801\uff08Huffman Coding\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u6839\u636e\u5b57\u7b26\u51fa\u73b0\u9891\u7387\u6784\u5efa\u6700\u4f18\u524d\u7f00\u7801\u6811\uff0c\u9ad8\u9891\u5b57\u7b26\u7528\u77ed\u7801<\/p>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f\uff1a<\/strong> \u5b57\u7b26\u9891\u7387\u5206\u5e03\u4e0d\u5747\u5300\u7684\u6587\u672c<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import heapq\nfrom collections import Counter\n\nclass HuffmanNode:\n    def __init__(self, char, freq, left=None, right=None):\n        self.char = char\n        self.freq = freq\n        self.left = left\n        self.right = right\n\n    def __lt__(self, other):\n        return self.freq &lt; other.freq\n\ndef build_huffman_tree(text):\n    \"\"\"\u6784\u5efa\u970d\u592b\u66fc\u6811\"\"\"\n    freq = Counter(text)\n\n    # \u521b\u5efa\u53f6\u5b50\u8282\u70b9\n    heap = &#91;HuffmanNode(char, freq) for char, freq in freq.items()]\n    heapq.heapify(heap)\n\n    # \u6784\u5efa\u6811\n    while len(heap) &gt; 1:\n        left = heapq.heappop(heap)\n        right = heapq.heappop(heap)\n        merged = HuffmanNode(None, left.freq + right.freq, left, right)\n        heapq.heappush(heap, merged)\n\n    return heap&#91;0]\n\ndef build_codes(node, code='', codes={}):\n    \"\"\"\u751f\u6210\u7f16\u7801\u8868\"\"\"\n    if node.char:\n        codes&#91;node.char] = code if code else '0'\n    else:\n        build_codes(node.left, code + '0', codes)\n        build_codes(node.right, code + '1', codes)\n    return codes\n\ndef huffman_encode(text, codes):\n    \"\"\"\u970d\u592b\u66fc\u7f16\u7801\"\"\"\n    return ''.join(codes&#91;char] for char in text)\n\ndef huffman_decode(encoded, codes):\n    \"\"\"\u970d\u592b\u66fc\u89e3\u7801\"\"\"\n    # \u53cd\u8f6c\u7f16\u7801\u8868\n    reverse_codes = {v: k for k, v in codes.items()}\n\n    decoded = &#91;]\n    current_code = ''\n\n    for bit in encoded:\n        current_code += bit\n        if current_code in reverse_codes:\n            decoded.append(reverse_codes&#91;current_code])\n            current_code = ''\n\n    return ''.join(decoded)\n\n# \u793a\u4f8b\ntext = \"this is an example of a huffman tree\"\ntree = build_huffman_tree(text)\ncodes = build_codes(tree)\nencoded = huffman_encode(text, codes)\ndecoded = huffman_decode(encoded, codes)\nassert text == decoded  # \u2705 \u5b8c\u5168\u4e00\u81f4\n\nprint(\"\u5b57\u7b26\u7f16\u7801\u8868\uff1a\")\nfor char, code in sorted(codes.items(), key=lambda x: len(x&#91;1])):\n    print(f\"'{char}': {code}\")<\/code><\/pre>\n\n\n\n<p><strong>\u538b\u7f29\u6548\u679c\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u539f\u59cb\u6587\u672c\uff1a37\u5b57\u7b26 \u00d7 8\u4f4d = 296\u4f4d<\/li>\n\n\n\n<li>\u538b\u7f29\u540e\uff1a\u7ea6150\u4f4d\uff08\u53d6\u51b3\u4e8e\u9891\u7387\u5206\u5e03\uff09<\/li>\n\n\n\n<li>\u538b\u7f29\u6bd4\uff1a\u7ea650%<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u7b97\u6cd53\uff1aLZ77\u538b\u7f29\u7b97\u6cd5<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u7528&#8221;\u8ddd\u79bb+\u957f\u5ea6+\u4e0b\u4e00\u4e2a\u5b57\u7b26&#8221;\u7684\u4e09\u5143\u7ec4\u8868\u793a\u91cd\u590d\u5b57\u7b26\u4e32<\/p>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f\uff1a<\/strong> \u901a\u7528\u6587\u672c\u538b\u7f29\uff0c\u662fZIP\u3001GZIP\u7684\u57fa\u7840<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def lz77_compress(data, window_size=4096, lookahead_buffer=18):\n    \"\"\"\n    LZ77\u538b\u7f29\u7b97\u6cd5\n\n    Args:\n        data: \u8f93\u5165\u6570\u636e\n        window_size: \u641c\u7d22\u7a97\u53e3\u5927\u5c0f\n        lookahead_buffer: \u524d\u77bb\u7f13\u51b2\u533a\u5927\u5c0f\n    \"\"\"\n    compressed = &#91;]\n    i = 0\n\n    while i &lt; len(data):\n        # \u641c\u7d22\u7a97\u53e3\n        search_start = max(0, i - window_size)\n        search_end = i\n        search_buffer = data&#91;search_start:search_end]\n\n        # \u524d\u77bb\u7f13\u51b2\u533a\n        lookahead_end = min(i + lookahead_buffer, len(data))\n        lookahead = data&#91;i:lookahead_end]\n\n        # \u67e5\u627e\u6700\u957f\u5339\u914d\n        best_match = (0, 0, data&#91;i] if i &lt; len(data) else None)\n\n        for j in range(len(search_buffer)):\n            match_length = 0\n            while (match_length &lt; len(lookahead) and \n                   j + match_length &lt; len(search_buffer) and\n                   search_buffer&#91;j + match_length] == lookahead&#91;match_length]):\n                match_length += 1\n\n            if match_length &gt; best_match&#91;1]:\n                distance = len(search_buffer) - j\n                next_char = lookahead&#91;match_length] if match_length &lt; len(lookahead) else None\n                best_match = (distance, match_length, next_char)\n\n        compressed.append(best_match)\n        i += best_match&#91;1] + 1\n\n    return compressed\n\ndef lz77_decompress(compressed):\n    \"\"\"LZ77\u89e3\u538b\u7f29\"\"\"\n    decompressed = &#91;]\n\n    for distance, length, next_char in compressed:\n        if distance &gt; 0 and length &gt; 0:\n            # \u4ece\u5df2\u89e3\u538b\u6570\u636e\u4e2d\u590d\u5236\n            start = len(decompressed) - distance\n            for i in range(length):\n                decompressed.append(decompressed&#91;start + i])\n\n        if next_char:\n            decompressed.append(next_char)\n\n    return ''.join(decompressed)\n\n# \u793a\u4f8b\ntext = \"abracadabraabracadabra\"\ncompressed = lz77_compress(text)\ndecompressed = lz77_decompress(compressed)\nassert text == decompressed  # \u2705 \u5b8c\u5168\u4e00\u81f4\n\nprint(f\"\u539f\u59cb\u957f\u5ea6: {len(text)}\")\nprint(f\"\u538b\u7f29\u540e\u5143\u7ec4\u6570: {len(compressed)}\")\nprint(f\"\u538b\u7f29\u5143\u7ec4: {compressed&#91;:5]}...\")  # \u663e\u793a\u524d5\u4e2a<\/code><\/pre>\n\n\n\n<p><strong>\u538b\u7f29\u6548\u679c\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u539f\u59cb\uff1a22\u5b57\u7b26<\/li>\n\n\n\n<li>\u538b\u7f29\uff1a\u7ea610-12\u4e2a\u4e09\u5143\u7ec4\uff08\u53d6\u51b3\u4e8e\u91cd\u590d\u6a21\u5f0f\uff09<\/li>\n\n\n\n<li>\u538b\u7f29\u6bd4\uff1a\u7ea645-55%<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u7b97\u6cd54\uff1aLZ78\u538b\u7f29\u7b97\u6cd5<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u52a8\u6001\u6784\u5efa\u5b57\u5178\uff0c\u7528\u5b57\u5178\u7d22\u5f15+\u65b0\u5b57\u7b26\u8868\u793a\u5b57\u7b26\u4e32<\/p>\n\n\n\n<p><strong>\u9002\u7528\u573a\u666f\uff1a<\/strong> \u9002\u5408\u91cd\u590d\u6a21\u5f0f\u4e0d\u8fde\u7eed\u7684\u60c5\u51b5<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>def lz78_compress(data):\n    \"\"\"LZ78\u538b\u7f29\"\"\"\n    dictionary = {}\n    compressed = &#91;]\n    i = 0\n\n    while i &lt; len(data):\n        # \u67e5\u627e\u6700\u957f\u5339\u914d\n        match_index = 0\n        match_length = 0\n\n        for j in range(1, i + 1):\n            if data&#91;i:i+j] in dictionary:\n                match_index = dictionary&#91;data&#91;i:i+j]]\n                match_length = j\n            else:\n                break\n\n        # \u6dfb\u52a0\u65b0\u5b57\u7b26\u4e32\u5230\u5b57\u5178\n        if i + match_length &lt; len(data):\n            new_string = data&#91;i:i+match_length+1]\n            dictionary&#91;new_string] = len(dictionary) + 1\n            compressed.append((match_index, data&#91;i+match_length]))\n        else:\n            compressed.append((match_index, ''))\n\n        i += match_length + 1\n\n    return compressed, dictionary\n\ndef lz78_decompress(compressed):\n    \"\"\"LZ78\u89e3\u538b\u7f29\"\"\"\n    dictionary = {0: ''}\n    decompressed = &#91;]\n\n    for index, char in compressed:\n        if index in dictionary:\n            string = dictionary&#91;index] + char\n            decompressed.append(string)\n            dictionary&#91;len(dictionary)] = string\n        else:\n            decompressed.append(char)\n\n    return ''.join(decompressed)\n\n# \u793a\u4f8b\ntext = \"ababababab\"\ncompressed, dictionary = lz78_compress(text)\ndecompressed = lz78_decompress(compressed)\nassert text == decompressed  # \u2705 \u5b8c\u5168\u4e00\u81f4<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">1.4 \u73b0\u4ee3\u6587\u672c\u538b\u7f29\u683c\u5f0f<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">GZIP\uff08\u57fa\u4e8eDEFLATE\uff09<\/h4>\n\n\n\n<p><strong>\u7ec4\u6210\uff1a<\/strong> LZ77 + \u970d\u592b\u66fc\u7f16\u7801<\/p>\n\n\n\n<p><strong>\u7279\u70b9\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u538b\u7f29\u6bd4\uff1a\u901a\u5e382-10\u500d<\/li>\n\n\n\n<li>\u901f\u5ea6\uff1a\u5feb\u901f<\/li>\n\n\n\n<li>\u5e94\u7528\uff1aHTTP\u4f20\u8f93\u3001\u6587\u4ef6\u538b\u7f29<\/li>\n<\/ul>\n\n\n\n<p><strong>\u4f7f\u7528\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import gzip\n\n# \u538b\u7f29\ntext = \"\u91cd\u590d\u7684\u6587\u672c\u5185\u5bb9 \" * 1000\ncompressed = gzip.compress(text.encode('utf-8'))\nprint(f\"\u539f\u59cb: {len(text)} \u5b57\u8282\")\nprint(f\"\u538b\u7f29: {len(compressed)} \u5b57\u8282\")\nprint(f\"\u538b\u7f29\u6bd4: {len(compressed)\/len(text):.2%}\")\n\n# \u89e3\u538b\ndecompressed = gzip.decompress(compressed)\nassert text.encode('utf-8') == decompressed  # \u2705 \u5b8c\u5168\u4e00\u81f4<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">BZIP2\uff08\u57fa\u4e8eBurrows-Wheeler\u53d8\u6362\uff09<\/h4>\n\n\n\n<p><strong>\u7279\u70b9\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u538b\u7f29\u6bd4\uff1a\u901a\u5e38\u6bd4GZIP\u9ad820-30%<\/li>\n\n\n\n<li>\u901f\u5ea6\uff1a\u8f83\u6162<\/li>\n\n\n\n<li>\u5e94\u7528\uff1a\u5907\u4efd\u3001\u5f52\u6863<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">7Z\uff08LZMA\u7b97\u6cd5\uff09<\/h4>\n\n\n\n<p><strong>\u7279\u70b9\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u538b\u7f29\u6bd4\uff1a\u901a\u5e38\u6700\u9ad8<\/li>\n\n\n\n<li>\u901f\u5ea6\uff1a\u6700\u6162<\/li>\n\n\n\n<li>\u5e94\u7528\uff1a\u957f\u671f\u5f52\u6863<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">1.5 \u6587\u672c\u538b\u7f29\u7684\u4f7f\u7528\u573a\u666f<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f1\uff1a\u6587\u4ef6\u4f20\u8f93<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code># HTTP\u54cd\u5e94\u538b\u7f29\nimport gzip\nfrom flask import Flask, Response\n\napp = Flask(__name__)\n\n@app.route('\/api\/data')\ndef get_data():\n    data = \"\u5927\u91cf\u6587\u672c\u6570\u636e...\"\n    compressed = gzip.compress(data.encode('utf-8'))\n    return Response(\n        compressed,\n        mimetype='application\/gzip',\n        headers={'Content-Encoding': 'gzip'}\n    )<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f2\uff1a\u6570\u636e\u5907\u4efd<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import zipfile\nimport os\n\ndef backup_directory(source_dir, backup_file):\n    \"\"\"\u5907\u4efd\u76ee\u5f55\u5230ZIP\u6587\u4ef6\"\"\"\n    with zipfile.ZipFile(backup_file, 'w', zipfile.ZIP_DEFLATED) as zipf:\n        for root, dirs, files in os.walk(source_dir):\n            for file in files:\n                file_path = os.path.join(root, file)\n                arcname = os.path.relpath(file_path, source_dir)\n                zipf.write(file_path, arcname)\n\n    original_size = sum(\n        os.path.getsize(os.path.join(root, f))\n        for root, _, files in os.walk(source_dir)\n        for f in files\n    )\n    compressed_size = os.path.getsize(backup_file)\n\n    print(f\"\u539f\u59cb\u5927\u5c0f: {original_size:,} \u5b57\u8282\")\n    print(f\"\u538b\u7f29\u540e: {compressed_size:,} \u5b57\u8282\")\n    print(f\"\u538b\u7f29\u6bd4: {compressed_size\/original_size:.2%}\")<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f3\uff1a\u6570\u636e\u5e93\u538b\u7f29<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code># \u5b58\u50a8\u538b\u7f29\u7684\u6587\u672c\u5b57\u6bb5\nimport sqlite3\nimport gzip\n\ndef store_compressed_text(db_path, text):\n    \"\"\"\u5b58\u50a8\u538b\u7f29\u6587\u672c\u5230\u6570\u636e\u5e93\"\"\"\n    conn = sqlite3.connect(db_path)\n    cursor = conn.cursor()\n\n    compressed = gzip.compress(text.encode('utf-8'))\n    cursor.execute(\n        \"INSERT INTO documents (content) VALUES (?)\",\n        (compressed,)\n    )\n    conn.commit()\n    conn.close()\n\ndef retrieve_text(db_path, doc_id):\n    \"\"\"\u4ece\u6570\u636e\u5e93\u68c0\u7d22\u5e76\u89e3\u538b\u6587\u672c\"\"\"\n    conn = sqlite3.connect(db_path)\n    cursor = conn.cursor()\n\n    cursor.execute(\"SELECT content FROM documents WHERE id = ?\", (doc_id,))\n    compressed = cursor.fetchone()&#91;0]\n\n    text = gzip.decompress(compressed).decode('utf-8')\n    conn.close()\n    return text<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f4\uff1a\u65e5\u5fd7\u538b\u7f29<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>import gzip\nfrom datetime import datetime\n\ndef compress_log_file(log_file, compressed_file):\n    \"\"\"\u538b\u7f29\u65e5\u5fd7\u6587\u4ef6\"\"\"\n    with open(log_file, 'rb') as f_in:\n        with gzip.open(compressed_file, 'wb') as f_out:\n            f_out.writelines(f_in)\n\n    original_size = os.path.getsize(log_file)\n    compressed_size = os.path.getsize(compressed_file)\n\n    print(f\"\u65e5\u5fd7\u538b\u7f29\u5b8c\u6210\")\n    print(f\"\u539f\u59cb: {original_size:,} \u5b57\u8282\")\n    print(f\"\u538b\u7f29: {compressed_size:,} \u5b57\u8282\")\n    print(f\"\u8282\u7701: {original_size - compressed_size:,} \u5b57\u8282\")<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u7b2c\u4e8c\u90e8\u5206\uff1a\u8bed\u6599\u538b\u7f29\uff08Information Compression\uff09<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">2.1 \u4ec0\u4e48\u662f\u8bed\u6599\u538b\u7f29\uff1f<\/h3>\n\n\n\n<p>\u8bed\u6599\u538b\u7f29\u901a\u8fc7\u8bed\u4e49\u7406\u89e3\u51cf\u5c11\u4fe1\u606f\u91cf\uff0c\u4fdd\u7559\u5173\u952e\u8bed\u4e49\uff0c\u4e22\u5f03\u7ec6\u8282\u3002\u672c\u8d28\u662f\u4fe1\u606f\u63d0\u53d6\u548c\u603b\u7ed3\uff0c\u800c\u975e\u5b58\u50a8\u4f18\u5316\u3002<\/p>\n\n\n\n<p><strong>\u6838\u5fc3\u7279\u5f81\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u2705 \u8bed\u4e49\u4fdd\u7559\uff1a\u4fdd\u7559\u5173\u952e\u4fe1\u606f\u548c\u8bed\u4e49<\/li>\n\n\n\n<li>\u274c \u5185\u5bb9\u53d8\u5316\uff1a\u4e22\u5931\u7ec6\u8282\u4fe1\u606f<\/li>\n\n\n\n<li>\u274c \u65e0\u6cd5\u7cbe\u786e\u8fd8\u539f\uff1a\u53ea\u80fd\u8fd1\u4f3c\u6062\u590d<\/li>\n\n\n\n<li>\u2705 \u4fe1\u606f\u91cf\u51cf\u5c11\uff1a\u5927\u5e45\u51cf\u5c11\u4fe1\u606f\u91cf<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">2.2 \u8bed\u6599\u538b\u7f29\u7684\u57fa\u672c\u539f\u7406<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">2.2.1 \u8bed\u4e49\u7406\u89e3<\/h4>\n\n\n\n<p>\u5927\u6a21\u578b\u7406\u89e3\u6587\u672c\u8bed\u4e49\uff0c\u533a\u5206\u91cd\u8981\u4fe1\u606f\u548c\u6b21\u8981\u4fe1\u606f\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>\u539f\u59cb\u6587\u672c\uff081000\u5b57\uff09\uff1a\n\"2024\u5e741\u670815\u65e5\u4e0a\u53489\u70b9\uff0c\u5728\u5317\u4eac\u5e02\u6d77\u6dc0\u533a\u4e2d\u5173\u6751\u5927\u88571\u53f7\uff0c\n\u516c\u53f8\u603b\u90e83\u697c\u4f1a\u8bae\u5ba4\u53ec\u5f00\u4e86\u5e74\u5ea6\u603b\u7ed3\u4f1a\u8bae\u3002\u53c2\u4f1a\u4eba\u5458\u5305\u62ec\uff1a\nCEO\u5f20\u4e09\uff08\u5de5\u53f7001\uff09\u3001CTO\u674e\u56db\uff08\u5de5\u53f7002\uff09...\"\n\n\u8bed\u6599\u538b\u7f29\u540e\uff08200\u5b57\uff09\uff1a\n\"2024\u5e741\u670815\u65e5\uff0c\u516c\u53f8\u53ec\u5f00\u5e74\u5ea6\u603b\u7ed3\u4f1a\u8bae\uff0c\u8ba8\u8bba2023\u5e74\u4e1a\u7ee9\n\u548c2024\u5e74\u89c4\u5212\uff0c\u51b3\u5b9a\u7814\u53d1\u9884\u7b97\u589e\u52a030%\uff0c\u6269\u62db50\u4eba\u3002\"\n\n\u4fe1\u606f\u91cf\u51cf\u5c1180%\uff0c\u4f46\u5173\u952e\u8bed\u4e49\u4fdd\u7559<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">2.2.2 \u538b\u7f29\u7b56\u7565<\/h4>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u6458\u8981\u751f\u6210\uff1a\u751f\u6210\u6587\u672c\u6458\u8981<\/li>\n\n\n\n<li>\u5173\u952e\u70b9\u63d0\u53d6\uff1a\u63d0\u53d6\u7ed3\u6784\u5316\u5173\u952e\u4fe1\u606f<\/li>\n\n\n\n<li>\u8bed\u4e49\u5d4c\u5165\uff1a\u8f6c\u6362\u4e3a\u4f4e\u7ef4\u5411\u91cf\u8868\u793a<\/li>\n\n\n\n<li>\u77e5\u8bc6\u56fe\u8c31\uff1a\u63d0\u53d6\u4e3a\u4e09\u5143\u7ec4\u7ed3\u6784<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">2.3 \u8bed\u6599\u538b\u7f29\u65b9\u6cd5<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6cd51\uff1a\u6458\u8981\u538b\u7f29\uff08Summarization\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u4f7f\u7528\u5927\u6a21\u578b\u751f\u6210\u6587\u672c\u6458\u8981<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import openai\n\nclass SummarizationCompressor:\n    \"\"\"\u6458\u8981\u538b\u7f29\u5668\"\"\"\n\n    def __init__(self, api_key, model=\"gpt-4\"):\n        self.api_key = api_key\n        self.model = model\n        openai.api_key = api_key\n\n    def compress(self, text, compression_ratio=0.1):\n        \"\"\"\n        \u538b\u7f29\u6587\u672c\u5230\u6307\u5b9a\u6bd4\u4f8b\n\n        Args:\n            text: \u539f\u59cb\u6587\u672c\n            compression_ratio: \u76ee\u6807\u538b\u7f29\u6bd4\uff080.1 = 10%\uff09\n        \"\"\"\n        prompt = f\"\"\"\u8bf7\u5c06\u4ee5\u4e0b\u6587\u672c\u538b\u7f29\u5230\u539f\u957f\u5ea6\u7684{compression_ratio*100}%\uff0c\n\u4fdd\u7559\u6240\u6709\u5173\u952e\u4fe1\u606f\u548c\u8bed\u4e49\uff1a\n\n\u539f\u6587\uff1a\n{text}\n\n\u8981\u6c42\uff1a\n1. \u4fdd\u7559\u6240\u6709\u5173\u952e\u4e8b\u5b9e\u3001\u6570\u636e\u548c\u7ed3\u8bba\n2. \u4f7f\u7528\u7b80\u6d01\u7684\u8bed\u8a00\n3. \u4fdd\u6301\u903b\u8f91\u7ed3\u6784\n4. \u8f93\u51fa\u538b\u7f29\u540e\u7684\u6587\u672c\uff1a\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=self.model,\n            messages=&#91;\n                {\n                    \"role\": \"system\",\n                    \"content\": \"\u4f60\u662f\u4e13\u4e1a\u7684\u6587\u672c\u538b\u7f29\u4e13\u5bb6\uff0c\u64c5\u957f\u5728\u4fdd\u6301\u8bed\u4e49\u5b8c\u6574\u6027\u7684\u540c\u65f6\u5927\u5e45\u538b\u7f29\u6587\u672c\u3002\"\n                },\n                {\"role\": \"user\", \"content\": prompt}\n            ],\n            temperature=0.3,\n            max_tokens=int(len(text) * compression_ratio)\n        )\n\n        return response.choices&#91;0].message.content\n\n    def extractive_summarize(self, text, num_sentences=3):\n        \"\"\"\u63d0\u53d6\u5f0f\u6458\u8981\uff08\u4fdd\u7559\u539f\u6587\u53e5\u5b50\uff09\"\"\"\n        prompt = f\"\"\"\u4ece\u4ee5\u4e0b\u6587\u672c\u4e2d\u63d0\u53d6\u6700\u91cd\u8981\u7684{num_sentences}\u53e5\u8bdd\uff1a\n\n{text}\n\n\u8981\u6c42\uff1a\n1. \u9009\u62e9\u6700\u80fd\u4ee3\u8868\u539f\u6587\u6838\u5fc3\u5185\u5bb9\u7684\u53e5\u5b50\n2. \u4fdd\u6301\u539f\u53e5\u4e0d\u53d8\n3. \u6309\u91cd\u8981\u6027\u6392\u5e8f\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=self.model,\n            messages=&#91;\n                {\"role\": \"system\", \"content\": \"\u4f60\u662f\u6587\u672c\u6458\u8981\u4e13\u5bb6\u3002\"},\n                {\"role\": \"user\", \"content\": prompt}\n            ],\n            temperature=0.2\n        )\n\n        return response.choices&#91;0].message.content\n\n# \u4f7f\u7528\u793a\u4f8b\ncompressor = SummarizationCompressor(api_key=\"your-key\")\noriginal = \"\u5f88\u957f\u7684\u6587\u672c\u5185\u5bb9...\"\ncompressed = compressor.compress(original, compression_ratio=0.1)\nprint(f\"\u539f\u59cb\u957f\u5ea6: {len(original)}\")\nprint(f\"\u538b\u7f29\u540e: {len(compressed)}\")\nprint(f\"\u4fe1\u606f\u91cf\u51cf\u5c11: {(1 - len(compressed)\/len(original))*100:.1f}%\")<\/code><\/pre>\n\n\n\n<p><strong>\u538b\u7f29\u6548\u679c\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4fe1\u606f\u91cf\u51cf\u5c11\uff1a70-90%<\/li>\n\n\n\n<li>\u8bed\u4e49\u4fdd\u7559\uff1a\u5173\u952e\u4fe1\u606f\u4fdd\u7559<\/li>\n\n\n\n<li>\u7ec6\u8282\u4e22\u5931\uff1a\u5177\u4f53\u7ec6\u8282\u88ab\u4e22\u5f03<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6cd52\uff1a\u5173\u952e\u70b9\u63d0\u53d6\uff08Key Points Extraction\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u63d0\u53d6\u7ed3\u6784\u5316\u5173\u952e\u4fe1\u606f<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import json\n\nclass KeyPointsCompressor:\n    \"\"\"\u5173\u952e\u70b9\u63d0\u53d6\u538b\u7f29\u5668\"\"\"\n\n    def __init__(self, api_key):\n        self.api_key = api_key\n        openai.api_key = api_key\n\n    def extract_keypoints(self, text):\n        \"\"\"\u63d0\u53d6\u5173\u952e\u70b9\"\"\"\n        prompt = f\"\"\"\u8bf7\u5c06\u4ee5\u4e0b\u6587\u672c\u63d0\u53d6\u4e3a\u5173\u952e\u70b9\uff0c\u4f7f\u7528JSON\u683c\u5f0f\u8f93\u51fa\uff1a\n\n\u539f\u6587\uff1a\n{text}\n\n\u8f93\u51fa\u683c\u5f0f\uff1a\n{{\n    \"\u4e3b\u9898\": \"\u6587\u672c\u4e3b\u9898\",\n    \"\u5173\u952e\u70b9\": &#91;\n        \"\u8981\u70b91\",\n        \"\u8981\u70b92\",\n        \"\u8981\u70b93\"\n    ],\n    \"\u91cd\u8981\u6570\u636e\": {{\n        \"\u6570\u636e1\": \"\u503c1\",\n        \"\u6570\u636e2\": \"\u503c2\"\n    }},\n    \"\u7ed3\u8bba\": \"\u4e3b\u8981\u7ed3\u8bba\",\n    \"\u65f6\u95f4\": \"\u76f8\u5173\u65f6\u95f4\",\n    \"\u5730\u70b9\": \"\u76f8\u5173\u5730\u70b9\",\n    \"\u4eba\u7269\": &#91;\"\u4eba\u72691\", \"\u4eba\u72692\"]\n}}\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=\"gpt-4\",\n            messages=&#91;\n                {\n                    \"role\": \"system\",\n                    \"content\": \"\u4f60\u662f\u4fe1\u606f\u63d0\u53d6\u4e13\u5bb6\uff0c\u64c5\u957f\u5c06\u6587\u672c\u63d0\u53d6\u4e3a\u7ed3\u6784\u5316\u5173\u952e\u70b9\u3002\"\n                },\n                {\"role\": \"user\", \"content\": prompt}\n            ],\n            temperature=0.2\n        )\n\n        result = response.choices&#91;0].message.content\n\n        # \u5c1d\u8bd5\u89e3\u6790JSON\n        try:\n            # \u79fb\u9664\u53ef\u80fd\u7684markdown\u4ee3\u7801\u5757\u6807\u8bb0\n            if result.startswith(\"```json\"):\n                result = result&#91;7:]\n            if result.startswith(\"```\"):\n                result = result&#91;3:]\n            if result.endswith(\"```\"):\n                result = result&#91;:-3]\n            result = result.strip()\n\n            return json.loads(result)\n        except:\n            return {\"raw\": result}\n\n    def keypoints_to_text(self, keypoints):\n        \"\"\"\u4ece\u5173\u952e\u70b9\u6062\u590d\u6587\u672c\uff08\u8fd1\u4f3c\uff09\"\"\"\n        prompt = f\"\"\"\u8bf7\u6839\u636e\u4ee5\u4e0b\u5173\u952e\u70b9\u751f\u6210\u4e00\u6bb5\u8fde\u8d2f\u7684\u6587\u672c\uff1a\n\n\u5173\u952e\u70b9\uff1a\n{json.dumps(keypoints, ensure_ascii=False, indent=2)}\n\n\u8981\u6c42\uff1a\n1. \u751f\u6210\u81ea\u7136\u6d41\u7545\u7684\u6587\u672c\n2. \u5305\u542b\u6240\u6709\u5173\u952e\u4fe1\u606f\n3. \u4fdd\u6301\u903b\u8f91\u5173\u7cfb\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=\"gpt-4\",\n            messages=&#91;\n                {\"role\": \"system\", \"content\": \"\u4f60\u662f\u6587\u672c\u751f\u6210\u4e13\u5bb6\u3002\"},\n                {\"role\": \"user\", \"content\": prompt}\n            ]\n        )\n\n        return response.choices&#91;0].message.content\n\n# \u4f7f\u7528\u793a\u4f8b\ncompressor = KeyPointsCompressor(api_key=\"your-key\")\noriginal = \"\u8be6\u7ec6\u7684\u4f1a\u8bae\u8bb0\u5f55...\"\nkeypoints = compressor.extract_keypoints(original)\n\n# \u5b58\u50a8\u5173\u952e\u70b9\uff08\u6bd4\u539f\u6587\u5c0f\u5f97\u591a\uff09\ncompressed_size = len(json.dumps(keypoints, ensure_ascii=False))\noriginal_size = len(original)\nprint(f\"\u539f\u59cb: {original_size} \u5b57\u7b26\")\nprint(f\"\u538b\u7f29: {compressed_size} \u5b57\u7b26\")\nprint(f\"\u538b\u7f29\u6bd4: {compressed_size\/original_size:.2%}\")\n\n# \u6062\u590d\u6587\u672c\uff08\u8fd1\u4f3c\uff09\nrecovered = compressor.keypoints_to_text(keypoints)<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6cd53\uff1a\u8bed\u4e49\u5d4c\u5165\u538b\u7f29\uff08Semantic Embedding\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u4f4e\u7ef4\u5411\u91cf\u8868\u793a<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from sentence_transformers import SentenceTransformer\nimport numpy as np\nimport pickle\nimport gzip\n\nclass SemanticEmbeddingCompressor:\n    \"\"\"\u8bed\u4e49\u5d4c\u5165\u538b\u7f29\u5668\"\"\"\n\n    def __init__(self, model_name='paraphrase-multilingual-MiniLM-L12-v2'):\n        self.model = SentenceTransformer(model_name)\n        self.embedding_dim = 384  # \u6839\u636e\u6a21\u578b\u8c03\u6574\n\n    def compress(self, text, chunk_size=512):\n        \"\"\"\n        \u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u8bed\u4e49\u5d4c\u5165\u5411\u91cf\n\n        Args:\n            text: \u539f\u59cb\u6587\u672c\n            chunk_size: \u5206\u5757\u5927\u5c0f\n        \"\"\"\n        # \u5206\u5757\u5904\u7406\u957f\u6587\u672c\n        chunks = &#91;\n            text&#91;i:i+chunk_size] \n            for i in range(0, len(text), chunk_size)\n        ]\n\n        # \u751f\u6210\u5d4c\u5165\u5411\u91cf\n        embeddings = self.model.encode(chunks, convert_to_numpy=True)\n\n        # \u91cf\u5316\u538b\u7f29\n        compressed = self.quantize(embeddings)\n\n        return {\n            'embeddings': compressed,\n            'chunk_size': chunk_size,\n            'original_length': len(text),\n            'compressed_size': compressed&#91;'data'].nbytes\n        }\n\n    def quantize(self, embeddings, bits=8):\n        \"\"\"\u91cf\u5316\u5d4c\u5165\u5411\u91cf\u4ee5\u51cf\u5c11\u5b58\u50a8\"\"\"\n        min_val = embeddings.min()\n        max_val = embeddings.max()\n\n        # \u5f52\u4e00\u5316\u5230&#91;0, 1]\n        normalized = (embeddings - min_val) \/ (max_val - min_val + 1e-8)\n\n        # \u91cf\u5316\n        quantized = (normalized * (2**bits - 1)).astype(np.uint8)\n\n        return {\n            'data': quantized,\n            'min': min_val,\n            'max': max_val,\n            'shape': embeddings.shape\n        }\n\n    def decompress(self, compressed_data):\n        \"\"\"\u4ece\u5d4c\u5165\u5411\u91cf\u6062\u590d\uff08\u8fd1\u4f3c\uff09\"\"\"\n        quantized = compressed_data&#91;'embeddings']\n\n        # \u53cd\u91cf\u5316\n        normalized = quantized&#91;'data'].astype(np.float32) \/ (2**8 - 1)\n        embeddings = (\n            normalized * (quantized&#91;'max'] - quantized&#91;'min']) + \n            quantized&#91;'min']\n        )\n\n        return embeddings\n\n    def save(self, compressed_data, filepath):\n        \"\"\"\u4fdd\u5b58\u538b\u7f29\u6570\u636e\"\"\"\n        with gzip.open(filepath, 'wb') as f:\n            pickle.dump(compressed_data, f)\n\n    def load(self, filepath):\n        \"\"\"\u52a0\u8f7d\u538b\u7f29\u6570\u636e\"\"\"\n        with gzip.open(filepath, 'rb') as f:\n            return pickle.load(f)\n\n# \u4f7f\u7528\u793a\u4f8b\ncompressor = SemanticEmbeddingCompressor()\ntext = \"\u5f88\u957f\u7684\u6587\u672c\u5185\u5bb9...\"\n\n# \u538b\u7f29\ncompressed = compressor.compress(text)\nprint(f\"\u539f\u59cb: {compressed&#91;'original_length']} \u5b57\u7b26\")\nprint(f\"\u538b\u7f29\u540e: {compressed&#91;'compressed_size']} \u5b57\u8282\")\nprint(f\"\u538b\u7f29\u6bd4: {compressed&#91;'compressed_size']\/compressed&#91;'original_length']:.2%}\")\n\n# \u4fdd\u5b58\ncompressor.save(compressed, 'compressed.pkl.gz')\n\n# \u52a0\u8f7d\nloaded = compressor.load('compressed.pkl.gz')\nembeddings = compressor.decompress(loaded)<\/code><\/pre>\n\n\n\n<p><strong>\u538b\u7f29\u6548\u679c\uff1a<\/strong><\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6587\u672c\uff1a1000\u5b57\u7b26 \u00d7 1\u5b57\u8282 = 1000\u5b57\u8282<\/li>\n\n\n\n<li>\u5d4c\u5165\uff1a10\u5757 \u00d7 384\u7ef4 \u00d7 1\u5b57\u8282\uff08\u91cf\u5316\u540e\uff09= 3840\u5b57\u8282<\/li>\n\n\n\n<li>\u4f46\u652f\u6301\u8bed\u4e49\u68c0\u7d22\u548c\u76f8\u4f3c\u5ea6\u8ba1\u7b97<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u65b9\u6cd54\uff1a\u77e5\u8bc6\u56fe\u8c31\u538b\u7f29\uff08Knowledge Graph\uff09<\/h4>\n\n\n\n<p><strong>\u539f\u7406\uff1a<\/strong> \u5c06\u6587\u672c\u63d0\u53d6\u4e3a\u77e5\u8bc6\u56fe\u8c31\uff08\u4e09\u5143\u7ec4\uff09<\/p>\n\n\n\n<p><strong>\u5b9e\u73b0\u793a\u4f8b\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>class KnowledgeGraphCompressor:\n    \"\"\"\u77e5\u8bc6\u56fe\u8c31\u538b\u7f29\u5668\"\"\"\n\n    def __init__(self, api_key):\n        self.api_key = api_key\n        openai.api_key = api_key\n\n    def text_to_kg(self, text):\n        \"\"\"\u5c06\u6587\u672c\u8f6c\u6362\u4e3a\u77e5\u8bc6\u56fe\u8c31\"\"\"\n        prompt = f\"\"\"\u8bf7\u5c06\u4ee5\u4e0b\u6587\u672c\u63d0\u53d6\u4e3a\u77e5\u8bc6\u56fe\u8c31\uff0c\u4f7f\u7528JSON\u683c\u5f0f\u8f93\u51fa\u4e09\u5143\u7ec4\uff1a\n\n\u539f\u6587\uff1a\n{text}\n\n\u8f93\u51fa\u683c\u5f0f\uff1a\n{{\n    \"entities\": &#91;\n        {{\"id\": 1, \"name\": \"\u5b9e\u4f531\", \"type\": \"\u7c7b\u578b\"}},\n        {{\"id\": 2, \"name\": \"\u5b9e\u4f532\", \"type\": \"\u7c7b\u578b\"}}\n    ],\n    \"relations\": &#91;\n        {{\"subject\": 1, \"predicate\": \"\u5173\u7cfb\", \"object\": 2}},\n        {{\"subject\": 2, \"predicate\": \"\u5173\u7cfb\", \"object\": 3}}\n    ],\n    \"attributes\": &#91;\n        {{\"entity\": 1, \"attribute\": \"\u5c5e\u6027\u540d\", \"value\": \"\u5c5e\u6027\u503c\"}}\n    ]\n}}\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=\"gpt-4\",\n            messages=&#91;\n                {\"role\": \"system\", \"content\": \"\u4f60\u662f\u77e5\u8bc6\u56fe\u8c31\u63d0\u53d6\u4e13\u5bb6\u3002\"},\n                {\"role\": \"user\", \"content\": prompt}\n            ],\n            temperature=0.2\n        )\n\n        result = response.choices&#91;0].message.content\n\n        # \u89e3\u6790JSON\n        try:\n            if result.startswith(\"```json\"):\n                result = result&#91;7:]\n            if result.startswith(\"```\"):\n                result = result&#91;3:]\n            if result.endswith(\"```\"):\n                result = result&#91;:-3]\n            return json.loads(result.strip())\n        except:\n            return {\"raw\": result}\n\n    def kg_to_text(self, kg):\n        \"\"\"\u4ece\u77e5\u8bc6\u56fe\u8c31\u6062\u590d\u6587\u672c\uff08\u8fd1\u4f3c\uff09\"\"\"\n        prompt = f\"\"\"\u8bf7\u6839\u636e\u4ee5\u4e0b\u77e5\u8bc6\u56fe\u8c31\u751f\u6210\u4e00\u6bb5\u8fde\u8d2f\u7684\u6587\u672c\uff1a\n\n\u77e5\u8bc6\u56fe\u8c31\uff1a\n{json.dumps(kg, ensure_ascii=False, indent=2)}\n\n\u8981\u6c42\uff1a\n1. \u751f\u6210\u81ea\u7136\u6d41\u7545\u7684\u6587\u672c\n2. \u5305\u542b\u6240\u6709\u5173\u952e\u4fe1\u606f\n3. \u4fdd\u6301\u903b\u8f91\u5173\u7cfb\"\"\"\n\n        response = openai.ChatCompletion.create(\n            model=\"gpt-4\",\n            messages=&#91;\n                {\"role\": \"system\", \"content\": \"\u4f60\u662f\u6587\u672c\u751f\u6210\u4e13\u5bb6\u3002\"},\n                {\"role\": \"user\", \"content\": prompt}\n            ]\n        )\n\n        return response.choices&#91;0].message.content\n\n# \u4f7f\u7528\u793a\u4f8b\ncompressor = KnowledgeGraphCompressor(api_key=\"your-key\")\ntext = \"\u5f20\u4e09\u5728\u5317\u4eac\u5927\u5b66\u5de5\u4f5c\uff0c\u62c5\u4efb\u8ba1\u7b97\u673a\u79d1\u5b66\u6559\u6388...\"\nkg = compressor.text_to_kg(text)\n\n# \u5b58\u50a8\u77e5\u8bc6\u56fe\u8c31\uff08\u6bd4\u539f\u6587\u5c0f\u5f97\u591a\uff09\nkg_size = len(json.dumps(kg, ensure_ascii=False))\ntext_size = len(text)\nprint(f\"\u539f\u59cb: {text_size} \u5b57\u7b26\")\nprint(f\"\u77e5\u8bc6\u56fe\u8c31: {kg_size} \u5b57\u7b26\")\nprint(f\"\u538b\u7f29\u6bd4: {kg_size\/text_size:.2%}\")\n\n# \u6062\u590d\u6587\u672c\nrecovered = compressor.kg_to_text(kg)<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">2.4 \u8bed\u6599\u538b\u7f29\u7684\u4f7f\u7528\u573a\u666f<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f1\uff1a\u6587\u6863\u6458\u8981\u7cfb\u7edf<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>class DocumentSummarizer:\n    \"\"\"\u6587\u6863\u6458\u8981\u7cfb\u7edf\"\"\"\n\n    def __init__(self, api_key):\n        self.compressor = SummarizationCompressor(api_key)\n\n    def process_document(self, document_path):\n        \"\"\"\u5904\u7406\u6587\u6863\u5e76\u751f\u6210\u6458\u8981\"\"\"\n        with open(document_path, 'r', encoding='utf-8') as f:\n            content = f.read()\n\n        # \u751f\u6210\u6458\u8981\n        summary = self.compressor.compress(content, compression_ratio=0.1)\n\n        # \u4fdd\u5b58\u6458\u8981\n        summary_path = document_path.replace('.txt', '_summary.txt')\n        with open(summary_path, 'w', encoding='utf-8') as f:\n            f.write(summary)\n\n        return summary\n\n# \u4f7f\u7528\nsummarizer = DocumentSummarizer(api_key=\"your-key\")\nsummary = summarizer.process_document(\"report.txt\")<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f2\uff1a\u77e5\u8bc6\u5e93\u6784\u5efa<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>class KnowledgeBaseBuilder:\n    \"\"\"\u77e5\u8bc6\u5e93\u6784\u5efa\u5668\"\"\"\n\n    def __init__(self, api_key):\n        self.kg_compressor = KnowledgeGraphCompressor(api_key)\n        self.kb = &#91;]\n\n    def add_document(self, text):\n        \"\"\"\u6dfb\u52a0\u6587\u6863\u5230\u77e5\u8bc6\u5e93\"\"\"\n        kg = self.kg_compressor.text_to_kg(text)\n        self.kb.append({\n            'id': len(self.kb) + 1,\n            'kg': kg,\n            'timestamp': datetime.now().isoformat()\n        })\n\n    def search(self, query):\n        \"\"\"\u5728\u77e5\u8bc6\u5e93\u4e2d\u641c\u7d22\"\"\"\n        # \u4f7f\u7528\u8bed\u4e49\u76f8\u4f3c\u5ea6\u641c\u7d22\n        # \u5b9e\u73b0\u641c\u7d22\u903b\u8f91...\n        pass\n\n    def save(self, filepath):\n        \"\"\"\u4fdd\u5b58\u77e5\u8bc6\u5e93\"\"\"\n        with open(filepath, 'w', encoding='utf-8') as f:\n            json.dump(self.kb, f, ensure_ascii=False, indent=2)\n\n# \u4f7f\u7528\nkb = KnowledgeBaseBuilder(api_key=\"your-key\")\nkb.add_document(\"\u6587\u68631\u5185\u5bb9...\")\nkb.add_document(\"\u6587\u68632\u5185\u5bb9...\")\nkb.save(\"knowledge_base.json\")<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f3\uff1a\u65b0\u95fb\u6458\u8981\u670d\u52a1<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>class NewsSummarizer:\n    \"\"\"\u65b0\u95fb\u6458\u8981\u670d\u52a1\"\"\"\n\n    def __init__(self, api_key):\n        self.compressor = SummarizationCompressor(api_key)\n\n    def summarize_news(self, news_article):\n        \"\"\"\u751f\u6210\u65b0\u95fb\u6458\u8981\"\"\"\n        # \u63d0\u53d6\u5173\u952e\u4fe1\u606f\n        keypoints = KeyPointsCompressor(api_key).extract_keypoints(news_article)\n\n        # \u751f\u6210\u6458\u8981\n        summary = self.compressor.compress(news_article, compression_ratio=0.15)\n\n        return {\n            'summary': summary,\n            'keypoints': keypoints,\n            'original_length': len(news_article),\n            'summary_length': len(summary)\n        }\n\n# \u4f7f\u7528\nsummarizer = NewsSummarizer(api_key=\"your-key\")\nresult = summarizer.summarize_news(\"\u957f\u7bc7\u65b0\u95fb\u6587\u7ae0...\")\nprint(f\"\u6458\u8981: {result&#91;'summary']}\")\nprint(f\"\u538b\u7f29\u6bd4: {result&#91;'summary_length']\/result&#91;'original_length']:.2%}\")<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\">\u573a\u666f4\uff1a\u4f1a\u8bae\u8bb0\u5f55\u538b\u7f29<\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code>class MeetingNotesCompressor:\n    \"\"\"\u4f1a\u8bae\u8bb0\u5f55\u538b\u7f29\u5668\"\"\"\n\n    def __init__(self, api_key):\n        self.compressor = KeyPointsCompressor(api_key)\n\n    def compress_meeting(self, meeting_notes):\n        \"\"\"\u538b\u7f29\u4f1a\u8bae\u8bb0\u5f55\"\"\"\n        keypoints = self.compressor.extract_keypoints(meeting_notes)\n\n        return {\n            'date': keypoints.get('\u65f6\u95f4', ''),\n            'location': keypoints.get('\u5730\u70b9', ''),\n            'participants': keypoints.get('\u4eba\u7269', &#91;]),\n            'key_points': keypoints.get('\u5173\u952e\u70b9', &#91;]),\n            'decisions': keypoints.get('\u7ed3\u8bba', ''),\n            'action_items': keypoints.get('\u91cd\u8981\u6570\u636e', {})\n        }\n\n# \u4f7f\u7528\ncompressor = MeetingNotesCompressor(api_key=\"your-key\")\nmeeting_notes = \"\u8be6\u7ec6\u7684\u4f1a\u8bae\u8bb0\u5f55...\"\ncompressed = compressor.compress_meeting(meeting_notes)\nprint(json.dumps(compressed, ensure_ascii=False, indent=2))<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u7b2c\u4e09\u90e8\u5206\uff1a\u5bf9\u6bd4\u5206\u6790<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">3.1 \u6838\u5fc3\u533a\u522b\u603b\u7ed3<\/h3>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th>\u7ef4\u5ea6<\/th><th>\u6587\u672c\u538b\u7f29<\/th><th>\u8bed\u6599\u538b\u7f29<\/th><\/tr><\/thead><tbody><tr><td><strong>\u76ee\u6807<\/strong><\/td><td>\u51cf\u5c11\u5b58\u50a8\u7a7a\u95f4<\/td><td>\u51cf\u5c11\u4fe1\u606f\u91cf<\/td><\/tr><tr><td><strong>\u65b9\u6cd5<\/strong><\/td><td>\u7b97\u6cd5\u7f16\u7801<\/td><td>\u8bed\u4e49\u7406\u89e3<\/td><\/tr><tr><td><strong>\u5185\u5bb9\u53d8\u5316<\/strong><\/td><td>\u274c \u4e0d\u53d8<\/td><td>\u2705 \u53d8\u5316<\/td><\/tr><tr><td><strong>\u53ef\u8fd8\u539f\u6027<\/strong><\/td><td>\u2705 100%\u8fd8\u539f<\/td><td>\u274c \u65e0\u6cd5\u7cbe\u786e\u8fd8\u539f<\/td><\/tr><tr><td><strong>\u538b\u7f29\u6bd4<\/strong><\/td><td>2-10\u500d<\/td><td>10-100\u500d\uff08\u4fe1\u606f\u91cf\uff09<\/td><\/tr><tr><td><strong>\u901f\u5ea6<\/strong><\/td><td>\u5feb<\/td><td>\u6162<\/td><\/tr><tr><td><strong>\u6210\u672c<\/strong><\/td><td>\u4f4e<\/td><td>\u9ad8\uff08\u9700\u8981API\uff09<\/td><\/tr><tr><td><strong>\u9002\u7528\u573a\u666f<\/strong><\/td><td>\u6587\u4ef6\u4f20\u8f93\u3001\u5907\u4efd<\/td><td>\u6587\u6863\u6458\u8981\u3001\u77e5\u8bc6\u63d0\u53d6<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">3.2 \u9009\u62e9\u6307\u5357<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u4f7f\u7528\u6587\u672c\u538b\u7f29\u7684\u573a\u666f\uff1a<\/h4>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u2705 \u9700\u8981\u5b8c\u6574\u4fdd\u7559\u6587\u4ef6\u5185\u5bb9<\/li>\n\n\n\n<li>\u2705 \u6587\u4ef6\u4f20\u8f93\u548c\u5b58\u50a8\u4f18\u5316<\/li>\n\n\n\n<li>\u2705 \u4ee3\u7801\u3001\u914d\u7f6e\u6587\u4ef6\u538b\u7f29<\/li>\n\n\n\n<li>\u2705 \u6570\u636e\u5e93\u5907\u4efd<\/li>\n\n\n\n<li>\u2705 \u65e5\u5fd7\u5f52\u6863<\/li>\n<\/ul>\n\n\n\n<h4 class=\"wp-block-heading\">\u4f7f\u7528\u8bed\u6599\u538b\u7f29\u7684\u573a\u666f\uff1a<\/h4>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u2705 \u53ea\u9700\u8981\u5173\u952e\u4fe1\u606f<\/li>\n\n\n\n<li>\u2705 \u6587\u6863\u6458\u8981\u751f\u6210<\/li>\n\n\n\n<li>\u2705 \u77e5\u8bc6\u5e93\u6784\u5efa<\/li>\n\n\n\n<li>\u2705 \u4fe1\u606f\u68c0\u7d22\u7cfb\u7edf<\/li>\n\n\n\n<li>\u2705 \u5185\u5bb9\u5f52\u6863\uff08\u4fdd\u7559\u8981\u70b9\uff09<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">3.3 \u6df7\u5408\u4f7f\u7528\u7b56\u7565<\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>class HybridCompressor:\n    \"\"\"\u6df7\u5408\u538b\u7f29\u7b56\u7565\"\"\"\n\n    def __init__(self, api_key):\n        self.text_compressor = gzip  # \u6587\u672c\u538b\u7f29\n        self.corpus_compressor = SummarizationCompressor(api_key)  # \u8bed\u6599\u538b\u7f29\n\n    def compress(self, text, mode='hybrid'):\n        \"\"\"\n        \u6df7\u5408\u538b\u7f29\n\n        mode: 'text' | 'corpus' | 'hybrid'\n        \"\"\"\n        if mode == 'text':\n            # \u7eaf\u6587\u672c\u538b\u7f29\n            return self.text_compressor.compress(text.encode('utf-8'))\n\n        elif mode == 'corpus':\n            # \u7eaf\u8bed\u6599\u538b\u7f29\n            summary = self.corpus_compressor.compress(text, compression_ratio=0.1)\n            return summary.encode('utf-8')\n\n        elif mode == 'hybrid':\n            # \u5148\u8bed\u6599\u538b\u7f29\uff0c\u518d\u6587\u672c\u538b\u7f29\n            summary = self.corpus_compressor.compress(text, compression_ratio=0.1)\n            compressed = self.text_compressor.compress(summary.encode('utf-8'))\n            return compressed\n\n# \u4f7f\u7528\ncompressor = HybridCompressor(api_key=\"your-key\")\ntext = \"\u5f88\u957f\u7684\u6587\u672c...\"\n\n# \u6587\u672c\u538b\u7f29\uff08\u4fdd\u6301\u5b8c\u6574\uff09\ntext_compressed = compressor.compress(text, mode='text')\n\n# \u8bed\u6599\u538b\u7f29\uff08\u4fdd\u7559\u5173\u952e\u4fe1\u606f\uff09\ncorpus_compressed = compressor.compress(text, mode='corpus')\n\n# \u6df7\u5408\u538b\u7f29\uff08\u6700\u5927\u538b\u7f29\uff09\nhybrid_compressed = compressor.compress(text, mode='hybrid')<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u7b2c\u56db\u90e8\u5206\uff1a\u5b9e\u9645\u5e94\u7528\u6848\u4f8b<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u6848\u4f8b1\uff1a\u5927\u578b\u6587\u6863\u7ba1\u7406\u7cfb\u7edf<\/h3>\n\n\n\n<p><strong>\u9700\u6c42\uff1a<\/strong> \u5b58\u50a8\u5927\u91cf\u6587\u6863\uff0c\u9700\u8981\u5feb\u901f\u68c0\u7d22\u548c\u8282\u7701\u7a7a\u95f4<\/p>\n\n\n\n<p><strong>\u89e3\u51b3\u65b9\u6848\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>class DocumentManagementSystem:\n    \"\"\"\u6587\u6863\u7ba1\u7406\u7cfb\u7edf\"\"\"\n\n    def __init__(self, api_key):\n        self.text_compressor = gzip  # \u5b8c\u6574\u6587\u6863\u7528\u6587\u672c\u538b\u7f29\n        self.corpus_compressor = KeyPointsCompressor(api_key)  # \u7d22\u5f15\u7528\u8bed\u6599\u538b\u7f29\n\n    def store_document(self, doc_id, content):\n        \"\"\"\u5b58\u50a8\u6587\u6863\"\"\"\n        # \u5b8c\u6574\u6587\u6863\uff1a\u6587\u672c\u538b\u7f29\u5b58\u50a8\n        compressed_content = self.text_compressor.compress(\n            content.encode('utf-8')\n        )\n\n        # \u7d22\u5f15\uff1a\u8bed\u6599\u538b\u7f29\uff08\u5173\u952e\u70b9\uff09\n        keypoints = self.corpus_compressor.extract_keypoints(content)\n\n        # \u5b58\u50a8\u5230\u6570\u636e\u5e93\n        db.store(doc_id, {\n            'content': compressed_content,\n            'keypoints': keypoints,\n            'original_size': len(content),\n            'compressed_size': len(compressed_content)\n        })\n\n    def search_documents(self, query):\n        \"\"\"\u641c\u7d22\u6587\u6863\"\"\"\n        # \u5728\u5173\u952e\u70b9\u4e2d\u641c\u7d22\uff08\u5feb\u901f\uff09\n        results = db.search_keypoints(query)\n        return results\n\n    def retrieve_document(self, doc_id):\n        \"\"\"\u68c0\u7d22\u5b8c\u6574\u6587\u6863\"\"\"\n        data = db.get(doc_id)\n        content = self.text_compressor.decompress(data&#91;'content'])\n        return content.decode('utf-8')<\/code><\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u6848\u4f8b2\uff1a\u65b0\u95fb\u805a\u5408\u5e73\u53f0<\/h3>\n\n\n\n<p><strong>\u9700\u6c42\uff1a<\/strong> \u805a\u5408\u5927\u91cf\u65b0\u95fb\uff0c\u751f\u6210\u6458\u8981\uff0c\u8282\u7701\u5b58\u50a8<\/p>\n\n\n\n<p><strong>\u89e3\u51b3\u65b9\u6848\uff1a<\/strong><\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>class NewsAggregationPlatform:\n    \"\"\"\u65b0\u95fb\u805a\u5408\u5e73\u53f0\"\"\"\n\n    def __init__(self, api_key):\n        self.summarizer = SummarizationCompressor(api_key)\n        self.kg_extractor = KnowledgeGraphCompressor(api_key)\n\n    def process_news(self, article):\n        \"\"\"\u5904\u7406\u65b0\u95fb\u6587\u7ae0\"\"\"\n        # \u751f\u6210\u6458\u8981\n        summary = self.summarizer.compress(article, compression_ratio=0.15)\n\n        # \u63d0\u53d6\u77e5\u8bc6\u56fe\u8c31\n        kg = self.kg_extractor.text_to_kg(article)\n\n        return {\n            'title': kg.get('\u4e3b\u9898', ''),\n            'summary': summary,\n            'keypoints': kg.get('\u5173\u952e\u70b9', &#91;]),\n            'entities': &#91;e&#91;'name'] for e in kg.get('entities', &#91;])],\n            'timestamp': datetime.now().isoformat()\n        }<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">\u603b\u7ed3<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u6587\u672c\u538b\u7f29<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u672c\u8d28\uff1a<\/strong> \u5b58\u50a8\u7a7a\u95f4\u4f18\u5316<\/li>\n\n\n\n<li><strong>\u65b9\u6cd5\uff1a<\/strong> \u7b97\u6cd5\u7f16\u7801\uff08LZ77\u3001\u970d\u592b\u66fc\u7b49\uff09<\/li>\n\n\n\n<li><strong>\u7279\u70b9\uff1a<\/strong> \u65e0\u635f\u3001\u53ef\u7cbe\u786e\u8fd8\u539f<\/li>\n\n\n\n<li><strong>\u9002\u7528\uff1a<\/strong> \u6587\u4ef6\u4f20\u8f93\u3001\u5907\u4efd\u3001\u5b58\u50a8\u4f18\u5316<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u8bed\u6599\u538b\u7f29<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u672c\u8d28\uff1a<\/strong> \u4fe1\u606f\u91cf\u51cf\u5c11<\/li>\n\n\n\n<li><strong>\u65b9\u6cd5\uff1a<\/strong> \u8bed\u4e49\u7406\u89e3\u3001\u603b\u7ed3\u63d0\u53d6<\/li>\n\n\n\n<li><strong>\u7279\u70b9\uff1a<\/strong> \u6709\u635f\u3001\u4fdd\u7559\u5173\u952e\u8bed\u4e49<\/li>\n\n\n\n<li><strong>\u9002\u7528\uff1a<\/strong> \u6587\u6863\u6458\u8981\u3001\u77e5\u8bc6\u63d0\u53d6\u3001\u4fe1\u606f\u68c0\u7d22<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u6700\u4f73\u5b9e\u8df5<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u9700\u8981\u5b8c\u6574\u5185\u5bb9 \u2192 \u4f7f\u7528\u6587\u672c\u538b\u7f29<\/li>\n\n\n\n<li>\u53ea\u9700\u5173\u952e\u4fe1\u606f \u2192 \u4f7f\u7528\u8bed\u6599\u538b\u7f29<\/li>\n\n\n\n<li>\u6df7\u5408\u573a\u666f \u2192 \u7ed3\u5408\u4f7f\u7528\u4e24\u79cd\u65b9\u6cd5<\/li>\n<\/ol>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5f15\u8a00 \u5728\u4fe1\u606f\u65f6\u4ee3\uff0c\u6570\u636e\u538b\u7f29\u662f\u5173\u952e&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-2571","post","type-post","status-publish","format-standard","hentry","category-4"],"_links":{"self":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2571","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/comments?post=2571"}],"version-history":[{"count":1,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2571\/revisions"}],"predecessor-version":[{"id":2572,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2571\/revisions\/2572"}],"wp:attachment":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/media?parent=2571"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/categories?post=2571"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/tags?post=2571"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}