{"id":2537,"date":"2025-06-26T16:02:05","date_gmt":"2025-06-26T08:02:05","guid":{"rendered":"https:\/\/sanlangcode.com\/?p=2537"},"modified":"2025-06-26T16:15:51","modified_gmt":"2025-06-26T08:15:51","slug":"docscanner-%e6%b7%b1%e5%ba%a6%e8%a7%a3%e6%9e%90%ef%bc%9a%e4%bb%8e%e6%89%ad%e6%9b%b2%e5%88%b0%e5%b9%b3%e6%95%b4%ef%bc%8c%e6%8f%ad%e7%a7%98%e6%96%87%e6%a1%a3%e5%9b%be%e5%83%8f%e6%a0%a1%e6%ad%a3%e8%83%8c","status":"publish","type":"post","link":"https:\/\/sanlangcode.com\/index.php\/2025\/06\/26\/docscanner-%e6%b7%b1%e5%ba%a6%e8%a7%a3%e6%9e%90%ef%bc%9a%e4%bb%8e%e6%89%ad%e6%9b%b2%e5%88%b0%e5%b9%b3%e6%95%b4%ef%bc%8c%e6%8f%ad%e7%a7%98%e6%96%87%e6%a1%a3%e5%9b%be%e5%83%8f%e6%a0%a1%e6%ad%a3%e8%83%8c\/","title":{"rendered":"DocScanner \u6df1\u5ea6\u89e3\u6790\uff1a\u4ece\u626d\u66f2\u5230\u5e73\u6574\uff0c\u63ed\u79d8\u6587\u6863\u56fe\u50cf\u6821\u6b63\u80cc\u540e\u7684 AI \u9b54\u6cd5"},"content":{"rendered":"\n<p>\u5728\u6570\u5b57\u5316\u65f6\u4ee3\uff0c\u6211\u4eec\u7ecf\u5e38\u9700\u8981\u7528\u624b\u673a\u62cd\u6444\u6587\u6863\u3001\u6536\u636e\u6216\u7b14\u8bb0\u3002\u4f46\u7531\u4e8e\u62cd\u6444\u89d2\u5ea6\u3001\u7eb8\u5f20\u5377\u66f2\u7b49\u95ee\u9898\uff0c\u5f97\u5230\u7684\u56fe\u50cf\u5f80\u5f80\u662f\u626d\u66f2\u53d8\u5f62\u7684\uff0c\u8fd9\u4e0d\u4ec5\u5f71\u54cd\u9605\u8bfb\uff0c\u4e5f\u6781\u5927\u5730\u5e72\u6270\u4e86\u540e\u7eed\u7684\u6587\u5b57\u8bc6\u522b\uff08OCR\uff09\u3002<\/p>\n\n\n\n<p>DocScanner \u9879\u76ee\u5e94\u8fd0\u800c\u751f\uff0c\u5b83\u662f\u4e00\u4e2a\u5f3a\u5927\u7684\u5de5\u5177\uff0c\u80fd\u591f\u5229\u7528\u6df1\u5ea6\u5b66\u4e60\u6280\u672f\uff0c\u5c06\u8fd9\u4e9b\u626d\u66f2\u7684\u6587\u6863\u56fe\u50cf\u201c\u4e00\u952e\u62c9\u5e73\u201d\uff0c\u6062\u590d\u6210\u5982\u540c\u626b\u63cf\u4eea\u626b\u63cf\u822c\u5e73\u6574\u7684\u56fe\u50cf\u3002<\/p>\n\n\n\n<p>\u672c\u6587\u5c06\u6df1\u5165\u5256C\u6790 DocScanner \u7684\u5185\u90e8\u5de5\u4f5c\u539f\u7406\uff0c\u9010\u4e00\u62c6\u89e3\u5176\u6838\u5fc3\u7b97\u6cd5\u548c\u4ee3\u7801\u5b9e\u73b0\uff0c\u5e26\u4f60\u9886\u7565\u5176\u80cc\u540e\u7cbe\u5999\u7684 AI \u8bbe\u8ba1\u601d\u60f3\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u6574\u4f53\u67b6\u6784\uff1a\u4e09\u6b65\u8d70\u7684\u827a\u672f<\/h2>\n\n\n\n<p>DocScanner \u7684\u6838\u5fc3\u6d41\u7a0b\u53ef\u4ee5\u6982\u62ec\u4e3a\u4e09\u4e2a\u4e3b\u8981\u9636\u6bb5\uff1a<strong>\u6587\u6863\u5206\u5272<\/strong>\u3001<strong>\u8fed\u4ee3\u6821\u6b63<\/strong>\u548c <strong>OCR \u8bc4\u4f30<\/strong>\u3002\u8fd9\u79cd\u5206\u800c\u6cbb\u4e4b\u7684\u7b56\u7565\uff0c\u4f7f\u5f97\u6a21\u578b\u5728\u590d\u6742\u80cc\u666f\u4e0b\u4f9d\u7136\u80fd\u4fdd\u6301\u51fa\u8272\u7684\u6027\u80fd\u3002<\/p>\n\n\n\n<p>\u4e0b\u9762\u662f\u6574\u4e2a\u5904\u7406\u6d41\u7a0b\u7684\u53ef\u89c6\u5316\u56fe\u8868\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>graph TD;\n    A&#91;\u8f93\u5165\u626d\u66f2\u56fe\u50cf] --&gt; B&#91;Stage 1: \u6587\u6863\u5206\u5272];\n    B --&gt; C&#91;U2NETP \u6a21\u578b];\n    C --&gt; D&#91;\u751f\u6210\u6587\u6863\u8499\u7248];\n    D --&gt; E&#91;\u5e94\u7528\u8499\u7248\u53bb\u9664\u80cc\u666f];\n    E --&gt; F&#91;Stage 2: \u8fed\u4ee3\u6821\u6b63];\n    F --&gt; G&#91;BasicEncoder \u63d0\u53d6\u7279\u5f81];\n    G --&gt; H&#91;\u521d\u59cb\u5316\u5f62\u53d8\u573a];\n    H --&gt; I&#91;\u8fed\u4ee3\u5faa\u73af\u5f00\u59cb];\n    I --&gt; J&#91;BasicUpdateBlock];\n    J --&gt; K&#91;\u9884\u6d4b\u5f62\u53d8\u589e\u91cf];\n    K --&gt; L&#91;\u66f4\u65b0\u603b\u5f62\u53d8\u573a];\n    L --&gt; I;\n    I -- \u5b8c\u6210 --&gt; M&#91;\u4e0a\u91c7\u6837];\n    M --&gt; N&#91;\u751f\u6210\u9ad8\u5206\u8fa8\u7387\u5750\u6807\u6620\u5c04];\n    N --&gt; O&#91;grid_sample \u91cd\u91c7\u6837];\n    A -- \u540c\u65f6 --&gt; O;\n    O --&gt; P&#91;\u8f93\u51fa\u6821\u6b63\u540e\u56fe\u50cf];\n    P --&gt; Q&#91;Stage 3: OCR\u8bc4\u4f30];\n    Q --&gt; R&#91;Tesseract \u63d0\u53d6\u6587\u672c];\n    R --&gt; S&#91;\u8ba1\u7b97CER\u548c\u7f16\u8f91\u8ddd\u79bb];\n\nsubgraph BasicUpdateBlock\n    direction LR\n    J_A&#91;\u8f93\u5165] --&gt; J_B&#91;\u8fd0\u52a8\u7f16\u7801\u5668];\n    J_B --&gt; J_C&#91;ConvGRU \u66f4\u65b0\u72b6\u6001];\n    J_C --&gt; J_D&#91;FlowHead \u9884\u6d4b\u589e\u91cf];\n    J_D --&gt; J_E&#91;\u8f93\u51fa];\nend\n\nstyle F fill:#f9f,stroke:#333,stroke-width:2px\nstyle B fill:#f9f,stroke:#333,stroke-width:2px\nstyle Q fill:#f9f,stroke:#333,stroke-width:2px<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">Stage 1: \u6587\u6863\u5206\u5272 &#8211; \u201c\u51c0\u5316\u201d\u8f93\u5165<\/h2>\n\n\n\n<p><strong>\u201cGarbage in, garbage out.\u201d<\/strong> \u8fd9\u662f\u673a\u5668\u5b66\u4e60\u9886\u57df\u7684\u540d\u8a00\u3002\u5982\u679c\u8f93\u5165\u5305\u542b\u4e86\u5927\u91cf\u65e0\u5173\u7684\u80cc\u666f\u4fe1\u606f\uff08\u5982\u684c\u9762\u3001\u624b\u6307\u7b49\uff09\uff0c\u6821\u6b63\u6a21\u578b\u7684\u6027\u80fd\u4f1a\u5927\u6253\u6298\u6263\u3002<\/p>\n\n\n\n<p>DocScanner \u7684\u7b2c\u4e00\u6b65\u5c31\u662f\u901a\u8fc7\u4e00\u4e2a\u8f7b\u91cf\u7ea7\u7684\u663e\u8457\u6027\u7269\u4f53\u68c0\u6d4b\u7f51\u7edc <strong>U2NETP<\/strong>\uff0c\u7cbe\u51c6\u5730\u5c06\u6587\u6863\u533a\u57df\u4ece\u590d\u6742\u7684\u80cc\u666f\u4e2d\u5206\u5272\u51fa\u6765\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># inference.py: Net \u7684\u524d\u5411\u4f20\u64ad\nclass Net(nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        self.msk = U2NETP(3, 1) # \u5206\u5272\u7f51\u7edc\n        self.bm = DocScanner()  # \u6821\u6b63\u7f51\u7edc\n\n    def forward(self, x):\n        msk, _,_,_,_,_,_ = self.msk(x)\n        msk = (msk &gt; 0.5).float()\n        x = msk * x # \u5c06\u8499\u7248\u5e94\u7528\u5230\u8f93\u5165\u56fe\u50cf\u4e0a\uff0c\u80cc\u666f\u53d8\u4e3a0\n\n        # ... \u540e\u7eed\u9001\u5165\u6821\u6b63\u7f51\u7edc ...<\/code><\/pre>\n\n\n\n<p>\u8fd9\u4e2a\u7b80\u5355\u7684\u64cd\u4f5c\uff0c\u4e3a\u540e\u7eed\u7684\u6821\u6b63\u9636\u6bb5\u63d0\u4f9b\u4e86\u4e00\u4e2a\u5e72\u51c0\u3001\u65e0\u5e72\u6270\u7684\u8f93\u5165\uff0c\u662f\u6574\u4e2a\u7cfb\u7edf\u9c81\u68d2\u6027\u7684\u5173\u952e\u4fdd\u969c\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Stage 2: \u8fed\u4ee3\u6821\u6b63 &#8211; \u7b97\u6cd5\u6838\u5fc3<\/h2>\n\n\n\n<p>\u8fd9\u662f DocScanner \u9879\u76ee\u6700\u795e\u5947\u3001\u6700\u6838\u5fc3\u7684\u90e8\u5206\u3002\u5b83\u7684\u76ee\u6807\u662f\u5b66\u4e60\u4e00\u4e2a\u4ece\u626d\u66f2\u56fe\u50cf\u5230\u5e73\u6574\u56fe\u50cf\u7684<strong>\u50cf\u7d20\u6620\u5c04\u5173\u7cfb<\/strong>\u3002\u7b80\u5355\u6765\u8bf4\uff0c\u5c31\u662f\u627e\u5230\u4e00\u79cd\u201c\u53d8\u6362\u201d\uff0c\u80fd\u628a\u626d\u66f2\u56fe\u50cf\u4e0a\u7684\u6bcf\u4e00\u4e2a\u50cf\u7d20\u70b9\u201c\u642c\u201d\u5230\u5b83\u5e94\u8be5\u5728\u7684\u4f4d\u7f6e\uff0c\u4ece\u800c\u7ec4\u6210\u4e00\u5f20\u5e73\u6574\u7684\u56fe\u50cf\u3002<\/p>\n\n\n\n<p>\u8be5\u9879\u76ee\u5e76\u672a\u4f7f\u7528\u7b80\u5355\u7684\u5355\u6b21\u9884\u6d4b\u6a21\u578b\uff0c\u800c\u662f\u501f\u9274\u4e86\u5149\u6d41\u4f30\u8ba1\u9886\u57df\u9876\u5c16\u6a21\u578b RAFT \u7684\u601d\u60f3\uff0c\u91c7\u7528\u4e86\u4e00\u79cd<strong>\u8fed\u4ee3\u4f18\u5316<\/strong>\u7684\u7cbe\u5999\u67b6\u6784\u3002\u6a21\u578b\u4e0d\u4f1a\u4e00\u6b65\u5230\u4f4d\uff0c\u800c\u662f\u50cf\u4e00\u4f4d\u753b\u5bb6\u53cd\u590d\u4fee\u6539\u753b\u4f5c\u4e00\u6837\uff0c\u4e00\u8f6e\u4e00\u8f6e\u5730\u4f18\u5316\u9884\u6d4b\u7684\u53d8\u6362\u7ed3\u679c\uff0c\u76f4\u5230\u6700\u7ec8\u5b8c\u7f8e\u3002<\/p>\n\n\n\n<p>\u6df1\u5165\u5176\u5185\u90e8<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">1. \u7279\u5f81\u63d0\u53d6\u5668 (<code>BasicEncoder<\/code>)<\/h3>\n\n\n\n<p>\u9996\u5148\uff0c\u4e00\u4e2a\u7c7b ResNet \u7684\u7f16\u7801\u5668 <code>BasicEncoder<\/code> \u4f1a\u4ece\u8f93\u5165\u7684\u6587\u6863\u56fe\u50cf\u4e2d\u63d0\u53d6\u51fa\u4e00\u4e2a\u6df1\u5c42\u7684\u7279\u5f81\u56fe\u8c31\u3002\u8fd9\u4e2a\u7279\u5f81\u56fe\u8c31\u5305\u542b\u4e86\u56fe\u50cf\u4e30\u5bcc\u7684\u51e0\u4f55\u548c\u7eb9\u7406\u4fe1\u606f\uff0c\u5e76\u4e14\u5176\u5c3a\u5bf8\u4ec5\u4e3a\u539f\u56fe\u7684 1\/8\uff0c\u5927\u5927\u964d\u4f4e\u4e86\u540e\u7eed\u8ba1\u7b97\u7684\u590d\u6742\u5ea6\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">2. \u8fed\u4ee3\u4f18\u5316\u6838\u5fc3 (<code>BasicUpdateBlock<\/code>)<\/h3>\n\n\n\n<p>\u8fed\u4ee3\u7684\u6838\u5fc3\u5728\u4e8e <code>DocScanner<\/code> \u6a21\u578b\u7684 <code>forward<\/code> \u51fd\u6570\u4e2d\u7684\u4e00\u4e2a\u5faa\u73af\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code># model.py: DocScanner \u7684\u524d\u5411\u4f20\u64ad\uff08\u7b80\u5316\u7248\uff09\nclass DocScanner(nn.Module):\n    # ...\n    def forward(self, image1, iters=12, test_mode=False):\n        # ... \u63d0\u53d6\u7279\u5f81 fmap1 ...\n        net, inp = torch.split(fmap1, &#91;160, 160], dim=1) # net\u662fGRU\u72b6\u6001, inp\u662f\u8f93\u5165\n        # ... \u521d\u59cb\u5316\u5750\u6807\u7f51\u683c coords0, coords1 ...\n\n        for itr in range(iters):\n            coords1 = coords1.detach()\n            flow = coords1 - coords0 # \u5f53\u524d\u7684\u5f62\u53d8\u573a\n\n            # --- \u8fd9\u662f\u6838\u5fc3\u66f4\u65b0\u6a21\u5757 ---\n            net, up_mask, delta_flow = self.update_block(net, inp, warpfea, flow)\n\n            # --- \u66f4\u65b0\u5750\u6807\u7f51\u683c ---\n            coords1 = coords1 + delta_flow\n\n            # ... \u4e0a\u91c7\u6837\u5e76\u4fdd\u5b58\u9884\u6d4b\u7ed3\u679c ...\n\n        return bm_up<\/code><\/pre>\n\n\n\n<p>\u5728\u8fd9\u4e2a\u5faa\u73af\u91cc\uff0c<code>BasicUpdateBlock<\/code> \u6a21\u5757\u662f\u7edd\u5bf9\u7684\u4e3b\u89d2\u3002\u5728\u6bcf\u6b21\u8fed\u4ee3\u4e2d\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5b83\u63a5\u6536\u5f53\u524d\u7f51\u7edc\u7684\u201c\u8bb0\u5fc6\u201d\u72b6\u6001 (<code>net<\/code>)\u3001\u56fe\u50cf\u7279\u5f81 (<code>inp<\/code>) \u548c\u5f53\u524d\u7684\u5f62\u53d8\u573a (<code>flow<\/code>)\u3002<\/li>\n\n\n\n<li>\u5185\u90e8\u7684 <strong><code>ConvGRU<\/code><\/strong>\uff08\u5377\u79ef\u95e8\u63a7\u5faa\u73af\u5355\u5143\uff09\u4f1a\u50cf\u5927\u8111\u4e00\u6837\u66f4\u65b0\u5176\u201c\u8bb0\u5fc6\u201d\u72b6\u6001\u3002GRU \u7684\u5f15\u5165\u4f7f\u5f97\u6a21\u578b\u80fd\u591f\u8bb0\u4f4f\u524d\u51e0\u6b21\u8fed\u4ee3\u7684\u4f18\u5316\u4fe1\u606f\uff0c\u4ece\u800c\u505a\u51fa\u66f4\u660e\u667a\u7684\u5224\u65ad\u3002<\/li>\n\n\n\n<li>\u53e6\u4e00\u4e2a\u5b50\u6a21\u5757 <strong><code>FlowHead<\/code><\/strong> \u5219\u6839\u636e\u66f4\u65b0\u540e\u7684\u8bb0\u5fc6\u72b6\u6001\uff0c\u9884\u6d4b\u51fa\u4e00\u4e2a\u5fae\u5c0f\u7684\u4fee\u6b63\u91cf <code>delta_flow<\/code>\u3002<\/li>\n\n\n\n<li>\u8fd9\u4e2a <code>delta_flow<\/code> \u4f1a\u88ab\u52a0\u5230\u603b\u7684\u5f62\u53d8\u573a <code>coords1<\/code> \u4e0a\uff0c\u5b8c\u6210\u4e00\u6b21\u201c\u7cbe\u4fee\u201d\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u7ecf\u8fc7 12 \u8f6e\u8fd9\u6837\u7684\u201c\u6df1\u601d\u719f\u8651\u201d\uff0c\u6a21\u578b\u6700\u7ec8\u4f1a\u5f97\u5230\u4e00\u4e2a\u9ad8\u5ea6\u7cbe\u786e\u7684\u5f62\u53d8\u573a\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">3. \u6700\u7ec8\u6821\u6b63 (<code>grid_sample<\/code>)<\/h3>\n\n\n\n<p>\u8fed\u4ee3\u5b8c\u6210\u540e\uff0c\u6a21\u578b\u5c06\u4f4e\u5206\u8fa8\u7387\u7684\u3001\u4f18\u5316\u597d\u7684\u5f62\u53d8\u573a\u901a\u8fc7\u4e00\u4e2a\u5b66\u4e60\u5230\u7684\u4e0a\u91c7\u6837\u5668\uff08<code>upsample_flow<\/code>\uff09\u6062\u590d\u5230\u539f\u59cb\u56fe\u50cf\u7684\u5206\u8fa8\u7387\uff0c\u5f97\u5230\u6700\u7ec8\u7684\u5750\u6807\u6620\u5c04\u8868 <code>bm<\/code>\u3002<\/p>\n\n\n\n<p>\u6700\u540e\uff0cPyTorch \u4e2d\u5f3a\u5927\u7684 <code>grid_sample<\/code> \u51fd\u6570\u767b\u573a\u3002\u5b83\u5229\u7528\u8fd9\u5f20\u6620\u5c04\u8868\uff0c\u4ece\u539f\u59cb\u7684\u626d\u66f2\u56fe\u50cf\u4e2d\u7cbe\u51c6\u5730\u62fe\u53d6\u50cf\u7d20\uff0c\u7136\u540e\u50cf\u62fc\u56fe\u4e00\u6837\uff0c\u5c06\u8fd9\u4e9b\u50cf\u7d20\u70b9\u91cd\u65b0\u6392\u5217\u6210\u4e00\u5f20\u5e73\u6574\u3001\u6e05\u6670\u7684\u6587\u6863\u56fe\u50cf\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">Stage 3: OCR \u8bc4\u4f30 &#8211; \u6548\u679c\u597d\u4e0d\u597d\uff0c\u6570\u636e\u8bf4\u4e86\u7b97<\/h2>\n\n\n\n<p>\u6821\u6b63\u540e\u7684\u56fe\u7247\u597d\u4e0d\u597d\uff0c\u9664\u4e86\u8089\u773c\u770b\uff0c\u8fd8\u9700\u8981\u5ba2\u89c2\u7684\u91cf\u5316\u6307\u6807\u3002\u9879\u76ee\u901a\u8fc7 <code>OCR_eval.py<\/code> \u811a\u672c\uff0c\u4f7f\u7528 <code>pytesseract<\/code>\uff08Tesseract OCR \u5f15\u64ce\u7684 Python \u5c01\u88c5\uff09\u6765\u8bc4\u4f30\u6821\u6b63\u6548\u679c\u3002<\/p>\n\n\n\n<p>\u8bc4\u4f30\u903b\u8f91\u975e\u5e38\u76f4\u89c2\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5bf9\u6821\u6b63\u540e\u7684\u56fe\u50cf\u8fdb\u884c OCR\uff0c\u63d0\u53d6\u8bc6\u522b\u51fa\u7684\u6587\u672c\u3002<\/li>\n\n\n\n<li>\u5c06\u8bc6\u522b\u6587\u672c\u4e0e\u539f\u59cb\u7684\u3001\u6b63\u786e\u7684\u201c\u771f\u503c\u201d\u6587\u672c\u8fdb\u884c\u6bd4\u8f83\u3002<\/li>\n\n\n\n<li>\u901a\u8fc7\u4e24\u4e2a\u6307\u6807\u6765\u91cf\u5316\u5dee\u5f02\uff1a\n<ul class=\"wp-block-list\">\n<li><strong>\u7f16\u8f91\u8ddd\u79bb (Levenshtein Distance)<\/strong>: \u6307\u4e24\u4e2a\u5b57\u7b26\u4e32\u4e4b\u95f4\uff0c\u7531\u4e00\u4e2a\u8f6c\u6210\u53e6\u4e00\u4e2a\u6240\u9700\u7684\u6700\u5c11\u7f16\u8f91\u64cd\u4f5c\u6b21\u6570\u3002\u8ddd\u79bb\u8d8a\u5c0f\uff0c\u8bf4\u660e\u8bc6\u522b\u8d8a\u51c6\u3002<\/li>\n\n\n\n<li><strong>\u5b57\u7b26\u9519\u8bef\u7387 (CER &#8211; Character Error Rate)<\/strong>: \u5373 <code>\u7f16\u8f91\u8ddd\u79bb \/ \u771f\u503c\u6587\u672c\u603b\u5b57\u7b26\u6570<\/code>\u3002\u8fd9\u662f\u5b66\u672f\u754c\u548c\u5de5\u4e1a\u754c\u8bc4\u4f30 OCR \u6027\u80fd\u6700\u5e38\u7528\u7684\u6307\u6807\u4e4b\u4e00\uff0c\u8d8a\u4f4e\u8d8a\u597d\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-code\"><code># OCR_eval.py: \u6838\u5fc3\u8bc4\u4f30\u903b\u8f91\ndef cal_cer_ed(path_ours, tail='_rec'):\n    # ...\n    for i in range(1,N):\n        # ...\n        content_gt = pytesseract.image_to_string(gt)       # \u63d0\u53d6\u771f\u503c\u6587\u672c\n        content1 = pytesseract.image_to_string(img1)     # \u63d0\u53d6\u6a21\u578b\u8f93\u51fa\u7684\u6587\u672c\n\n        l1 = Levenshtein_Distance(content_gt, content1)  # \u8ba1\u7b97\u7f16\u8f91\u8ddd\u79bb\n\n        ed1.append(l1)\n        cer1.append(l1 \/ len(content_gt))                # \u8ba1\u7b97\u5b57\u7b26\u9519\u8bef\u7387\n        # ...\n    print('CER: ', (np.mean(cer1)+np.mean(cer2)) \/ 2.)\n    print('ED:  ', (np.mean(ed1)+np.mean(ed2)) \/ 2.)<\/code><\/pre>\n\n\n\n<p>\u901a\u8fc7\u8fd9\u79cd\u65b9\u5f0f\uff0c\u9879\u76ee\u53ef\u4ee5\u5ba2\u89c2\u3001\u91cf\u5316\u5730\u8bc1\u660e\u5176\u7b97\u6cd5\u7684\u6709\u6548\u6027\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u5982\u4f55\u4f7f\u7528<\/h2>\n\n\n\n<p>DocScanner \u4e0d\u4ec5\u662f\u4e00\u4e2a\u7b97\u6cd5\u5e93\uff0c\u5b83\u8fd8\u63d0\u4f9b\u4e86\u5f00\u7bb1\u5373\u7528\u7684 Web \u5e94\u7528\u548c API\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4f9d\u8d56\u5b89\u88c5<\/strong>: \u9879\u76ee\u4f9d\u8d56 PyTorch, OpenCV \u7b49\u5e93\uff0c\u5177\u4f53\u89c1 <code>requirements.txt<\/code>\u3002<\/li>\n\n\n\n<li><strong>\u4ea4\u4e92\u5f0f\u5e94\u7528<\/strong>: \u8fd0\u884c <code>start_streamlit.sh<\/code> \u4f1a\u542f\u52a8\u4e00\u4e2a\u57fa\u4e8e Streamlit \u7684 Web \u5e94\u7528\u3002\u4f60\u53ea\u9700\u5728\u6d4f\u89c8\u5668\u4e2d\u4e0a\u4f20\u56fe\u7247\uff0c\u5373\u53ef\u5b9e\u65f6\u770b\u5230\u6821\u6b63\u6548\u679c\u3002<\/li>\n\n\n\n<li><strong>API \u670d\u52a1<\/strong>: \u8fd0\u884c <code>start_fastapi.sh<\/code> \u5219\u4f1a\u542f\u52a8\u4e00\u4e2a FastAPI \u670d\u52a1\uff0c\u8ba9\u5176\u4ed6\u7a0b\u5e8f\u53ef\u4ee5\u901a\u8fc7 API \u7684\u65b9\u5f0f\u8c03\u7528\u6587\u6863\u6821\u6b63\u529f\u80fd\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u7ed3\u8bba<\/h2>\n\n\n\n<p>DocScanner \u662f\u4e00\u4e2a\u8bbe\u8ba1\u7cbe\u826f\u3001\u6280\u672f\u5148\u8fdb\u7684\u6587\u6863\u6821\u6b63\u9879\u76ee\u3002\u5b83\u901a\u8fc7 <strong>\u201c\u5206\u5272-\u6821\u6b63\u201d<\/strong> \u7684\u4e24\u9636\u6bb5\u8bbe\u8ba1\u63d0\u5347\u4e86\u9c81\u68d2\u6027\uff0c\u5e76\u521b\u9020\u6027\u5730\u5c06\u5149\u6d41\u9886\u57df\u7684 <strong>\u201c\u8fed\u4ee3\u4f18\u5316\u201d<\/strong> \u601d\u60f3\u5f15\u5165\u5230\u6587\u6863\u6821\u6b63\u4efb\u52a1\u4e2d\uff0c\u53d6\u5f97\u4e86\u5353\u8d8a\u7684\u6548\u679c\u3002<\/p>\n\n\n\n<p>\u901a\u8fc7\u5bf9\u5b83\u7684\u6df1\u5ea6\u5256\u6790\uff0c\u6211\u4eec\u4e0d\u4ec5\u5b66\u4e60\u5230\u4e86\u4e00\u4e2a\u5b9e\u7528\u7684 AI \u5de5\u5177\uff0c\u66f4\u80fd\u9886\u7565\u5230\u6df1\u5ea6\u5b66\u4e60\u5728\u89e3\u51b3\u5b9e\u9645\u95ee\u9898\u65f6\u5c55\u73b0\u51fa\u7684\u5f3a\u5927\u5a01\u529b\u4e0e\u4f18\u96c5\u8bbe\u8ba1\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p>DocScanner \u662f\u4e00\u4e2a\u5229\u7528\u6df1\u5ea6\u5b66\u4e60\u6a21\u578b\u5bf9\u626d\u66f2\u7684\u6587\u6863\u56fe\u50cf\u8fdb\u884c\u6821\u6b63\uff0c\u5e76\u5229\u7528 OCR \u6280\u672f\u63d0\u53d6\u6587\u672c\u5185\u5bb9\u7684\u5de5\u5177\u3002<\/p>\n\n\n\n<p>\u9879\u76ee\u7684\u6838\u5fc3\u6280\u672f\u6808\u5305\u62ec\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>PyTorch<\/strong>: \u7528\u4e8e\u6df1\u5ea6\u5b66\u4e60\u6a21\u578b\u3002<\/li>\n\n\n\n<li><strong>OpenCV<\/strong> \u548c <strong>Pillow<\/strong>, <strong>scikit-image<\/strong>: \u7528\u4e8e\u56fe\u50cf\u5904\u7406\u3002<\/li>\n\n\n\n<li><strong>NumPy<\/strong>: \u7528\u4e8e\u79d1\u5b66\u8ba1\u7b97\u3002<\/li>\n<\/ul>\n\n\n\n<p><\/p>\n\n\n\n<p><code>app.py<\/code>\u9879\u76ee\u7684\u7aef\u5230\u7aef\u5de5\u4f5c\u6d41\u7a0b\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u4e0a\u4f20\u56fe\u7247<\/strong>: \u7528\u6237\u901a\u8fc7 Streamlit \u754c\u9762\u4e0a\u4f20\u4e00\u5f20\u6587\u6863\u56fe\u7247\u3002<\/li>\n\n\n\n<li><strong>\u52a0\u8f7d\u6a21\u578b<\/strong>: <code>load_model<\/code> \u51fd\u6570\u4f1a\u52a0\u8f7d\u4e24\u4e2a\u9884\u8bad\u7ec3\u6a21\u578b\uff1a<code>seg.pth<\/code>\uff08\u53ef\u80fd\u7528\u4e8e\u5206\u5272\uff09\u548c <code>DocScanner-L.pth<\/code>\uff08\u6838\u5fc3\u7684\u6821\u6b63\u6a21\u578b\uff09\u3002\u8fd9\u4e24\u4e2a\u6a21\u578b\u88ab\u52a0\u8f7d\u5230\u4e00\u4e2a\u53eb\u505a <code>Net<\/code> \u7684\u7f51\u7edc\u7ed3\u6784\u4e2d\u3002<\/li>\n\n\n\n<li><strong>\u56fe\u50cf\u6821\u6b63<\/strong>: <code>rectify_image<\/code> \u51fd\u6570\u662f\u6838\u5fc3\u5904\u7406\u6b65\u9aa4\u3002\n<ul class=\"wp-block-list\">\n<li>\u5b83\u9996\u5148\u5c06\u4e0a\u4f20\u7684\u56fe\u50cf\u9884\u5904\u7406\uff08\u7f29\u653e\u3001\u5f52\u4e00\u5316\u7b49\uff09\u3002<\/li>\n\n\n\n<li>\u7136\u540e\uff0c\u5c06\u9884\u5904\u7406\u540e\u7684\u56fe\u50cf\u8f93\u5165\u5230 <code>Net<\/code> \u6a21\u578b\u4e2d\uff0c\u6a21\u578b\u4f1a\u9884\u6d4b\u51fa\u4e00\u4e2a\u79f0\u4e3a <code>bm<\/code> \u7684\u4e1c\u897f\uff08\u8fd9\u5f88\u53ef\u80fd\u662f\u4e00\u4e2a\u201c\u53cd\u5411\u6620\u5c04\u201d\u6216\u201c\u5149\u6d41\u573a\u201d\uff09\u3002<\/li>\n\n\n\n<li>\u8fd9\u4e2a <code>bm<\/code> \u4f1a\u88ab\u7f29\u653e\u5230\u539f\u59cb\u56fe\u50cf\u7684\u5c3a\u5bf8\u3002<\/li>\n\n\n\n<li>\u6700\u540e\uff0c\u901a\u8fc7 <code>torch.nn.functional.grid_sample<\/code> \u51fd\u6570\uff0c\u5229\u7528\u8fd9\u4e2a <code>bm<\/code> \u5bf9\u539f\u59cb\u56fe\u50cf\u8fdb\u884c\u91cd\u91c7\u6837\uff0c\u5c31\u597d\u50cf\u662f\u628a\u626d\u66f2\u56fe\u50cf\u7684\u50cf\u7d20\u201c\u62c9\u201d\u56de\u5230\u6b63\u786e\u7684\u4f4d\u7f6e\uff0c\u4ece\u800c\u5b9e\u73b0\u6821\u6b63\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u5c55\u793a\u7ed3\u679c<\/strong>: Streamlit \u5e94\u7528\u6700\u540e\u4f1a\u5c55\u793a\u51fa\u539f\u59cb\u56fe\u50cf\u548c\u6821\u6b63\u540e\u7684\u56fe\u50cf\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u6838\u5fc3\u9b54\u6cd5\u5728\u4e8e <code>Net<\/code> \u6a21\u578b\u9884\u6d4b\u51fa\u7684 <code>bm<\/code> \u6620\u5c04\u4ee5\u53ca <code>grid_sample<\/code> \u7684\u5de7\u5999\u8fd0\u7528\u3002<\/p>\n\n\n\n<p>\u4e0b\u4e00\u6b65\uff0c\u641e\u6e05\u695a <code>Net<\/code> \u5230\u5e95\u662f\u4ec0\u4e48\u3002<br><\/p>\n\n\n\n<p>\u5173\u952e\u4fe1\u606f\u5982\u4e0b\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong><code>Net<\/code> \u7c7b\u7684\u5de5\u4f5c\u6d41\u7a0b<\/strong>: \u8fd9\u4e2a\u7c7b\u5c01\u88c5\u4e86\u4e00\u4e2a\u975e\u5e38\u5173\u952e\u7684\u4e24\u9636\u6bb5\uff08Two-Stage\uff09\u5904\u7406\u6d41\u7a0b\u3002\n<ul class=\"wp-block-list\">\n<li><strong>\u7b2c\u4e00\u9636\u6bb5\uff1a\u5206\u5272 (<code>self.msk<\/code>)<\/strong>: \u9996\u5148\uff0c\u6a21\u578b\u4f7f\u7528\u4e86\u4e00\u4e2a <code>U2NETP<\/code> \u7f51\u7edc\uff08\u5b9a\u4e49\u5728 <code>seg.py<\/code> \u4e2d\uff09\u6765\u5206\u5272\u51fa\u56fe\u50cf\u4e2d\u7684\u6587\u6863\u533a\u57df\u3002<code>x = msk * x<\/code> \u8fd9\u884c\u4ee3\u7801\u5c06\u5206\u5272\u5f97\u5230\u7684\u8499\u7248\uff08mask\uff09\u5e94\u7528\u5230\u539f\u59cb\u56fe\u50cf\u4e0a\uff0c\u5176\u6548\u679c\u5c31\u662f<strong>\u53bb\u9664\u80cc\u666f<\/strong>\uff0c\u8ba9\u540e\u7eed\u7684\u7f51\u7edc\u53ef\u4ee5\u66f4\u4e13\u6ce8\u4e8e\u6587\u6863\u672c\u8eab\u3002\u8fd9\u662f\u4e00\u4e2a\u975e\u5e38\u806a\u660e\u7684\u8bbe\u8ba1\uff0c\u53ef\u4ee5\u5927\u5927\u63d0\u9ad8\u6821\u6b63\u6a21\u578b\u7684\u9c81\u68d2\u6027\u3002<\/li>\n\n\n\n<li><strong>\u7b2c\u4e8c\u9636\u6bb5\uff1a\u6821\u6b63 (<code>self.bm<\/code>)<\/strong>: \u63a5\u7740\uff0c\u88ab\u8499\u7248\u5904\u7406\u8fc7\u7684\u56fe\u50cf\u88ab\u9001\u5165 <code>DocScanner<\/code> \u6a21\u578b\uff08\u5b9a\u4e49\u5728 <code>model.py<\/code> \u4e2d\uff09\uff0c\u8fd9\u624d\u662f\u6267\u884c\u6587\u6863\u6821\u6b63\u3001\u9884\u6d4b\u53cd\u5411\u6620\u5c04 <code>bm<\/code> \u7684\u6838\u5fc3\u7f51\u7edc\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u6a21\u578b\u7ec6\u8282<\/strong>:\n<ul class=\"wp-block-list\">\n<li><code>U2NETP<\/code>: \u4e00\u4e2a\u8f7b\u91cf\u7ea7\u7684\u663e\u8457\u6027\u7269\u4f53\u68c0\u6d4b\u7f51\u7edc\uff0c\u5728\u8fd9\u91cc\u88ab\u7528\u6765\u505a\u6587\u6863\u5206\u5272\u3002<\/li>\n\n\n\n<li><code>DocScanner<\/code>: \u9884\u6d4b\u6587\u6863\u626d\u66f2\u5f62\u53d8\u7684\u6838\u5fc3\u7f51\u7edc\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u547d\u4ee4\u884c\u652f\u6301<\/strong>: \u8fd9\u4e2a\u811a\u672c\u8fd8\u53ef\u4ee5\u76f4\u63a5\u901a\u8fc7\u547d\u4ee4\u884c\u8fd0\u884c\uff0c\u6279\u91cf\u5904\u7406\u6574\u4e2a\u6587\u4ef6\u5939\u7684\u56fe\u7247\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u6240\u4ee5\uff0c\u6574\u4e2a\u5904\u7406\u6d41\u7a0b\uff1a<\/p>\n\n\n\n<p><strong>\u8f93\u5165\u56fe\u50cf -&gt; <code>U2NETP<\/code> \u5206\u5272 -&gt; \u751f\u6210\u8499\u7248 -&gt; \u5e94\u7528\u8499\u7248\uff08\u53bb\u9664\u80cc\u666f\uff09 -&gt; <code>DocScanner<\/code> \u6821\u6b63 -&gt; \u9884\u6d4b\u626d\u66f2\u56fe (<code>bm<\/code>) -&gt; <code>grid_sample<\/code> \u91cd\u91c7\u6837 -&gt; \u8f93\u51fa\u6821\u6b63\u540e\u7684\u56fe\u50cf<\/strong><\/p>\n\n\n\n<p>\u8fd9\u4e2a\u201c\u5206\u5272-\u518d\u6821\u6b63\u201d\u7684\u4e24\u9636\u6bb5\u8bbe\u8ba1\u662f\u6574\u4e2a\u9879\u76ee\u7684\u67b6\u6784\u4eae\u70b9<\/p>\n\n\n\n<p>\u63a5\u4e0b\u6765\uff0c\u6df1\u5165\u4e86\u89e3 <code>DocScanner<\/code> \u6a21\u578b\u672c\u8eab\u7684\u7f51\u7edc\u7ed3\u6784\u3002\u5b83\u662f\u5728 <code>model.py<\/code> \u6587\u4ef6\u4e2d\u5b9a\u4e49\u7684\uff0c<br>\u5982\u4e0b\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u8fed\u4ee3\u4f18\u5316\u67b6\u6784<\/strong>: <code>DocScanner<\/code> \u6a21\u578b\u5e76\u975e\u4e00\u6b21\u6027\u751f\u6210\u7ed3\u679c\uff0c\u800c\u662f\u91c7\u7528\u4e86\u4e00\u4e2a\u8fed\u4ee3\u5faa\u73af\uff08<code>for itr in range(iters)<\/code>\uff09\u6765\u9010\u6b65\u4f18\u5316\u7ed3\u679c\u3002\u8fd9\u79cd\u6a21\u5f0f\u5728\u5149\u6d41\u4f30\u8ba1\u7b97\u6cd5\u4e2d\u975e\u5e38\u7ecf\u5178\uff0c\u4f8b\u5982\u5927\u540d\u9f0e\u9f0e\u7684 RAFT \u6a21\u578b\uff0c\u800c <code>DocScanner<\/code> \u7684\u67b6\u6784\u5f88\u660e\u663e\u53d7\u5230\u4e86 RAFT \u7684\u542f\u53d1\u3002\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\uff0c\u6a21\u578b\u90fd\u4f1a\u5bf9\u9884\u6d4b\u7684\u201c\u5149\u6d41\u573a\u201d\uff08\u5373<code>delta_flow<\/code>\uff0c\u5728\u8fd9\u91cc\u4ee3\u8868\u4e86\u50cf\u7d20\u7684\u201c\u79fb\u52a8\u65b9\u5411\u201d\uff09\u8fdb\u884c\u4e00\u6b21\u4f18\u5316\u3002<\/li>\n\n\n\n<li><strong>\u6838\u5fc3\u7ec4\u4ef6<\/strong>:\n<ul class=\"wp-block-list\">\n<li><code>fnet (BasicEncoder)<\/code>: \u8fd9\u662f\u4e00\u4e2a\u7279\u5f81\u63d0\u53d6\u5668\u3002\u5b83\u8d1f\u8d23\u5c06\u8f93\u5165\u7684\u56fe\u50cf\u8f6c\u6362\u6210\u4e00\u4e2a\u9ad8\u7ef4\u7684\u7279\u5f81\u56fe\u8c31\uff08<code>fmap1<\/code>\uff09\u3002\u6211\u9700\u8981\u67e5\u770b <code>extractor.py<\/code> \u6765\u4e86\u89e3 <code>BasicEncoder<\/code> \u7684\u5177\u4f53\u7ed3\u6784\u3002<\/li>\n\n\n\n<li><code>update_block (BasicUpdateBlock)<\/code>: \u8fd9\u662f\u8fed\u4ee3\u8fc7\u7a0b\u7684\u6838\u5fc3\u3002\u5728\u6bcf\u4e00\u6b65\u4e2d\uff0c\u5b83\u63a5\u6536\u5f53\u524d\u7684\u7f51\u7edc\u72b6\u6001 (<code>net<\/code>)\u3001\u8f93\u5165\u7279\u5f81 (<code>inp<\/code>)\u3001\u6839\u636e\u5f53\u524d\u5149\u6d41\u573a\u201c\u626d\u66f2\u201d\u8fc7\u7684\u7279\u5f81 (<code>warpfea<\/code>) \u4ee5\u53ca\u5f53\u524d\u9884\u6d4b\u7684\u5149\u6d41\u573a (<code>flow<\/code>)\uff0c\u7136\u540e\u8ba1\u7b97\u51fa\u4e00\u4e2a\u7528\u4e8e\u4f18\u5316\u7684\u66f4\u65b0\u91cf <code>delta_flow<\/code>\u3002\u6211\u540c\u6837\u9700\u8981\u67e5\u770b <code>update.py<\/code> \u6765\u4e86\u89e3\u5b83\u7684\u5185\u90e8\u7ed3\u6784\u3002<\/li>\n\n\n\n<li><code>initialize_flow<\/code>: \u521d\u59cb\u5316\u5750\u6807\u7f51\u683c\uff0c\u4f5c\u4e3a\u8fed\u4ee3\u7684\u8d77\u70b9\u3002<\/li>\n\n\n\n<li><code>upsample_flow<\/code>: \u6a21\u578b\u7684\u6838\u5fc3\u8ba1\u7b97\u662f\u5728\u4e00\u4e2a\u8f83\u5c0f\u7684\u5206\u8fa8\u7387\u4e0a\uff08\u539f\u56fe\u76841\/8\uff09\u8fdb\u884c\u7684\u3002\u8fd9\u4e2a\u51fd\u6570\u8d1f\u8d23\u5c06\u9884\u6d4b\u51fa\u7684\u5149\u6d41\u573a\u4e0a\u91c7\u6837\u56de\u539f\u59cb\u56fe\u50cf\u7684\u5206\u8fa8\u7387\u3002<\/li>\n\n\n\n<li><code>bilinear_sampler<\/code>: <code>grid_sample<\/code> \u7684\u4e00\u4e2a\u5305\u88c5\u51fd\u6570\uff0c\u5b83\u6839\u636e\u5f53\u524d\u9884\u6d4b\u7684\u5149\u6d41\u573a\u6765\u201c\u626d\u66f2\u201d\u7279\u5f81\u56fe\uff0c\u5e76\u5c06\u7ed3\u679c\u7528\u4e8e\u4e0b\u4e00\u6b21\u8fed\u4ee3\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><strong>\u524d\u5411\u4f20\u64ad\u903b\u8f91<\/strong>:\n<ul class=\"wp-block-list\">\n<li>\u8f93\u5165\u56fe\u50cf\u9996\u5148\u901a\u8fc7 <code>fnet<\/code> \u63d0\u53d6\u7279\u5f81\u3002<\/li>\n\n\n\n<li>\u6a21\u578b\u521d\u59cb\u5316\u4e24\u4e2a\u5750\u6807\u7f51\u683c\uff1a<code>coords0<\/code> \u4f5c\u4e3a\u57fa\u51c6\u7f51\u683c\uff0c<code>coords1<\/code> \u4f5c\u4e3a\u5c06\u88ab\u6301\u7eed\u8fed\u4ee3\u4f18\u5316\u7684\u7f51\u683c\u3002\u8fd9\u4e24\u4e2a\u7f51\u683c\u7684\u5dee\u503c\uff08<code>coords1 - coords0<\/code>\uff09\u5c31\u4ee3\u8868\u4e86\u626d\u66f2\u7684\u5f62\u53d8\u573a\u3002<\/li>\n\n\n\n<li>\u8fdb\u5165\u6838\u5fc3\u7684\u8fed\u4ee3\u5faa\u73af\u3002<\/li>\n\n\n\n<li>\u5728\u5faa\u73af\u4e2d\uff1a\n<ul class=\"wp-block-list\">\n<li><code>update_block<\/code> \u9884\u6d4b\u51fa\u4e00\u4e2a <code>delta_flow<\/code> \u66f4\u65b0\u91cf\u3002<\/li>\n\n\n\n<li><code>coords1<\/code> \u52a0\u4e0a\u8fd9\u4e2a\u66f4\u65b0\u91cf\uff0c\u53d8\u5f97\u66f4\u7cbe\u786e\u3002<\/li>\n\n\n\n<li>\u5c06\u4f18\u5316\u540e\u7684\u5149\u6d41\u573a\u4e0a\u91c7\u6837\uff0c\u5f97\u5230\u6700\u7ec8\u53ef\u7528\u4e8e\u5168\u5c3a\u5bf8\u56fe\u50cf\u7684\u6620\u5c04 <code>bm_up<\/code>\u3002<\/li>\n\n\n\n<li>\u6839\u636e\u5f53\u524d\u7684 <code>coords1<\/code> \u6765\u201c\u626d\u66f2\u201d\u539f\u59cb\u7279\u5f81\u56fe\uff0c\u7528\u4e8e\u4e0b\u4e00\u6b21\u8fed\u4ee3\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u5faa\u73af\u7ed3\u675f\u540e\uff0c\u8fd4\u56de\u6700\u7ec8\u7684 <code>bm_up<\/code>\u3002<\/li>\n<\/ul>\n<\/li>\n<\/ol>\n\n\n\n<p>\u8fd9\u662f\u4e00\u4e2a\u76f8\u5f53\u590d\u6742\u7684\u67b6\u6784\u3002\u5b83\u7684\u672c\u8d28\u662f\u5728\u5b66\u4e60\u5982\u4f55\u201c\u79fb\u52a8\u201d\u626d\u66f2\u56fe\u50cf\u4e0a\u7684\u50cf\u7d20\u70b9\uff0c\u76f4\u5230\u8fd9\u4e9b\u70b9\u80fd\u6392\u5217\u6210\u4e00\u4e2a\u5b8c\u7f8e\u7684\u3001\u65e0\u626d\u66f2\u7684\u77e9\u5f62\u7f51\u683c\u3002\u6a21\u578b\u6700\u7ec8\u8fd4\u56de\u7684 <code>bm_up<\/code> \u5c31\u662f\u4e00\u4e2a\u5750\u6807\u6620\u5c04\u8868\uff0c\u5b83\u544a\u8bc9 <code>grid_sample<\/code> \u51fd\u6570\u5e94\u8be5\u4ece\u539f\u59cb\u626d\u66f2\u56fe\u50cf\u7684\u54ea\u4e2a\u5750\u6807\u53bb\u53d6\u50cf\u7d20\uff0c\u624d\u80fd\u62fc\u6210\u4e00\u5f20\u6821\u6b63\u597d\u7684\u56fe\u7247\u3002<\/p>\n\n\n\n<p><code>BasicEncoder<\/code> \u5c31\u662f\u4e00\u4e2a\u6807\u51c6\u7684\u5377\u79ef\u795e\u7ecf\u7f51\u7edc\uff08CNN\uff09\u7279\u5f81\u63d0\u53d6\u5668\uff0c\u5176\u67b6\u6784\u7c7b\u4f3c\u4e8e <strong>ResNet<\/strong>\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u5b83\u7531\u4e00\u7cfb\u5217\u7684 <code>ResidualBlock<\/code>\uff08\u6b8b\u5dee\u5757\uff09\u5806\u53e0\u800c\u6210\u3002<\/li>\n\n\n\n<li>\u8f93\u5165\u662f3\u901a\u9053\u7684\u5f69\u8272\u56fe\u50cf\u3002<\/li>\n\n\n\n<li>\u7f51\u7edc\u901a\u8fc7\u591a\u4e2a\u5e26\u6709 <code>stride=2<\/code> \u7684\u5377\u79ef\u5c42\uff0c\u9010\u6b65\u5bf9\u56fe\u50cf\u8fdb\u884c\u4e0b\u91c7\u6837\u548c\u7279\u5f81\u63d0\u53d6\u3002<\/li>\n\n\n\n<li>\u5177\u4f53\u6765\u8bf4\uff0c\u4e00\u5f20 288&#215;288 \u7684\u8f93\u5165\u56fe\u7247\u4f1a\u7ecf\u5386\u4ee5\u4e0b\u8fc7\u7a0b\uff1a\n<ol class=\"wp-block-list\">\n<li><code>conv1<\/code>: \u6b65\u957f\u4e3a2\uff0c\u5c3a\u5bf8\u53d8\u4e3a 144&#215;144<\/li>\n\n\n\n<li><code>layer1<\/code>: \u6b65\u957f\u4e3a1\uff0c\u5c3a\u5bf8\u4fdd\u6301 144&#215;144<\/li>\n\n\n\n<li><code>layer2<\/code>: \u6b65\u957f\u4e3a2\uff0c\u5c3a\u5bf8\u53d8\u4e3a 72&#215;72<\/li>\n\n\n\n<li><code>layer3<\/code>: \u6b65\u957f\u4e3a2\uff0c\u5c3a\u5bf8\u53d8\u4e3a 36&#215;36<\/li>\n<\/ol>\n<\/li>\n\n\n\n<li>\u6700\u7ec8\u8f93\u51fa\u7684\u7279\u5f81\u56fe\u5728\u7a7a\u95f4\u4e0a\u662f\u8f93\u5165\u56fe\u50cf\u5c3a\u5bf8\u7684 <strong>1\/8<\/strong> (288 \/ 8 = 36)\uff0c\u8fd9\u4e0e\u6211\u4e4b\u524d\u7684\u63a8\u65ad\u5b8c\u5168\u543b\u5408\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u603b\u7ed3\u4e00\u4e0b\uff0c\u7279\u5f81\u63d0\u53d6\u5668 <code>fnet<\/code> \u5c31\u662f\u4e00\u4e2a\u7ecf\u5178\u7684 ResNet-like \u7f16\u7801\u5668\u3002<\/p>\n\n\n\n<p><code>BasicUpdateBlock<\/code> \u7684\u4f5c\u7528\u53ef\u4ee5\u603b\u7ed3\u4e3a\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>ConvGRU<\/strong>: \u8fd9\u662f\u4e00\u4e2a\u5e26\u6709\u5377\u79ef\u64cd\u4f5c\u7684\u95e8\u63a7\u5faa\u73af\u5355\u5143\uff08GRU\uff09\u3002\u5b83\u5728\u6574\u4e2a\u8fed\u4ee3\u8fc7\u7a0b\u4e2d\u626e\u6f14\u7740\u201c\u8bb0\u5fc6\u201d\u6216\u201c\u72b6\u6001\u201d (<code>net<\/code>) \u7684\u89d2\u8272\u3002GRU \u5355\u5143\u63a5\u6536\u4e0a\u4e00\u6b65\u7684\u72b6\u6001\u548c\u8fd9\u4e00\u6b65\u7684\u65b0\u8f93\u5165\uff0c\u7136\u540e\u751f\u6210\u66f4\u65b0\u540e\u7684\u72b6\u6001\u3002\u4fe1\u606f\u5c31\u662f\u8fd9\u6837\u5728\u8fed\u4ee3\u4e2d\u4f20\u9012\u7684\u3002<\/li>\n\n\n\n<li><strong>BasicMotionEncoder<\/strong>: \u8fd9\u4e2a\u6a21\u5757\u8d1f\u8d23\u5c06\u5f53\u524d\u9884\u6d4b\u7684\u5f62\u53d8\u573a\uff08flow\uff09\u548c\u539f\u59cb\u56fe\u50cf\u7279\u5f81\u8fdb\u884c\u7f16\u7801\uff0c\u878d\u5408\u6210\u201c\u8fd0\u52a8\u7279\u5f81\u201d\u3002<\/li>\n\n\n\n<li><strong>GRU \u7684\u8f93\u5165<\/strong>: GRU \u7684\u8f93\u5165 (<code>inp<\/code>) \u662f\u539f\u59cb\u56fe\u50cf\u7279\u5f81\u548c\u201c\u8fd0\u52a8\u7279\u5f81\u201d\u7684\u7ec4\u5408\u3002<\/li>\n\n\n\n<li><strong>FlowHead<\/strong>: \u4e00\u4e2a\u7b80\u5355\u7684\u5377\u79ef\u7f51\u7edc\uff0c\u5b83\u63a5\u6536 GRU \u66f4\u65b0\u540e\u7684\u72b6\u6001 <code>net<\/code>\uff0c\u5e76\u9884\u6d4b\u51fa\u5f53\u524d\u8fed\u4ee3\u9700\u8981\u4fee\u6b63\u7684 <code>delta_flow<\/code>\uff08\u5f62\u53d8\u589e\u91cf\uff09\u3002<\/li>\n\n\n\n<li><strong>Mask<\/strong>: \u5b83\u8fd8\u5e76\u884c\u5730\u9884\u6d4b\u4e86\u4e00\u4e2a <code>mask<\/code>\uff0c\u8fd9\u4e2a <code>mask<\/code> \u5728\u4e3b\u6a21\u578b\u4e2d\u7528\u4e8e\u6307\u5bfc\u5982\u4f55\u66f4\u667a\u80fd\u5730\u5c06\u4f4e\u5206\u8fa8\u7387\u7684\u5f62\u53d8\u573a\u4e0a\u91c7\u6837\u5230\u9ad8\u5206\u8fa8\u7387\u3002<\/li>\n<\/ul>\n\n\n\n<p><strong>\u6574\u4e2a\u7b97\u6cd5\u6d41\u7a0b\u5982\u4e0b\uff1a<\/strong><\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u8f93\u5165<\/strong>: \u4e00\u5f20\u626d\u66f2\u7684\u6587\u6863\u56fe\u7247\u3002<\/li>\n\n\n\n<li><strong>\u5206\u5272 (\u53ef\u9009\u4f46\u91cd\u8981)<\/strong>: <code>U2NETP<\/code> \u6a21\u578b\u9996\u5148\u5c06\u6587\u6863\u4ece\u80cc\u666f\u4e2d\u5206\u5272\u51fa\u6765\uff0c\u5e76\u5c06\u80cc\u666f\u50cf\u7d20\u7f6e\u96f6\u3002\u8fd9\u4e3a\u540e\u7eed\u5904\u7406\u63d0\u4f9b\u4e86\u4e00\u4e2a\u5e72\u51c0\u3001\u65e0\u5e72\u6270\u7684\u8f93\u5165\u3002<\/li>\n\n\n\n<li><strong>\u7279\u5f81\u63d0\u53d6<\/strong>: \u4e00\u4e2a\u7c7b ResNet \u7684 <code>BasicEncoder<\/code> (<code>fnet<\/code>) \u4ece\u6587\u6863\u56fe\u50cf\u4e2d\u63d0\u53d6\u4e00\u4e2a 1\/8 \u5206\u8fa8\u7387\u7684\u7279\u5f81\u56fe\u3002\u8fd9\u4e2a\u7279\u5f81\u56fe\u88ab\u4e00\u5206\u4e3a\u4e8c\uff1a<code>net<\/code>\uff08\u4f5c\u4e3a GRU \u7684\u521d\u59cb\u9690\u85cf\u72b6\u6001\uff09\u548c <code>inp<\/code>\uff08\u4f5c\u4e3a\u8f93\u5165\u7279\u5f81\uff09\u3002<\/li>\n\n\n\n<li><strong>\u8fed\u4ee3\u4f18\u5316 (\u6838\u5fc3\u5faa\u73af)<\/strong>: \u6a21\u578b\u4f1a\u8fdb\u884c\u56fa\u5b9a\u6b21\u6570\u7684\u8fed\u4ee3\uff08\u4f8b\u598212\u6b21\uff09\u3002\u5728\u6bcf\u4e00\u6b21\u8fed\u4ee3\u4e2d\uff1a<br>a. <strong>\u8fd0\u52a8\u7f16\u7801<\/strong>: \u5c06\u5f53\u524d\u7684\u5f62\u53d8\u573a\u4f30\u8ba1\u503c\u548c\u56fe\u50cf\u7279\u5f81\u7f16\u7801\u4e3a\u201c\u8fd0\u52a8\u7279\u5f81\u201d\u3002<br>b. <strong>GRU \u66f4\u65b0<\/strong>: <code>ConvGRU<\/code> \u5355\u5143\u5229\u7528\u201c\u8fd0\u52a8\u7279\u5f81\u201d\u548c\u8f93\u5165\u7279\u5f81\u6765\u66f4\u65b0\u5176\u9690\u85cf\u72b6\u6001 <code>net<\/code>\u3002\u8fd9\u662f\u4f18\u5316\u8fc7\u7a0b\u7684\u201c\u8bb0\u5fc6\u6838\u5fc3\u201d\u3002<br>c. <strong>\u589e\u91cf\u9884\u6d4b<\/strong>: <code>FlowHead<\/code> \u7f51\u7edc\u6839\u636e\u66f4\u65b0\u540e\u7684 GRU \u72b6\u6001\u9884\u6d4b\u51fa\u4e00\u4e2a <code>delta_flow<\/code>\uff08\u4e00\u4e2a\u5fae\u5c0f\u7684\u4fee\u6b63\u91cf\uff09\u3002<br>d. <strong>\u5f62\u53d8\u573a\u66f4\u65b0<\/strong>: \u5c06\u9884\u6d4b\u51fa\u7684 <code>delta_flow<\/code> \u53e0\u52a0\u5230\u603b\u7684\u5f62\u53d8\u573a\u4e0a\uff0c\u4f7f\u5176\u66f4\u63a5\u8fd1\u5b8c\u7f8e\u7ed3\u679c\u3002<br>e. <strong>\u7279\u5f81\u626d\u66f2<\/strong>: \u4f7f\u7528\u66f4\u65b0\u540e\u7684\u5f62\u53d8\u573a\u6765\u201c\u626d\u66f2\u201d\u7b2c3\u6b65\u4e2d\u63d0\u53d6\u7684\u539f\u59cb\u7279\u5f81\u56fe\uff0c\u7528\u4e8e\u4e0b\u4e00\u6b21\u8fed\u4ee3\u3002<\/li>\n\n\n\n<li><strong>\u4e0a\u91c7\u6837<\/strong>: \u6700\u7ec8\uff0c\u5728\u4f4e\u5206\u8fa8\u7387\u4e0a\u4f18\u5316\u597d\u7684\u5f62\u53d8\u573a\uff0c\u4f1a\u901a\u8fc7\u4e00\u4e2a\u5b66\u4e60\u5230\u7684\u4e0a\u91c7\u6837 <code>mask<\/code> \u88ab\u653e\u5927\u56de\u539f\u59cb\u56fe\u50cf\u7684\u5206\u8fa8\u7387\uff0c\u5f97\u5230\u6700\u7ec8\u7684\u5750\u6807\u6620\u5c04\u8868 <code>bm<\/code>\u3002<\/li>\n\n\n\n<li><strong>\u56fe\u50cf\u6821\u6b63<\/strong>: <code>F.grid_sample<\/code> \u51fd\u6570\u5229\u7528\u8fd9\u4e2a\u9ad8\u5206\u8fa8\u7387\u7684 <code>bm<\/code> \u6620\u5c04\u8868\uff0c\u4ece\u539f\u59cb\u7684\u3001\u626d\u66f2\u7684\u56fe\u50cf\u4e2d\u7cbe\u51c6\u5730\u62fe\u53d6\u50cf\u7d20\uff0c\u6700\u7ec8\u201c\u62fc\u201d\u6210\u4e00\u5f20\u5e73\u6574\u3001\u65e0\u7578\u53d8\u7684\u6587\u6863\u56fe\u7247\u3002<\/li>\n<\/ol>\n\n\n\n<p>\u4e00\u4e2a\u975e\u5e38\u7cbe\u5999\u4e14\u5f3a\u5927\u7684\u7b97\u6cd5\uff0c\u5b83\u7684\u8bbe\u8ba1\u54f2\u5b66\u660e\u663e\u53d7\u5230\u4e86\u5149\u6d41\u9886\u57df\u9876\u5c16\u6a21\u578b RAFT \u7684\u542f\u53d1\uff0c\u5e76\u88ab\u521b\u9020\u6027\u5730\u5e94\u7528\u4e8e\u6587\u6863\u56fe\u50cf\u6821\u6b63\u4efb\u52a1\u3002<\/p>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u5728\u6570\u5b57\u5316\u65f6\u4ee3\uff0c\u6211\u4eec\u7ecf\u5e38\u9700\u8981\u7528\u624b\u673a&#46;&#46;&#46;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-2537","post","type-post","status-publish","format-standard","hentry","category-4"],"_links":{"self":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2537","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/comments?post=2537"}],"version-history":[{"count":5,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2537\/revisions"}],"predecessor-version":[{"id":2542,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/posts\/2537\/revisions\/2542"}],"wp:attachment":[{"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/media?parent=2537"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/categories?post=2537"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/sanlangcode.com\/index.php\/wp-json\/wp\/v2\/tags?post=2537"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}