😎增加PaddleOCR图像识别(身份证图像提取)

This commit is contained in:
zuohuaijun 2024-10-14 12:43:02 +08:00
parent 4ae139112d
commit cd789ad983
6 changed files with 265 additions and 0 deletions

View File

@ -28,6 +28,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Admin.NET.Plugin.ApprovalFl
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Admin.NET.Plugin.K3Cloud", "Plugins\Admin.NET.Plugin.K3Cloud\Admin.NET.Plugin.K3Cloud.csproj", "{9EB9C39E-E14F-443E-9AA3-EE417ABCBC1D}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Admin.NET.Plugin.PaddleOCR", "Plugins\Admin.NET.Plugin.PaddleOCR\Admin.NET.Plugin.PaddleOCR.csproj", "{1B106C11-E5BF-44AB-A283-1E948A8BD8C2}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -70,6 +72,10 @@ Global
{9EB9C39E-E14F-443E-9AA3-EE417ABCBC1D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9EB9C39E-E14F-443E-9AA3-EE417ABCBC1D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9EB9C39E-E14F-443E-9AA3-EE417ABCBC1D}.Release|Any CPU.Build.0 = Release|Any CPU
{1B106C11-E5BF-44AB-A283-1E948A8BD8C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1B106C11-E5BF-44AB-A283-1E948A8BD8C2}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1B106C11-E5BF-44AB-A283-1E948A8BD8C2}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1B106C11-E5BF-44AB-A283-1E948A8BD8C2}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -80,6 +86,7 @@ Global
{04AB2E76-DE8B-4EFD-9F48-F8D4C0993106} = {76F70D22-8D53-468E-A3B6-1704666A1D71}
{4124E31B-EA94-4EE3-9EC6-A565F1420AEA} = {76F70D22-8D53-468E-A3B6-1704666A1D71}
{9EB9C39E-E14F-443E-9AA3-EE417ABCBC1D} = {76F70D22-8D53-468E-A3B6-1704666A1D71}
{1B106C11-E5BF-44AB-A283-1E948A8BD8C2} = {76F70D22-8D53-468E-A3B6-1704666A1D71}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5CD801D7-984A-4F5C-8FA2-211B7A5EA9F3}

View File

@ -0,0 +1,41 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<NoWarn>1701;1702;1591;8632</NoWarn>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>disable</Nullable>
<GenerateDocumentationFile>True</GenerateDocumentationFile>
<Copyright>Admin.NET</Copyright>
<Description>Admin.NET 通用权限开发平台</Description>
</PropertyGroup>
<ItemGroup>
<Content Include="Configuration\**\*">
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<CopyToPublishDirectory>PreserveNewest</CopyToPublishDirectory>
</Content>
<Content Include="wwwroot\**\*">
<ExcludeFromSingleFile>true</ExcludeFromSingleFile>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<CopyToPublishDirectory>PreserveNewest</CopyToPublishDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<Compile Remove="OcrModel\**" />
<EmbeddedResource Remove="OcrModel\**" />
<None Remove="OcrModel\**" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Paddle.Runtime.win_x64" Version="2.6.1.1" />
<PackageReference Include="PaddleOCRSharp" Version="4.4.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\Admin.NET.Core\Admin.NET.Core.csproj" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,19 @@
// Admin.NET 项目的版权、商标、专利和其他相关权利均受相应法律法规的保护。使用本项目应遵守相关法律法规和许可证的要求。
//
// 本项目主要遵循 MIT 许可证和 Apache 许可证(版本 2.0)进行分发和使用。许可证位于源代码树根目录中的 LICENSE-MIT 和 LICENSE-APACHE 文件。
//
// 不得利用本项目从事危害国家安全、扰乱社会秩序、侵犯他人合法权益等法律法规禁止的活动!任何基于本项目二次开发而产生的一切法律纠纷和责任,我们不承担任何责任!
namespace Admin.NET.Plugin.PaddleOCR;
/// <summary>
/// PaddleOCR 图像识别
/// </summary>
[Const("PaddleOCR 图像识别")]
public class ApplicationConst
{
/// <summary>
/// API分组名称
/// </summary>
public const string GroupName = "PaddleOCR 图像识别";
}

View File

@ -0,0 +1,11 @@
// Admin.NET 项目的版权、商标、专利和其他相关权利均受相应法律法规的保护。使用本项目应遵守相关法律法规和许可证的要求。
//
// 本项目主要遵循 MIT 许可证和 Apache 许可证(版本 2.0)进行分发和使用。许可证位于源代码树根目录中的 LICENSE-MIT 和 LICENSE-APACHE 文件。
//
// 不得利用本项目从事危害国家安全、扰乱社会秩序、侵犯他人合法权益等法律法规禁止的活动!任何基于本项目二次开发而产生的一切法律纠纷和责任,我们不承担任何责任!
global using Admin.NET.Core;
global using Furion.DynamicApiController;
global using Microsoft.AspNetCore.Authorization;
global using Microsoft.AspNetCore.Mvc;
global using System.ComponentModel;

View File

@ -0,0 +1,92 @@
// Admin.NET 项目的版权、商标、专利和其他相关权利均受相应法律法规的保护。使用本项目应遵守相关法律法规和许可证的要求。
//
// 本项目主要遵循 MIT 许可证和 Apache 许可证(版本 2.0)进行分发和使用。许可证位于源代码树根目录中的 LICENSE-MIT 和 LICENSE-APACHE 文件。
//
// 不得利用本项目从事危害国家安全、扰乱社会秩序、侵犯他人合法权益等法律法规禁止的活动!任何基于本项目二次开发而产生的一切法律纠纷和责任,我们不承担任何责任!
using Furion.DependencyInjection;
using Microsoft.AspNetCore.Http;
using PaddleOCRSharp;
using System.ComponentModel.DataAnnotations;
namespace Admin.NET.Plugin.PaddleOCR.Service;
/// <summary>
/// PaddleOCR 图像识别服务 🧩
/// </summary>
[ApiDescriptionSettings(ApplicationConst.GroupName, Description = "PaddleOCR 图像识别")]
public class PaddleOCRService : IDynamicApiController, ISingleton
{
private readonly PaddleOCREngine _engine;
public PaddleOCRService()
{
// 自带轻量版中英文模型PP-OCRv4
OCRModelConfig config = null;
//// 服务器中英文模型v2
//OCRModelConfig config = new OCRModelConfig();
//string modelPathroot = "你的模型绝对路径文件夹";
//config.det_infer = modelPathroot + @"\ch_ppocr_server_v2.0_det_infer";
//config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
//config.rec_infer = modelPathroot + @"\ch_ppocr_server_v2.0_rec_infer";
//config.keys = modelPathroot + @"\ppocr_keys.txt";
//// 英文和数字模型v3
//OCRModelConfig config = new OCRModelConfig();
//string modelPathroot = "你的模型绝对路径文件夹";
//config.det_infer = modelPathroot + @"\en_PP-OCRv3_det_infer";
//config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
//config.rec_infer = modelPathroot + @"\en_PP-OCRv3_rec_infer";
//config.keys = modelPathroot + @"\en_dict.txt";
//// 中英文模型V4
//config = new OCRModelConfig();
//string modelPathroot = AppContext.BaseDirectory + "OcrModel\\ch_PP-OCRv4";
//config.det_infer = modelPathroot + @"\ch_PP-OCRv4_det_infer";
//config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
//config.rec_infer = modelPathroot + @"\ch_PP-OCRv4_rec_infer";
//config.keys = modelPathroot + @"\ppocr_keys.txt";
//// 服务器中英文模型V4
//config = new OCRModelConfig();
//string modelPathroot = "你的模型绝对路径文件夹";
//config.det_infer = modelPathroot + @"\ch_PP-OCRv4_det_server_infer";
//config.cls_infer = modelPathroot + @"\ch_ppocr_mobile_v2.0_cls_infer";
//config.rec_infer = modelPathroot + @"\ch_PP-OCRv4_rec_server_infer";
//config.keys = modelPathroot + @"\ppocr_keys.txt";
// 参数
OCRParameter oCRParameter = new OCRParameter();
// oCRParameter.use_gpu=true; // 当使用GPU版本的预测库时该参数打开才有效果
// oCRParameter.enable_mkldnn = false;
// 初始化OCR引擎
_engine = new PaddleOCREngine(config, oCRParameter);
}
/// <summary>
/// 识别身份证 🔖
/// </summary>
/// <param name="file"></param>
/// <returns></returns>
[AllowAnonymous]
[DisplayName("识别身份证")]
public async Task<dynamic> IDCardOCR([Required] IFormFile file)
{
using var memoryStream = new MemoryStream();
await file.CopyToAsync(memoryStream);
var ocrRes = _engine.DetectText(memoryStream.ToArray());
List<TextBlock> textBlocks = ocrRes.TextBlocks;
var cardName = TextBlockUtil.ReadIdCardName(textBlocks);
var cardNo = TextBlockUtil.ReadIdCardNo(textBlocks);
var cardAddress = TextBlockUtil.ReadIdCardAddress(textBlocks);
return await Task.FromResult(new
{
CardName = cardName,
CardNo = cardNo,
CardAddress = cardAddress
});
}
}

View File

@ -0,0 +1,95 @@
// Admin.NET 项目的版权、商标、专利和其他相关权利均受相应法律法规的保护。使用本项目应遵守相关法律法规和许可证的要求。
//
// 本项目主要遵循 MIT 许可证和 Apache 许可证(版本 2.0)进行分发和使用。许可证位于源代码树根目录中的 LICENSE-MIT 和 LICENSE-APACHE 文件。
//
// 不得利用本项目从事危害国家安全、扰乱社会秩序、侵犯他人合法权益等法律法规禁止的活动!任何基于本项目二次开发而产生的一切法律纠纷和责任,我们不承担任何责任!
using PaddleOCRSharp;
using System.Text;
using System.Text.RegularExpressions;
namespace Admin.NET.Plugin.PaddleOCR;
/// <summary>
/// 识别文本块工具类
/// </summary>
public static class TextBlockUtil
{
/// <summary>
/// 解析身份证姓名
/// </summary>
/// <param name="textBlocks"></param>
/// <returns></returns>
public static string ReadIdCardName(List<TextBlock> textBlocks)
{
var result = "";
foreach (var item in textBlocks)
{
var txt = item.Text.Replace(" ", "").Trim();
if (txt.Contains("性别") || txt.Contains("民族") || txt.Contains("住址") || txt.Contains("公民身份证号码") || txt.Contains("身份") || txt.Contains("号码"))
continue;
if (Regex.IsMatch(txt, @"^姓名[\u4e00-\u9fa5]{2,4}$"))
{
result = txt.TrimStart('姓', '名');
break;
}
else if (Regex.IsMatch(txt, @"^名[\u4e00-\u9fa5]{2,4}$"))
{
result = txt.TrimStart('名');
break;
}
else if (Regex.IsMatch(txt, @"^[\u4e00-\u9fa5]{2,4}$"))
{
result = txt;
break;
}
}
return result;
}
/// <summary>
/// 解析身份证号码
/// </summary>
/// <param name="textBlocks"></param>
/// <returns></returns>
public static string ReadIdCardNo(List<TextBlock> textBlocks)
{
var result = "";
foreach (var item in textBlocks)
{
var txt = item.Text.Replace(" ", "").Trim();
if (Regex.IsMatch(txt, @"^\d{15}$|^\d{17}(\d|X|x)$"))
{
result = txt;
break;
}
}
return result;
}
/// <summary>
/// 解析身份证地址
/// </summary>
/// <param name="textBlocks"></param>
/// <returns></returns>
public static string ReadIdCardAddress(List<TextBlock> textBlocks)
{
var sb = new StringBuilder();
string[] temps = { "省", "市", "县", "区", "镇", "乡", "村", "组", "室", "栋", "街道", "号" };
foreach (var item in textBlocks)
{
var txt = item.Text.Replace(" ", "").Trim();
if (txt.Contains("姓名") || txt.Contains("号码"))
continue;
if (temps.Where(t => txt.Contains(t)).Count() > 0)
{
sb.Append(txt);
}
}
sb = sb.Replace("住址", "");
return sb.ToString();
}
}