소스 검색

core-ai parseDoc api

deason 1 개월 전
부모
커밋
25f3d2d988

+ 12 - 0
src/main/java/com/qmth/ops/api/controller/ai/OcrController.java

@@ -4,6 +4,8 @@ import com.qmth.boot.api.annotation.Aac;
 import com.qmth.boot.core.ai.model.AiConstants;
 import com.qmth.boot.core.ai.model.ocr.ImageType;
 import com.qmth.boot.core.ai.model.ocr.OcrType;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTask;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskResult;
 import com.qmth.boot.tools.signature.SignatureType;
 import com.qmth.ops.api.security.AccessOrg;
 import com.qmth.ops.biz.service.OcrClientService;
@@ -29,4 +31,14 @@ public class OcrController {
         return ocrClientService.forImage(type, file.getBytes(), imageType);
     }
 
+    @PostMapping(AiConstants.OCR_DOC_TASK_PATH)
+    public ParseDocTask parseDocTask(@RequestAttribute AccessOrg accessOrg, @RequestParam("file") MultipartFile file) throws Exception {
+        return ocrClientService.parseDocTask(file.getBytes(), file.getOriginalFilename());
+    }
+
+    @PostMapping(AiConstants.OCR_DOC_TASK_QUERY_PATH)
+    public ParseDocTaskResult parseDocTaskQuery(@RequestAttribute AccessOrg accessOrg, @RequestParam("taskId") String taskId) throws Exception {
+        return ocrClientService.parseDocTaskQuery(taskId);
+    }
+
 }

+ 128 - 0
src/main/java/com/qmth/ops/biz/ai/client/baidu/doc/BaiduParseDocClient.java

@@ -0,0 +1,128 @@
+package com.qmth.ops.biz.ai.client.baidu.doc;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTask;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskResult;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskStatus;
+import com.qmth.boot.core.exception.StatusException;
+import com.qmth.boot.tools.codec.CodecUtils;
+import com.qmth.ops.biz.ai.client.OcrApiConfig;
+import com.qmth.ops.biz.ai.client.baidu.BceV1Signer;
+import okhttp3.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.http.HttpHeaders;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+public class BaiduParseDocClient {
+
+    private static final Logger log = LoggerFactory.getLogger(BaiduParseDocClient.class);
+
+    private OcrApiConfig config;
+
+    public BaiduParseDocClient(OcrApiConfig config) {
+        this.config = config;
+    }
+
+    public ParseDocTask parseDocTask(byte[] fileData, String fileName) throws Exception {
+        FormBody.Builder formBuilder = new FormBody.Builder();
+        formBuilder.add("file_data", CodecUtils.toBase64(fileData));
+        formBuilder.add("file_name", CodecUtils.urlEncode(fileName));
+        String url = config.getUrl() + "/rest/2.0/brain/online/v2/parser/task";
+        Request request = this.buildRequest(url, formBuilder);
+
+        try (Response response = this.getHttpClient().newCall(request).execute()) {
+            ResponseBody respBody = response.body();
+            String respBodyStr = respBody != null ? respBody.string() : "";
+            log.info(respBodyStr);
+
+            if (response.isSuccessful()) {
+                BaiduParseDocTaskResp resp = new ObjectMapper().readValue(respBodyStr, BaiduParseDocTaskResp.class);
+                if (resp.getErrorCode() == 0) {
+                    return new ParseDocTask(resp.getResult().getTaskId());
+                }
+            }
+
+            throw new StatusException(respBodyStr);
+        } catch (Exception e) {
+            log.error("Baidu parse doc error. {}", e.getMessage());
+            throw new StatusException(e.getMessage(), e);
+        }
+    }
+
+    public ParseDocTaskResult parseDocTaskQuery(String taskId) throws Exception {
+        FormBody.Builder formBuilder = new FormBody.Builder();
+        formBuilder.add("task_id", taskId);
+        String url = config.getUrl() + "/rest/2.0/brain/online/v2/parser/task/query";
+        Request request = this.buildRequest(url, formBuilder);
+
+        try (Response response = this.getHttpClient().newCall(request).execute()) {
+            ResponseBody respBody = response.body();
+            String respBodyStr = respBody != null ? respBody.string() : "";
+            log.info(respBodyStr);
+
+            if (response.isSuccessful()) {
+                BaiduParseDocTaskQueryResp resp = new ObjectMapper().readValue(respBodyStr, BaiduParseDocTaskQueryResp.class);
+                if (resp.getErrorCode() == 0) {
+                    BaiduParseDocTaskQueryResult respResult = resp.getResult();
+                    ParseDocTaskStatus status = ParseDocTaskStatus.find(respResult.getStatus());
+
+                    ParseDocTaskResult result = new ParseDocTaskResult();
+                    result.setStatus(status != null ? status : ParseDocTaskStatus.FAILED);
+                    result.setContent(respResult.getMarkdownUrl());
+                    return result;
+                }
+            }
+
+            throw new StatusException(respBodyStr);
+        } catch (Exception e) {
+            log.error("Baidu parse doc query error. {}", e.getMessage());
+            throw new StatusException(e.getMessage(), e);
+        }
+    }
+
+    private Request buildRequest(String url, FormBody.Builder formBuilder) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat(BceV1Signer.DATE_FORMAT_PATTERN);
+        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+
+        Request request = new Request.Builder()
+                .url(url)
+                .addHeader("Content-Type", "application/x-www-form-urlencoded")
+                .addHeader(HttpHeaders.HOST, HttpUrl.parse(url).host())
+                .addHeader(BceV1Signer.X_BCE_DATE, dateFormat.format(new Date()))
+                .post(formBuilder.build())
+                .build();
+
+        return request.newBuilder().addHeader(HttpHeaders.AUTHORIZATION,
+                BceV1Signer.sign(request, config.getKey(), config.getSecret())).build();
+    }
+
+    private OkHttpClient getHttpClient() {
+        return new OkHttpClient.Builder()
+                .readTimeout(60, TimeUnit.SECONDS)
+                .connectTimeout(60, TimeUnit.SECONDS)
+                .build();
+    }
+
+    public static void main(String[] args) throws Exception {
+        OcrApiConfig config = new OcrApiConfig();
+        config.setUrl("https://aip.baidubce.com");
+        config.setKey("xxx");
+        config.setSecret("xxx");
+        config.setQps(10);
+        BaiduParseDocClient client = new BaiduParseDocClient(config);
+
+        // File file = new File("D:\\home\\大纲.pdf");
+        // byte[] fileData = ByteArray.fromFile(file).value();
+        // System.out.println(client.parseDocTask(fileData, file.getName()).getTaskId());
+
+        ParseDocTaskResult result = client.parseDocTaskQuery("task-rmebOA853Uk592w9uTOLqV8Wtffb3HhR");
+        System.out.println(result.getStatus());
+        System.out.println(result.getContent());
+    }
+
+}

+ 53 - 0
src/main/java/com/qmth/ops/biz/ai/client/baidu/doc/BaiduParseDocTaskQueryResp.java

@@ -0,0 +1,53 @@
+package com.qmth.ops.biz.ai.client.baidu.doc;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class BaiduParseDocTaskQueryResp {
+
+    @JsonProperty("log_id")
+    private String logId;
+
+    @JsonProperty("error_code")
+    private Integer errorCode;
+
+    @JsonProperty("error_msg")
+    private String errorMsg;
+
+    @JsonProperty("result")
+    private BaiduParseDocTaskQueryResult result;
+
+    public String getLogId() {
+        return logId;
+    }
+
+    public void setLogId(String logId) {
+        this.logId = logId;
+    }
+
+    public Integer getErrorCode() {
+        return errorCode;
+    }
+
+    public void setErrorCode(Integer errorCode) {
+        this.errorCode = errorCode;
+    }
+
+    public String getErrorMsg() {
+        return errorMsg;
+    }
+
+    public void setErrorMsg(String errorMsg) {
+        this.errorMsg = errorMsg;
+    }
+
+    public BaiduParseDocTaskQueryResult getResult() {
+        return result;
+    }
+
+    public void setResult(BaiduParseDocTaskQueryResult result) {
+        this.result = result;
+    }
+
+}

+ 71 - 0
src/main/java/com/qmth/ops/biz/ai/client/baidu/doc/BaiduParseDocTaskQueryResult.java

@@ -0,0 +1,71 @@
+package com.qmth.ops.biz.ai.client.baidu.doc;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class BaiduParseDocTaskQueryResult {
+
+    /**
+     * 任务状态:
+     * pending:排队中;
+     * processing:运行中;
+     * success:成功;
+     * failed:失败
+     */
+    @JsonProperty("status")
+    private String status;
+
+    @JsonProperty("task_id")
+    private String taskId;
+
+    @JsonProperty("task_error")
+    private String taskError;
+
+    @JsonProperty("markdown_url")
+    private String markdownUrl;
+
+    @JsonProperty("parse_result_url")
+    private String parseResultUrl;
+
+    public String getTaskId() {
+        return taskId;
+    }
+
+    public void setTaskId(String taskId) {
+        this.taskId = taskId;
+    }
+
+    public String getStatus() {
+        return status;
+    }
+
+    public void setStatus(String status) {
+        this.status = status;
+    }
+
+    public String getTaskError() {
+        return taskError;
+    }
+
+    public void setTaskError(String taskError) {
+        this.taskError = taskError;
+    }
+
+    public String getMarkdownUrl() {
+        return markdownUrl;
+    }
+
+    public void setMarkdownUrl(String markdownUrl) {
+        this.markdownUrl = markdownUrl;
+    }
+
+    public String getParseResultUrl() {
+        return parseResultUrl;
+    }
+
+    public void setParseResultUrl(String parseResultUrl) {
+        this.parseResultUrl = parseResultUrl;
+    }
+
+}

+ 53 - 0
src/main/java/com/qmth/ops/biz/ai/client/baidu/doc/BaiduParseDocTaskResp.java

@@ -0,0 +1,53 @@
+package com.qmth.ops.biz.ai.client.baidu.doc;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class BaiduParseDocTaskResp {
+
+    @JsonProperty("log_id")
+    private String logId;
+
+    @JsonProperty("error_code")
+    private Integer errorCode;
+
+    @JsonProperty("error_msg")
+    private String errorMsg;
+
+    @JsonProperty("result")
+    private BaiduParseDocTaskResult result;
+
+    public String getLogId() {
+        return logId;
+    }
+
+    public void setLogId(String logId) {
+        this.logId = logId;
+    }
+
+    public Integer getErrorCode() {
+        return errorCode;
+    }
+
+    public void setErrorCode(Integer errorCode) {
+        this.errorCode = errorCode;
+    }
+
+    public String getErrorMsg() {
+        return errorMsg;
+    }
+
+    public void setErrorMsg(String errorMsg) {
+        this.errorMsg = errorMsg;
+    }
+
+    public BaiduParseDocTaskResult getResult() {
+        return result;
+    }
+
+    public void setResult(BaiduParseDocTaskResult result) {
+        this.result = result;
+    }
+
+}

+ 20 - 0
src/main/java/com/qmth/ops/biz/ai/client/baidu/doc/BaiduParseDocTaskResult.java

@@ -0,0 +1,20 @@
+package com.qmth.ops.biz.ai.client.baidu.doc;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class BaiduParseDocTaskResult {
+
+    @JsonProperty("task_id")
+    private String taskId;
+
+    public String getTaskId() {
+        return taskId;
+    }
+
+    public void setTaskId(String taskId) {
+        this.taskId = taskId;
+    }
+
+}

+ 20 - 0
src/main/java/com/qmth/ops/biz/service/OcrClientService.java

@@ -2,8 +2,11 @@ package com.qmth.ops.biz.service;
 
 import com.qmth.boot.core.ai.model.ocr.ImageType;
 import com.qmth.boot.core.ai.model.ocr.OcrType;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTask;
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskResult;
 import com.qmth.ops.biz.ai.client.OcrApiClient;
 import com.qmth.ops.biz.ai.client.OcrApiConfig;
+import com.qmth.ops.biz.ai.client.baidu.doc.BaiduParseDocClient;
 import com.qmth.ops.biz.ai.exception.OcrClientNotFound;
 import com.qmth.ops.biz.domain.OcrSupplier;
 import org.slf4j.Logger;
@@ -71,4 +74,21 @@ public class OcrClientService {
         return defaultClient.forImage(type, imageData, imageType);
     }
 
+    public ParseDocTask parseDocTask(byte[] fileData, String fileName) throws Exception {
+        OcrApiConfig apiConfig = this.getApiConfig(3L);//todo
+        BaiduParseDocClient client = new BaiduParseDocClient(apiConfig);
+        return client.parseDocTask(fileData, fileName);
+    }
+
+    public ParseDocTaskResult parseDocTaskQuery(String taskId) throws Exception {
+        OcrApiConfig apiConfig = this.getApiConfig(3L);//todo
+        BaiduParseDocClient client = new BaiduParseDocClient(apiConfig);
+        return client.parseDocTaskQuery(taskId);
+    }
+
+    private OcrApiConfig getApiConfig(Long id) {
+        OcrSupplier supplier = ocrSupplierService.getById(id);
+        return new OcrApiConfig(supplier);
+    }
+
 }