|
@@ -0,0 +1,128 @@
|
|
|
|
+package com.qmth.ops.biz.ai.client.baidu.doc;
|
|
|
|
+
|
|
|
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
+import com.qmth.boot.core.ai.model.ocr.ParseDocTask;
|
|
|
|
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskResult;
|
|
|
|
+import com.qmth.boot.core.ai.model.ocr.ParseDocTaskStatus;
|
|
|
|
+import com.qmth.boot.core.exception.StatusException;
|
|
|
|
+import com.qmth.boot.tools.codec.CodecUtils;
|
|
|
|
+import com.qmth.ops.biz.ai.client.OcrApiConfig;
|
|
|
|
+import com.qmth.ops.biz.ai.client.baidu.BceV1Signer;
|
|
|
|
+import okhttp3.*;
|
|
|
|
+import org.slf4j.Logger;
|
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
|
+import org.springframework.http.HttpHeaders;
|
|
|
|
+
|
|
|
|
+import java.text.SimpleDateFormat;
|
|
|
|
+import java.util.Date;
|
|
|
|
+import java.util.TimeZone;
|
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
|
+
|
|
|
|
+public class BaiduParseDocClient {
|
|
|
|
+
|
|
|
|
+ private static final Logger log = LoggerFactory.getLogger(BaiduParseDocClient.class);
|
|
|
|
+
|
|
|
|
+ private OcrApiConfig config;
|
|
|
|
+
|
|
|
|
+ public BaiduParseDocClient(OcrApiConfig config) {
|
|
|
|
+ this.config = config;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public ParseDocTask parseDocTask(byte[] fileData, String fileName) throws Exception {
|
|
|
|
+ FormBody.Builder formBuilder = new FormBody.Builder();
|
|
|
|
+ formBuilder.add("file_data", CodecUtils.toBase64(fileData));
|
|
|
|
+ formBuilder.add("file_name", CodecUtils.urlEncode(fileName));
|
|
|
|
+ String url = config.getUrl() + "/rest/2.0/brain/online/v2/parser/task";
|
|
|
|
+ Request request = this.buildRequest(url, formBuilder);
|
|
|
|
+
|
|
|
|
+ try (Response response = this.getHttpClient().newCall(request).execute()) {
|
|
|
|
+ ResponseBody respBody = response.body();
|
|
|
|
+ String respBodyStr = respBody != null ? respBody.string() : "";
|
|
|
|
+ log.info(respBodyStr);
|
|
|
|
+
|
|
|
|
+ if (response.isSuccessful()) {
|
|
|
|
+ BaiduParseDocTaskResp resp = new ObjectMapper().readValue(respBodyStr, BaiduParseDocTaskResp.class);
|
|
|
|
+ if (resp.getErrorCode() == 0) {
|
|
|
|
+ return new ParseDocTask(resp.getResult().getTaskId());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ throw new StatusException(respBodyStr);
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ log.error("Baidu parse doc error. {}", e.getMessage());
|
|
|
|
+ throw new StatusException(e.getMessage(), e);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public ParseDocTaskResult parseDocTaskQuery(String taskId) throws Exception {
|
|
|
|
+ FormBody.Builder formBuilder = new FormBody.Builder();
|
|
|
|
+ formBuilder.add("task_id", taskId);
|
|
|
|
+ String url = config.getUrl() + "/rest/2.0/brain/online/v2/parser/task/query";
|
|
|
|
+ Request request = this.buildRequest(url, formBuilder);
|
|
|
|
+
|
|
|
|
+ try (Response response = this.getHttpClient().newCall(request).execute()) {
|
|
|
|
+ ResponseBody respBody = response.body();
|
|
|
|
+ String respBodyStr = respBody != null ? respBody.string() : "";
|
|
|
|
+ log.info(respBodyStr);
|
|
|
|
+
|
|
|
|
+ if (response.isSuccessful()) {
|
|
|
|
+ BaiduParseDocTaskQueryResp resp = new ObjectMapper().readValue(respBodyStr, BaiduParseDocTaskQueryResp.class);
|
|
|
|
+ if (resp.getErrorCode() == 0) {
|
|
|
|
+ BaiduParseDocTaskQueryResult respResult = resp.getResult();
|
|
|
|
+ ParseDocTaskStatus status = ParseDocTaskStatus.find(respResult.getStatus());
|
|
|
|
+
|
|
|
|
+ ParseDocTaskResult result = new ParseDocTaskResult();
|
|
|
|
+ result.setStatus(status != null ? status : ParseDocTaskStatus.FAILED);
|
|
|
|
+ result.setContent(respResult.getMarkdownUrl());
|
|
|
|
+ return result;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ throw new StatusException(respBodyStr);
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ log.error("Baidu parse doc query error. {}", e.getMessage());
|
|
|
|
+ throw new StatusException(e.getMessage(), e);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private Request buildRequest(String url, FormBody.Builder formBuilder) {
|
|
|
|
+ SimpleDateFormat dateFormat = new SimpleDateFormat(BceV1Signer.DATE_FORMAT_PATTERN);
|
|
|
|
+ dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
|
|
|
+
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
+ .url(url)
|
|
|
|
+ .addHeader("Content-Type", "application/x-www-form-urlencoded")
|
|
|
|
+ .addHeader(HttpHeaders.HOST, HttpUrl.parse(url).host())
|
|
|
|
+ .addHeader(BceV1Signer.X_BCE_DATE, dateFormat.format(new Date()))
|
|
|
|
+ .post(formBuilder.build())
|
|
|
|
+ .build();
|
|
|
|
+
|
|
|
|
+ return request.newBuilder().addHeader(HttpHeaders.AUTHORIZATION,
|
|
|
|
+ BceV1Signer.sign(request, config.getKey(), config.getSecret())).build();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private OkHttpClient getHttpClient() {
|
|
|
|
+ return new OkHttpClient.Builder()
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
+ .build();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static void main(String[] args) throws Exception {
|
|
|
|
+ OcrApiConfig config = new OcrApiConfig();
|
|
|
|
+ config.setUrl("https://aip.baidubce.com");
|
|
|
|
+ config.setKey("xxx");
|
|
|
|
+ config.setSecret("xxx");
|
|
|
|
+ config.setQps(10);
|
|
|
|
+ BaiduParseDocClient client = new BaiduParseDocClient(config);
|
|
|
|
+
|
|
|
|
+ // File file = new File("D:\\home\\大纲.pdf");
|
|
|
|
+ // byte[] fileData = ByteArray.fromFile(file).value();
|
|
|
|
+ // System.out.println(client.parseDocTask(fileData, file.getName()).getTaskId());
|
|
|
|
+
|
|
|
|
+ ParseDocTaskResult result = client.parseDocTaskQuery("task-rmebOA853Uk592w9uTOLqV8Wtffb3HhR");
|
|
|
|
+ System.out.println(result.getStatus());
|
|
|
|
+ System.out.println(result.getContent());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|