Ver código fonte

core-ai 流式输出和思考模式

deason 1 mês atrás
pai
commit
e64ec3993f

+ 19 - 4
src/main/java/com/qmth/ops/api/controller/ai/LlmController.java

@@ -36,7 +36,9 @@ public class LlmController {
     @PostMapping(AiConstants.LLM_CHAT_ENDPOINT_PATH)
     public ChatEndpoint chatEndpoint(@RequestAttribute AccessOrg accessOrg,
                                      @RequestHeader(AiConstants.LLM_APP_TYPE) LlmAppType type,
-                                     @RequestBody Object param) throws Exception {
+                                     @RequestBody Object param,
+                                     @RequestParam(required = false) Boolean stream,
+                                     @RequestParam(required = false) Boolean enableThinking) throws Exception {
         LlmOrgConfig config = llmOrgConfigService.findByOrgAndAppType(accessOrg.getOrg().getId(), type);
         if (config == null || config.getLeftCount() <= 0) {
             throw new ForbiddenException(
@@ -47,6 +49,7 @@ public class LlmController {
             throw new NotFoundException(
                     "Chat prompt template not found for app_type=" + type + ", modelId=" + config.getModelId());
         }
+
         ChatRequest request = new ChatRequest();
         String systemMessage = FreemarkerUtil.getValue(llmPromptTemplate.getSystem(), param, null);
         String userMessage = FreemarkerUtil.getValue(llmPromptTemplate.getUser(), param, null);
@@ -56,7 +59,15 @@ public class LlmController {
         if (StringUtils.isNotBlank(userMessage)) {
             request.addMessage(ChatRole.user, userMessage);
         }
-        request.setStream(true);//todo
+
+        request.setStream(stream != null ? stream : false);
+        if (enableThinking == null || !enableThinking) {
+            request.setEnableThinking(false);
+        } else {
+            request.setEnableThinking(true);
+            // 若开启思考模式,则默认流式输出
+            request.setStream(true);
+        }
 
         return llmClientService.chatEndpoint(request, config.getModelId(), type);
     }
@@ -71,6 +82,7 @@ public class LlmController {
             throw new ForbiddenException(
                     "Chat api is disabled or exhausted for org=" + accessOrg.getOrg().getCode() + ", app_type=" + type);
         }
+
         ChatResult result = llmClientService.chat(request, config.getModelId(), type);
         llmOrgConfigService.consume(config);
         return result;
@@ -78,18 +90,20 @@ public class LlmController {
 
     @PostMapping(AiConstants.LLM_CHAT_TEMPLATE_PATH)
     public ChatResult chatTemplate(@RequestAttribute AccessOrg accessOrg,
-                                   @RequestHeader(AiConstants.LLM_APP_TYPE) LlmAppType type, @RequestBody Object param)
-            throws Exception {
+                                   @RequestHeader(AiConstants.LLM_APP_TYPE) LlmAppType type,
+                                   @RequestBody Object param) throws Exception {
         LlmOrgConfig config = llmOrgConfigService.findByOrgAndAppType(accessOrg.getOrg().getId(), type);
         if (config == null || config.getLeftCount() <= 0) {
             throw new ForbiddenException(
                     "Chat api is disabled or exhausted for org=" + accessOrg.getOrg().getCode() + ", app_type=" + type);
         }
+
         LlmPromptTemplate llmPromptTemplate = llmPromptTemplateService.findById(config.getPromptId());
         if (llmPromptTemplate == null) {
             throw new NotFoundException(
                     "Chat prompt template not found for app_type=" + type + ", modelId=" + config.getModelId());
         }
+
         ChatRequest request = new ChatRequest();
         String systemMessage = FreemarkerUtil.getValue(llmPromptTemplate.getSystem(), param, null);
         String userMessage = FreemarkerUtil.getValue(llmPromptTemplate.getUser(), param, null);
@@ -99,6 +113,7 @@ public class LlmController {
         if (StringUtils.isNotBlank(userMessage)) {
             request.addMessage(ChatRole.user, userMessage);
         }
+
         ChatResult result = llmClientService.chat(request, config.getModelId(), type);
         llmOrgConfigService.consume(config);
         return result;

+ 32 - 25
src/main/java/com/qmth/ops/biz/ai/client/aliyun/llm/AliyunChatClient.java

@@ -32,24 +32,25 @@ public class AliyunChatClient extends ChatApiClient {
     @Override
     public ChatEndpoint buildEndpoint(ChatRequest request, LlmAppType appType) {
         ChatEndpoint endpoint = new ChatEndpoint();
-        // endpoint.setUrl(getConfig().getUrl());//todo
-        endpoint.setUrl("https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions");
+        endpoint.setUrl(getConfig().getUrl());
         endpoint.setMethod("POST");
 
         Map<String, String> headers = new HashMap<>();
         headers.put(AUTH_HEADER_NAME, AUTH_HEADER_VALUE + getConfig().getSecret());
-        headers.put("X-DashScope-SSE", "enable");
+        if (request.isStream()) {
+            headers.put("X-DashScope-SSE", "enable");
+        }
         endpoint.setHeaders(headers);
 
-        request.setModel(getConfig().getModel());
+        this.buildRequest(request, appType);
 
-        byte[] json;
+        byte[] jsonBytes;
         try {
-            json = new ObjectMapper().writeValueAsBytes(request);
+            jsonBytes = new ObjectMapper().writeValueAsBytes(request);
         } catch (JsonProcessingException e) {
             throw new RuntimeException(e);
         }
-        endpoint.setRequestBody(json);
+        endpoint.setRequestBody(jsonBytes);
 
         return endpoint;
     }
@@ -57,26 +58,24 @@ public class AliyunChatClient extends ChatApiClient {
     @Override
     protected Headers buildHeader(Headers.Builder headerBuilder, LlmAppType appType) {
         headerBuilder.add(AUTH_HEADER_NAME, AUTH_HEADER_VALUE + getConfig().getSecret());
-        // if (appType == LlmAppType.AUTO_SCORE) {
-        //     headerBuilder.add("X-DashScope-DataInspection", "{\"input\":\"disable\", \"output\":\"disable\"}");
-        // }
         return headerBuilder.build();
     }
 
     @Override
     protected Object buildRequest(ChatRequest request, LlmAppType appType) {
-        AliyunChatRequest chatRequest = new AliyunChatRequest(request, getConfig().getModel());
+        request.setModel(getConfig().getModel());
         if (appType == LlmAppType.AUTO_SCORE) {
-            chatRequest.getParameters().put("top_p", 0.1);
+            request.setTopP(0.1f);
         } else if (appType == LlmAppType.AUTO_GENERATE_QUESTION) {
-            chatRequest.getParameters().put("top_p", 0.9);
+            request.setTopP(0.9f);
         }
-        return chatRequest;
+        request.setResultFormat("message");
+        return request;
     }
 
     @Override
     protected ChatResult buildResult(byte[] data, ObjectMapper mapper) throws IOException {
-        return mapper.readValue(data, AliyunChatResult.class).buildResult();
+        return mapper.readValue(data, ChatResult.class);
     }
 
     @Override
@@ -85,7 +84,7 @@ public class AliyunChatClient extends ChatApiClient {
         if (data != null) {
             try {
                 error = mapper.readValue(data, AliyunError.class);
-            } catch (Exception e) {
+            } catch (Exception ignore) {
             }
         }
         switch (statusCode) {
@@ -100,19 +99,27 @@ public class AliyunChatClient extends ChatApiClient {
 
     public static void main(String[] args) throws Exception {
         ChatApiConfig config = new ChatApiConfig();
-        config.setSupplier("aliyun");
-        config.setUrl("https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation");
-        config.setSecret("");
-        config.setModel("qwen-turbo");
+        config.setSupplier("Aliyun");
+        config.setUrl("https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions");
+        config.setSecret("sk-xxx");
+        // config.setModel("qwen3-235b-a22b");
+        // config.setModel("qwen-plus");
+        // config.setModel("qwen-max");
+        // config.setModel("qwen-turbo");
+        // config.setModel("qwen-long");
+        config.setModel("deepseek-v3");
         config.setQpm(60);
         AliyunChatClient client = new AliyunChatClient(config);
         ChatRequest request = new ChatRequest();
         request.addMessage(ChatRole.user,
-                "作为高等数学科目的命题老师,请按照下列要求出1道单选试题\n" + "试题题干前用单独一行'【题干】'作为内容\n" + "试题答案前用单独一行'【答案】'作为内容\n"
-                        + "试题答案解析前用单独一行'【解析】'作为内容\n" + "试题包含4个选项,选项内容前用单独一行'【选项】'作为内容,且每个选项前用大写英文字母开头\n"
-                        + "请按照上述要求出1道高等数学的单选试题");
-        System.out.println(
-                new ObjectMapper().writeValueAsString(client.call(request, LlmAppType.AUTO_GENERATE_QUESTION)));
+                "作为小学数学科目的命题老师,请按照下列要求出1道单选试题,\n"
+                        + "试题题干前用单独一行【题干】作为内容,\n"
+                        + "试题答案前用单独一行【答案】作为内容,\n"
+                        + "试题答案解析前用单独一行【解析】作为内容,\n"
+                        + "试题包含4个选项,选项内容前用单独一行【选项】作为内容,且每个选项前用大写英文字母开头,\n"
+                        + "请按照上述要求出1道小学数学的单选试题。");
+        ChatResult chatResult = client.call(request, LlmAppType.AUTO_GENERATE_QUESTION);
+        System.out.println(new ObjectMapper().writeValueAsString(chatResult));
     }
 
 }

+ 26 - 7
src/main/java/com/qmth/ops/biz/ai/client/azure/llm/AzureChatClient.java

@@ -1,5 +1,6 @@
 package com.qmth.ops.biz.ai.client.azure.llm;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.qmth.boot.core.ai.model.llm.ChatRequest;
 import com.qmth.boot.core.ai.model.llm.ChatResult;
@@ -14,6 +15,8 @@ import okhttp3.Headers;
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
 
 public class AzureChatClient extends ChatApiClient {
 
@@ -29,7 +32,22 @@ public class AzureChatClient extends ChatApiClient {
     @Override
     public ChatEndpoint buildEndpoint(ChatRequest request, LlmAppType appType) {
         ChatEndpoint endpoint = new ChatEndpoint();
-        // todo
+        endpoint.setUrl(getConfig().getUrl().replace(MODEL_PLACEHOLDER, getConfig().getModel()));
+        endpoint.setMethod("POST");
+
+        Map<String, String> headers = new HashMap<>();
+        headers.put(AUTH_HEADER_NAME, getConfig().getSecret());
+        endpoint.setHeaders(headers);
+
+        request.setModel(getConfig().getModel());
+        byte[] jsonBytes;
+        try {
+            jsonBytes = new ObjectMapper().writeValueAsBytes(request);
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException(e);
+        }
+        endpoint.setRequestBody(jsonBytes);
+
         return endpoint;
     }
 
@@ -52,12 +70,13 @@ public class AzureChatClient extends ChatApiClient {
     protected ChatResult handleError(byte[] data, int statusCode, ObjectMapper mapper) {
         String message = data != null ? new String(data, StandardCharsets.UTF_8) : null;
         switch (statusCode) {
-        case 400:
-            throw new ChatRequestError(message != null ? message : "chat request error");
-        case 404:
-            throw new NotFoundException(message != null ? message : "chat resource not found");
-        default:
-            throw new StatusException(message != null ? message : "chat model error");
+            case 400:
+                throw new ChatRequestError(message != null ? message : "chat request error");
+            case 404:
+                throw new NotFoundException(message != null ? message : "chat resource not found");
+            default:
+                throw new StatusException(message != null ? message : "chat model error");
         }
     }
+
 }

+ 3 - 0
src/main/java/com/qmth/ops/biz/service/LlmClientService.java

@@ -72,6 +72,9 @@ public class LlmClientService {
         if (client == null) {
             throw new ChatClientNotFound(modelId);
         }
+        // 常规调用,暂时不支持流式输出和思考模式
+        request.setStream(false);
+        request.setEnableThinking(false);
         return client.call(request, appType);
     }