mirror of
https://github.com/zadam/trilium.git
synced 2025-11-01 02:45:54 +01:00
refactor(llm): streamline chat response handling by simplifying content accumulation and removing unnecessary thinking content processing
This commit is contained in:
@@ -858,92 +858,52 @@ async function streamMessage(req: Request, res: Response) {
|
||||
}
|
||||
}
|
||||
|
||||
// Create request parameters for the pipeline
|
||||
const requestParams = {
|
||||
chatNoteId: chatNoteId,
|
||||
content: enhancedContent,
|
||||
useAdvancedContext: useAdvancedContext === true,
|
||||
showThinking: showThinking === true,
|
||||
stream: true // Always stream for this endpoint
|
||||
};
|
||||
|
||||
// Create a fake request/response pair to pass to the handler
|
||||
const fakeReq = {
|
||||
...req,
|
||||
method: 'GET', // Set to GET to indicate streaming
|
||||
query: {
|
||||
stream: 'true', // Set stream param - don't use format: 'stream' to avoid confusion
|
||||
useAdvancedContext: String(useAdvancedContext === true),
|
||||
showThinking: String(showThinking === true)
|
||||
},
|
||||
params: {
|
||||
chatNoteId: chatNoteId
|
||||
},
|
||||
// Make sure the enhanced content is available to the handler
|
||||
body: {
|
||||
content: enhancedContent,
|
||||
useAdvancedContext: useAdvancedContext === true,
|
||||
showThinking: showThinking === true
|
||||
}
|
||||
} as unknown as Request;
|
||||
|
||||
// Log to verify correct parameters
|
||||
log.info(`WebSocket stream settings - useAdvancedContext=${useAdvancedContext === true}, in query=${fakeReq.query.useAdvancedContext}, in body=${fakeReq.body.useAdvancedContext}`);
|
||||
// Extra safety to ensure the parameters are passed correctly
|
||||
if (useAdvancedContext === true) {
|
||||
log.info(`Enhanced context IS enabled for this request`);
|
||||
} else {
|
||||
log.info(`Enhanced context is NOT enabled for this request`);
|
||||
}
|
||||
|
||||
// Process the request in the background
|
||||
Promise.resolve().then(async () => {
|
||||
try {
|
||||
await restChatService.handleSendMessage(fakeReq, res);
|
||||
} catch (error) {
|
||||
log.error(`Background message processing error: ${error}`);
|
||||
|
||||
// Import the WebSocket service
|
||||
const wsService = (await import('../../services/ws.js')).default;
|
||||
|
||||
// Define LLMStreamMessage interface
|
||||
interface LLMStreamMessage {
|
||||
type: 'llm-stream';
|
||||
chatNoteId: string;
|
||||
content?: string;
|
||||
thinking?: string;
|
||||
toolExecution?: any;
|
||||
done?: boolean;
|
||||
error?: string;
|
||||
raw?: unknown;
|
||||
}
|
||||
|
||||
// Send error to client via WebSocket
|
||||
wsService.sendMessageToAllClients({
|
||||
type: 'llm-stream',
|
||||
chatNoteId: chatNoteId,
|
||||
error: `Error processing message: ${error}`,
|
||||
done: true
|
||||
} as LLMStreamMessage);
|
||||
}
|
||||
});
|
||||
|
||||
// Import the WebSocket service
|
||||
// Import the WebSocket service to send immediate feedback
|
||||
const wsService = (await import('../../services/ws.js')).default;
|
||||
|
||||
// Let the client know streaming has started via WebSocket (helps client confirm connection is working)
|
||||
// Let the client know streaming has started
|
||||
wsService.sendMessageToAllClients({
|
||||
type: 'llm-stream',
|
||||
chatNoteId: chatNoteId,
|
||||
thinking: 'Initializing streaming LLM response...'
|
||||
thinking: showThinking ? 'Initializing streaming LLM response...' : undefined
|
||||
});
|
||||
|
||||
// Let the client know streaming has started via HTTP response
|
||||
return {
|
||||
success: true,
|
||||
message: 'Streaming started',
|
||||
chatNoteId: chatNoteId
|
||||
};
|
||||
// Process the streaming request directly
|
||||
try {
|
||||
const result = await restChatService.handleSendMessage({
|
||||
...req,
|
||||
method: 'GET', // Indicate streaming mode
|
||||
query: {
|
||||
...req.query,
|
||||
stream: 'true' // Add the required stream parameter
|
||||
},
|
||||
body: {
|
||||
content: enhancedContent,
|
||||
useAdvancedContext: useAdvancedContext === true,
|
||||
showThinking: showThinking === true
|
||||
},
|
||||
params: { chatNoteId }
|
||||
} as unknown as Request, res);
|
||||
|
||||
// Since we're streaming, the result will be null
|
||||
return {
|
||||
success: true,
|
||||
message: 'Streaming started',
|
||||
chatNoteId: chatNoteId
|
||||
};
|
||||
} catch (error) {
|
||||
log.error(`Error during streaming: ${error}`);
|
||||
|
||||
// Send error to client via WebSocket
|
||||
wsService.sendMessageToAllClients({
|
||||
type: 'llm-stream',
|
||||
chatNoteId: chatNoteId,
|
||||
error: `Error processing message: ${error}`,
|
||||
done: true
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
} catch (error: any) {
|
||||
log.error(`Error starting message stream: ${error.message}`);
|
||||
throw error;
|
||||
|
||||
@@ -231,21 +231,16 @@ class RestChatService {
|
||||
|
||||
if (data) {
|
||||
message.content = data;
|
||||
// Handle accumulation carefully - if this appears to be a complete response
|
||||
// (done=true and data is much longer than current accumulated), replace rather than append
|
||||
if (done && data.length > accumulatedContentRef.value.length && data.includes(accumulatedContentRef.value)) {
|
||||
// This looks like a complete final response that includes what we've accumulated
|
||||
accumulatedContentRef.value = data;
|
||||
} else {
|
||||
// Normal incremental accumulation
|
||||
accumulatedContentRef.value += data;
|
||||
}
|
||||
// Simple accumulation - just append the new data
|
||||
accumulatedContentRef.value += data;
|
||||
}
|
||||
|
||||
// Only include thinking if explicitly present in rawChunk
|
||||
if (rawChunk && 'thinking' in rawChunk && rawChunk.thinking) {
|
||||
message.thinking = rawChunk.thinking as string;
|
||||
}
|
||||
|
||||
// Only include tool execution if explicitly present in rawChunk
|
||||
if (rawChunk && 'toolExecution' in rawChunk && rawChunk.toolExecution) {
|
||||
const toolExec = rawChunk.toolExecution;
|
||||
message.toolExecution = {
|
||||
@@ -262,7 +257,7 @@ class RestChatService {
|
||||
// Send WebSocket message
|
||||
wsService.sendMessageToAllClients(message);
|
||||
|
||||
// Send SSE response
|
||||
// Send SSE response for compatibility
|
||||
const responseData: any = { content: data, done };
|
||||
if (rawChunk?.toolExecution) {
|
||||
responseData.toolExecution = rawChunk.toolExecution;
|
||||
|
||||
Reference in New Issue
Block a user