yes, this finally does set streaming to true

2025-10-31 10:26:08 +01:00 · 2025-04-09 19:53:45 +00:00
parent 59a358a3ee
commit b05b88dd76
9 changed files with 134 additions and 19 deletions
--- a/src/public/app/widgets/llm_chat_panel.ts
+++ b/src/public/app/widgets/llm_chat_panel.ts
@@ -410,8 +410,14 @@ export default class LlmChatPanel extends BasicWidget {
     */
    private async handleDirectResponse(messageParams: any): Promise<boolean> {
        try {
-            // Send the message via POST request
-            const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, messageParams);
+            // Add format parameter to maintain consistency with the streaming GET request
+            const postParams = {
+                ...messageParams,
+                format: 'stream'  // Match the format parameter used in the GET streaming request
+            };
+
+            // Send the message via POST request with the updated params
+            const postResponse = await server.post<any>(`llm/sessions/${this.sessionId}/messages`, postParams);

            // If the POST request returned content directly, display it
            if (postResponse && postResponse.content) {
@@ -460,8 +466,8 @@ export default class LlmChatPanel extends BasicWidget {
        const useAdvancedContext = messageParams.useAdvancedContext;
        const showThinking = messageParams.showThinking;

-        // Set up streaming via EventSource
-        const streamUrl = `./api/llm/sessions/${this.sessionId}/messages?format=stream&useAdvancedContext=${useAdvancedContext}&showThinking=${showThinking}`;
+        // Set up streaming via EventSource - explicitly add stream=true parameter to ensure consistency
+        const streamUrl = `./api/llm/sessions/${this.sessionId}/messages?format=stream&stream=true&useAdvancedContext=${useAdvancedContext}&showThinking=${showThinking}`;

        return new Promise((resolve, reject) => {
            const source = new EventSource(streamUrl);
--- a/src/services/llm/ai_interface.ts
+++ b/src/services/llm/ai_interface.ts
@@ -20,6 +20,18 @@ export interface StreamChunk {
    };
 }

+/**
+ * Options for chat completion requests
+ * 
+ * Key properties:
+ * - stream: If true, the response will be streamed
+ * - model: Model name to use
+ * - provider: Provider to use (openai, anthropic, ollama, etc.)
+ * - enableTools: If true, enables tool support
+ * 
+ * The stream option is particularly important and should be consistently handled
+ * throughout the pipeline. It should be explicitly set to true or false.
+ */
 export interface ChatCompletionOptions {
    model?: string;
    temperature?: number;
--- a/src/services/llm/ai_service_manager.ts
+++ b/src/services/llm/ai_service_manager.ts
@@ -197,6 +197,13 @@ export class AIServiceManager implements IAIServiceManager {
    async generateChatCompletion(messages: Message[], options: ChatCompletionOptions = {}): Promise<ChatResponse> {
        this.ensureInitialized();

+        log.info(`[AIServiceManager] generateChatCompletion called with options: ${JSON.stringify({
+            model: options.model,
+            stream: options.stream,
+            enableTools: options.enableTools
+        })}`);
+        log.info(`[AIServiceManager] Stream option type: ${typeof options.stream}`);
+
        if (!messages || messages.length === 0) {
            throw new Error('No messages provided for chat completion');
        }
@@ -219,6 +226,7 @@ export class AIServiceManager implements IAIServiceManager {
            if (availableProviders.includes(providerName as ServiceProviders)) {
                try {
                    const modifiedOptions = { ...options, model: modelName };
+                    log.info(`[AIServiceManager] Using provider ${providerName} from model prefix with modifiedOptions.stream: ${modifiedOptions.stream}`);
                    return await this.services[providerName as ServiceProviders].generateChatCompletion(messages, modifiedOptions);
                } catch (error) {
                    log.error(`Error with specified provider ${providerName}: ${error}`);
@@ -232,6 +240,7 @@ export class AIServiceManager implements IAIServiceManager {

        for (const provider of sortedProviders) {
            try {
+                log.info(`[AIServiceManager] Trying provider ${provider} with options.stream: ${options.stream}`);
                return await this.services[provider].generateChatCompletion(messages, options);
            } catch (error) {
                log.error(`Error with provider ${provider}: ${error}`);
--- a/src/services/llm/pipeline/chat_pipeline.ts
+++ b/src/services/llm/pipeline/chat_pipeline.ts
@@ -227,14 +227,36 @@ export class ChatPipeline {
            log.info(`Prepared ${preparedMessages.messages.length} messages for LLM, tools enabled: ${useTools}`);

            // Setup streaming handler if streaming is enabled and callback provided
-            const enableStreaming = this.config.enableStreaming &&
-                                  modelSelection.options.stream !== false &&
-                                  typeof streamCallback === 'function';
+            // Check if streaming should be enabled based on several conditions
+            const streamEnabledInConfig = this.config.enableStreaming;
+            const streamFormatRequested = input.format === 'stream';
+            const streamRequestedInOptions = modelSelection.options.stream === true;
+            const streamCallbackAvailable = typeof streamCallback === 'function';
            
-            if (enableStreaming) {
-                // Make sure stream is enabled in options
+            log.info(`[ChatPipeline] Request type info - Format: ${input.format || 'not specified'}, Options from pipelineInput: ${JSON.stringify({stream: input.options?.stream})}`);
+            log.info(`[ChatPipeline] Stream settings - config.enableStreaming: ${streamEnabledInConfig}, format parameter: ${input.format}, modelSelection.options.stream: ${modelSelection.options.stream}, streamCallback available: ${streamCallbackAvailable}`);
+            
+            // IMPORTANT: Different behavior for GET vs POST requests:
+            // - For GET requests with streamCallback available: Always enable streaming
+            // - For POST requests: Use streaming options but don't actually stream (since we can't stream back to client)
+            if (streamCallbackAvailable) {
+                // If a stream callback is available (GET requests), we can stream the response
                modelSelection.options.stream = true;
+                log.info(`[ChatPipeline] Stream callback available, setting stream=true for real-time streaming`);
+            } else {
+                // For POST requests, preserve the stream flag as-is from input options
+                // This ensures LLM request format is consistent across both GET and POST
+                if (streamRequestedInOptions) {
+                    log.info(`[ChatPipeline] No stream callback but stream requested in options, preserving stream=true`);
+                } else {
+                    log.info(`[ChatPipeline] No stream callback and no stream in options, setting stream=false`);
+                    modelSelection.options.stream = false;
                }
+            }
+            
+            log.info(`[ChatPipeline] Final modelSelection.options.stream = ${modelSelection.options.stream}`);
+            log.info(`[ChatPipeline] Will actual streaming occur? ${streamCallbackAvailable && modelSelection.options.stream}`);
+            

            // STAGE 5 & 6: Handle LLM completion and tool execution loop
            log.info(`========== STAGE 5: LLM COMPLETION ==========`);
--- a/src/services/llm/pipeline/stages/llm_completion_stage.ts
+++ b/src/services/llm/pipeline/stages/llm_completion_stage.ts
@@ -1,6 +1,6 @@
 import { BasePipelineStage } from '../pipeline_stage.js';
 import type { LLMCompletionInput } from '../interfaces.js';
-import type { ChatResponse } from '../../ai_interface.js';
+import type { ChatCompletionOptions, ChatResponse } from '../../ai_interface.js';
 import aiServiceManager from '../../ai_service_manager.js';
 import toolRegistry from '../../tools/tool_registry.js';
 import log from '../../../log.js';
@@ -19,8 +19,35 @@ export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, {
    protected async process(input: LLMCompletionInput): Promise<{ response: ChatResponse }> {
        const { messages, options, provider } = input;

-        // Create a copy of options to avoid modifying the original
-        const updatedOptions = { ...options };
+        // Log input options, particularly focusing on the stream option
+        log.info(`[LLMCompletionStage] Input options: ${JSON.stringify({
+            model: options.model,
+            provider,
+            stream: options.stream,
+            enableTools: options.enableTools
+        })}`);
+        log.info(`[LLMCompletionStage] Stream option in input: ${options.stream}, type: ${typeof options.stream}`);
+
+        // Create a deep copy of options to avoid modifying the original
+        const updatedOptions: ChatCompletionOptions = JSON.parse(JSON.stringify(options));
+
+        // IMPORTANT: Ensure stream property is explicitly set to a boolean value
+        // This is critical to ensure consistent behavior across all providers
+        updatedOptions.stream = options.stream === true;
+
+        log.info(`[LLMCompletionStage] Explicitly set stream option to boolean: ${updatedOptions.stream}`);
+
+        // If this is a direct (non-stream) call to Ollama but has the stream flag,
+        // ensure we set additional metadata to maintain proper state
+        if (updatedOptions.stream && !provider && updatedOptions.providerMetadata?.provider === 'ollama') {
+            log.info(`[LLMCompletionStage] This is an Ollama request with stream=true, ensuring provider config is consistent`);
+        }
+
+        log.info(`[LLMCompletionStage] Copied options: ${JSON.stringify({
+            model: updatedOptions.model,
+            stream: updatedOptions.stream,
+            enableTools: updatedOptions.enableTools
+        })}`);

        // Check if tools should be enabled
        if (updatedOptions.enableTools !== false) {
@@ -48,15 +75,22 @@ export class LLMCompletionStage extends BasePipelineStage<LLMCompletionInput, {
        }

        log.info(`Generating LLM completion, provider: ${selectedProvider || 'auto'}, model: ${updatedOptions?.model || 'default'}`);
+        log.info(`[LLMCompletionStage] Options before service call: ${JSON.stringify({
+            model: updatedOptions.model,
+            stream: updatedOptions.stream,
+            enableTools: updatedOptions.enableTools
+        })}`);

        // If provider is specified (either explicit or from metadata), use that specific provider
        if (selectedProvider && aiServiceManager.isProviderAvailable(selectedProvider)) {
            const service = aiServiceManager.getService(selectedProvider);
+            log.info(`[LLMCompletionStage] Using specific service for ${selectedProvider}, stream option: ${updatedOptions.stream}`);
            const response = await service.generateChatCompletion(messages, updatedOptions);
            return { response };
        }

        // Otherwise use the service manager to select an available provider
+        log.info(`[LLMCompletionStage] Using auto-selected service, stream option: ${updatedOptions.stream}`);
        const response = await aiServiceManager.generateChatCompletion(messages, updatedOptions);
        return { response };
    }
--- a/src/services/llm/pipeline/stages/model_selection_stage.ts
+++ b/src/services/llm/pipeline/stages/model_selection_stage.ts
@@ -18,9 +18,21 @@ export class ModelSelectionStage extends BasePipelineStage<ModelSelectionInput,
    protected async process(input: ModelSelectionInput): Promise<{ options: ChatCompletionOptions }> {
        const { options: inputOptions, query, contentLength } = input;

+        // Log input options
+        log.info(`[ModelSelectionStage] Input options: ${JSON.stringify({
+            model: inputOptions?.model,
+            stream: inputOptions?.stream,
+            enableTools: inputOptions?.enableTools
+        })}`);
+        log.info(`[ModelSelectionStage] Stream option in input: ${inputOptions?.stream}, type: ${typeof inputOptions?.stream}`);
+
        // Start with provided options or create a new object
        const updatedOptions: ChatCompletionOptions = { ...(inputOptions || {}) };
        
+        // Preserve the stream option exactly as it was provided, including undefined state
+        // This is critical for ensuring the stream option propagates correctly down the pipeline
+        log.info(`[ModelSelectionStage] After copy, stream: ${updatedOptions.stream}, type: ${typeof updatedOptions.stream}`);
+
        // If model already specified, don't override it
        if (updatedOptions.model) {
            // Check if the model has a provider prefix, which indicates legacy format
@@ -36,6 +48,7 @@ export class ModelSelectionStage extends BasePipelineStage<ModelSelectionInput,
                log.info(`Using explicitly specified model: ${updatedOptions.model}`);
            }

+            log.info(`[ModelSelectionStage] Returning early with stream: ${updatedOptions.stream}`);
            return { options: updatedOptions };
        }

@@ -151,6 +164,13 @@ export class ModelSelectionStage extends BasePipelineStage<ModelSelectionInput,
        this.addProviderMetadata(updatedOptions, defaultProvider, defaultModelName);

        log.info(`Selected model: ${defaultModelName} from provider: ${defaultProvider} for query complexity: ${queryComplexity}`);
+        log.info(`[ModelSelectionStage] Final options: ${JSON.stringify({
+            model: updatedOptions.model,
+            stream: updatedOptions.stream,
+            provider: defaultProvider,
+            enableTools: updatedOptions.enableTools
+        })}`);
+
        return { options: updatedOptions };
    }

--- a/src/services/llm/providers/ollama_service.ts
+++ b/src/services/llm/providers/ollama_service.ts
@@ -114,11 +114,21 @@ export class OllamaService extends BaseAIService {
                messages: messagesToSend
            };

+            // Debug logging for stream option
+            log.info(`Stream option in providerOptions: ${providerOptions.stream}`);
+            log.info(`Stream option type: ${typeof providerOptions.stream}`);

-            log.info(`Stream: ${providerOptions.stream}`);
-            // Stream is a top-level option
-            if (providerOptions.stream !== undefined) {
-                requestBody.stream = providerOptions.stream;
+            // Stream is a top-level option - ALWAYS set it explicitly to ensure consistency
+            // This is critical for ensuring streaming works properly
+            requestBody.stream = providerOptions.stream === true;
+            log.info(`Set requestBody.stream to boolean: ${requestBody.stream}`);
+            
+            // Log additional information about the streaming context
+            log.info(`Streaming context: Will stream to client: ${typeof opts.streamCallback === 'function'}`);
+            
+            // If we have a streaming callback but the stream flag isn't set for some reason, warn about it
+            if (typeof opts.streamCallback === 'function' && !requestBody.stream) {
+                log.warn(`WARNING: Stream callback provided but stream=false in request. This may cause streaming issues.`);
            }

            // Add options object if provided
--- a/src/services/llm/providers/providers.ts
+++ b/src/services/llm/providers/providers.ts
@@ -542,7 +542,7 @@ export async function getOllamaOptions(

            // API parameters
            model: modelName,  // Clean model name without provider prefix
-            stream: opts.stream,
+            stream: opts.stream !== undefined ? opts.stream : true, // Default to true if not specified
            options: {
                temperature: opts.temperature,
                num_ctx: modelContextWindow,
--- a/src/services/llm/rest_chat_service.ts
+++ b/src/services/llm/rest_chat_service.ts
@@ -458,7 +458,9 @@ class RestChatService {
                    temperature: session.metadata.temperature,
                    maxTokens: session.metadata.maxTokens,
                    model: session.metadata.model,
-                    stream: req.method === 'GET' ? true : undefined  // Explicitly set stream: true for GET requests
+                    // Always set stream to true for all request types to ensure consistency
+                    // This ensures the pipeline always knows streaming is supported, even for POST requests
+                    stream: true
                },
                streamCallback: req.method === 'GET' ? (data, done) => {
                    res.write(`data: ${JSON.stringify({ content: data, done })}\n\n`);