Streaming y UX de Chat en Tiempo Real
Las respuestas de LLMs pueden tardar varios segundos. El streaming resuelve este problema mostrando tokens en tiempo real, creando una experiencia fluida similar a ChatGPT. En esta lección aprenderás a implementar streaming end-to-end.
¿Por qué streaming?
| Sin streaming | Con streaming |
|---|---|
| Usuario espera 5-15 segundos | Primer token en <500ms |
| Pantalla en blanco durante la espera | Texto aparece progresivamente |
| Sensación de app lenta | Sensación de conversación natural |
| Timeout en respuestas largas | Sin límite práctico de longitud |
Streaming con OpenAI
import OpenAI from 'openai';
const openai = new OpenAI();
async function streamChat(userMessage: string) {
const stream = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{ role: 'system', content: 'Eres un asistente útil.' },
{ role: 'user', content: userMessage },
],
stream: true,
});
let fullResponse = '';
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
fullResponse += content;
process.stdout.write(content); // Muestra token a token
}
return fullResponse;
}
Streaming con Anthropic
import Anthropic from '@anthropic-ai/sdk';
const anthropic = new Anthropic();
async function streamClaude(userMessage: string) {
const stream = anthropic.messages.stream({
model: 'claude-sonnet-4-20250514',
max_tokens: 1024,
messages: [{ role: 'user', content: userMessage }],
});
let fullResponse = '';
for await (const event of stream) {
if (event.type === 'content_block_delta' &&
event.delta.type === 'text_delta') {
fullResponse += event.delta.text;
process.stdout.write(event.delta.text);
}
}
return fullResponse;
}
Server-Sent Events (SSE) en el Backend
SSE es el protocolo estándar para enviar streaming del backend al frontend:
Express + SSE
import express from 'express';
import OpenAI from 'openai';
const app = express();
const openai = new OpenAI();
app.post('/api/chat', async (req, res) => {
const { messages } = req.body;
// Headers SSE
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
try {
const stream = await openai.chat.completions.create({
model: 'gpt-4o',
messages,
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
// Formato SSE: "data: {json}\n\n"
res.write(`data: ${JSON.stringify({ content })}\n\n`);
}
}
// Señal de fin
res.write(`data: ${JSON.stringify({ done: true })}\n\n`);
res.end();
} catch (error) {
res.write(`data: ${JSON.stringify({ error: 'Error del modelo' })}\n\n`);
res.end();
}
});
Frontend: Consumir streaming
Con EventSource (nativo)
// Para GET requests (limitado)
const source = new EventSource('/api/chat?message=Hola');
source.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.done) {
source.close();
return;
}
appendToChat(data.content);
};
Con fetch + ReadableStream (recomendado)
async function streamChat(messages: Message[]): Promise<string> {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages }),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
let fullResponse = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
for (const line of lines) {
const data = JSON.parse(line.slice(6)); // Remove "data: "
if (data.done) return fullResponse;
if (data.content) {
fullResponse += data.content;
updateUI(fullResponse); // Actualiza el chat en tiempo real
}
}
}
return fullResponse;
}
React: Componente de Chat con Streaming
import { useState, useRef, useEffect } from 'react';
interface Message {
role: 'user' | 'assistant';
content: string;
}
function ChatUI() {
const [messages, setMessages] = useState<Message[]>([]);
const [input, setInput] = useState('');
const [isStreaming, setIsStreaming] = useState(false);
const messagesEndRef = useRef<HTMLDivElement>(null);
// Auto-scroll al último mensaje
useEffect(() => {
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
}, [messages]);
async function handleSubmit(e: React.FormEvent) {
e.preventDefault();
if (!input.trim() || isStreaming) return;
const userMessage: Message = { role: 'user', content: input };
const newMessages = [...messages, userMessage];
setMessages(newMessages);
setInput('');
setIsStreaming(true);
// Agregar mensaje vacío del asistente
setMessages(prev => [...prev, { role: 'assistant', content: '' }]);
try {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ messages: newMessages }),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
const lines = chunk.split('\n').filter(l => l.startsWith('data: '));
for (const line of lines) {
const data = JSON.parse(line.slice(6));
if (data.content) {
setMessages(prev => {
const updated = [...prev];
const last = updated[updated.length - 1];
updated[updated.length - 1] = {
...last,
content: last.content + data.content,
};
return updated;
});
}
}
}
} catch (error) {
console.error('Stream error:', error);
} finally {
setIsStreaming(false);
}
}
return (
<div className="flex flex-col h-screen max-w-3xl mx-auto">
<div className="flex-1 overflow-y-auto p-4 space-y-4">
{messages.map((msg, i) => (
<div
key={i}
className={`p-3 rounded-lg ${
msg.role === 'user'
? 'bg-blue-100 ml-auto max-w-[80%]'
: 'bg-gray-100 mr-auto max-w-[80%]'
}`}
>
{msg.content}
{isStreaming && i === messages.length - 1 && msg.role === 'assistant' && (
<span className="animate-pulse">▊</span>
)}
</div>
))}
<div ref={messagesEndRef} />
</div>
<form onSubmit={handleSubmit} className="p-4 border-t">
<div className="flex gap-2">
<input
type="text"
value={input}
onChange={(e) => setInput(e.target.value)}
placeholder="Escribe tu mensaje..."
className="flex-1 p-2 border rounded"
disabled={isStreaming}
/>
<button
type="submit"
disabled={isStreaming}
className="px-4 py-2 bg-blue-500 text-white rounded disabled:opacity-50"
>
Enviar
</button>
</div>
</form>
</div>
);
}
UX Patterns para AI
1. Indicador de "pensando"
{isStreaming && messages[messages.length - 1]?.content === '' && (
<div className="flex items-center gap-2 text-gray-500">
<div className="flex gap-1">
<span className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" />
<span className="w-2 h-2 bg-gray-400 rounded-full animate-bounce [animation-delay:0.2s]" />
<span className="w-2 h-2 bg-gray-400 rounded-full animate-bounce [animation-delay:0.4s]" />
</div>
<span>Pensando...</span>
</div>
)}
2. Botón de "Detener generación"
const abortControllerRef = useRef<AbortController | null>(null);
async function handleSubmit() {
abortControllerRef.current = new AbortController();
const response = await fetch('/api/chat', {
method: 'POST',
body: JSON.stringify({ messages }),
signal: abortControllerRef.current.signal, // Cancelable
});
// ... streaming
}
function handleStop() {
abortControllerRef.current?.abort();
setIsStreaming(false);
}
// En el UI:
{isStreaming && (
<button onClick={handleStop} className="text-red-500">
⬛ Detener
</button>
)}
3. Renderizado de Markdown
import ReactMarkdown from 'react-markdown';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
function MessageContent({ content }: { content: string }) {
return (
<ReactMarkdown
components={{
code({ className, children }) {
const language = className?.replace('language-', '') || '';
return (
<SyntaxHighlighter language={language}>
{String(children).replace(/\n$/, '')}
</SyntaxHighlighter>
);
},
}}
>
{content}
</ReactMarkdown>
);
}
Vercel AI SDK (Recomendado)
El Vercel AI SDK simplifica enormemente el streaming:
// Backend (Route Handler Next.js)
import { openai } from '@ai-sdk/openai';
import { streamText } from 'ai';
export async function POST(req: Request) {
const { messages } = await req.json();
const result = streamText({
model: openai('gpt-4o'),
messages,
});
return result.toDataStreamResponse();
}
// Frontend (React)
import { useChat } from 'ai/react';
function Chat() {
const { messages, input, handleInputChange, handleSubmit, isLoading } = useChat();
return (
<div>
{messages.map(m => (
<div key={m.id}>
<strong>{m.role}:</strong> {m.content}
</div>
))}
<form onSubmit={handleSubmit}>
<input value={input} onChange={handleInputChange} />
<button type="submit" disabled={isLoading}>Enviar</button>
</form>
</div>
);
}
El
useChathook maneja streaming, estado de mensajes, loading, errores y abort automáticamente.