๐Getting Streaming to work
LLMs are still pretty slow and sitting around waiting on them can be frustrating. Streaming back the response as it is ready is by far the best way to relieve your user of that frustration.
Here we will show you how both the Capability (Python FastAPI router) and the React code might look like in order to get streaming to work. We will do that by providing an example.
Capability / Backend
Example Prompt to build the Capability
Feel free to modify the prompt.
Open AI as the LLM
from pydantic import BaseModel
from databutton_app import router
from fastapi.responses import StreamingResponse
import databutton as db
from openai import OpenAI
# Define the request model
class ChatRequest(BaseModel):
query: str
# Define the response model
class ChatResponse(BaseModel):
answer: str
@router.post("/chat", tags=["stream"])
def chat(body: ChatRequest):
# Retrieve the stored OpenAI API key
OPENAI_API_KEY = db.secrets.get("OPENAI_API_KEY")
# Initialize the OpenAI client with the API key
client = OpenAI(api_key=OPENAI_API_KEY)
# Function to generate responses
def generate_responses():
response = client.chat.completions.create(
model="gpt-4-0125-preview",
messages=[
{
"role": "system",
"content": "You are a helpful assistant, skilled in providing informative and engaging responses.",
},
{"role": "user", "content": body.query},
],
stream=True,
)
for chunk in response:
if chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
# Return a streaming response
return StreamingResponse(generate_responses(), media_type="text/plain")
CohereAI as the LLM
from pydantic import BaseModel
from databutton_app import router
from fastapi.responses import StreamingResponse
import databutton as db
import cohere
# Define the request model
class ChatRequest(BaseModel):
message: str
@router.post("/cohere-stream", tags=["stream"])
def cohere_stream(body: ChatRequest):
# Retrieve the stored Cohere API key
COHERE_API_KEY = db.secrets.get("COHERE_API_KEY")
# Initialize the Cohere client with the API key
co = cohere.Client(COHERE_API_KEY)
# Function to generate responses
def generate_responses():
stream = co.chat_stream(
message=body.message
)
for event in stream:
if event.event_type == "text-generation":
yield event.text
# Return a streaming response
return StreamingResponse(generate_responses(), media_type="text/plain")
UI / Frontend
Simply include the appropriate hashtag in your prompt, and Databutton will manage the complexities of chunking and parsing behind the scenes.
import React, { useState } from "react";
import { CohereStreamChatRequest } from "types";
import {
Box,
Button,
Input,
VStack,
Text,
useStyleConfig,
Select,
useToast,
} from "@chakra-ui/react";
import brain from "brain";
import { Flex, Spacer } from "@chakra-ui/react";
const App: React.FC = () => {
const [input, setInput] = useState("");
const [selectedModel, setSelectedModel] = useState<string>("");
const [conversation, setConversation] = useState<
{ sender: "user" | "ai"; message: string }[]
>([]);
const toast = useToast();
const accumulateChunks = (chunks: string[]) => {
const message = chunks.join("");
setConversation((prev) => {
const newConversation = [...prev];
if (
newConversation.length > 0 &&
newConversation[newConversation.length - 1].sender === "ai"
) {
newConversation[newConversation.length - 1].message += message;
} else {
newConversation.push({ sender: "ai", message });
}
return newConversation;
});
};
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
setInput(e.target.value);
};
const handleSendClick = async () => {
setConversation((prev) => [...prev, { sender: "user", message: input }]);
if (selectedModel === "cohere") {
const request: CohereStreamChatRequest = { message: input };
for await (const chunk of brain.cohere_stream(request)) {
accumulateChunks([chunk]);
}
} else {
for await (const chunk of brain.chat({ query: input })) {
accumulateChunks([chunk]);
}
}
setInput(""); // Clear input after sending
};
const handleClearChat = () => {
setConversation([]);
toast({
title: "Chat cleared.",
description: "The conversation history has been cleared.",
status: "info",
duration: 5000,
isClosable: true,
});
};
return (
<VStack spacing={4} align="stretch">
<Box as="header">
<Text fontSize="2xl" fontWeight="bold">
LLM Streaming ๐{" "}
</Text>
</Box>
<Flex align="center" gap="2">
<Select
placeholder="Select model"
w="240px"
value={selectedModel}
onChange={(e) => setSelectedModel(e.target.value)}
>
<option value="gpt">GPT 4</option>
<option value="cohere">Cohere</option>
</Select>
<Input
placeholder="Enter your prompt"
value={input}
onChange={handleInputChange}
/>
<Button
onClick={handleSendClick}
backgroundColor="#F1F0FE"
borderColor="#E9E9FD"
borderRadius="10px"
width="auto"
textAlign="center"
>
Send
</Button>
<Button
onClick={handleClearChat}
backgroundColor="#F1F0FE"
borderColor="#E9E9FD"
borderRadius="10px"
width="auto"
textAlign="center"
>
Clear
</Button>
</Flex>
<VStack spacing={4}>
{conversation.map((entry, index) => (
<Flex
key={`${entry.sender}-${index}`}
width="100%"
direction={entry.sender === "user" ? "row-reverse" : "row"}
>
{entry.sender === "user" ? <Spacer /> : null}
<Box
bg={entry.sender === "user" ? "gray.100" : "#E9E9FD"}
boxShadow={entry.sender === "user" ? "md" : "inner"}
border={
entry.sender === "user"
? "bg-gray-100"
: "#68d391"
}
p={3}
borderRadius="lg"
>
<Text>{entry.message}</Text>
</Box>
{entry.sender === "ai" ? <Spacer /> : null}
</Flex>
))}
</VStack>
</VStack>
);
};
export default App;
The key integration steps which Databutton implements are as follows,
Importing the brain module,
import brain from "brain";
Adding the capability to the react frontend,
const handleSendClick = async () => {
setConversation((prev) => [...prev, { sender: "user", message: input }]);
if (selectedModel === "cohere") {
const request: CohereStreamChatRequest = { message: input };
for await (const chunk of brain.cohere_stream(request)) {
accumulateChunks([chunk]);
}
} else {
for await (const chunk of brain.chat({ query: input })) {
accumulateChunks([chunk]);
}
}
setInput(""); // Clear input after sending
};
Our app now features live streaming of the Large Language Model (LLM)! ๐
Last updated