๐ŸŒŠGetting Streaming to work

LLMs are still pretty slow and sitting around waiting on them can be frustrating. Streaming back the response as it is ready is by far the best way to relieve your user of that frustration.

Here we will show you how both the Capability (Python FastAPI router) and the React code might look like in order to get streaming to work. We will do that by providing an example.

Capability / Backend

Example Prompt to build the Capability

Feel free to modify the prompt.

Open AI as the LLM

from pydantic import BaseModel
from databutton_app import router
from fastapi.responses import StreamingResponse
import databutton as db
from openai import OpenAI

# Define the request model
class ChatRequest(BaseModel):
    query: str

# Define the response model
class ChatResponse(BaseModel):
    answer: str

@router.post("/chat", tags=["stream"])
def chat(body: ChatRequest):
    # Retrieve the stored OpenAI API key
    OPENAI_API_KEY = db.secrets.get("OPENAI_API_KEY")

    # Initialize the OpenAI client with the API key
    client = OpenAI(api_key=OPENAI_API_KEY)

    # Function to generate responses
    def generate_responses():
        response = client.chat.completions.create(
            model="gpt-4-0125-preview",
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant, skilled in providing informative and engaging responses.",
                },
                {"role": "user", "content": body.query},
            ],
            stream=True,
        )
        for chunk in response:
            if chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content

    # Return a streaming response
    return StreamingResponse(generate_responses(), media_type="text/plain")

CohereAI as the LLM

from pydantic import BaseModel
from databutton_app import router
from fastapi.responses import StreamingResponse
import databutton as db
import cohere

# Define the request model
class ChatRequest(BaseModel):
    message: str

@router.post("/cohere-stream", tags=["stream"])
def cohere_stream(body: ChatRequest):
    # Retrieve the stored Cohere API key
    COHERE_API_KEY = db.secrets.get("COHERE_API_KEY")

    # Initialize the Cohere client with the API key
    co = cohere.Client(COHERE_API_KEY)

    # Function to generate responses
    def generate_responses():
        stream = co.chat_stream(
            message=body.message
        )
        for event in stream:
            if event.event_type == "text-generation":
                yield event.text

    # Return a streaming response
    return StreamingResponse(generate_responses(), media_type="text/plain")

UI / Frontend

Simply include the appropriate hashtag in your prompt, and Databutton will manage the complexities of chunking and parsing behind the scenes.

import React, { useState } from "react";
import { CohereStreamChatRequest } from "types";
import {
  Box,
  Button,
  Input,
  VStack,
  Text,
  useStyleConfig,
  Select,
  useToast,
} from "@chakra-ui/react";
import brain from "brain";
import { Flex, Spacer } from "@chakra-ui/react";

const App: React.FC = () => {
  const [input, setInput] = useState("");
  const [selectedModel, setSelectedModel] = useState<string>("");
  const [conversation, setConversation] = useState<
    { sender: "user" | "ai"; message: string }[]
  >([]);
  const toast = useToast();

  const accumulateChunks = (chunks: string[]) => {
    const message = chunks.join("");
    setConversation((prev) => {
      const newConversation = [...prev];
      if (
        newConversation.length > 0 &&
        newConversation[newConversation.length - 1].sender === "ai"
      ) {
        newConversation[newConversation.length - 1].message += message;
      } else {
        newConversation.push({ sender: "ai", message });
      }
      return newConversation;
    });
  };

  const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    setInput(e.target.value);
  };

  const handleSendClick = async () => {
    setConversation((prev) => [...prev, { sender: "user", message: input }]);
    if (selectedModel === "cohere") {
      const request: CohereStreamChatRequest = { message: input };
      for await (const chunk of brain.cohere_stream(request)) {
        accumulateChunks([chunk]);
      }
    } else {
      for await (const chunk of brain.chat({ query: input })) {
        accumulateChunks([chunk]);
      }
    }
    setInput(""); // Clear input after sending
  };

  const handleClearChat = () => {
    setConversation([]);
    toast({
      title: "Chat cleared.",
      description: "The conversation history has been cleared.",
      status: "info",
      duration: 5000,
      isClosable: true,
    });
  };

  return (
    <VStack spacing={4} align="stretch">
      <Box as="header">
        <Text fontSize="2xl" fontWeight="bold">
          LLM Streaming ๐ŸŒŠ{" "}
        </Text>
      </Box>
      <Flex align="center" gap="2">
        <Select
          placeholder="Select model"
          w="240px"
          value={selectedModel}
          onChange={(e) => setSelectedModel(e.target.value)}
        >
          <option value="gpt">GPT 4</option>
          <option value="cohere">Cohere</option>
        </Select>
        <Input
          placeholder="Enter your prompt"
          value={input}
          onChange={handleInputChange}
        />
        <Button
          onClick={handleSendClick}
          backgroundColor="#F1F0FE"
          borderColor="#E9E9FD"
          borderRadius="10px"
          width="auto"
          textAlign="center"
        >
          Send
        </Button>
        <Button
          onClick={handleClearChat}
          backgroundColor="#F1F0FE"
          borderColor="#E9E9FD"
          borderRadius="10px"
          width="auto"
          textAlign="center"
        >
          Clear
        </Button>
      </Flex>
      <VStack spacing={4}>
        {conversation.map((entry, index) => (
          <Flex
            key={`${entry.sender}-${index}`}
            width="100%"
            direction={entry.sender === "user" ? "row-reverse" : "row"}
          >
            {entry.sender === "user" ? <Spacer /> : null}
            <Box
              bg={entry.sender === "user" ? "gray.100" : "#E9E9FD"}
              boxShadow={entry.sender === "user" ? "md" : "inner"}
              border={
                entry.sender === "user"
                  ? "bg-gray-100"
                  : "#68d391"
              }
              p={3}
              borderRadius="lg"
            >
              <Text>{entry.message}</Text>
            </Box>
            {entry.sender === "ai" ? <Spacer /> : null}
          </Flex>
        ))}
      </VStack>
    </VStack>
  );
};

export default App;

The key integration steps which Databutton implements are as follows,

Importing the brain module,

import brain from "brain";

Adding the capability to the react frontend,

const handleSendClick = async () => {
    setConversation((prev) => [...prev, { sender: "user", message: input }]);
    if (selectedModel === "cohere") {
      const request: CohereStreamChatRequest = { message: input };
      for await (const chunk of brain.cohere_stream(request)) {
        accumulateChunks([chunk]);
      }
    } else {
      for await (const chunk of brain.chat({ query: input })) {
        accumulateChunks([chunk]);
      }
    }
    setInput(""); // Clear input after sending
  };

Our app now features live streaming of the Large Language Model (LLM)! ๐ŸŒŠ

Last updated