Please wait while we prepare your experience
Streaming allows you to receive responses from HelpingAI in real-time as they're generated, rather than waiting for the complete response. This creates a more interactive and responsive user experience, especially for longer responses.
When you set stream: true
in your request, HelpingAI sends back Server-Sent Events (SSE) with partial response chunks. Each chunk contains a piece of the response as it's being generated.
import requests
import json
url = "https://api.helpingai.co/v1/chat/completions"
headers = {
"Authorization": "Bearer YOUR_API_KEY",
"Content-Type": "application/json"
}
data = {
"model": "Dhanishtha-2.0-preview",
"messages": [
{"role": "user", "content": "Tell me a story about a brave knight"}
],
"stream": True,
"temperature": 0.8,
"max_tokens": 500
}
response = requests.post(url, headers=headers, json=data, stream=True)
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
if data_str == '[DONE]':
break
try:
chunk = json.loads(data_str)
if chunk['choices'][0]['delta'].get('content'):
print(chunk['choices'][0]['delta']['content'], end='', flush=True)
except json.JSONDecodeError:
continue
from openai import OpenAI
client = OpenAI(
base_url="https://api.helpingai.co/v1",
api_key="YOUR_API_KEY"
)
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=[
{"role": "user", "content": "Tell me a story about a brave knight"}
],
stream=True,
temperature=0.8,
max_tokens=500
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
from helpingai import HelpingAI
client = HelpingAI(api_key="YOUR_API_KEY")
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=[
{"role": "user", "content": "Tell me a story about a brave knight"}
],
stream=True,
temperature=0.8,
max_tokens=500
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
const axios = require('axios');
async function streamResponse() {
const response = await axios.post(
'https://api.helpingai.co/v1/chat/completions',
{
model: 'Dhanishtha-2.0-preview',
messages: [
{role: 'user', content: 'Tell me a story about a brave knight'}
],
stream: true,
temperature: 0.8,
max_tokens: 500
},
{
headers: {
'Authorization': 'Bearer YOUR_API_KEY',
'Content-Type': 'application/json'
},
responseType: 'stream'
}
);
response.data.on('data', (chunk) => {
const lines = chunk.toString().split('\n');
for (const line of lines) {
if (line.startsWith('data: ')) {
const data = line.slice(6);
if (data === '[DONE]') {
return;
}
try {
const parsed = JSON.parse(data);
if (parsed.choices[0].delta.content) {
process.stdout.write(parsed.choices[0].delta.content);
}
} catch (error) {
// Skip invalid JSON
}
}
}
});
}
streamResponse();
import OpenAI from 'openai';
const openai = new OpenAI({
baseURL: 'https://api.helpingai.co/v1',
apiKey: 'YOUR_API_KEY'
});
async function main() {
const stream = await openai.chat.completions.create({
model: 'Dhanishtha-2.0-preview',
messages: [
{role: 'user', content: 'Tell me a story about a brave knight'}
],
stream: true,
temperature: 0.8,
max_tokens: 500
});
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}
}
}
main();
import { HelpingAI } from 'helpingai';
const client = new HelpingAI({
apiKey: 'YOUR_API_KEY'
});
async function main() {
const stream = await client.chat.completions.create({
model: 'Dhanishtha-2.0-preview',
messages: [
{role: 'user', content: 'Tell me a story about a brave knight'}
],
stream: true,
temperature: 0.8,
max_tokens: 500
});
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}
}
}
main();
Each streaming chunk follows this format:
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":"Once"},"finish_reason":null}]}
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" upon"},"finish_reason":null}]}
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{"content":" a"},"finish_reason":null}]}
data: {"id":"chatcmpl-abc123","object":"chat.completion.chunk","created":1677652288,"model":"Dhanishtha-2.0-preview","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
data: [DONE]
Field | Type | Description |
---|---|---|
id | string | Unique identifier for the completion |
object | string | Always "chat.completion.chunk" |
created | integer | Unix timestamp |
model | string | Model used |
choices | array | Array of choice objects |
Field | Type | Description |
---|---|---|
index | integer | Choice index |
delta | object | Content delta for this chunk |
finish_reason | string | Reason for completion (null until final chunk) |
Field | Type | Description |
---|---|---|
content | string | Partial content for this chunk |
role | string | Role (only in first chunk) |
from openai import OpenAI
client = OpenAI(
base_url="https://api.helpingai.co/v1",
api_key="YOUR_API_KEY"
)
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=[
{"role": "system", "content": "You are a compassionate counselor."},
{"role": "user", "content": "I'm feeling really anxious about my job interview tomorrow."}
],
stream=True,
temperature=0.7,
max_tokens=300
)
print("AI Response: ", end="")
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
print() # New line at the end
from openai import OpenAI
client = OpenAI(
base_url="https://api.helpingai.co/v1",
api_key="YOUR_API_KEY"
)
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=[
{"role": "user", "content": "Solve this step by step: If a train travels 120 miles in 2 hours, what's its speed?"}
],
stream=True,
hideThink=False, # Show reasoning process
temperature=0.3,
max_tokens=400
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
from openai import OpenAI
import json
client = OpenAI(
base_url="https://api.helpingai.co/v1",
api_key="YOUR_API_KEY"
)
try:
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=[
{"role": "user", "content": "Hello!"}
],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
except Exception as e:
print(f"Streaming error: {e}")
import OpenAI from 'openai';
const openai = new OpenAI({
baseURL: 'https://api.helpingai.co/v1',
apiKey: 'YOUR_API_KEY'
});
async function streamWithErrorHandling() {
try {
const stream = await openai.chat.completions.create({
model: 'Dhanishtha-2.0-preview',
messages: [
{role: 'user', content: 'Hello!'}
],
stream: true
});
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
process.stdout.write(chunk.choices[0].delta.content);
}
}
} catch (error) {
console.error('Streaming error:', error);
}
}
streamWithErrorHandling();
Always implement retry logic for network failures:
import time
from openai import OpenAI
def stream_with_retry(client, messages, max_retries=3):
for attempt in range(max_retries):
try:
stream = client.chat.completions.create(
model="Dhanishtha-2.0-preview",
messages=messages,
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
break
except Exception as e:
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
continue
else:
raise e
For UI applications, consider buffering chunks:
class StreamBuffer {
constructor() {
this.buffer = '';
this.callbacks = [];
}
addChunk(content) {
this.buffer += content;
this.callbacks.forEach(callback => callback(this.buffer));
}
onUpdate(callback) {
this.callbacks.push(callback);
}
}
const buffer = new StreamBuffer();
buffer.onUpdate((content) => {
document.getElementById('response').textContent = content;
});
// Use with streaming...
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
buffer.addChunk(chunk.choices[0].delta.content);
}
}
Check why the stream ended:
for chunk in stream:
choice = chunk.choices[0]
if choice.delta.content is not None:
print(choice.delta.content, end="", flush=True)
if choice.finish_reason:
if choice.finish_reason == "stop":
print("\n[Completed normally]")
elif choice.finish_reason == "length":
print("\n[Reached max tokens]")
elif choice.finish_reason == "content_filter":
print("\n[Content filtered]")