ASSISTANTS
ASSISTANTS
Stream Mode
1. Using Stream Mode in Python
1.1 Stream Mode with the OpenAI Library
It is recommended to use the OpenAI library for stream mode in most scenarios.
from openai import OpenAI
client = OpenAI(
base_url='https://api.ap.siliconflow.com/v1',
api_key='your-api-key'
)
# Send a request with streaming output
response = client.chat.completions.create(
model="deepseek-ai/DeepSeek-V2.5",
messages=[
{"role": "user", "content": "SiliconCloud public beta is online, giving each user 300 million tokens to unlock the innovative capabilities of open-source large models. What changes does this bring to the entire large model application field?"}
],
stream=True # Enable streaming output
)
# Gradually receive and process responses
for chunk in response:
if not chunk.choices:
continue
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
if chunk.choices[0].delta.reasoning_content:
print(chunk.choices[0].delta.reasoning_content, end="", flush=True)
1.2 Stream Mode with the Requests Library
If you are using the requests library for non-OpenAI scenarios, such as using the SiliconCloud API, you need to ensure that both the payload and the request parameters are set to stream mode.
from openai import OpenAI
import requests
import json
url = "https://api.ap.siliconflow.com/v1/chat/completions"
payload = {
"model": "deepseek-ai/DeepSeek-V2.5", # Replace with your model
"messages": [
{
"role": "user",
"content": "SiliconCloud public beta is online, giving each user 300 million tokens to unlock the innovative capabilities of open-source large models. What changes does this bring to the entire large model application field?"
}
],
"stream": True # Set to stream mode here
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": "Bearer your-api-key"
}
response = requests.post(url, json=payload, headers=headers, stream=True) # Specify stream mode in the request here
# Print streaming return information
if response.status_code == 200:
full_content = ""
full_reasoning_content = ""
for chunk in response.iter_lines():
if chunk:
chunk_str = chunk.decode('utf-8').replace('data: ', '')
if chunk_str != "[DONE]":
chunk_data = json.loads(chunk_str)
delta = chunk_data['choices'][0].get('delta', {})
content = delta.get('content', '')
reasoning_content = delta.get('reasoning_content', '')
if content:
print(content, end="", flush=True)
full_content += content
if reasoning_content:
print(reasoning_content, end="", flush=True)
full_reasoning_content += reasoning_content
else:
print(f"Request failed, status code: {response.status_code}")