Streaming Responses

Overview

Streaming allows you to receive partial responses from the Perplexity API as they are generated, rather than waiting for the complete response. This is particularly useful for:

Real-time user experiences - Display responses as they’re generated
Long responses - Start showing content immediately for lengthy analyses
Interactive applications - Provide immediate feedback to users

Streaming is supported across all Perplexity models including Sonar, Sonar Pro, and reasoning models.

Quick Start

To enable streaming, add "stream": true to your API request:

import requests
import json

url = "https://api.perplexity.ai/chat/completions"
headers = {
    "Authorization": "Bearer YOUR_API_KEY",
    "Content-Type": "application/json"
}
payload = {
    "model": "sonar",
    "messages": [{"role": "user", "content": "What is the latest in AI research?"}],
    "stream": True
}

with requests.post(url, headers=headers, json=payload, stream=True, timeout=300) as resp:
    resp.raise_for_status()
    for raw_line in resp.iter_lines(decode_unicode=True):
        if not raw_line:
            continue
        if raw_line.startswith("data: "):
            data_str = raw_line[len("data: ") :]
            if data_str == "[DONE]":
                break
            chunk = json.loads(data_str)
            delta = chunk["choices"][0].get("delta", {})
            if (content := delta.get("content")):
                print(content, end="")

Basic Implementation

With this code snippet, you can stream responses from the Perplexity API using the requests library. However, you will need to parse the response manually to get the content, search results, and metadata.

import requests

# Set up the API endpoint and headers
url = "https://api.perplexity.ai/chat/completions"
headers = {
    "Authorization": "Bearer YOUR_API_KEY",
    "Content-Type": "application/json"
}

payload = {
    "model": "sonar-pro",
    "messages": [
        {"role": "user", "content": "Who are the top 5 tech influencers on X?"}
    ],
    "stream": True  # Enable streaming for real-time responses
}

response = requests.post(url, headers=headers, json=payload, stream=True)

# Process the streaming response (simplified example)
for line in response.iter_lines():
    if line:
        print(line.decode('utf-8'))

Streaming Best Practices

Gather all your search results and metadata in the final chunk of the response neatly in an array.

from openai import OpenAI

client = OpenAI(
    api_key="YOUR_API_KEY",
    base_url="https://api.perplexity.ai"
)

stream = client.chat.completions.create(
    model="sonar-pro",
    messages=[
        {"role": "user", "content": "Compare renewable energy technologies"}
    ],
    stream=True
)

content = ""
search_results = []
usage_info = None

for chunk in stream:
    # Content arrives progressively
    if chunk.choices[0].delta.content is not None:
        content_chunk = chunk.choices[0].delta.content
        content += content_chunk
        print(content_chunk, end="")
    
    # Metadata arrives in final chunks
    if hasattr(chunk, 'search_results') and chunk.search_results:
                search_results = chunk.search_results
    
    if hasattr(chunk, 'usage') and chunk.usage:
        usage_info = chunk.usage
    
    # Handle completion
    if chunk.choices[0].finish_reason is not None:
        print(f"\n\nFinish reason: {chunk.choices[0].finish_reason}")
        print(f"Search Results: {search_results}")
        print(f"Usage: {usage_info}")

Search Results and Metadata During Streaming

Search results and metadata are delivered in the final chunk(s) of a streaming response, not progressively during the stream.

How Metadata Works with Streaming

When streaming, you receive:

Content chunks which arrive progressively in real-time
Search results (delivered in the final chunk(s))
Usage stats

Using Requests Library for Metadata

import requests
import json

def stream_with_requests_metadata():
    url = "https://api.perplexity.ai/chat/completions"
    headers = {
        "Authorization": "Bearer YOUR_API_KEY",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "sonar",
        "messages": [{"role": "user", "content": "Explain quantum computing"}],
        "stream": True
    }
    
    response = requests.post(url, headers=headers, json=payload, stream=True)
    
    content = ""
    metadata = {}
    
    for line in response.iter_lines():
        if line:
            line = line.decode('utf-8')
            if line.startswith('data: '):
                data_str = line[6:]
                if data_str == '[DONE]':
                    break
                try:
                    chunk = json.loads(data_str)
                    
                    # Process content
                    if 'choices' in chunk and chunk['choices'][0]['delta'].get('content'):
                        content_piece = chunk['choices'][0]['delta']['content']
                        content += content_piece
                        print(content_piece, end='', flush=True)
                    
                    # Collect metadata
                    for key in ['search_results', 'usage']:
                        if key in chunk:
                            metadata[key] = chunk[key]
                            
                    # Check if streaming is complete
                    if chunk['choices'][0].get('finish_reason'):
                        print(f"\n\nMetadata: {metadata}")
                        
                except json.JSONDecodeError:
                    continue
    
    return content, metadata

stream_with_requests_metadata()

Important: If you need search results immediately for your user interface, consider using non-streaming requests for use cases where search result display is critical to the real-time user experience.

Getting Started

Guides

Admin

Help & Resources

Overview

Quick Start

Basic Implementation

Streaming Best Practices

Search Results and Metadata During Streaming

How Metadata Works with Streaming

Using Requests Library for Metadata

Getting Started

Guides

Admin

Help & Resources

​Overview

​Quick Start

​Basic Implementation

​Streaming Best Practices

​Search Results and Metadata During Streaming

​How Metadata Works with Streaming

​Using Requests Library for Metadata

Overview

Quick Start

Basic Implementation

Streaming Best Practices

Search Results and Metadata During Streaming

How Metadata Works with Streaming

Using Requests Library for Metadata