Skip to main content

Installation

Install both the Netra SDK and Cerebras:
pip install netra-sdk cerebras-cloud-sdk

Usage

Initialize the Netra SDK to automatically trace all Cerebras operations:
from netra import Netra
from cerebras.cloud.sdk import Cerebras
import os

# Initialize Netra
Netra.init(
    headers=f"x-api-key={os.environ.get('NETRA_API_KEY')}",
    trace_content=True
)

# Create Cerebras client - automatically traced
client = Cerebras(api_key=os.environ.get('CEREBRAS_API_KEY'))

# Use Cerebras as normal
response = client.chat.completions.create(
    model="llama3.1-8b",
    messages=[{"role": "user", "content": "What is Cerebras?"}]
)
print(response.choices[0].message.content)

Chat Completions

Trace chat completions with decorators:
from netra import task, SpanWrapper
from cerebras.cloud.sdk import Cerebras

@task()
def chat_completion(client: Cerebras, prompt: str) -> str:
    span = SpanWrapper("cerebras-chat", {
        "prompt": prompt,
        "model": "llama3.1-8b"
    }).start()
    
    response = client.chat.completions.create(
        model="llama3.1-8b",
        messages=[{"role": "user", "content": prompt}]
    )
    
    answer = response.choices[0].message.content
    span.set_attribute("response", answer)
    span.set_attribute("tokens.completion", response.usage.completion_tokens)
    span.end()
    
    return answer

Streaming Responses

Trace streaming completions:
from netra import task, SpanWrapper

@task()
def stream_completion(client: Cerebras, prompt: str):
    stream_span = SpanWrapper("cerebras-stream").start()
    
    response = client.chat.completions.create(
        model="llama3.1-8b",
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )
    
    full_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            print(content, end='', flush=True)
            full_response += content
    
    stream_span.set_attribute("response", full_response)
    stream_span.end()
    
    return full_response

Multi-turn Conversations

Trace conversation history:
from netra import agent, SpanWrapper

@agent()
def conversation(client: Cerebras, messages: list[dict]):
    conv_span = SpanWrapper("cerebras-conversation", {
        "messages.count": len(messages)
    }).start()
    
    response = client.chat.completions.create(
        model="llama3.1-8b",
        messages=messages
    )
    
    answer = response.choices[0].message.content
    conv_span.set_attribute("response", answer)
    conv_span.end()
    
    return answer

Model Configuration

Trace with different models and settings:
from netra import workflow, SpanWrapper

@workflow()
def generate_with_config(client: Cerebras, prompt: str, temperature: float = 0.7):
    config_span = SpanWrapper("cerebras-configured", {
        "prompt": prompt,
        "temperature": temperature
    }).start()
    
    response = client.chat.completions.create(
        model="llama3.1-70b",
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
        max_tokens=1000
    )
    
    result = response.choices[0].message.content
    config_span.set_attribute("response", result)
    config_span.end()
    
    return result

Configuration

Configure Cerebras instrumentation:
from netra import Netra
from netra.instrumentation.instruments import InstrumentSet

Netra.init(
    headers=f"x-api-key={os.environ.get('NETRA_API_KEY')}",
    trace_content=True,
    instruments={InstrumentSet.CEREBRAS}
)

Next Steps

Last modified on February 3, 2026