Documentation Index
Fetch the complete documentation index at: https://docs.getnetra.ai/llms.txt
Use this file to discover all available pages before exploring further.
Installation
Install both the Netra SDK and Cerebras:
pip install netra-sdk cerebras-cloud-sdk
Usage
Initialize the Netra SDK to automatically trace all Cerebras operations:
from netra import Netra
from cerebras.cloud.sdk import Cerebras
import os
# Initialize Netra
Netra.init(
headers=f"x-api-key={os.environ.get('NETRA_API_KEY')}",
trace_content=True
)
# Create Cerebras client - automatically traced
client = Cerebras(api_key=os.environ.get('CEREBRAS_API_KEY'))
# Use Cerebras as normal
response = client.chat.completions.create(
model="llama3.1-8b",
messages=[{"role": "user", "content": "What is Cerebras?"}]
)
print(response.choices[0].message.content)
Chat Completions
Trace chat completions with decorators:
from netra.decorators import task
from netra import SpanWrapper
from cerebras.cloud.sdk import Cerebras
@task()
def chat_completion(client: Cerebras, prompt: str) -> str:
span = SpanWrapper("cerebras-chat", {
"prompt": prompt,
"model": "llama3.1-8b"
}).start()
response = client.chat.completions.create(
model="llama3.1-8b",
messages=[{"role": "user", "content": prompt}]
)
answer = response.choices[0].message.content
span.set_attribute("response", answer)
span.set_attribute("tokens.completion", response.usage.completion_tokens)
span.end()
return answer
Streaming Responses
Trace streaming completions:
from netra.decorators import task
from netra import SpanWrapper
@task()
def stream_completion(client: Cerebras, prompt: str):
stream_span = SpanWrapper("cerebras-stream").start()
response = client.chat.completions.create(
model="llama3.1-8b",
messages=[{"role": "user", "content": prompt}],
stream=True
)
full_response = ""
for chunk in response:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
print(content, end='', flush=True)
full_response += content
stream_span.set_attribute("response", full_response)
stream_span.end()
return full_response
Multi-turn Conversations
Trace conversation history:
from netra.decorators import agent
from netra import SpanWrapper
@agent()
def conversation(client: Cerebras, messages: list[dict]):
conv_span = SpanWrapper("cerebras-conversation", {
"messages.count": len(messages)
}).start()
response = client.chat.completions.create(
model="llama3.1-8b",
messages=messages
)
answer = response.choices[0].message.content
conv_span.set_attribute("response", answer)
conv_span.end()
return answer
Model Configuration
Trace with different models and settings:
from netra.decorators import workflow
from netra import SpanWrapper
@workflow()
def generate_with_config(client: Cerebras, prompt: str, temperature: float = 0.7):
config_span = SpanWrapper("cerebras-configured", {
"prompt": prompt,
"temperature": temperature
}).start()
response = client.chat.completions.create(
model="llama3.1-70b",
messages=[{"role": "user", "content": prompt}],
temperature=temperature,
max_tokens=1000
)
result = response.choices[0].message.content
config_span.set_attribute("response", result)
config_span.end()
return result
Configuration
Configure Cerebras instrumentation:
from netra import Netra
from netra.instrumentation.instruments import InstrumentSet
Netra.init(
headers=f"x-api-key={os.environ.get('NETRA_API_KEY')}",
trace_content=True,
instruments={InstrumentSet.CEREBRAS}
)
Next Steps