Installation
Copy
pip install cartesia netra-sdk
Usage
Initialize Netra before using Cartesia:Copy
import os
from netra import Netra
Netra.init(
app_name="cartesia-service",
headers=f"x-api-key={os.environ.get('NETRA_API_KEY')}"
)
Examples
Text-to-Speech with Sonic Models
Track Cartesia TTS operations using Netra decorators:Copy
from cartesia import Cartesia
from netra import task, workflow
import os
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
@task()
def generate_speech(text: str, voice_id: str) -> bytes:
"""Generate speech using Cartesia Sonic model."""
response = client.tts.bytes(
model_id="sonic-english",
transcript=text,
voice={
"mode": "id",
"id": voice_id
},
output_format={
"container": "raw",
"encoding": "pcm_f32le",
"sample_rate": 44100
}
)
return response
@task()
def stream_speech(text: str, voice_id: str):
"""Stream speech using Cartesia Sonic Turbo."""
stream = client.tts.stream(
model_id="sonic-turbo",
transcript=text,
voice={
"mode": "id",
"id": voice_id
},
output_format={
"container": "raw",
"encoding": "pcm_f32le",
"sample_rate": 44100
}
)
for chunk in stream:
yield chunk
@workflow()
def process_text_batch(texts: list[str], voice_id: str) -> list[bytes]:
"""Process multiple texts to speech."""
audio_buffers = []
for text in texts:
audio = generate_speech(text, voice_id)
audio_buffers.append(audio)
return audio_buffers
# Usage
audio_data = generate_speech(
"Hello, this is Cartesia Sonic speech synthesis.",
"a0e99841-438c-4a64-b679-ae501e7d6091"
)
Speech-to-Text with Ink Models
Track Cartesia STT operations using Netra decorators:Copy
from cartesia import Cartesia
from netra import task
import os
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
@task()
def transcribe_audio(audio_path: str) -> str:
"""Transcribe audio using Cartesia Ink model."""
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
response = client.stt.transcribe(
model_id="ink-whisper",
audio=audio_data,
language="en"
)
return response.transcript
@task()
def transcribe_stream(audio_stream) -> str:
"""Transcribe streaming audio in real-time."""
full_transcript = ""
stream = client.stt.stream_transcribe(
model_id="ink-whisper",
audio=audio_stream,
language="en",
interim_results=True
)
for result in stream:
if result.is_final:
full_transcript += result.transcript + " "
return full_transcript.strip()
# Usage
transcript = transcribe_audio("./audio/sample.wav")
Manual Span Creation with Action Tracking
For detailed control over tracing with both TTS and STT:Copy
from cartesia import Cartesia
from netra import SpanWrapper, ActionModel, UsageModel
import os
import time
client = Cartesia(api_key=os.environ.get("CARTESIA_API_KEY"))
def generate_speech_with_tracking(text: str, voice_id: str) -> bytes:
"""Generate speech with detailed tracking."""
span = SpanWrapper("cartesia-tts")
span.start()
try:
start_time = time.time_ns()
span.set_attribute("text_length", len(text))
span.set_attribute("voice_id", voice_id)
span.set_attribute("model", "sonic-turbo")
response = client.tts.bytes(
model_id="sonic-turbo",
transcript=text,
voice={"mode": "id", "id": voice_id},
output_format={
"container": "raw",
"encoding": "pcm_f32le",
"sample_rate": 44100
}
)
end_time = time.time_ns()
duration_ms = (end_time - start_time) / 1_000_000
# Track the TTS API operation
action = ActionModel(
start_time=str(start_time),
action="API",
action_type="TTS_SYNTHESIS",
metadata={
"provider": "cartesia",
"model": "sonic-turbo",
"voice_id": voice_id,
"text_length": str(len(text)),
"audio_size_bytes": str(len(response)),
"sample_rate": "44100",
"latency_ms": str(duration_ms)
},
success=True
)
span.set_action([action])
# Track usage
usage = UsageModel(
model="sonic-turbo",
usage_type="characters",
units_used=len(text),
cost_in_usd=len(text) * 0.00001
)
span.set_usage([usage])
span.set_status({"code": 1, "message": "Success"})
span.end()
return response
except Exception as e:
span.set_error(e)
span.set_status({"code": 2, "message": "Error"})
span.end()
raise
def transcribe_with_tracking(audio_path: str) -> str:
"""Transcribe audio with detailed tracking."""
span = SpanWrapper("cartesia-stt")
span.start()
try:
start_time = time.time_ns()
with open(audio_path, "rb") as audio_file:
audio_data = audio_file.read()
audio_size_bytes = len(audio_data)
span.set_attribute("audio_file", audio_path)
span.set_attribute("audio_size_bytes", audio_size_bytes)
span.set_attribute("model", "ink-whisper")
response = client.stt.transcribe(
model_id="ink-whisper",
audio=audio_data,
language="en"
)
end_time = time.time_ns()
duration_ms = (end_time - start_time) / 1_000_000
# Track the STT API operation
action = ActionModel(
start_time=str(start_time),
action="API",
action_type="STT_TRANSCRIPTION",
metadata={
"provider": "cartesia",
"model": "ink-whisper",
"audio_size_bytes": str(audio_size_bytes),
"transcript_length": str(len(response.transcript)),
"duration_ms": str(duration_ms),
"language": "en"
},
success=True
)
span.set_action([action])
# Track usage
audio_duration = getattr(response, 'duration', 0)
usage = UsageModel(
model="ink-whisper",
usage_type="audio_seconds",
units_used=audio_duration,
cost_in_usd=audio_duration * 0.036 # $0.13 per hour
)
span.set_usage([usage])
span.set_attribute("transcript_length", len(response.transcript))
span.set_status({"code": 1, "message": "Success"})
span.end()
return response.transcript
except Exception as e:
span.set_error(e)
span.set_status({"code": 2, "message": "Error"})
span.end()
raise
# Usage
audio_data = generate_speech_with_tracking(
"This is ultra-low latency speech synthesis.",
"a0e99841-438c-4a64-b679-ae501e7d6091"
)
transcript = transcribe_with_tracking("./audio/sample.wav")
Next Steps
- Netra Documentation - Learn more about Netra’s observability features
- Cartesia API - Explore Cartesia’s ultra-low latency TTS and STT