Python SDK
The Python SDK provides synchronous and asynchronous clients for every webclaw endpoint. Built on httpx with typed dataclass responses.
Installation
pip
pip install webclawNote
Requires Python 3.9 or later. The only runtime dependency is
httpx.Configuration
Create a client with your API key. All other options are optional.
Basic
from webclaw import Webclaw
client = Webclaw("wc_your_api_key")Options
| Parameter | Type | Default | Description |
|---|---|---|---|
api_key | str | -- | Your webclaw API key (starts with wc_). |
base_url | str | https://api.webclaw.io | Override for self-hosted instances. |
timeout | float | 30.0 | Request timeout in seconds. |
All options
client = Webclaw(
"wc_your_api_key",
base_url="https://api.webclaw.io", # default
timeout=60.0, # seconds
)Tip
Use a context manager to automatically close the underlying HTTP client when you're done.
Context manager
with Webclaw("wc_your_api_key") as client:
result = client.scrape("https://example.com")
print(result.markdown)Async client
AsyncWebclaw mirrors every method on the sync client but returns awaitables.
Async usage
from webclaw import AsyncWebclaw
async with AsyncWebclaw("wc_your_api_key") as client:
result = await client.scrape("https://example.com", formats=["markdown"])
print(result.markdown)Scrape
python
result = client.scrape(
"https://example.com",
formats=["markdown", "text", "llm"],
include_selectors=["article", ".content"],
exclude_selectors=["nav", "footer"],
only_main_content=True,
no_cache=True,
)
result.url # str
result.markdown # str | None
result.text # str | None
result.llm # str | None
result.metadata # dict
result.cache # CacheInfo | None
result.warning # str | NoneCrawl
Starts an async crawl job. The returned handle lets you poll or wait.
python
job = client.crawl(
"https://example.com",
max_depth=3,
max_pages=100,
use_sitemap=True,
)
# Block until complete (polls every 2s, 5min timeout)
status = job.wait(interval=2.0, timeout=300.0)
print(status.status) # "completed" | "failed"
print(status.total) # total pages discovered
print(status.completed) # pages successfully crawled
print(status.errors) # pages that errored
for page in status.pages:
print(page.url, len(page.markdown or ""))Async crawl
job = await client.crawl("https://example.com", max_pages=50)
status = await job.wait()Map
python
result = client.map("https://example.com")
print(result.count)
for url in result.urls:
print(url)Batch
python
result = client.batch(
["https://a.com", "https://b.com", "https://c.com"],
formats=["markdown"],
concurrency=5,
)
for item in result.results:
if item.error:
print(f"FAIL {item.url}: {item.error}")
else:
print(f"OK {item.url}: {len(item.markdown or '')} chars")Extract
LLM-powered structured extraction. Pass a JSON schema or a plain-text prompt.
Schema-based
result = client.extract(
"https://example.com/pricing",
schema={
"type": "object",
"properties": {
"plans": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"price": {"type": "string"},
},
},
}
},
},
)
print(result.data) # {"plans": [{"name": "Pro", "price": "$49/mo"}, ...]}Prompt-based
result = client.extract(
"https://example.com",
prompt="Extract all pricing tiers with names and monthly prices",
)
print(result.data)Summarize
python
result = client.summarize("https://example.com", max_sentences=3)
print(result.summary)Brand
python
result = client.brand("https://example.com")
print(result.data) # {"colors": [...], "fonts": [...], "logo_url": "..."}Error handling
All errors inherit from WebclawError. Catch specific subclasses for fine-grained control.
| Exception | HTTP status | When |
|---|---|---|
AuthenticationError | 401 / 403 | Invalid or missing API key |
NotFoundError | 404 | Resource not found |
RateLimitError | 429 | Quota exceeded or too many requests |
TimeoutError | -- | Request or crawl poll timed out |
python
from webclaw.errors import (
WebclawError,
AuthenticationError,
RateLimitError,
TimeoutError,
)
try:
result = client.scrape("https://example.com")
except AuthenticationError:
print("Check your API key")
except RateLimitError:
print("Slow down or upgrade your plan")
except TimeoutError:
print("Request took too long")
except WebclawError as e:
print(f"API error ({e.status_code}): {e}")