This guide covers integrating the Sentience Python SDK with browser-use, an open-source framework for building AI agents that interact with web browsers.
SentienceContext is a token-optimized context middleware for browser-use that replaces full DOM + vision with a ranked, semantic geometry state — cutting agent token usage by ~50%.
The Sentience SDK enhances browser-use by providing:
The Sentience extension runs in the browser and provides a snapshot API through a Chrome extension that:
The Python SDK wraps this functionality with SentienceContext, which formats the snapshot data into a compact, LLM-optimized prompt block.
The following data was collected running the same task ("Find the top Show HN post") with browser-use, comparing baseline (vision + full DOM) against Sentience SDK integration.
| Metric | Value |
|---|---|
| DOM Limit | 40,000 chars |
| DOM State per Step | ~10,195 chars (~2,548 tokens) |
| Total per Step | ~12,667 chars (~3,166 tokens) |
| Total Tokens | 37,051 |
| Total Cost | $0.0096 |
| Agent Steps | 7 |
| Average Tokens/Step | 5,293 |
| Metric | Value |
|---|---|
| DOM Limit | 5,000 chars |
| DOM State per Step | ~5,000 chars (~1,250 tokens) |
| Total per Step | ~6,418 chars (~1,604 tokens) |
| Total Tokens | 14,143 |
| Total Cost | $0.0043 |
| Agent Steps | 4 |
| Average Tokens/Step | 3,536 |
| Metric | Improvement |
|---|---|
| Token Reduction | 62% fewer tokens (37,051 → 14,143) |
| Cost Reduction | 55% lower cost ($0.0096 → $0.0043) |
| Steps Required | 43% fewer steps (7 → 4) |
| Tokens per Step | 33% reduction (5,293 → 3,536) |
Key findings:
Install both packages together using the optional dependency:
pip install "sentienceapi[browser-use]"Or install separately:
pip install sentienceapi browser-usefrom sentience import get_extension_dir
from sentience.backends import SentienceContext
print(f"Extension path: {get_extension_dir()}")
print("Installation successful!")import asyncio
from browser_use import BrowserSession, BrowserProfile
from sentience import get_extension_dir
from sentience.backends import SentienceContext
async def main():
# 1. Setup browser with Sentience extension
profile = BrowserProfile(
args=[f"--load-extension={get_extension_dir()}"]
)
session = BrowserSession(browser_profile=profile)
await session.start()
# 2. Create context builder
ctx = SentienceContext(max_elements=60)
# 3. Navigate and build context
await session.navigate("https://news.ycombinator.com")
state = await ctx.build(session, goal="Find the first Show HN post")
if state:
print(f"Found {len(state.snapshot.elements)} elements")
print(f"Prompt block:
{state.prompt_block[:500]}...")
await session.stop()
asyncio.run(main())The primary class for building LLM-optimized DOM context from a browser session.
from sentience.backends import SentienceContext, TopElementSelector
ctx = SentienceContext(
sentience_api_key: str | None = None, # API key for gateway mode
use_api: bool | None = None, # Force API vs extension mode
max_elements: int = 60, # Maximum elements to fetch
show_overlay: bool = False, # Show visual overlay on elements
top_element_selector: TopElementSelector | None = None, # Selection strategy
)| Parameter | Type | Default | Description |
|---|---|---|---|
sentience_api_key | str | None | None | API key for Sentience gateway (cloud processing) |
use_api | bool | None | None | Force API mode (True) or extension mode (False). Auto-detected if None |
max_elements | int | 60 | Maximum number of elements to fetch from snapshot |
show_overlay | bool | False | Display visual overlay highlighting detected elements |
top_element_selector | TopElementSelector | (defaults) | Configure element selection strategy |
build()Builds context state from a browser session.
state = await ctx.build(
browser_session, # browser-use BrowserSession
goal: str | None = None, # Task description for reranking
wait_for_extension_ms: int = 5000, # Extension load timeout
retries: int = 2, # Retry attempts on failure
retry_delay_s: float = 1.0, # Delay between retries
) -> SentienceContextState | NoneReturns SentienceContextState on success, or None if the snapshot fails.
Configuration for the 3-way element selection strategy.
from sentience.backends import TopElementSelector
selector = TopElementSelector(
by_importance: int = 60, # Top N by importance score
from_dominant_group: int = 15, # Top N from dominant group
by_position: int = 10, # Top N by page position (lowest Y)
)Elements are selected using a 3-way merge with deduplication:
doc_y values)| Parameter | Type | Default | Description |
|---|---|---|---|
by_importance | int | 60 | Number of top elements by importance score (descending) |
from_dominant_group | int | 15 | Number of elements from the dominant group (for ordinal tasks) |
by_position | int | 10 | Number of elements by position (top of page first) |
Dataclass returned by SentienceContext.build().
@dataclass
class SentienceContextState:
url: str # Current page URL
snapshot: Snapshot # Full Sentience snapshot with all elements
prompt_block: str # Compact LLM-ready context block| Attribute | Type | Description |
|---|---|---|
url | str | The URL of the page when the snapshot was taken |
snapshot | Snapshot | Full snapshot object containing all detected elements |
prompt_block | str | Formatted string ready for LLM consumption |
Adapter that bridges browser-use's BrowserSession to Sentience's backend protocol.
from sentience.backends import BrowserUseAdapter
adapter = BrowserUseAdapter(browser_session)
backend = await adapter.create_backend()from browser_use import BrowserSession, BrowserProfile
from sentience import get_extension_dir
from sentience.backends import BrowserUseAdapter, snapshot, click
# Setup
profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
session = BrowserSession(browser_profile=profile)
await session.start()
# Create backend
adapter = BrowserUseAdapter(session)
backend = await adapter.create_backend()
# Use backend for snapshots and actions
snap = await snapshot(backend)
await click(backend, snap.elements[0].bbox)Caches snapshots to reduce redundant calls during action loops.
from sentience.backends import CachedSnapshot
cache = CachedSnapshot(
backend, # Backend instance
max_age_ms: int = 2000 # Cache TTL in milliseconds
)| Method | Description |
|---|---|
await cache.get() | Returns cached snapshot if valid, otherwise takes fresh snapshot |
cache.invalidate() | Forces next get() to take a fresh snapshot |
cache = CachedSnapshot(backend, max_age_ms=2000)
snap1 = await cache.get() # Takes fresh snapshot
snap2 = await cache.get() # Returns cached (if < 2s old)
await click(backend, element.bbox)
cache.invalidate() # Force refresh after DOM-modifying action
snap3 = await cache.get() # Takes fresh snapshotTakes a snapshot of the current page, returning all detected elements.
from sentience.backends import snapshot
snap = await snapshot(backend, options=None)Returns a Snapshot object containing:
status: "success" or "error"url: Current page URLviewport: Viewport dimensionselements: List of Element objectsdominant_group_key: Key identifying the dominant repeating patternClicks at the center of a bounding box.
from sentience.backends import click
await click(backend, bbox)| Parameter | Type | Description |
|---|---|---|
backend | BrowserBackendV0 | Backend instance |
bbox | BBox | Bounding box of the target element |
Types text into the currently focused element.
from sentience.backends import type_text
await type_text(backend, text, delay_ms=50)| Parameter | Type | Default | Description |
|---|---|---|---|
backend | BrowserBackendV0 | — | Backend instance |
text | str | — | Text to type |
delay_ms | int | 50 | Delay between keystrokes |
Scrolls the page or a specific element.
from sentience.backends import scroll
await scroll(backend, delta_y=500) # Scroll down 500px
await scroll(backend, delta_y=-300) # Scroll up 300px
await scroll(backend, delta_y=300, target=(400, 500)) # Scroll at position| Parameter | Type | Default | Description |
|---|---|---|---|
backend | BrowserBackendV0 | — | Backend instance |
delta_y | int | — | Vertical scroll amount (positive = down) |
target | tuple[int, int] | None | Optional (x, y) position to scroll at |
Finds a single element matching a selector.
from sentience import find
element = find(snapshot, 'role=textbox[name*="Search"]')Returns Element | None.
| Pattern | Example | Description |
|---|---|---|
role=X | role=button | Match by ARIA role |
[name="X"] | [name="Submit"] | Exact name match |
[name*="X"] | [name*="Search"] | Partial name match |
text=X | text=Click here | Match by visible text |
Finds all elements matching a selector.
from sentience import query
links = query(snapshot, 'role=link')
print(f"Found {len(links)} links")Returns list[Element].
All exceptions inherit from SentienceBackendError.
from sentience.backends import (
SentienceBackendError,
ExtensionNotLoadedError,
SnapshotError,
ActionError,
)
try:
snap = await snapshot(backend)
except ExtensionNotLoadedError as e:
print(f"Extension not loaded. Fix: {e}")
except SnapshotError as e:
print(f"Snapshot failed: {e}")
except ActionError as e:
print(f"Action failed: {e}")| Exception | Description |
|---|---|
ExtensionNotLoadedError | Sentience extension not loaded in browser args |
SnapshotError | window.motiondocs.snapshot() failed |
ActionError | Click/type/scroll operation failed |
BackendEvalError | JavaScript evaluation failed |
The prompt_block from SentienceContextState uses a compact pipe-delimited format:
Elements: ID|role|text|imp|is_primary|docYq|ord|DG|href
Rules: ordinal→DG=1 then ord asc; otherwise imp desc. Use click(ID)/input_text(ID,...).
1|link|Show HN: My Project|85|1|2|0|1|ycombinato
2|link|Ask HN: Best practices|80|0|3|1|1|ycombinato
3|button|Submit|75|1|1|-|0|
| Field | Description |
|---|---|
ID | Element ID for actions (e.g., click(ID)) |
role | Semantic role (button, link, textbox, etc.) |
text | Truncated element text (max 30 chars) |
imp | Importance score (0-100) |
is_primary | 1 if primary element (e.g. search box), 0 otherwise |
docYq | Quantized Y position (doc_y / 200) |
ord | Ordinal rank within dominant group, or - |
DG | 1 if in dominant group, 0 otherwise |
href | Compressed href token (domain or path segment) |
DG=1, sort by ord ascendingimp descending (highest importance first)is_primary=1 for main call-to-action buttonsshow_overlay=True for Debugging# overlay to show bounding box with green border on top elements in Snapshot
# highlighted border automatically disappears after 5 seconds
ctx = SentienceContext(show_overlay=True)This displays a visual overlay on detected elements, useful for verifying element detection.
max_elementsmax_elements=40 is sufficientmax_elements=80-100cache = CachedSnapshot(backend, max_age_ms=2000)
snap = await cache.get()
await click(backend, button.bbox) # This may change the DOM
cache.invalidate() # Force refresh
snap = await cache.get() # Fresh snapshotstate = await ctx.build(
session,
goal="Click the first search result" # Helps with ordinal ranking
)state = await ctx.build(
session,
wait_for_extension_ms=5000, # Wait up to 5 seconds
retries=3, # Retry on failure
)import asyncio
from browser_use import BrowserSession, BrowserProfile, Agent, ChatBrowserUse
from sentience import get_extension_dir, find
from sentience.backends import (
SentienceContext,
TopElementSelector,
BrowserUseAdapter,
snapshot,
click,
type_text,
)
async def main():
# Setup browser with Sentience extension
profile = BrowserProfile(
args=[
"--enable-extensions",
f"--load-extension={get_extension_dir()}",
]
)
session = BrowserSession(browser_profile=profile)
await session.start()
# Navigate to target page
await session.navigate("https://news.ycombinator.com")
# Option A: High-level SentienceContext API
ctx = SentienceContext(
max_elements=60,
show_overlay=True, # Visual debugging
top_element_selector=TopElementSelector(
by_importance=60,
from_dominant_group=15,
by_position=10,
),
)
state = await ctx.build(
session,
goal="Find the first Show HN post",
wait_for_extension_ms=5000,
)
if state:
print(f"URL: {state.url}")
print(f"Elements: {len(state.snapshot.elements)}")
print(f"
Prompt block:
{state.prompt_block}")
# Option B: Low-level API for direct control
adapter = BrowserUseAdapter(session)
backend = await adapter.create_backend()
snap = await snapshot(backend)
search_link = find(snap, 'role=link[name*="Show HN"]')
if search_link:
await click(backend, search_link.bbox)
print(f"Clicked: {search_link.text}")
await session.stop()
if __name__ == "__main__":
asyncio.run(main())See working examples of Sentience + browser-use integration:
Last updated: January 2026