Per-agent provider routing + 2-OR / 2-Ollama model mix

Routing fix:
- New provider_for_model(name): a model name containing '/' is
  treated as an OpenRouter slug, bare names (llama3.2:3b) as Ollama.
  Previously the global PROVIDER variable routed all calls, so a
  per-agent override 'llama3.2:3b' would have hit OpenRouter and 404'd.
- decide_tool now uses provider_for_model() so per-agent models
  route correctly regardless of global PROVIDER setting.
- New provider_for_agent() helper for callers that need the
  provider of a specific agent.

Live mix: Anchor + Flora on OpenRouter (claude-haiku, gpt-4o-mini);
Lovely + Spark on Ollama (llama3.2:3b, free local).

.env:
- Provider set to 'auto' (uses OpenRouter when key is set)
- Per-agent assignments documented in .env.example
- Cost estimate updated: 2 OR + 2 Ollama = ~$0.10-0.30/day for OR
  portion, $0 for Ollama portion

Tests: 100 passing (was 99). New test_provider_for_model() covers
the routing heuristic. Existing tests updated to pass model=...
explicitly so they don't depend on env-loaded .env overrides.
This commit is contained in:
Jeuners 2026-06-15 02:53:42 +02:00
parent eb41d4b196
commit e0d72021e4
3 changed files with 63 additions and 30 deletions

View file

@ -3,41 +3,36 @@
# .env is git-ignored; do not commit secrets.
# === LLM PROVIDER ===
# Default: Ollama (local, free, no token cost)
# Set to "openrouter" to use OpenRouter (requires API key below)
# "auto" picks OpenRouter if OPENROUTER_API_KEY is set, else Ollama
EMERGENCE_LLM_PROVIDER=ollama
# "auto" picks OpenRouter if key is set, else Ollama.
# "ollama" forces local-only. "openrouter" requires the key below.
EMERGENCE_LLM_PROVIDER=auto
# === OPENROUTER (only if EMERGENCE_LLM_PROVIDER=openrouter) ===
# Get a key at https://openrouter.ai/keys
# Free models exist but tool-use support is limited; see README.
OPENROUTER_API_KEY=
# Default OpenRouter model (used if no per-agent override)
# Default OpenRouter model
EMERGENCE_OPENROUTER_MODEL=anthropic/claude-3.5-haiku
# === OLLAMA (default) ===
# Get Ollama at https://ollama.com — runs locally, no API key, no cost
# Pull a small fast model: ollama pull llama3.2:3b
# === OLLAMA (default, free, local) ===
# Get Ollama at https://ollama.com. Pull once:
# ollama pull llama3.2:3b
EMERGENCE_OLLAMA_URL=http://127.0.0.1:11434
EMERGENCE_OLLAMA_MODEL=llama3.2:3b
# Per-agent Ollama model overrides. With ~2 GB each, you can run all four
# on the same machine. Mix and match for Time-Dilation experiments.
# Examples:
# EMERGENCE_AGENT_ANCHOR_MODEL=llama3.2:3b
# EMERGENCE_AGENT_FLORA_MODEL=llama3.2:3b
# === PER-AGENT MODEL ASSIGNMENT ===
# Example: 2 cheap cloud models + 2 free local models.
# Uncomment and edit to assign models per agent.
# Cloud (cheap, good tool-use):
# EMERGENCE_AGENT_ANCHOR_MODEL=anthropic/claude-3.5-haiku
# EMERGENCE_AGENT_FLORA_MODEL=openai/gpt-4o-mini
# Local (free, requires Ollama running):
# EMERGENCE_AGENT_LOVELY_MODEL=llama3.2:3b
# EMERGENCE_AGENT_SPARK_MODEL=llama3.2:3b
# === BEHAVIOUR ===
# Master switch: 0 forces rule-based engine (no LLM calls at all)
EMERGENCE_LLM_ENABLED=1
EMERGENCE_LLM_TIMEOUT=30
# === TOKEN-SAVING TIPS ===
# - Default model llama3.2:3b is small (~2 GB) and fast
# - System prompt is already compact (~150 tokens)
# - max_tokens is capped at 256 — plenty for tool calls
# - Tool descriptions are short on purpose
# - For zero-cost operation, use Ollama only (this file)

View file

@ -83,6 +83,24 @@ def model_for_agent(agent_id: str) -> str:
return OPENROUTER_MODEL if PROVIDER == "openrouter" else OLLAMA_MODEL
def provider_for_model(model: str) -> str:
"""Heuristic: a model name containing '/' is an OpenRouter-style slug
(org/model). Bare names without '/' (llama3.2:3b, gemma3, mistral) are
served by Ollama.
"""
if "/" in model:
return "openrouter"
return "ollama"
def provider_for_agent(agent_id: str) -> str:
"""Pick the provider for a specific agent based on its model name.
Falls back to the global PROVIDER if the model name is ambiguous.
"""
model = model_for_agent(agent_id)
return provider_for_model(model)
def default_model() -> str:
return model_for_agent("default")
@ -222,21 +240,27 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
"""
t0 = time.time()
model = model or (model_for_agent(agent_id) if agent_id else default_model())
# Per-agent provider: if the model name looks like an OpenRouter slug
# ('org/model'), route to OpenRouter regardless of the global PROVIDER.
provider = provider_for_model(model)
if provider == "openrouter" and not _openrouter_key():
return None, None, {"error": "OPENROUTER_API_KEY not set", "provider": provider,
"model": model, "latency_s": time.time() - t0}
try:
if PROVIDER == "openrouter":
if provider == "openrouter":
response = chat_openrouter(messages, tools or [], model, timeout or TIMEOUT)
else:
response = chat_ollama(messages, tools or [], model, timeout or TIMEOUT)
except Exception as e:
return None, None, {"error": str(e), "provider": PROVIDER, "model": model,
return None, None, {"error": str(e), "provider": provider, "model": model,
"latency_s": time.time() - t0}
latency = time.time() - t0
cost = None
if PROVIDER == "openrouter":
if provider == "openrouter":
cost = response.get("usage", {}).get("cost")
if PROVIDER == "openrouter":
if provider == "openrouter":
msg = response.get("choices", [{}])[0].get("message", {})
else:
msg = response.get("message", {})
@ -251,7 +275,9 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
args = json.loads(args)
except Exception:
args = {}
return name, args, {"provider": PROVIDER, "model": model,
return name, args, {"provider": provider, "model": model,
"latency_s": latency, "cost_usd": cost}
return None, None, {"provider": provider, "model": model,
"latency_s": latency, "cost_usd": cost}
return None, None, {"provider": PROVIDER, "model": model,
"latency_s": latency, "cost_usd": cost}

View file

@ -64,9 +64,10 @@ def test_decide_tool_parses_response(monkeypatch):
}
monkeypatch.setattr(llm, "PROVIDER", "ollama")
with mock.patch.object(llm, "chat_ollama", return_value=fake):
# pass model directly so provider_for_model picks ollama
name, args, meta = llm.decide_tool(
[{"role": "user", "content": "x"}], tools=[],
agent_id="anchor",
model="llama3.2:3b",
)
assert name == "go_to_place"
assert args == {"place": "library"}
@ -80,7 +81,7 @@ def test_decide_tool_handles_string_args(monkeypatch):
]}}
monkeypatch.setattr(llm, "PROVIDER", "ollama")
with mock.patch.object(llm, "chat_ollama", return_value=fake):
name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
assert name == "idle"
assert args == {}
@ -90,7 +91,7 @@ def test_decide_tool_no_tool_call_returns_none(monkeypatch):
fake = {"message": {"content": "I think... no tool"}}
monkeypatch.setattr(llm, "PROVIDER", "ollama")
with mock.patch.object(llm, "chat_ollama", return_value=fake):
name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
assert name is None
assert args is None
@ -104,14 +105,25 @@ def test_decide_tool_openrouter_response(monkeypatch):
"usage": {"total_tokens": 50, "cost": 0.0001},
}
monkeypatch.setattr(llm, "PROVIDER", "openrouter")
monkeypatch.setattr(llm, "_openrouter_key", lambda: "sk-or-test")
with mock.patch.object(llm, "chat_openrouter", return_value=fake):
name, args, meta = llm.decide_tool([], tools=[], agent_id="anchor")
name, args, meta = llm.decide_tool([], tools=[],
model="anthropic/claude-3.5-haiku")
assert name == "go_to_place"
assert args == {"place": "town_hall"}
assert meta["provider"] == "openrouter"
assert meta["cost_usd"] == 0.0001
def test_provider_for_model():
from engine import llm
assert llm.provider_for_model("anthropic/claude-3.5-haiku") == "openrouter"
assert llm.provider_for_model("openai/gpt-4o-mini") == "openrouter"
assert llm.provider_for_model("llama3.2:3b") == "ollama"
assert llm.provider_for_model("gemma3") == "ollama"
assert llm.provider_for_model("mistral") == "ollama"
def test_per_agent_model_override(monkeypatch):
"""EMERGENCE_AGENT_<ID>_MODEL env var overrides the default."""
from engine import llm