Per-agent provider routing + 2-OR / 2-Ollama model mix
Routing fix: - New provider_for_model(name): a model name containing '/' is treated as an OpenRouter slug, bare names (llama3.2:3b) as Ollama. Previously the global PROVIDER variable routed all calls, so a per-agent override 'llama3.2:3b' would have hit OpenRouter and 404'd. - decide_tool now uses provider_for_model() so per-agent models route correctly regardless of global PROVIDER setting. - New provider_for_agent() helper for callers that need the provider of a specific agent. Live mix: Anchor + Flora on OpenRouter (claude-haiku, gpt-4o-mini); Lovely + Spark on Ollama (llama3.2:3b, free local). .env: - Provider set to 'auto' (uses OpenRouter when key is set) - Per-agent assignments documented in .env.example - Cost estimate updated: 2 OR + 2 Ollama = ~$0.10-0.30/day for OR portion, $0 for Ollama portion Tests: 100 passing (was 99). New test_provider_for_model() covers the routing heuristic. Existing tests updated to pass model=... explicitly so they don't depend on env-loaded .env overrides.
This commit is contained in:
parent
eb41d4b196
commit
e0d72021e4
3 changed files with 63 additions and 30 deletions
37
.env.example
37
.env.example
|
|
@ -3,41 +3,36 @@
|
|||
# .env is git-ignored; do not commit secrets.
|
||||
|
||||
# === LLM PROVIDER ===
|
||||
# Default: Ollama (local, free, no token cost)
|
||||
# Set to "openrouter" to use OpenRouter (requires API key below)
|
||||
# "auto" picks OpenRouter if OPENROUTER_API_KEY is set, else Ollama
|
||||
EMERGENCE_LLM_PROVIDER=ollama
|
||||
# "auto" picks OpenRouter if key is set, else Ollama.
|
||||
# "ollama" forces local-only. "openrouter" requires the key below.
|
||||
EMERGENCE_LLM_PROVIDER=auto
|
||||
|
||||
# === OPENROUTER (only if EMERGENCE_LLM_PROVIDER=openrouter) ===
|
||||
# Get a key at https://openrouter.ai/keys
|
||||
# Free models exist but tool-use support is limited; see README.
|
||||
OPENROUTER_API_KEY=
|
||||
|
||||
# Default OpenRouter model (used if no per-agent override)
|
||||
# Default OpenRouter model
|
||||
EMERGENCE_OPENROUTER_MODEL=anthropic/claude-3.5-haiku
|
||||
|
||||
# === OLLAMA (default) ===
|
||||
# Get Ollama at https://ollama.com — runs locally, no API key, no cost
|
||||
# Pull a small fast model: ollama pull llama3.2:3b
|
||||
# === OLLAMA (default, free, local) ===
|
||||
# Get Ollama at https://ollama.com. Pull once:
|
||||
# ollama pull llama3.2:3b
|
||||
EMERGENCE_OLLAMA_URL=http://127.0.0.1:11434
|
||||
EMERGENCE_OLLAMA_MODEL=llama3.2:3b
|
||||
|
||||
# Per-agent Ollama model overrides. With ~2 GB each, you can run all four
|
||||
# on the same machine. Mix and match for Time-Dilation experiments.
|
||||
# Examples:
|
||||
# EMERGENCE_AGENT_ANCHOR_MODEL=llama3.2:3b
|
||||
# EMERGENCE_AGENT_FLORA_MODEL=llama3.2:3b
|
||||
# === PER-AGENT MODEL ASSIGNMENT ===
|
||||
# Example: 2 cheap cloud models + 2 free local models.
|
||||
# Uncomment and edit to assign models per agent.
|
||||
|
||||
# Cloud (cheap, good tool-use):
|
||||
# EMERGENCE_AGENT_ANCHOR_MODEL=anthropic/claude-3.5-haiku
|
||||
# EMERGENCE_AGENT_FLORA_MODEL=openai/gpt-4o-mini
|
||||
|
||||
# Local (free, requires Ollama running):
|
||||
# EMERGENCE_AGENT_LOVELY_MODEL=llama3.2:3b
|
||||
# EMERGENCE_AGENT_SPARK_MODEL=llama3.2:3b
|
||||
|
||||
# === BEHAVIOUR ===
|
||||
# Master switch: 0 forces rule-based engine (no LLM calls at all)
|
||||
EMERGENCE_LLM_ENABLED=1
|
||||
EMERGENCE_LLM_TIMEOUT=30
|
||||
|
||||
# === TOKEN-SAVING TIPS ===
|
||||
# - Default model llama3.2:3b is small (~2 GB) and fast
|
||||
# - System prompt is already compact (~150 tokens)
|
||||
# - max_tokens is capped at 256 — plenty for tool calls
|
||||
# - Tool descriptions are short on purpose
|
||||
# - For zero-cost operation, use Ollama only (this file)
|
||||
|
|
|
|||
|
|
@ -83,6 +83,24 @@ def model_for_agent(agent_id: str) -> str:
|
|||
return OPENROUTER_MODEL if PROVIDER == "openrouter" else OLLAMA_MODEL
|
||||
|
||||
|
||||
def provider_for_model(model: str) -> str:
|
||||
"""Heuristic: a model name containing '/' is an OpenRouter-style slug
|
||||
(org/model). Bare names without '/' (llama3.2:3b, gemma3, mistral) are
|
||||
served by Ollama.
|
||||
"""
|
||||
if "/" in model:
|
||||
return "openrouter"
|
||||
return "ollama"
|
||||
|
||||
|
||||
def provider_for_agent(agent_id: str) -> str:
|
||||
"""Pick the provider for a specific agent based on its model name.
|
||||
Falls back to the global PROVIDER if the model name is ambiguous.
|
||||
"""
|
||||
model = model_for_agent(agent_id)
|
||||
return provider_for_model(model)
|
||||
|
||||
|
||||
def default_model() -> str:
|
||||
return model_for_agent("default")
|
||||
|
||||
|
|
@ -222,21 +240,27 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
|
|||
"""
|
||||
t0 = time.time()
|
||||
model = model or (model_for_agent(agent_id) if agent_id else default_model())
|
||||
# Per-agent provider: if the model name looks like an OpenRouter slug
|
||||
# ('org/model'), route to OpenRouter regardless of the global PROVIDER.
|
||||
provider = provider_for_model(model)
|
||||
if provider == "openrouter" and not _openrouter_key():
|
||||
return None, None, {"error": "OPENROUTER_API_KEY not set", "provider": provider,
|
||||
"model": model, "latency_s": time.time() - t0}
|
||||
try:
|
||||
if PROVIDER == "openrouter":
|
||||
if provider == "openrouter":
|
||||
response = chat_openrouter(messages, tools or [], model, timeout or TIMEOUT)
|
||||
else:
|
||||
response = chat_ollama(messages, tools or [], model, timeout or TIMEOUT)
|
||||
except Exception as e:
|
||||
return None, None, {"error": str(e), "provider": PROVIDER, "model": model,
|
||||
return None, None, {"error": str(e), "provider": provider, "model": model,
|
||||
"latency_s": time.time() - t0}
|
||||
latency = time.time() - t0
|
||||
|
||||
cost = None
|
||||
if PROVIDER == "openrouter":
|
||||
if provider == "openrouter":
|
||||
cost = response.get("usage", {}).get("cost")
|
||||
|
||||
if PROVIDER == "openrouter":
|
||||
if provider == "openrouter":
|
||||
msg = response.get("choices", [{}])[0].get("message", {})
|
||||
else:
|
||||
msg = response.get("message", {})
|
||||
|
|
@ -251,7 +275,9 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
|
|||
args = json.loads(args)
|
||||
except Exception:
|
||||
args = {}
|
||||
return name, args, {"provider": PROVIDER, "model": model,
|
||||
return name, args, {"provider": provider, "model": model,
|
||||
"latency_s": latency, "cost_usd": cost}
|
||||
return None, None, {"provider": provider, "model": model,
|
||||
"latency_s": latency, "cost_usd": cost}
|
||||
return None, None, {"provider": PROVIDER, "model": model,
|
||||
"latency_s": latency, "cost_usd": cost}
|
||||
|
|
|
|||
|
|
@ -64,9 +64,10 @@ def test_decide_tool_parses_response(monkeypatch):
|
|||
}
|
||||
monkeypatch.setattr(llm, "PROVIDER", "ollama")
|
||||
with mock.patch.object(llm, "chat_ollama", return_value=fake):
|
||||
# pass model directly so provider_for_model picks ollama
|
||||
name, args, meta = llm.decide_tool(
|
||||
[{"role": "user", "content": "x"}], tools=[],
|
||||
agent_id="anchor",
|
||||
model="llama3.2:3b",
|
||||
)
|
||||
assert name == "go_to_place"
|
||||
assert args == {"place": "library"}
|
||||
|
|
@ -80,7 +81,7 @@ def test_decide_tool_handles_string_args(monkeypatch):
|
|||
]}}
|
||||
monkeypatch.setattr(llm, "PROVIDER", "ollama")
|
||||
with mock.patch.object(llm, "chat_ollama", return_value=fake):
|
||||
name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
|
||||
name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
|
||||
assert name == "idle"
|
||||
assert args == {}
|
||||
|
||||
|
|
@ -90,7 +91,7 @@ def test_decide_tool_no_tool_call_returns_none(monkeypatch):
|
|||
fake = {"message": {"content": "I think... no tool"}}
|
||||
monkeypatch.setattr(llm, "PROVIDER", "ollama")
|
||||
with mock.patch.object(llm, "chat_ollama", return_value=fake):
|
||||
name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
|
||||
name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
|
||||
assert name is None
|
||||
assert args is None
|
||||
|
||||
|
|
@ -104,14 +105,25 @@ def test_decide_tool_openrouter_response(monkeypatch):
|
|||
"usage": {"total_tokens": 50, "cost": 0.0001},
|
||||
}
|
||||
monkeypatch.setattr(llm, "PROVIDER", "openrouter")
|
||||
monkeypatch.setattr(llm, "_openrouter_key", lambda: "sk-or-test")
|
||||
with mock.patch.object(llm, "chat_openrouter", return_value=fake):
|
||||
name, args, meta = llm.decide_tool([], tools=[], agent_id="anchor")
|
||||
name, args, meta = llm.decide_tool([], tools=[],
|
||||
model="anthropic/claude-3.5-haiku")
|
||||
assert name == "go_to_place"
|
||||
assert args == {"place": "town_hall"}
|
||||
assert meta["provider"] == "openrouter"
|
||||
assert meta["cost_usd"] == 0.0001
|
||||
|
||||
|
||||
def test_provider_for_model():
|
||||
from engine import llm
|
||||
assert llm.provider_for_model("anthropic/claude-3.5-haiku") == "openrouter"
|
||||
assert llm.provider_for_model("openai/gpt-4o-mini") == "openrouter"
|
||||
assert llm.provider_for_model("llama3.2:3b") == "ollama"
|
||||
assert llm.provider_for_model("gemma3") == "ollama"
|
||||
assert llm.provider_for_model("mistral") == "ollama"
|
||||
|
||||
|
||||
def test_per_agent_model_override(monkeypatch):
|
||||
"""EMERGENCE_AGENT_<ID>_MODEL env var overrides the default."""
|
||||
from engine import llm
|
||||
|
|
|
|||
Loading…
Reference in a new issue