Per-agent provider routing + 2-OR / 2-Ollama model mix

Routing fix: - New provider_for_model(name): a model name containing '/' is treated as an OpenRouter slug, bare names (llama3.2:3b) as Ollama. Previously the global PROVIDER variable routed all calls, so a per-agent override 'llama3.2:3b' would have hit OpenRouter and 404'd. - decide_tool now uses provider_for_model() so per-agent models route correctly regardless of global PROVIDER setting. - New provider_for_agent() helper for callers that need the provider of a specific agent. Live mix: Anchor + Flora on OpenRouter (claude-haiku, gpt-4o-mini); Lovely + Spark on Ollama (llama3.2:3b, free local). .env: - Provider set to 'auto' (uses OpenRouter when key is set) - Per-agent assignments documented in .env.example - Cost estimate updated: 2 OR + 2 Ollama = ~$0.10-0.30/day for OR portion, $0 for Ollama portion Tests: 100 passing (was 99). New test_provider_for_model() covers the routing heuristic. Existing tests updated to pass model=... explicitly so they don't depend on env-loaded .env overrides.
2026-06-15 02:53:42 +02:00 · 2026-06-15 02:53:42 +02:00 · e0d72021e4
commit e0d72021e4
parent eb41d4b196
3 changed files with 63 additions and 30 deletions
--- a/.env.example
+++ b/.env.example
@ -3,41 +3,36 @@
 # .env is git-ignored; do not commit secrets.

 # === LLM PROVIDER ===
-# Default: Ollama (local, free, no token cost)
-# Set to "openrouter" to use OpenRouter (requires API key below)
-# "auto" picks OpenRouter if OPENROUTER_API_KEY is set, else Ollama
-EMERGENCE_LLM_PROVIDER=ollama
+# "auto" picks OpenRouter if key is set, else Ollama.
+# "ollama" forces local-only. "openrouter" requires the key below.
+EMERGENCE_LLM_PROVIDER=auto

 # === OPENROUTER (only if EMERGENCE_LLM_PROVIDER=openrouter) ===
 # Get a key at https://openrouter.ai/keys
 # Free models exist but tool-use support is limited; see README.
 OPENROUTER_API_KEY=

-# Default OpenRouter model (used if no per-agent override)
+# Default OpenRouter model
 EMERGENCE_OPENROUTER_MODEL=anthropic/claude-3.5-haiku

-# === OLLAMA (default) ===
-# Get Ollama at https://ollama.com — runs locally, no API key, no cost
-# Pull a small fast model:  ollama pull llama3.2:3b
+# === OLLAMA (default, free, local) ===
+# Get Ollama at https://ollama.com. Pull once:
+#   ollama pull llama3.2:3b
 EMERGENCE_OLLAMA_URL=http://127.0.0.1:11434
 EMERGENCE_OLLAMA_MODEL=llama3.2:3b

-# Per-agent Ollama model overrides. With ~2 GB each, you can run all four
-# on the same machine. Mix and match for Time-Dilation experiments.
-# Examples:
-# EMERGENCE_AGENT_ANCHOR_MODEL=llama3.2:3b
-# EMERGENCE_AGENT_FLORA_MODEL=llama3.2:3b
+# === PER-AGENT MODEL ASSIGNMENT ===
+# Example: 2 cheap cloud models + 2 free local models.
+# Uncomment and edit to assign models per agent.
+
+# Cloud (cheap, good tool-use):
+# EMERGENCE_AGENT_ANCHOR_MODEL=anthropic/claude-3.5-haiku
+# EMERGENCE_AGENT_FLORA_MODEL=openai/gpt-4o-mini
+
+# Local (free, requires Ollama running):
 # EMERGENCE_AGENT_LOVELY_MODEL=llama3.2:3b
 # EMERGENCE_AGENT_SPARK_MODEL=llama3.2:3b

 # === BEHAVIOUR ===
-# Master switch: 0 forces rule-based engine (no LLM calls at all)
 EMERGENCE_LLM_ENABLED=1
 EMERGENCE_LLM_TIMEOUT=30
-
-# === TOKEN-SAVING TIPS ===
-# - Default model llama3.2:3b is small (~2 GB) and fast
-# - System prompt is already compact (~150 tokens)
-# - max_tokens is capped at 256 — plenty for tool calls
-# - Tool descriptions are short on purpose
-# - For zero-cost operation, use Ollama only (this file)
--- a/engine/llm.py
+++ b/engine/llm.py
@ -83,6 +83,24 @@ def model_for_agent(agent_id: str) -> str:
    return OPENROUTER_MODEL if PROVIDER == "openrouter" else OLLAMA_MODEL


+def provider_for_model(model: str) -> str:
+    """Heuristic: a model name containing '/' is an OpenRouter-style slug
+    (org/model). Bare names without '/' (llama3.2:3b, gemma3, mistral) are
+    served by Ollama.
+    """
+    if "/" in model:
+        return "openrouter"
+    return "ollama"
+
+
+def provider_for_agent(agent_id: str) -> str:
+    """Pick the provider for a specific agent based on its model name.
+    Falls back to the global PROVIDER if the model name is ambiguous.
+    """
+    model = model_for_agent(agent_id)
+    return provider_for_model(model)
+
+
 def default_model() -> str:
    return model_for_agent("default")

@ -222,21 +240,27 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
    """
    t0 = time.time()
    model = model or (model_for_agent(agent_id) if agent_id else default_model())
+    # Per-agent provider: if the model name looks like an OpenRouter slug
+    # ('org/model'), route to OpenRouter regardless of the global PROVIDER.
+    provider = provider_for_model(model)
+    if provider == "openrouter" and not _openrouter_key():
+        return None, None, {"error": "OPENROUTER_API_KEY not set", "provider": provider,
+                            "model": model, "latency_s": time.time() - t0}
    try:
-        if PROVIDER == "openrouter":
+        if provider == "openrouter":
            response = chat_openrouter(messages, tools or [], model, timeout or TIMEOUT)
        else:
            response = chat_ollama(messages, tools or [], model, timeout or TIMEOUT)
    except Exception as e:
-        return None, None, {"error": str(e), "provider": PROVIDER, "model": model,
+        return None, None, {"error": str(e), "provider": provider, "model": model,
                            "latency_s": time.time() - t0}
    latency = time.time() - t0

    cost = None
-    if PROVIDER == "openrouter":
+    if provider == "openrouter":
        cost = response.get("usage", {}).get("cost")

-    if PROVIDER == "openrouter":
+    if provider == "openrouter":
        msg = response.get("choices", [{}])[0].get("message", {})
    else:
        msg = response.get("message", {})
@ -251,7 +275,9 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None):
                args = json.loads(args)
            except Exception:
                args = {}
-        return name, args, {"provider": PROVIDER, "model": model,
+        return name, args, {"provider": provider, "model": model,
+                            "latency_s": latency, "cost_usd": cost}
+    return None, None, {"provider": provider, "model": model,
                        "latency_s": latency, "cost_usd": cost}
    return None, None, {"provider": PROVIDER, "model": model,
                        "latency_s": latency, "cost_usd": cost}
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@ -64,9 +64,10 @@ def test_decide_tool_parses_response(monkeypatch):
    }
    monkeypatch.setattr(llm, "PROVIDER", "ollama")
    with mock.patch.object(llm, "chat_ollama", return_value=fake):
+        # pass model directly so provider_for_model picks ollama
        name, args, meta = llm.decide_tool(
            [{"role": "user", "content": "x"}], tools=[],
-            agent_id="anchor",
+            model="llama3.2:3b",
        )
    assert name == "go_to_place"
    assert args == {"place": "library"}
@ -80,7 +81,7 @@ def test_decide_tool_handles_string_args(monkeypatch):
    ]}}
    monkeypatch.setattr(llm, "PROVIDER", "ollama")
    with mock.patch.object(llm, "chat_ollama", return_value=fake):
-        name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
+        name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
    assert name == "idle"
    assert args == {}

@ -90,7 +91,7 @@ def test_decide_tool_no_tool_call_returns_none(monkeypatch):
    fake = {"message": {"content": "I think... no tool"}}
    monkeypatch.setattr(llm, "PROVIDER", "ollama")
    with mock.patch.object(llm, "chat_ollama", return_value=fake):
-        name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor")
+        name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b")
    assert name is None
    assert args is None

@ -104,14 +105,25 @@ def test_decide_tool_openrouter_response(monkeypatch):
        "usage": {"total_tokens": 50, "cost": 0.0001},
    }
    monkeypatch.setattr(llm, "PROVIDER", "openrouter")
+    monkeypatch.setattr(llm, "_openrouter_key", lambda: "sk-or-test")
    with mock.patch.object(llm, "chat_openrouter", return_value=fake):
-        name, args, meta = llm.decide_tool([], tools=[], agent_id="anchor")
+        name, args, meta = llm.decide_tool([], tools=[],
+                                           model="anthropic/claude-3.5-haiku")
    assert name == "go_to_place"
    assert args == {"place": "town_hall"}
    assert meta["provider"] == "openrouter"
    assert meta["cost_usd"] == 0.0001


+def test_provider_for_model():
+    from engine import llm
+    assert llm.provider_for_model("anthropic/claude-3.5-haiku") == "openrouter"
+    assert llm.provider_for_model("openai/gpt-4o-mini") == "openrouter"
+    assert llm.provider_for_model("llama3.2:3b") == "ollama"
+    assert llm.provider_for_model("gemma3") == "ollama"
+    assert llm.provider_for_model("mistral") == "ollama"
+
+
 def test_per_agent_model_override(monkeypatch):
    """EMERGENCE_AGENT_<ID>_MODEL env var overrides the default."""
    from engine import llm