diff --git a/.env.example b/.env.example index e212cd8..011de2b 100644 --- a/.env.example +++ b/.env.example @@ -3,41 +3,36 @@ # .env is git-ignored; do not commit secrets. # === LLM PROVIDER === -# Default: Ollama (local, free, no token cost) -# Set to "openrouter" to use OpenRouter (requires API key below) -# "auto" picks OpenRouter if OPENROUTER_API_KEY is set, else Ollama -EMERGENCE_LLM_PROVIDER=ollama +# "auto" picks OpenRouter if key is set, else Ollama. +# "ollama" forces local-only. "openrouter" requires the key below. +EMERGENCE_LLM_PROVIDER=auto # === OPENROUTER (only if EMERGENCE_LLM_PROVIDER=openrouter) === # Get a key at https://openrouter.ai/keys # Free models exist but tool-use support is limited; see README. OPENROUTER_API_KEY= -# Default OpenRouter model (used if no per-agent override) +# Default OpenRouter model EMERGENCE_OPENROUTER_MODEL=anthropic/claude-3.5-haiku -# === OLLAMA (default) === -# Get Ollama at https://ollama.com — runs locally, no API key, no cost -# Pull a small fast model: ollama pull llama3.2:3b +# === OLLAMA (default, free, local) === +# Get Ollama at https://ollama.com. Pull once: +# ollama pull llama3.2:3b EMERGENCE_OLLAMA_URL=http://127.0.0.1:11434 EMERGENCE_OLLAMA_MODEL=llama3.2:3b -# Per-agent Ollama model overrides. With ~2 GB each, you can run all four -# on the same machine. Mix and match for Time-Dilation experiments. -# Examples: -# EMERGENCE_AGENT_ANCHOR_MODEL=llama3.2:3b -# EMERGENCE_AGENT_FLORA_MODEL=llama3.2:3b +# === PER-AGENT MODEL ASSIGNMENT === +# Example: 2 cheap cloud models + 2 free local models. +# Uncomment and edit to assign models per agent. + +# Cloud (cheap, good tool-use): +# EMERGENCE_AGENT_ANCHOR_MODEL=anthropic/claude-3.5-haiku +# EMERGENCE_AGENT_FLORA_MODEL=openai/gpt-4o-mini + +# Local (free, requires Ollama running): # EMERGENCE_AGENT_LOVELY_MODEL=llama3.2:3b # EMERGENCE_AGENT_SPARK_MODEL=llama3.2:3b # === BEHAVIOUR === -# Master switch: 0 forces rule-based engine (no LLM calls at all) EMERGENCE_LLM_ENABLED=1 EMERGENCE_LLM_TIMEOUT=30 - -# === TOKEN-SAVING TIPS === -# - Default model llama3.2:3b is small (~2 GB) and fast -# - System prompt is already compact (~150 tokens) -# - max_tokens is capped at 256 — plenty for tool calls -# - Tool descriptions are short on purpose -# - For zero-cost operation, use Ollama only (this file) diff --git a/engine/llm.py b/engine/llm.py index ae6529e..93e482f 100644 --- a/engine/llm.py +++ b/engine/llm.py @@ -83,6 +83,24 @@ def model_for_agent(agent_id: str) -> str: return OPENROUTER_MODEL if PROVIDER == "openrouter" else OLLAMA_MODEL +def provider_for_model(model: str) -> str: + """Heuristic: a model name containing '/' is an OpenRouter-style slug + (org/model). Bare names without '/' (llama3.2:3b, gemma3, mistral) are + served by Ollama. + """ + if "/" in model: + return "openrouter" + return "ollama" + + +def provider_for_agent(agent_id: str) -> str: + """Pick the provider for a specific agent based on its model name. + Falls back to the global PROVIDER if the model name is ambiguous. + """ + model = model_for_agent(agent_id) + return provider_for_model(model) + + def default_model() -> str: return model_for_agent("default") @@ -222,21 +240,27 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None): """ t0 = time.time() model = model or (model_for_agent(agent_id) if agent_id else default_model()) + # Per-agent provider: if the model name looks like an OpenRouter slug + # ('org/model'), route to OpenRouter regardless of the global PROVIDER. + provider = provider_for_model(model) + if provider == "openrouter" and not _openrouter_key(): + return None, None, {"error": "OPENROUTER_API_KEY not set", "provider": provider, + "model": model, "latency_s": time.time() - t0} try: - if PROVIDER == "openrouter": + if provider == "openrouter": response = chat_openrouter(messages, tools or [], model, timeout or TIMEOUT) else: response = chat_ollama(messages, tools or [], model, timeout or TIMEOUT) except Exception as e: - return None, None, {"error": str(e), "provider": PROVIDER, "model": model, + return None, None, {"error": str(e), "provider": provider, "model": model, "latency_s": time.time() - t0} latency = time.time() - t0 cost = None - if PROVIDER == "openrouter": + if provider == "openrouter": cost = response.get("usage", {}).get("cost") - if PROVIDER == "openrouter": + if provider == "openrouter": msg = response.get("choices", [{}])[0].get("message", {}) else: msg = response.get("message", {}) @@ -251,8 +275,10 @@ def decide_tool(messages, tools=None, agent_id=None, model=None, timeout=None): args = json.loads(args) except Exception: args = {} - return name, args, {"provider": PROVIDER, "model": model, + return name, args, {"provider": provider, "model": model, "latency_s": latency, "cost_usd": cost} + return None, None, {"provider": provider, "model": model, + "latency_s": latency, "cost_usd": cost} return None, None, {"provider": PROVIDER, "model": model, "latency_s": latency, "cost_usd": cost} diff --git a/tests/test_llm.py b/tests/test_llm.py index dda722d..00a7c56 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -64,9 +64,10 @@ def test_decide_tool_parses_response(monkeypatch): } monkeypatch.setattr(llm, "PROVIDER", "ollama") with mock.patch.object(llm, "chat_ollama", return_value=fake): + # pass model directly so provider_for_model picks ollama name, args, meta = llm.decide_tool( [{"role": "user", "content": "x"}], tools=[], - agent_id="anchor", + model="llama3.2:3b", ) assert name == "go_to_place" assert args == {"place": "library"} @@ -80,7 +81,7 @@ def test_decide_tool_handles_string_args(monkeypatch): ]}} monkeypatch.setattr(llm, "PROVIDER", "ollama") with mock.patch.object(llm, "chat_ollama", return_value=fake): - name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor") + name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b") assert name == "idle" assert args == {} @@ -90,7 +91,7 @@ def test_decide_tool_no_tool_call_returns_none(monkeypatch): fake = {"message": {"content": "I think... no tool"}} monkeypatch.setattr(llm, "PROVIDER", "ollama") with mock.patch.object(llm, "chat_ollama", return_value=fake): - name, args, _ = llm.decide_tool([], tools=[], agent_id="anchor") + name, args, _ = llm.decide_tool([], tools=[], model="llama3.2:3b") assert name is None assert args is None @@ -104,14 +105,25 @@ def test_decide_tool_openrouter_response(monkeypatch): "usage": {"total_tokens": 50, "cost": 0.0001}, } monkeypatch.setattr(llm, "PROVIDER", "openrouter") + monkeypatch.setattr(llm, "_openrouter_key", lambda: "sk-or-test") with mock.patch.object(llm, "chat_openrouter", return_value=fake): - name, args, meta = llm.decide_tool([], tools=[], agent_id="anchor") + name, args, meta = llm.decide_tool([], tools=[], + model="anthropic/claude-3.5-haiku") assert name == "go_to_place" assert args == {"place": "town_hall"} assert meta["provider"] == "openrouter" assert meta["cost_usd"] == 0.0001 +def test_provider_for_model(): + from engine import llm + assert llm.provider_for_model("anthropic/claude-3.5-haiku") == "openrouter" + assert llm.provider_for_model("openai/gpt-4o-mini") == "openrouter" + assert llm.provider_for_model("llama3.2:3b") == "ollama" + assert llm.provider_for_model("gemma3") == "ollama" + assert llm.provider_for_model("mistral") == "ollama" + + def test_per_agent_model_override(monkeypatch): """EMERGENCE_AGENT__MODEL env var overrides the default.""" from engine import llm