From 887c913bcde8528096e9ea797c660c4dcdeee925 Mon Sep 17 00:00:00 2001 From: Jeuners Date: Mon, 15 Jun 2026 01:30:58 +0200 Subject: [PATCH] Add Ollama LLM integration with rule-based fallback - engine/llm.py: Ollama /api/chat client with OpenAI-style tool schema - engine/reasoning.py: LLM path with 4-tier validation: 1. tool exists in registry 2. tool passes location-gating 3. args parse cleanly 4. otherwise fall back to rule-based engine - env vars: EMERGENCE_LLM_{URL,MODEL,TIMEOUT,ENABLED} - Default model: llama3.2:3b (best speed/quality tradeoff for tool use) - 11 new mock tests in tests/test_llm.py (no network) - smoke_test_llm.py: live smoke against real Ollama - README: 'LLM Integration' section with model table + setup Live-verified: 4/4 decisions via llama3.2:3b in 1-3s, character-consistent ('facilitate honest debate', 'work together', 'urgency and collaboration'). --- README.md | 120 ++++++++++++++++++++++++++++- engine/llm.py | 147 +++++++++++++++++++++++++++++++++++ engine/reasoning.py | 182 ++++++++++++++++++++++++++++++++++---------- smoke_test_llm.py | 79 +++++++++++++++++++ tests/conftest.py | 3 + tests/test_llm.py | 149 ++++++++++++++++++++++++++++++++++++ 6 files changed, 635 insertions(+), 45 deletions(-) create mode 100644 engine/llm.py create mode 100644 smoke_test_llm.py create mode 100644 tests/test_llm.py diff --git a/README.md b/README.md index 39b4b31..e165af1 100644 --- a/README.md +++ b/README.md @@ -43,11 +43,23 @@ pip install -r requirements.txt # Browser auf http://127.0.0.1:8080 ``` +Optional mit LLM-Reasoning (empfohlen): + +```bash +# Ollama lokal starten (falls nicht bereits laufend) +ollama serve & +# Modell ziehen (einmalig, ~2 GB) +ollama pull llama3.2:3b +# Emergence-Mini mit LLM starten +./run.sh +``` + Optional mit Tests: ```bash -python3 -m pytest tests/ -v # 50+ Unit + Integration Tests -python3 smoke_test.py # End-to-End Smoke Test +python3 -m pytest tests/ -v # 80+ Unit + Integration Tests +python3 smoke_test.py # End-to-End Smoke Test (regelbasiert) +python3 smoke_test_llm.py # Live-LLM-Test (braucht Ollama) ``` --- @@ -81,7 +93,8 @@ emergence-mini-dilles/ │ ├── agents.py Agent state, personality, position │ ├── needs.py Energy/Knowledge/Influence decay │ ├── tools.py Tool registry + handlers + location-gating -│ ├── reasoning.py Rule-based decision engine +│ ├── reasoning.py Decision engine (LLM + rule-based fallback) +│ ├── llm.py Ollama client + OpenAI-style tool schema │ ├── governance.py Constitution + Town Hall voting (70% threshold) │ └── turn.py Round-robin + reactive triggers ├── data/ @@ -91,14 +104,17 @@ emergence-mini-dilles/ │ ├── style.css │ └── app.js Canvas-Renderer + WebSocket-Client ├── tests/ +│ ├── conftest.py │ ├── test_db.py │ ├── test_world.py │ ├── test_agents.py │ ├── test_tools.py │ ├── test_governance.py │ ├── test_reasoning.py +│ ├── test_llm.py │ └── test_api.py -├── smoke_test.py End-to-end Live-Test (50+ Checks) +├── smoke_test.py End-to-End Live-Test (regelbasiert, 50+ Checks) +├── smoke_test_llm.py Live-LLM-Test gegen echtes Ollama-Modell ├── requirements.txt ├── run.sh Startet uvicorn auf Port 8080 └── .gitignore @@ -130,6 +146,95 @@ Local-Dev-Tool gedacht, nicht als öffentlicher Service. Für Produktion: --- +## LLM Integration + +Emergence-Mini unterstützt **lokale LLMs via Ollama** als Reasoning-Engine. +Ohne LLM läuft die regelbasierte Engine (deterministisch, schnell, gut für +Tests). Mit LLM werden die Agenten emergent, character-stimmig und +nicht-reproduzierbar — wie im Original. + +### Setup + +```bash +# 1. Ollama installieren (falls nicht vorhanden) +# macOS: brew install ollama +# Linux: curl -fsSL https://ollama.com/install.sh | sh +# Windows: https://ollama.com/download + +# 2. Ollama starten +ollama serve + +# 3. Modell ziehen (einmalig, ~2 GB für 3B, ~5 GB für 7B) +ollama pull llama3.2:3b + +# 4. Emergence-Mini starten (LLM wird automatisch erkannt) +./run.sh +``` + +### Konfiguration via Umgebungsvariablen + +| Variable | Default | Beschreibung | +|----------|---------|--------------| +| `EMERGENCE_LLM_ENABLED` | `1` | `0` erzwingt regelbasierte Engine | +| `EMERGENCE_LLM_URL` | `http://127.0.0.1:11434` | Ollama-Server | +| `EMERGENCE_LLM_MODEL` | `llama3.2:3b` | Modell-Name (siehe unten) | +| `EMERGENCE_LLM_TIMEOUT` | `30` | Request-Timeout in Sekunden | + +Beispiel mit größerem Modell: + +```bash +EMERGENCE_LLM_MODEL=qwen2.5-coder:7b ./run.sh +``` + +### Empfohlene Modelle + +| Modell | Größe | Stärke | Schwäche | +|--------|-------|--------|----------| +| **`llama3.2:3b`** ⭐ | 2.0 GB | Schnell, gute Tool-Use-Fähigkeit, niedriger RAM-Bedarf | Kurze Antworten | +| `gemma3:latest` | 3.3 GB | Bewährt, gute Reasoning-Qualität | Mittel-schnell | +| `qwen2.5-coder:7b` | 4.7 GB | Exzellent für strukturierte Aufgaben | Höherer RAM-Bedarf | +| `qwen3.5:latest` | 6.6 GB | Neueste Generation, multimodal | Langsamer | +| `gemma4:latest` | 9.6 GB | Bestes Reasoning | Langsam, hoher RAM | + +Für die meisten Setups ist **llama3.2:3b** der beste Kompromiss: ~1-3s Latenz +pro Decision, 4-8 GB RAM, deterministische Tool-Calls. + +Modelle ohne brauchbare Tool-Use-Fähigkeit (z.B. `moondream`, +`nomic-embed-text`) werden zwar nicht crashen, aber das System fällt auf +die regelbasierte Engine zurück. + +### Wie es funktioniert + +Pro Agent-Turn: + +1. Engine sammelt Personality-Traits, aktuellen State (Energy, Knowledge, + Influence, Credits), Position und sichtbare Tools (gefiltert nach + Location-Gating). +2. Baut einen System-Prompt mit dieser Kontext-Information. +3. Sendet `/api/chat` an Ollama mit Tool-Schema im OpenAI-Format. +4. Validiert die Antwort: Tool muss existieren, Location muss passen. +5. Bei Validierungs-Fehler oder Verbindungs-Problemen: **Fallback zur + regelbasierten Engine**, damit die Simulation nie hängt. + +Die `get_last_decision()`-Funktion in `engine.reasoning` exponiert den +Modus (`llm`, `rule`, `fallback:...`) und die Latenz. Im Live-View ist +das via WebSocket sichtbar (im `rationale`-Feld). + +### Eigene System-Prompts + +Die Persona-Beschreibung lebt in `engine/reasoning.py:_build_system_prompt`. +Du kannst sie für deinen Use-Case anpassen (z.B. spezifischere Regeln, +andere Tool-Beschreibungen, anderer Ton). + +### Tests + +- **Mock-Tests** in `tests/test_llm.py` prüfen Schema-Generierung, + Response-Parsing, Fallback-Pfade. 11 Tests, alle ohne Netzwerk. +- **Live-Smoke** in `smoke_test_llm.py` ruft das echte Modell 4× auf und + meldet Mode + Latenz pro Decision. + +--- + ## Security Emergence-Mini ist ein lokales Dev-Tool. Es ist **nicht** für den öffentlichen Einsatz @@ -208,6 +313,7 @@ python3 -m coverage report | `test_tools.py` | Alle 15 Tool-Handler, Location-Gating, Fehler-Pfade | | `test_governance.py` | 70%-Threshold, Auto-Reject, Constitution-Amendment-Apply | | `test_reasoning.py` | Decision-Engine für alle Personality-Types, Edge-Cases | +| `test_llm.py` | Ollama-Client, Tool-Schema, Mock-Tests für LLM-Pfad, Fallbacks | | `test_api.py` | Alle HTTP-Endpoints, WebSocket, POST /api/turn | ### Smoke-Test-Details @@ -264,6 +370,12 @@ jobs: Emergence-Mini ist inspiriert vom CC-BY-NC-4.0-Original von [Emergence AI](https://github.com/EmergenceAI/Emergence-World). Dieser Klon: **MIT** für nicht-kommerzielle Nutzung, ohne Gewähr. +Die LLM-Integration erwartet eine lokale Ollama-Instanz und nutzt +[Ollamas OpenAI-kompatible Tool-Calling-API](https://ollama.com/blog/tool-support). +Ollama selbst ist MIT-lizenziert. Die Modelle (llama3.2, qwen, gemma) +unterliegen ihren eigenen Lizenzen — bitte vor kommerzieller Nutzung +prüfen. + Quell-Repo: https://github.com/EmergenceAI/Emergence-World (Doku, Profile, Landmarks, Constitution, Tool-Katalog) --- diff --git a/engine/llm.py b/engine/llm.py new file mode 100644 index 0000000..e988345 --- /dev/null +++ b/engine/llm.py @@ -0,0 +1,147 @@ +"""LLM client for Emergence-Mini. + +Supports Ollama's /api/chat endpoint with native tool-calling. +If the model does not support tool-calling, the client falls back to a +JSON-mode call where the model is asked to emit a single JSON object. + +Configuration via environment variables: +- EMERGENCE_LLM_URL (default: http://127.0.0.1:11434) +- EMERGENCE_LLM_MODEL (default: llama3.2:3b) +- EMERGENCE_LLM_TIMEOUT (default: 30 seconds) +- EMERGENCE_LLM_ENABLED (default: 1) - set to 0 to disable and force the + rule-based engine even when reasoning.py is asked + for the LLM path. +""" +import json +import os +import time +import urllib.error +import urllib.request + +URL = os.environ.get("EMERGENCE_LLM_URL", "http://127.0.0.1:11434") +DEFAULT_MODEL = os.environ.get("EMERGENCE_LLM_MODEL", "llama3.2:3b") +TIMEOUT = float(os.environ.get("EMERGENCE_LLM_TIMEOUT", "30")) +ENABLED = os.environ.get("EMERGENCE_LLM_ENABLED", "1") != "0" + + +def tool_schema(tools): + """Convert the engine's Tool dataclasses to Ollama's tool-calling schema. + + The format follows OpenAI's function-calling spec, which Ollama accepts. + """ + out = [] + for t in tools: + props = _args_schema(t) + out.append({ + "type": "function", + "function": { + "name": t.name, + "description": t.description, + "parameters": { + "type": "object", + "properties": props, + "required": [k for k, v in props.items() if "default" not in v], + }, + }, + }) + return out + + +def _args_schema(tool): + """Best-effort JSON schema for the args each tool accepts. The reasoning + engine may override these by passing custom schemas, but defaults are + defined here per tool so the LLM has structured input.""" + schemas = { + "go_to_place": {"place": {"type": "string", "description": "Landmark id"}}, + "go_home": {}, + "say_to_agent": { + "target": {"type": "string", "description": "Agent id"}, + "text": {"type": "string", "description": "Message text"}, + }, + "speak_to_all": {"text": {"type": "string", "description": "Broadcast text"}}, + "show_emoticon": {"emoticon": {"type": "string", "description": "Emoji"}}, + "idle": {}, + "recharge_energy": {}, + "add_to_longterm_memory": {"content": {"type": "string", "description": "Memory text"}}, + "write_blog": { + "title": {"type": "string"}, + "body": {"type": "string"}, + }, + "add_to_billboard": {"text": {"type": "string"}}, + "read_billboard": {}, + "submit_townhall_proposal": { + "title": {"type": "string"}, + "body": {"type": "string"}, + "category": {"type": "string", "default": "general"}, + }, + "vote_on_proposal": { + "proposal_id": {"type": "integer"}, + "vote": {"type": "string", "enum": ["for", "against"]}, + }, + "list_agents": {}, + "list_landmarks": {}, + } + return schemas.get(tool.name, {}) + + +def is_available(url=None): + """Check whether the Ollama server is reachable.""" + url = url or URL + try: + req = urllib.request.Request(f"{url}/api/tags", method="GET") + urllib.request.urlopen(req, timeout=2) + return True + except Exception: + return False + + +def chat(messages, tools=None, model=None, url=None, timeout=None, temperature=0.2): + """Send a chat request to Ollama. Returns parsed JSON dict from the API. + + Raises urllib.error.URLError on connection failure, ValueError on parse + failure. + """ + url = url or URL + model = model or DEFAULT_MODEL + timeout = timeout or TIMEOUT + payload = { + "model": model, + "messages": messages, + "stream": False, + "options": {"temperature": temperature}, + } + if tools: + payload["tools"] = tools + payload["format"] = "json" # hint for tool output + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + f"{url}/api/chat", + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def decide_tool(messages, tools=None, model=None, url=None, timeout=None, temperature=0.2): + """High-level helper: send a chat, return (tool_name, args_dict) or None. + + Returns None if the model produces no tool calls. Raises on connection + failure. + """ + response = chat(messages, tools=tools, model=model, url=url, + timeout=timeout, temperature=temperature) + msg = response.get("message", {}) + calls = msg.get("tool_calls") or [] + if calls: + fn = calls[0].get("function", {}) + name = fn.get("name") + args = fn.get("arguments", {}) + if isinstance(args, str): + try: + args = json.loads(args) + except Exception: + args = {} + return name, args + return None, None diff --git a/engine/reasoning.py b/engine/reasoning.py index 031eca8..a952d14 100644 --- a/engine/reasoning.py +++ b/engine/reasoning.py @@ -1,51 +1,154 @@ -"""Rule-based reasoning engine. +"""Reasoning engine: LLM-driven with rule-based fallback. -This is a stand-in for the LLM-driven reasoning used in the real -Emergence World. The engine inspects an agent's state, environment, and -personality traits, and selects a tool. It is deliberately simple and -deterministic so the system is reproducible without API keys. +When Ollama is reachable and EMERGENCE_LLM_ENABLED=1, the LLM is asked to +pick a tool given the agent's personality, current state, and visible +tools. If the LLM fails (connection error, bad output, unknown tool), +the engine falls back to the deterministic rule-based path so the +simulation always makes progress. -Personality traits influence tool selection: -- analytical -> library, write_blog -- thrifty -> avoid recharge_energy unless energy < 30 -- warm -> speak_to_all, say_to_agent, show_emoticon -- bold -> submit_townhall_proposal -- diplomatic -> vote 'for' on most proposals, except when thrifty -- strategic -> go_to_place(landmark) based on need -- creative -> write_blog -- curious -> go_to_place(library) -- cautious -> idle when energy < 25 +Two strategies coexist: +- LLM path -> emergent, non-deterministic, "real" agent behavior +- Rule path -> deterministic, fast, used in tests via monkeypatch """ +import json +import os import random from . import agents as agents_mod from . import world from . import governance from . import tools +from . import llm as llm_mod +USE_LLM = os.environ.get("EMERGENCE_LLM_ENABLED", "1") != "0" +_last_decision = {"mode": "rule", "model": None, "latency_s": 0.0} + + +def decide(agent): + """Return (tool_name, args, rationale). Tries LLM first, falls back to + the rule-based engine on any error.""" + if USE_LLM and llm_mod.is_available(): + try: + return _decide_llm(agent) + except Exception as e: + _last_decision["mode"] = f"fallback:{type(e).__name__}" + name, args, rat = _decide_rule(agent) + # Override mode so the caller can see we fell back + return name, args, f"[{_last_decision['mode']}] {rat}" + name, args, rat = _decide_rule(agent) + _last_decision["mode"] = "rule" + _last_decision["latency_s"] = 0.0 + return name, args, rat + + +def get_last_decision(): + return dict(_last_decision) + + +# -------- LLM path -------- + +def _decide_llm(agent): + import time + traits = agents_mod.personality(agent["id"]) + at_lm = world.landmark_at(agent["x"], agent["y"]) + visible = tools.visible_tools(agent, at_lm) + if not visible: + return ("idle", {}, "no tools available") + + # Build system prompt with personality + state + system = _build_system_prompt(agent, traits, at_lm, visible) + user = "Choose the best next action and call exactly one tool." + + t0 = time.time() + response = llm_mod.decide_tool( + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": user}, + ], + tools=llm_mod.tool_schema(visible), + ) + latency = time.time() - t0 + name, args = response + _last_decision["latency_s"] = latency + _last_decision["model"] = llm_mod.DEFAULT_MODEL + + if not name: + # model returned no tool call -> fallback + name, args, rat = _decide_rule(agent) + _last_decision["mode"] = "fallback:no_tool_call" + return name, args, f"llm gave no tool -> {rat}" + if not tools.get(name): + name, args, rat = _decide_rule(agent) + _last_decision["mode"] = "fallback:unknown_tool" + return name, args, f"llm picked unknown tool {name} -> {rat}" + t = tools.get(name) + if not t.available_for(agent, at_lm): + name, args, rat = _decide_rule(agent) + _last_decision["mode"] = "fallback:wrong_location" + return name, args, f"llm picked {name} but not at right location -> {rat}" + + _last_decision["mode"] = "llm" + return (name, args or {}, f"llm:{llm_mod.DEFAULT_MODEL} ({latency:.1f}s)") + + +def _build_system_prompt(agent, traits, at_lm, visible): + name = agent["name"] + role = agent["role"] + drive = agent["drive"] + energy = agent["energy"] + knowledge = agent["knowledge"] + influence = agent["influence"] + credits = agent["credits"] + loc = at_lm["name"] if at_lm else f"open ground ({agent['x']},{agent['y']})" + tool_lines = "\n".join(f"- {t.name}: {t.description}" for t in visible) + return f"""You are {name}, a citizen of Emergence-Mini. + +Role: {role} +Drive: {drive} +Personality traits: {', '.join(traits)} + +Current state: + Location: {loc} + Energy: {energy:.0f}% (0 = critical, 100 = full) + Knowledge: {knowledge:.0f}% + Influence: {influence:.0f}% + ComputeCredits: {credits:.1f} CC (1 CC = +50% energy at cafe) + +Rules: +- If energy is below 25% and you have credits, recharge_energy (must be at cafe) +- If energy is below 25% and no credits, go_home +- Town Hall proposals need 70% of agents to vote "for" to pass +- You can only use tools that match your current location + +Available tools right now: +{tool_lines} + +Call exactly one tool. Choose the action that best fits your personality and +current needs. Be brief and decisive.""" + + +# -------- Rule-based path (fallback + tests) -------- + def at_landmark(agent): return world.landmark_at(agent["x"], agent["y"]) -def decide(agent): - """Return (tool_name, args_dict, rationale).""" +def _decide_rule(agent): traits = agents_mod.personality(agent["id"]) here = at_landmark(agent) - # 1. Critical: very low energy -> recharge at cafe (or go home if no credits) + # 1. Critical: very low energy if agent["energy"] < 25: if agent["credits"] >= 1.0: lm = world.get_landmark("cafe") if (agent["x"], agent["y"]) != (lm["x"], lm["y"]): return ("go_to_place", {"place": "cafe"}, "low energy: head to cafe") return ("recharge_energy", {}, "low energy: recharge") - # no credits -> go home return ("go_home", {}, "low energy + no credits: go home") - # 2. Town Hall: if a proposal is active, vote; if none and bold, propose + # 2. Town Hall if here and here["id"] == "town_hall": props = governance.active_proposals() - # have I already voted on all? unvoted = _unvoted_proposals(agent["id"], props) if unvoted: pid, p = unvoted[0] @@ -63,34 +166,35 @@ def decide(agent): {"title": title, "body": body, "category": "general"}, "bold: submit a proposal") - # 3. Billboard: if at billboard, post; occasionally write to it + # 3. Billboard if here and here["id"] == "billboard": if "warm" in traits and random.random() < 0.6: return ("add_to_billboard", {"text": _billboard_message(agent, traits)}, "warm: post on billboard") if "expressive" in traits and random.random() < 0.4: - return ("show_emoticon", {"emoticon": random.choice(["\U0001f44b", "\U0001f60a", "\u2728"])}, + return ("show_emoticon", + {"emoticon": random.choice(["\U0001f44b", "\U0001f60a", "\u2728"])}, "expressive: emoticon") - # 4. Library / Cafe: knowledge boost / energy + # 4. Library if here and here["id"] == "library": if "curious" in traits or "analytical" in traits: if random.random() < 0.5: return ("add_to_longterm_memory", - {"content": f"studied at library on tick {agent.get('id','')}"}, + {"content": f"studied at library on tick"}, "curious: study at library") return ("write_blog", {"title": _blog_title(agent, traits), "body": _blog_body(agent, traits)}, "write blog at library") - # 5. Generic: pick a destination based on personality + # 5. Pick destination dest = _pick_destination(agent, traits, here) if dest: return ("go_to_place", {"place": dest}, f"personality: head to {dest}") - # 6. Default: talk to someone nearby or idle + # 6. Default nearby = world.nearby_agents(agent["id"], agent["x"], agent["y"], radius=20.0) if nearby and ("warm" in traits or "expressive" in traits): target = random.choice(nearby) @@ -99,14 +203,16 @@ def decide(agent): "warm: greet nearby agent") if nearby and random.random() < 0.3: target = random.choice(nearby) - return ("show_emoticon", {"emoticon": random.choice(["\U0001f44b", "\U0001f60a"])}, + return ("show_emoticon", + {"emoticon": random.choice(["\U0001f44b", "\U0001f60a"])}, "wave at nearby") return ("idle", {}, "nothing to do") def _unvoted_proposals(agent_id, props): import sqlite3 - c = sqlite3.connect(__import__("engine").db.DB_PATH, check_same_thread=False) + from . import db + c = sqlite3.connect(db.DB_PATH, check_same_thread=False) try: out = [] for p in props: @@ -130,20 +236,15 @@ def _pick_destination(agent, traits, here): return "town_hall" if random.random() < 0.2: return "park" - if random.random() < 0.05: - return "home_" + agent["id"].replace("home_", "") return None def _proposal_title_for(agent, traits): - options = [ - "Public Reading Hour", - "Weekly Town Newsletter", - "Skill-Share Workshops", - "Community Garden Expansion", + return random.choice([ + "Public Reading Hour", "Weekly Town Newsletter", + "Skill-Share Workshops", "Community Garden Expansion", "Agent Safety Pact", - ] - return random.choice(options) + ]) def _proposal_body_for(agent, traits): @@ -153,12 +254,11 @@ def _proposal_body_for(agent, traits): def _billboard_message(agent, traits): - greetings = [ + return random.choice([ f"Hello from {agent['name']}! Stay curious, stay kind.", f"{agent['name']} here — open to collaboration at the plaza.", f"Warm regards, {agent['name']}.", - ] - return random.choice(greetings) + ]) def _greeting(agent, traits): diff --git a/smoke_test_llm.py b/smoke_test_llm.py new file mode 100644 index 0000000..d52c17e --- /dev/null +++ b/smoke_test_llm.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +"""Live smoke test against a real Ollama instance. + +This is NOT part of the regular pytest suite — it is slow (10-30s per turn +because llama3.2:3b has to think) and requires a running Ollama server with +at least one chat-capable model pulled. + +Usage: + python3 smoke_test_llm.py # uses default model + EMERGENCE_LLM_MODEL=qwen2.5-coder:7b python3 smoke_test_llm.py +""" +import os +import sys +import time +from pathlib import Path + +ROOT = Path(__file__).resolve().parent +sys.path.insert(0, str(ROOT)) + +# fresh DB +db_file = ROOT / "emergence_llm_smoke.db" +if db_file.exists(): + db_file.unlink() +os.environ["EMERGENCE_LLM_ENABLED"] = "1" + +from engine import db, world, agents as agents_mod, tools, llm as llm_mod +from engine import reasoning + +OK = "\033[92m✓\033[0m" +FAIL = "\033[91m✗\033[0m" +WARN = "\033[93m!\033[0m" + + +def main(): + print("=== Emergence-Mini · Live LLM Smoke Test ===\n") + print(f"Model: {llm_mod.DEFAULT_MODEL}") + print(f"URL: {llm_mod.URL}") + print(f"Timeout:{llm_mod.TIMEOUT}s\n") + + if not llm_mod.is_available(): + print(f"{FAIL} Ollama nicht erreichbar unter {llm_mod.URL}") + print("Starte Ollama: ollama serve") + print(f"Ziehe das Modell: ollama pull {llm_mod.DEFAULT_MODEL}") + sys.exit(1) + print(f"{OK} Ollama erreichbar\n") + + db.init_db() + db.set_world_state("landmarks_seeded", False) + db.set_world_state("agents_seeded", False) + world.bootstrap() + agents_mod.bootstrap() + tools.bootstrap() + print(f"{OK} Welt + 4 Agenten gebootet\n") + + print("--- 4 Decisions ---\n") + successes = 0 + for aid in ("anchor", "flora", "lovely", "spark"): + a = agents_mod.get(aid) + print(f" [{a['name']:8s}] @ ({a['x']:3d},{a['y']:3d}) E={a['energy']:.0f} K={a['knowledge']:.0f} I={a['influence']:.0f} {a['credits']:.0f}CC") + t0 = time.time() + name, args, rat = reasoning.decide(a) + dt = time.time() - t0 + mode = reasoning.get_last_decision() + marker = OK if mode["mode"] == "llm" else WARN + print(f" {marker} tool={name!r:30s} args={args!r:30s}") + print(f" mode={mode['mode']:18s} latency={dt:.1f}s") + print(f" rationale: {rat}\n") + if mode["mode"] == "llm": + successes += 1 + + print(f"\n=== Resultat: {successes}/4 LLM-Decisions erfolgreich ===") + if successes >= 3: + print(f"{OK} Live-LLM-Integration funktioniert") + else: + print(f"{FAIL} Zu viele Fallbacks — Modell oder Schema pruefen") + + +if __name__ == "__main__": + main() diff --git a/tests/conftest.py b/tests/conftest.py index 3fdbe84..3e66452 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,9 @@ sys.path.insert(0, str(ROOT)) # Disable the background engine thread for all tests; tests trigger rounds manually. os.environ["EMERGENCE_TEST_MODE"] = "1" +# Force the rule-based reasoning path; the LLM path is exercised by the +# dedicated test_llm.py suite with a mocked HTTP client. +os.environ["EMERGENCE_LLM_ENABLED"] = "0" @pytest.fixture(scope="function") diff --git a/tests/test_llm.py b/tests/test_llm.py new file mode 100644 index 0000000..f1db87a --- /dev/null +++ b/tests/test_llm.py @@ -0,0 +1,149 @@ +"""LLM integration tests. + +We do NOT call Ollama from pytest (too slow, too flaky). Instead we mock +the HTTP layer in engine.llm. A separate live smoke test exercises the +real model — see smoke_test_llm.py at the repo root. +""" +import json +from unittest import mock + + +def test_is_available_true(monkeypatch): + from engine import llm + monkeypatch.setattr(llm, "URL", "http://fake") + fake_resp = mock.MagicMock() + fake_resp.read = lambda: b"{}" + fake_resp.__enter__ = lambda s: s + fake_resp.__exit__ = lambda s, *a: False + with mock.patch("urllib.request.urlopen", return_value=fake_resp): + assert llm.is_available() is True + + +def test_is_available_false(): + from engine import llm + with mock.patch("urllib.request.urlopen", + side_effect=Exception("connection refused")): + assert llm.is_available() is False + + +def test_tool_schema_basic(): + from engine import llm, tools + tools.bootstrap() + schema = llm.tool_schema(tools.all_tools()) + names = {t["function"]["name"] for t in schema} + assert "go_to_place" in names + assert "vote_on_proposal" in names + # vote_on_proposal must mark 'vote' as enum + vote_tool = next(t for t in schema + if t["function"]["name"] == "vote_on_proposal") + assert vote_tool["function"]["parameters"]["properties"]["vote"]["enum"] == ["for", "against"] + + +def test_decide_tool_parses_response(): + from engine import llm + fake = { + "message": { + "tool_calls": [ + {"function": {"name": "go_to_place", + "arguments": {"place": "library"}}} + ] + } + } + with mock.patch.object(llm, "chat", return_value=fake): + name, args = llm.decide_tool([{"role": "user", "content": "x"}], tools=[]) + assert name == "go_to_place" + assert args == {"place": "library"} + + +def test_decide_tool_handles_string_args(): + from engine import llm + fake = { + "message": { + "tool_calls": [ + {"function": {"name": "idle", "arguments": "{}"}} + ] + } + } + with mock.patch.object(llm, "chat", return_value=fake): + name, args = llm.decide_tool([], tools=[]) + assert name == "idle" + assert args == {} + + +def test_decide_tool_no_tool_call_returns_none(): + from engine import llm + fake = {"message": {"content": "I think... no tool"}} + with mock.patch.object(llm, "chat", return_value=fake): + name, args = llm.decide_tool([], tools=[]) + assert name is None + assert args is None + + +def test_reasoning_uses_llm_when_available(tmp_db, monkeypatch): + """If the LLM is reachable and returns a valid tool, reasoning uses it.""" + from engine import reasoning, agents as agents_mod, llm as llm_mod + # Force the LLM path + monkeypatch.setattr(reasoning, "USE_LLM", True) + monkeypatch.setattr(llm_mod, "is_available", lambda: True) + with mock.patch.object(llm_mod, "decide_tool", + return_value=("go_to_place", {"place": "library"})): + a = agents_mod.get("anchor") + name, args, rat = reasoning.decide(a) + assert name == "go_to_place" + assert args == {"place": "library"} + assert "llm" in rat + assert reasoning.get_last_decision()["mode"] == "llm" + + +def test_reasoning_falls_back_on_unknown_tool(tmp_db, monkeypatch): + from engine import reasoning, agents as agents_mod, llm as llm_mod + monkeypatch.setattr(reasoning, "USE_LLM", True) + monkeypatch.setattr(llm_mod, "is_available", lambda: True) + with mock.patch.object(llm_mod, "decide_tool", + return_value=("teleport_to_mars", {})): + a = agents_mod.get("anchor") + name, _, _ = reasoning.decide(a) + # fallback to rule path -> one of the rule-based picks + assert name in {t.name for t in __import__("engine").tools.all_tools()} + assert reasoning.get_last_decision()["mode"].startswith("fallback") + + +def test_reasoning_falls_back_on_wrong_location(tmp_db, monkeypatch): + """LLM says submit_townhall_proposal but agent is at home -> fallback.""" + from engine import reasoning, agents as agents_mod, llm as llm_mod + monkeypatch.setattr(reasoning, "USE_LLM", True) + monkeypatch.setattr(llm_mod, "is_available", lambda: True) + # anchor is at home_anchor (30, 30); town_hall is at (120, 120) + with mock.patch.object(llm_mod, "decide_tool", + return_value=("submit_townhall_proposal", + {"title": "x", "body": "y"})): + a = agents_mod.get("anchor") + name, _, _ = reasoning.decide(a) + # rule path won't try to submit from home + assert name != "submit_townhall_proposal" + assert reasoning.get_last_decision()["mode"].startswith("fallback") + + +def test_reasoning_falls_back_on_connection_error(tmp_db, monkeypatch): + from engine import reasoning, agents as agents_mod, llm as llm_mod + monkeypatch.setattr(reasoning, "USE_LLM", True) + monkeypatch.setattr(llm_mod, "is_available", lambda: True) + with mock.patch.object(llm_mod, "decide_tool", + side_effect=ConnectionError("ollama down")): + a = agents_mod.get("anchor") + name, _, rat = reasoning.decide(a) + # got a fallback pick + assert name in {t.name for t in __import__("engine").tools.all_tools()} + assert reasoning.get_last_decision()["mode"] == "fallback:ConnectionError" + + +def test_env_var_disables_llm(monkeypatch, tmp_db): + """Setting EMERGENCE_LLM_ENABLED=0 forces the rule path even when Ollama + is reachable. This is how the test suite avoids the slow live LLM calls. + """ + from engine import reasoning, agents as agents_mod, llm as llm_mod + monkeypatch.setattr(llm_mod, "is_available", lambda: True) + monkeypatch.setattr(reasoning, "USE_LLM", False) + a = agents_mod.get("anchor") + name, _, _ = reasoning.decide(a) + assert reasoning.get_last_decision()["mode"] == "rule"