Back to Compact
Skills → Compact
s05 (187 LOC) → s06 (205 LOC)
LOC Delta
+18lines
New Tools
1
compact
New Classes
0
New Functions
3
estimate_tokensmicro_compactauto_compact
Skills
Load on Demand
187 LOC
5 tools: bash, read_file, write_file, edit_file, load_skill
planningCompact
Three-Layer Compression
205 LOC
5 tools: bash, read_file, write_file, edit_file, compact
memorySource Code Diff
s05 (s05_skill_loading.py) -> s06 (s06_context_compact.py)
| 1 | 1 | #!/usr/bin/env python3 | |
| 2 | 2 | """ | |
| 3 | - | s05_skill_loading.py - Skills | |
| 3 | + | s06_context_compact.py - Compact | |
| 4 | 4 | ||
| 5 | - | Two-layer skill injection that avoids bloating the system prompt: | |
| 5 | + | Three-layer compression pipeline so the agent can work forever: | |
| 6 | 6 | ||
| 7 | - | Layer 1 (cheap): skill names in system prompt (~100 tokens/skill) | |
| 8 | - | Layer 2 (on demand): full skill body in tool_result | |
| 7 | + | Every turn: | |
| 8 | + | +------------------+ | |
| 9 | + | | Tool call result | | |
| 10 | + | +------------------+ | |
| 11 | + | | | |
| 12 | + | v | |
| 13 | + | [Layer 1: micro_compact] (silent, every turn) | |
| 14 | + | Replace tool_result content older than last 3 | |
| 15 | + | with "[Previous: used {tool_name}]" | |
| 16 | + | | | |
| 17 | + | v | |
| 18 | + | [Check: tokens > 50000?] | |
| 19 | + | | | | |
| 20 | + | no yes | |
| 21 | + | | | | |
| 22 | + | v v | |
| 23 | + | continue [Layer 2: auto_compact] | |
| 24 | + | Save full transcript to .transcripts/ | |
| 25 | + | Ask LLM to summarize conversation. | |
| 26 | + | Replace all messages with [summary]. | |
| 27 | + | | | |
| 28 | + | v | |
| 29 | + | [Layer 3: compact tool] | |
| 30 | + | Model calls compact -> immediate summarization. | |
| 31 | + | Same as auto, triggered manually. | |
| 9 | 32 | ||
| 10 | - | skills/ | |
| 11 | - | pdf/ | |
| 12 | - | SKILL.md <-- frontmatter (name, description) + body | |
| 13 | - | code-review/ | |
| 14 | - | SKILL.md | |
| 15 | - | ||
| 16 | - | System prompt: | |
| 17 | - | +--------------------------------------+ | |
| 18 | - | | You are a coding agent. | | |
| 19 | - | | Skills available: | | |
| 20 | - | | - pdf: Process PDF files... | <-- Layer 1: metadata only | |
| 21 | - | | - code-review: Review code... | | |
| 22 | - | +--------------------------------------+ | |
| 23 | - | ||
| 24 | - | When model calls load_skill("pdf"): | |
| 25 | - | +--------------------------------------+ | |
| 26 | - | | tool_result: | | |
| 27 | - | | <skill> | | |
| 28 | - | | Full PDF processing instructions | <-- Layer 2: full body | |
| 29 | - | | Step 1: ... | | |
| 30 | - | | Step 2: ... | | |
| 31 | - | | </skill> | | |
| 32 | - | +--------------------------------------+ | |
| 33 | - | ||
| 34 | - | Key insight: "Don't put everything in the system prompt. Load on demand." | |
| 33 | + | Key insight: "The agent can forget strategically and keep working forever." | |
| 35 | 34 | """ | |
| 36 | 35 | ||
| 36 | + | import json | |
| 37 | 37 | import os | |
| 38 | - | import re | |
| 39 | 38 | import subprocess | |
| 39 | + | import time | |
| 40 | 40 | from pathlib import Path | |
| 41 | 41 | ||
| 42 | 42 | from anthropic import Anthropic | |
| 43 | 43 | from dotenv import load_dotenv | |
| 44 | 44 | ||
| 45 | 45 | load_dotenv(override=True) | |
| 46 | 46 | ||
| 47 | 47 | if os.getenv("ANTHROPIC_BASE_URL"): | |
| 48 | 48 | os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) | |
| 49 | 49 | ||
| 50 | 50 | WORKDIR = Path.cwd() | |
| 51 | 51 | client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) | |
| 52 | 52 | MODEL = os.environ["MODEL_ID"] | |
| 53 | - | SKILLS_DIR = WORKDIR / "skills" | |
| 54 | 53 | ||
| 54 | + | SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks." | |
| 55 | 55 | ||
| 56 | - | # -- SkillLoader: scan skills/<name>/SKILL.md with YAML frontmatter -- | |
| 57 | - | class SkillLoader: | |
| 58 | - | def __init__(self, skills_dir: Path): | |
| 59 | - | self.skills_dir = skills_dir | |
| 60 | - | self.skills = {} | |
| 61 | - | self._load_all() | |
| 56 | + | THRESHOLD = 50000 | |
| 57 | + | TRANSCRIPT_DIR = WORKDIR / ".transcripts" | |
| 58 | + | KEEP_RECENT = 3 | |
| 62 | 59 | ||
| 63 | - | def _load_all(self): | |
| 64 | - | if not self.skills_dir.exists(): | |
| 65 | - | return | |
| 66 | - | for f in sorted(self.skills_dir.rglob("SKILL.md")): | |
| 67 | - | text = f.read_text() | |
| 68 | - | meta, body = self._parse_frontmatter(text) | |
| 69 | - | name = meta.get("name", f.parent.name) | |
| 70 | - | self.skills[name] = {"meta": meta, "body": body, "path": str(f)} | |
| 71 | 60 | ||
| 72 | - | def _parse_frontmatter(self, text: str) -> tuple: | |
| 73 | - | """Parse YAML frontmatter between --- delimiters.""" | |
| 74 | - | match = re.match(r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL) | |
| 75 | - | if not match: | |
| 76 | - | return {}, text | |
| 77 | - | meta = {} | |
| 78 | - | for line in match.group(1).strip().splitlines(): | |
| 79 | - | if ":" in line: | |
| 80 | - | key, val = line.split(":", 1) | |
| 81 | - | meta[key.strip()] = val.strip() | |
| 82 | - | return meta, match.group(2).strip() | |
| 61 | + | def estimate_tokens(messages: list) -> int: | |
| 62 | + | """Rough token count: ~4 chars per token.""" | |
| 63 | + | return len(str(messages)) // 4 | |
| 83 | 64 | ||
| 84 | - | def get_descriptions(self) -> str: | |
| 85 | - | """Layer 1: short descriptions for the system prompt.""" | |
| 86 | - | if not self.skills: | |
| 87 | - | return "(no skills available)" | |
| 88 | - | lines = [] | |
| 89 | - | for name, skill in self.skills.items(): | |
| 90 | - | desc = skill["meta"].get("description", "No description") | |
| 91 | - | tags = skill["meta"].get("tags", "") | |
| 92 | - | line = f" - {name}: {desc}" | |
| 93 | - | if tags: | |
| 94 | - | line += f" [{tags}]" | |
| 95 | - | lines.append(line) | |
| 96 | - | return "\n".join(lines) | |
| 97 | 65 | ||
| 98 | - | def get_content(self, name: str) -> str: | |
| 99 | - | """Layer 2: full skill body returned in tool_result.""" | |
| 100 | - | skill = self.skills.get(name) | |
| 101 | - | if not skill: | |
| 102 | - | return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}" | |
| 103 | - | return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>" | |
| 66 | + | # -- Layer 1: micro_compact - replace old tool results with placeholders -- | |
| 67 | + | def micro_compact(messages: list) -> list: | |
| 68 | + | # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries | |
| 69 | + | tool_results = [] | |
| 70 | + | for msg_idx, msg in enumerate(messages): | |
| 71 | + | if msg["role"] == "user" and isinstance(msg.get("content"), list): | |
| 72 | + | for part_idx, part in enumerate(msg["content"]): | |
| 73 | + | if isinstance(part, dict) and part.get("type") == "tool_result": | |
| 74 | + | tool_results.append((msg_idx, part_idx, part)) | |
| 75 | + | if len(tool_results) <= KEEP_RECENT: | |
| 76 | + | return messages | |
| 77 | + | # Find tool_name for each result by matching tool_use_id in prior assistant messages | |
| 78 | + | tool_name_map = {} | |
| 79 | + | for msg in messages: | |
| 80 | + | if msg["role"] == "assistant": | |
| 81 | + | content = msg.get("content", []) | |
| 82 | + | if isinstance(content, list): | |
| 83 | + | for block in content: | |
| 84 | + | if hasattr(block, "type") and block.type == "tool_use": | |
| 85 | + | tool_name_map[block.id] = block.name | |
| 86 | + | # Clear old results (keep last KEEP_RECENT) | |
| 87 | + | to_clear = tool_results[:-KEEP_RECENT] | |
| 88 | + | for _, _, result in to_clear: | |
| 89 | + | if isinstance(result.get("content"), str) and len(result["content"]) > 100: | |
| 90 | + | tool_id = result.get("tool_use_id", "") | |
| 91 | + | tool_name = tool_name_map.get(tool_id, "unknown") | |
| 92 | + | result["content"] = f"[Previous: used {tool_name}]" | |
| 93 | + | return messages | |
| 104 | 94 | ||
| 105 | 95 | ||
| 106 | - | SKILL_LOADER = SkillLoader(SKILLS_DIR) | |
| 96 | + | # -- Layer 2: auto_compact - save transcript, summarize, replace messages -- | |
| 97 | + | def auto_compact(messages: list) -> list: | |
| 98 | + | # Save full transcript to disk | |
| 99 | + | TRANSCRIPT_DIR.mkdir(exist_ok=True) | |
| 100 | + | transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" | |
| 101 | + | with open(transcript_path, "w") as f: | |
| 102 | + | for msg in messages: | |
| 103 | + | f.write(json.dumps(msg, default=str) + "\n") | |
| 104 | + | print(f"[transcript saved: {transcript_path}]") | |
| 105 | + | # Ask LLM to summarize | |
| 106 | + | conversation_text = json.dumps(messages, default=str)[:80000] | |
| 107 | + | response = client.messages.create( | |
| 108 | + | model=MODEL, | |
| 109 | + | messages=[{"role": "user", "content": | |
| 110 | + | "Summarize this conversation for continuity. Include: " | |
| 111 | + | "1) What was accomplished, 2) Current state, 3) Key decisions made. " | |
| 112 | + | "Be concise but preserve critical details.\n\n" + conversation_text}], | |
| 113 | + | max_tokens=2000, | |
| 114 | + | ) | |
| 115 | + | summary = response.content[0].text | |
| 116 | + | # Replace all messages with compressed summary | |
| 117 | + | return [ | |
| 118 | + | {"role": "user", "content": f"[Conversation compressed. Transcript: {transcript_path}]\n\n{summary}"}, | |
| 119 | + | {"role": "assistant", "content": "Understood. I have the context from the summary. Continuing."}, | |
| 120 | + | ] | |
| 107 | 121 | ||
| 108 | - | # Layer 1: skill metadata injected into system prompt | |
| 109 | - | SYSTEM = f"""You are a coding agent at {WORKDIR}. | |
| 110 | - | Use load_skill to access specialized knowledge before tackling unfamiliar topics. | |
| 111 | 122 | ||
| 112 | - | Skills available: | |
| 113 | - | {SKILL_LOADER.get_descriptions()}""" | |
| 114 | - | ||
| 115 | - | ||
| 116 | 123 | # -- Tool implementations -- | |
| 117 | 124 | def safe_path(p: str) -> Path: | |
| 118 | 125 | path = (WORKDIR / p).resolve() | |
| 119 | 126 | if not path.is_relative_to(WORKDIR): | |
| 120 | 127 | raise ValueError(f"Path escapes workspace: {p}") | |
| 121 | 128 | return path | |
| 122 | 129 | ||
| 123 | 130 | def run_bash(command: str) -> str: | |
| 124 | 131 | dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] | |
| 125 | 132 | if any(d in command for d in dangerous): | |
| 126 | 133 | return "Error: Dangerous command blocked" | |
| 127 | 134 | try: | |
| 128 | 135 | r = subprocess.run(command, shell=True, cwd=WORKDIR, | |
| 129 | 136 | capture_output=True, text=True, timeout=120) | |
| 130 | 137 | out = (r.stdout + r.stderr).strip() | |
| 131 | 138 | return out[:50000] if out else "(no output)" | |
| 132 | 139 | except subprocess.TimeoutExpired: | |
| 133 | 140 | return "Error: Timeout (120s)" | |
| 134 | 141 | ||
| 135 | 142 | def run_read(path: str, limit: int = None) -> str: | |
| 136 | 143 | try: | |
| 137 | 144 | lines = safe_path(path).read_text().splitlines() | |
| 138 | 145 | if limit and limit < len(lines): | |
| 139 | 146 | lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] | |
| 140 | 147 | return "\n".join(lines)[:50000] | |
| 141 | 148 | except Exception as e: | |
| 142 | 149 | return f"Error: {e}" | |
| 143 | 150 | ||
| 144 | 151 | def run_write(path: str, content: str) -> str: | |
| 145 | 152 | try: | |
| 146 | 153 | fp = safe_path(path) | |
| 147 | 154 | fp.parent.mkdir(parents=True, exist_ok=True) | |
| 148 | 155 | fp.write_text(content) | |
| 149 | 156 | return f"Wrote {len(content)} bytes" | |
| 150 | 157 | except Exception as e: | |
| 151 | 158 | return f"Error: {e}" | |
| 152 | 159 | ||
| 153 | 160 | def run_edit(path: str, old_text: str, new_text: str) -> str: | |
| 154 | 161 | try: | |
| 155 | 162 | fp = safe_path(path) | |
| 156 | 163 | content = fp.read_text() | |
| 157 | 164 | if old_text not in content: | |
| 158 | 165 | return f"Error: Text not found in {path}" | |
| 159 | 166 | fp.write_text(content.replace(old_text, new_text, 1)) | |
| 160 | 167 | return f"Edited {path}" | |
| 161 | 168 | except Exception as e: | |
| 162 | 169 | return f"Error: {e}" | |
| 163 | 170 | ||
| 164 | 171 | ||
| 165 | 172 | TOOL_HANDLERS = { | |
| 166 | 173 | "bash": lambda **kw: run_bash(kw["command"]), | |
| 167 | 174 | "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), | |
| 168 | 175 | "write_file": lambda **kw: run_write(kw["path"], kw["content"]), | |
| 169 | 176 | "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), | |
| 170 | - | "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), | |
| 177 | + | "compact": lambda **kw: "Manual compression requested.", | |
| 171 | 178 | } | |
| 172 | 179 | ||
| 173 | 180 | TOOLS = [ | |
| 174 | 181 | {"name": "bash", "description": "Run a shell command.", | |
| 175 | 182 | "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, | |
| 176 | 183 | {"name": "read_file", "description": "Read file contents.", | |
| 177 | 184 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, | |
| 178 | 185 | {"name": "write_file", "description": "Write content to file.", | |
| 179 | 186 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, | |
| 180 | 187 | {"name": "edit_file", "description": "Replace exact text in file.", | |
| 181 | 188 | "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, | |
| 182 | - | {"name": "load_skill", "description": "Load specialized knowledge by name.", | |
| 183 | - | "input_schema": {"type": "object", "properties": {"name": {"type": "string", "description": "Skill name to load"}}, "required": ["name"]}}, | |
| 189 | + | {"name": "compact", "description": "Trigger manual conversation compression.", | |
| 190 | + | "input_schema": {"type": "object", "properties": {"focus": {"type": "string", "description": "What to preserve in the summary"}}}}, | |
| 184 | 191 | ] | |
| 185 | 192 | ||
| 186 | 193 | ||
| 187 | 194 | def agent_loop(messages: list): | |
| 188 | 195 | while True: | |
| 196 | + | # Layer 1: micro_compact before each LLM call | |
| 197 | + | micro_compact(messages) | |
| 198 | + | # Layer 2: auto_compact if token estimate exceeds threshold | |
| 199 | + | if estimate_tokens(messages) > THRESHOLD: | |
| 200 | + | print("[auto_compact triggered]") | |
| 201 | + | messages[:] = auto_compact(messages) | |
| 189 | 202 | response = client.messages.create( | |
| 190 | 203 | model=MODEL, system=SYSTEM, messages=messages, | |
| 191 | 204 | tools=TOOLS, max_tokens=8000, | |
| 192 | 205 | ) | |
| 193 | 206 | messages.append({"role": "assistant", "content": response.content}) | |
| 194 | 207 | if response.stop_reason != "tool_use": | |
| 195 | 208 | return | |
| 196 | 209 | results = [] | |
| 210 | + | manual_compact = False | |
| 197 | 211 | for block in response.content: | |
| 198 | 212 | if block.type == "tool_use": | |
| 199 | - | handler = TOOL_HANDLERS.get(block.name) | |
| 200 | - | try: | |
| 201 | - | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 202 | - | except Exception as e: | |
| 203 | - | output = f"Error: {e}" | |
| 213 | + | if block.name == "compact": | |
| 214 | + | manual_compact = True | |
| 215 | + | output = "Compressing..." | |
| 216 | + | else: | |
| 217 | + | handler = TOOL_HANDLERS.get(block.name) | |
| 218 | + | try: | |
| 219 | + | output = handler(**block.input) if handler else f"Unknown tool: {block.name}" | |
| 220 | + | except Exception as e: | |
| 221 | + | output = f"Error: {e}" | |
| 204 | 222 | print(f"> {block.name}: {str(output)[:200]}") | |
| 205 | 223 | results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) | |
| 206 | 224 | messages.append({"role": "user", "content": results}) | |
| 225 | + | # Layer 3: manual compact triggered by the compact tool | |
| 226 | + | if manual_compact: | |
| 227 | + | print("[manual compact]") | |
| 228 | + | messages[:] = auto_compact(messages) | |
| 207 | 229 | ||
| 208 | 230 | ||
| 209 | 231 | if __name__ == "__main__": | |
| 210 | 232 | history = [] | |
| 211 | 233 | while True: | |
| 212 | 234 | try: | |
| 213 | - | query = input("\033[36ms05 >> \033[0m") | |
| 235 | + | query = input("\033[36ms06 >> \033[0m") | |
| 214 | 236 | except (EOFError, KeyboardInterrupt): | |
| 215 | 237 | break | |
| 216 | 238 | if query.strip().lower() in ("q", "exit", ""): | |
| 217 | 239 | break | |
| 218 | 240 | history.append({"role": "user", "content": query}) | |
| 219 | 241 | agent_loop(history) | |
| 220 | 242 | response_content = history[-1]["content"] | |
| 221 | 243 | if isinstance(response_content, list): | |
| 222 | 244 | for block in response_content: | |
| 223 | 245 | if hasattr(block, "text"): | |
| 224 | 246 | print(block.text) | |
| 225 | 247 | print() |
