Skills → Compact

s05 (187 LOC) → s06 (205 LOC)

LOC Delta

+18lines

New Tools

1

compact

New Classes

0

New Functions

3

estimate_tokensmicro_compactauto_compact

Skills

Load on Demand

187 LOC

5 tools: bash, read_file, write_file, edit_file, load_skill

planning

Compact

Three-Layer Compression

205 LOC

5 tools: bash, read_file, write_file, edit_file, compact

memory

Source Code Diff

s05 (s05_skill_loading.py) -> s06 (s06_context_compact.py)

1	1		#!/usr/bin/env python3
2	2		"""
3		-	s05_skill_loading.py - Skills
	3	+	s06_context_compact.py - Compact
4	4
5		-	Two-layer skill injection that avoids bloating the system prompt:
	5	+	Three-layer compression pipeline so the agent can work forever:
6	6
7		-	Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)
8		-	Layer 2 (on demand): full skill body in tool_result
	7	+	Every turn:
	8	+	+------------------+
	9	+	\| Tool call result \|
	10	+	+------------------+
	11	+	\|
	12	+	v
	13	+	[Layer 1: micro_compact] (silent, every turn)
	14	+	Replace tool_result content older than last 3
	15	+	with "[Previous: used {tool_name}]"
	16	+	\|
	17	+	v
	18	+	[Check: tokens > 50000?]
	19	+	\| \|
	20	+	no yes
	21	+	\| \|
	22	+	v v
	23	+	continue [Layer 2: auto_compact]
	24	+	Save full transcript to .transcripts/
	25	+	Ask LLM to summarize conversation.
	26	+	Replace all messages with [summary].
	27	+	\|
	28	+	v
	29	+	[Layer 3: compact tool]
	30	+	Model calls compact -> immediate summarization.
	31	+	Same as auto, triggered manually.
9	32
10		-	skills/
11		-	pdf/
12		-	SKILL.md <-- frontmatter (name, description) + body
13		-	code-review/
14		-	SKILL.md
15		-
16		-	System prompt:
17		-	+--------------------------------------+
18		-	\| You are a coding agent. \|
19		-	\| Skills available: \|
20		-	\| - pdf: Process PDF files... \| <-- Layer 1: metadata only
21		-	\| - code-review: Review code... \|
22		-	+--------------------------------------+
23		-
24		-	When model calls load_skill("pdf"):
25		-	+--------------------------------------+
26		-	\| tool_result: \|
27		-	\| <skill> \|
28		-	\| Full PDF processing instructions \| <-- Layer 2: full body
29		-	\| Step 1: ... \|
30		-	\| Step 2: ... \|
31		-	\| </skill> \|
32		-	+--------------------------------------+
33		-
34		-	Key insight: "Don't put everything in the system prompt. Load on demand."
	33	+	Key insight: "The agent can forget strategically and keep working forever."
35	34		"""
36	35
	36	+	import json
37	37		import os
38		-	import re
39	38		import subprocess
	39	+	import time
40	40		from pathlib import Path
41	41
42	42		from anthropic import Anthropic
43	43		from dotenv import load_dotenv
44	44
45	45		load_dotenv(override=True)
46	46
47	47		if os.getenv("ANTHROPIC_BASE_URL"):
48	48		os.environ.pop("ANTHROPIC_AUTH_TOKEN", None)
49	49
50	50		WORKDIR = Path.cwd()
51	51		client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL"))
52	52		MODEL = os.environ["MODEL_ID"]
53		-	SKILLS_DIR = WORKDIR / "skills"
54	53
	54	+	SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks."
55	55
56		-	# -- SkillLoader: scan skills/<name>/SKILL.md with YAML frontmatter --
57		-	class SkillLoader:
58		-	def __init__(self, skills_dir: Path):
59		-	self.skills_dir = skills_dir
60		-	self.skills = {}
61		-	self._load_all()
	56	+	THRESHOLD = 50000
	57	+	TRANSCRIPT_DIR = WORKDIR / ".transcripts"
	58	+	KEEP_RECENT = 3
62	59
63		-	def _load_all(self):
64		-	if not self.skills_dir.exists():
65		-	return
66		-	for f in sorted(self.skills_dir.rglob("SKILL.md")):
67		-	text = f.read_text()
68		-	meta, body = self._parse_frontmatter(text)
69		-	name = meta.get("name", f.parent.name)
70		-	self.skills[name] = {"meta": meta, "body": body, "path": str(f)}
71	60
72		-	def _parse_frontmatter(self, text: str) -> tuple:
73		-	"""Parse YAML frontmatter between --- delimiters."""
74		-	match = re.match(r"^---\n(.?)\n---\n(.)", text, re.DOTALL)
75		-	if not match:
76		-	return {}, text
77		-	meta = {}
78		-	for line in match.group(1).strip().splitlines():
79		-	if ":" in line:
80		-	key, val = line.split(":", 1)
81		-	meta[key.strip()] = val.strip()
82		-	return meta, match.group(2).strip()
	61	+	def estimate_tokens(messages: list) -> int:
	62	+	"""Rough token count: ~4 chars per token."""
	63	+	return len(str(messages)) // 4
83	64
84		-	def get_descriptions(self) -> str:
85		-	"""Layer 1: short descriptions for the system prompt."""
86		-	if not self.skills:
87		-	return "(no skills available)"
88		-	lines = []
89		-	for name, skill in self.skills.items():
90		-	desc = skill["meta"].get("description", "No description")
91		-	tags = skill["meta"].get("tags", "")
92		-	line = f" - {name}: {desc}"
93		-	if tags:
94		-	line += f" [{tags}]"
95		-	lines.append(line)
96		-	return "\n".join(lines)
97	65
98		-	def get_content(self, name: str) -> str:
99		-	"""Layer 2: full skill body returned in tool_result."""
100		-	skill = self.skills.get(name)
101		-	if not skill:
102		-	return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}"
103		-	return f"<skill name=\"{name}\">\n{skill['body']}\n</skill>"
	66	+	# -- Layer 1: micro_compact - replace old tool results with placeholders --
	67	+	def micro_compact(messages: list) -> list:
	68	+	# Collect (msg_index, part_index, tool_result_dict) for all tool_result entries
	69	+	tool_results = []
	70	+	for msg_idx, msg in enumerate(messages):
	71	+	if msg["role"] == "user" and isinstance(msg.get("content"), list):
	72	+	for part_idx, part in enumerate(msg["content"]):
	73	+	if isinstance(part, dict) and part.get("type") == "tool_result":
	74	+	tool_results.append((msg_idx, part_idx, part))
	75	+	if len(tool_results) <= KEEP_RECENT:
	76	+	return messages
	77	+	# Find tool_name for each result by matching tool_use_id in prior assistant messages
	78	+	tool_name_map = {}
	79	+	for msg in messages:
	80	+	if msg["role"] == "assistant":
	81	+	content = msg.get("content", [])
	82	+	if isinstance(content, list):
	83	+	for block in content:
	84	+	if hasattr(block, "type") and block.type == "tool_use":
	85	+	tool_name_map[block.id] = block.name
	86	+	# Clear old results (keep last KEEP_RECENT)
	87	+	to_clear = tool_results[:-KEEP_RECENT]
	88	+	for _, _, result in to_clear:
	89	+	if isinstance(result.get("content"), str) and len(result["content"]) > 100:
	90	+	tool_id = result.get("tool_use_id", "")
	91	+	tool_name = tool_name_map.get(tool_id, "unknown")
	92	+	result["content"] = f"[Previous: used {tool_name}]"
	93	+	return messages
104	94
105	95
106		-	SKILL_LOADER = SkillLoader(SKILLS_DIR)
	96	+	# -- Layer 2: auto_compact - save transcript, summarize, replace messages --
	97	+	def auto_compact(messages: list) -> list:
	98	+	# Save full transcript to disk
	99	+	TRANSCRIPT_DIR.mkdir(exist_ok=True)
	100	+	transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl"
	101	+	with open(transcript_path, "w") as f:
	102	+	for msg in messages:
	103	+	f.write(json.dumps(msg, default=str) + "\n")
	104	+	print(f"[transcript saved: {transcript_path}]")
	105	+	# Ask LLM to summarize
	106	+	conversation_text = json.dumps(messages, default=str)[:80000]
	107	+	response = client.messages.create(
	108	+	model=MODEL,
	109	+	messages=[{"role": "user", "content":
	110	+	"Summarize this conversation for continuity. Include: "
	111	+	"1) What was accomplished, 2) Current state, 3) Key decisions made. "
	112	+	"Be concise but preserve critical details.\n\n" + conversation_text}],
	113	+	max_tokens=2000,
	114	+	)
	115	+	summary = response.content[0].text
	116	+	# Replace all messages with compressed summary
	117	+	return [
	118	+	{"role": "user", "content": f"[Conversation compressed. Transcript: {transcript_path}]\n\n{summary}"},
	119	+	{"role": "assistant", "content": "Understood. I have the context from the summary. Continuing."},
	120	+	]
107	121
108		-	# Layer 1: skill metadata injected into system prompt
109		-	SYSTEM = f"""You are a coding agent at {WORKDIR}.
110		-	Use load_skill to access specialized knowledge before tackling unfamiliar topics.
111	122
112		-	Skills available:
113		-	{SKILL_LOADER.get_descriptions()}"""
114		-
115		-
116	123		# -- Tool implementations --
117	124		def safe_path(p: str) -> Path:
118	125		path = (WORKDIR / p).resolve()
119	126		if not path.is_relative_to(WORKDIR):
120	127		raise ValueError(f"Path escapes workspace: {p}")
121	128		return path
122	129
123	130		def run_bash(command: str) -> str:
124	131		dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
125	132		if any(d in command for d in dangerous):
126	133		return "Error: Dangerous command blocked"
127	134		try:
128	135		r = subprocess.run(command, shell=True, cwd=WORKDIR,
129	136		capture_output=True, text=True, timeout=120)
130	137		out = (r.stdout + r.stderr).strip()
131	138		return out[:50000] if out else "(no output)"
132	139		except subprocess.TimeoutExpired:
133	140		return "Error: Timeout (120s)"
134	141
135	142		def run_read(path: str, limit: int = None) -> str:
136	143		try:
137	144		lines = safe_path(path).read_text().splitlines()
138	145		if limit and limit < len(lines):
139	146		lines = lines[:limit] + [f"... ({len(lines) - limit} more)"]
140	147		return "\n".join(lines)[:50000]
141	148		except Exception as e:
142	149		return f"Error: {e}"
143	150
144	151		def run_write(path: str, content: str) -> str:
145	152		try:
146	153		fp = safe_path(path)
147	154		fp.parent.mkdir(parents=True, exist_ok=True)
148	155		fp.write_text(content)
149	156		return f"Wrote {len(content)} bytes"
150	157		except Exception as e:
151	158		return f"Error: {e}"
152	159
153	160		def run_edit(path: str, old_text: str, new_text: str) -> str:
154	161		try:
155	162		fp = safe_path(path)
156	163		content = fp.read_text()
157	164		if old_text not in content:
158	165		return f"Error: Text not found in {path}"
159	166		fp.write_text(content.replace(old_text, new_text, 1))
160	167		return f"Edited {path}"
161	168		except Exception as e:
162	169		return f"Error: {e}"
163	170
164	171
165	172		TOOL_HANDLERS = {
166	173		"bash": lambda **kw: run_bash(kw["command"]),
167	174		"read_file": lambda **kw: run_read(kw["path"], kw.get("limit")),
168	175		"write_file": lambda **kw: run_write(kw["path"], kw["content"]),
169	176		"edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]),
170		-	"load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]),
	177	+	"compact": lambda **kw: "Manual compression requested.",
171	178		}
172	179
173	180		TOOLS = [
174	181		{"name": "bash", "description": "Run a shell command.",
175	182		"input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}},
176	183		{"name": "read_file", "description": "Read file contents.",
177	184		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}},
178	185		{"name": "write_file", "description": "Write content to file.",
179	186		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}},
180	187		{"name": "edit_file", "description": "Replace exact text in file.",
181	188		"input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}},
182		-	{"name": "load_skill", "description": "Load specialized knowledge by name.",
183		-	"input_schema": {"type": "object", "properties": {"name": {"type": "string", "description": "Skill name to load"}}, "required": ["name"]}},
	189	+	{"name": "compact", "description": "Trigger manual conversation compression.",
	190	+	"input_schema": {"type": "object", "properties": {"focus": {"type": "string", "description": "What to preserve in the summary"}}}},
184	191		]
185	192
186	193
187	194		def agent_loop(messages: list):
188	195		while True:
	196	+	# Layer 1: micro_compact before each LLM call
	197	+	micro_compact(messages)
	198	+	# Layer 2: auto_compact if token estimate exceeds threshold
	199	+	if estimate_tokens(messages) > THRESHOLD:
	200	+	print("[auto_compact triggered]")
	201	+	messages[:] = auto_compact(messages)
189	202		response = client.messages.create(
190	203		model=MODEL, system=SYSTEM, messages=messages,
191	204		tools=TOOLS, max_tokens=8000,
192	205		)
193	206		messages.append({"role": "assistant", "content": response.content})
194	207		if response.stop_reason != "tool_use":
195	208		return
196	209		results = []
	210	+	manual_compact = False
197	211		for block in response.content:
198	212		if block.type == "tool_use":
199		-	handler = TOOL_HANDLERS.get(block.name)
200		-	try:
201		-	output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
202		-	except Exception as e:
203		-	output = f"Error: {e}"
	213	+	if block.name == "compact":
	214	+	manual_compact = True
	215	+	output = "Compressing..."
	216	+	else:
	217	+	handler = TOOL_HANDLERS.get(block.name)
	218	+	try:
	219	+	output = handler(**block.input) if handler else f"Unknown tool: {block.name}"
	220	+	except Exception as e:
	221	+	output = f"Error: {e}"
204	222		print(f"> {block.name}: {str(output)[:200]}")
205	223		results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)})
206	224		messages.append({"role": "user", "content": results})
	225	+	# Layer 3: manual compact triggered by the compact tool
	226	+	if manual_compact:
	227	+	print("[manual compact]")
	228	+	messages[:] = auto_compact(messages)
207	229
208	230
209	231		if __name__ == "__main__":
210	232		history = []
211	233		while True:
212	234		try:
213		-	query = input("\033[36ms05 >> \033[0m")
	235	+	query = input("\033[36ms06 >> \033[0m")
214	236		except (EOFError, KeyboardInterrupt):
215	237		break
216	238		if query.strip().lower() in ("q", "exit", ""):
217	239		break
218	240		history.append({"role": "user", "content": query})
219	241		agent_loop(history)
220	242		response_content = history[-1]["content"]
221	243		if isinstance(response_content, list):
222	244		for block in response_content:
223	245		if hasattr(block, "text"):
224	246		print(block.text)
225	247		print()