codex-agents/convert.py
Cal Corum 83ee34b3ad Add .claude-plugin/marketplace.json for plugin discovery
Converter now generates the marketplace index file that Claude Code
needs to list available plugins from this repo.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 16:56:20 -05:00

338 lines
9.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""Convert VoltAgent/awesome-codex-subagents TOML files to Claude Code plugin marketplace.
Usage:
python3 convert.py <input_dir> <output_dir> [--dry-run] [--verbose] [--manifest PATH]
"""
import argparse
import hashlib
import json
import re
import shutil
import sys
import tomllib
from datetime import datetime, timezone
from pathlib import Path
# --- Model and tool mapping ---
MODEL_MAP = {
("gpt-5.4", "high"): "opus",
("gpt-5.4", "medium"): "sonnet",
("gpt-5.3-codex-spark", "medium"): "sonnet",
("gpt-5.3-codex-spark", "high"): "sonnet",
}
DEFAULT_MODEL = "sonnet"
TOOLS_BY_SANDBOX = {
"read-only": {
"tools": ["Bash", "Glob", "Grep", "Read"],
"disallowed": ["Edit", "Write"],
},
"workspace-write": {
"tools": ["Bash", "Glob", "Grep", "Read", "Edit", "Write"],
"disallowed": [],
},
}
def sha256_file(path: Path) -> str:
return hashlib.sha256(path.read_bytes()).hexdigest()
def map_model(model: str, effort: str) -> str:
return MODEL_MAP.get((model, effort), DEFAULT_MODEL)
def map_tools(sandbox_mode: str) -> tuple[list[str], list[str]]:
cfg = TOOLS_BY_SANDBOX.get(sandbox_mode, TOOLS_BY_SANDBOX["read-only"])
return cfg["tools"], cfg["disallowed"]
def sanitize_instructions(text: str) -> str:
text = re.sub(r"\bparent agent\b", "orchestrating agent", text)
return text.strip()
def title_case_name(name: str) -> str:
return name.replace("-", " ").title()
def yaml_escape(s: str) -> str:
"""Wrap in double quotes and escape internal quotes for YAML frontmatter."""
escaped = s.replace("\\", "\\\\").replace('"', '\\"')
return f'"{escaped}"'
def build_agent_md(data: dict, category: str) -> str:
name = data["name"]
description = data.get("description", "")
model_raw = data.get("model", "")
effort = data.get("model_reasoning_effort", "medium")
sandbox = data.get("sandbox_mode", "read-only")
instructions = data.get(
"developer_instructions", data.get("instructions", {}).get("text", "")
)
model = map_model(model_raw, effort)
tools, disallowed = map_tools(sandbox)
body = sanitize_instructions(instructions)
title = title_case_name(name)
lines = [
"---",
f"name: {name}",
f"description: {yaml_escape(description)}",
f"model: {model}",
f"tools: {', '.join(tools)}",
]
if disallowed:
lines.append(f"disallowedTools: {', '.join(disallowed)}")
lines.append("permissionMode: default")
lines.append("---")
lines.append("")
lines.append(f"# {title}")
lines.append("")
lines.append(body)
lines.append("")
lines.append(f"<!-- codex-source: {category} -->")
lines.append("")
return "\n".join(lines)
def build_plugin_json(data: dict) -> str:
plugin = {
"name": data["name"],
"description": data.get("description", ""),
"version": "1.0.0",
}
return json.dumps(plugin, indent=2) + "\n"
def convert_file(
toml_path: Path,
output_dir: Path,
category: str,
dry_run: bool,
verbose: bool,
) -> tuple[str, str]:
"""Convert a single TOML file. Returns (name, status) where status is added/updated/unchanged."""
with open(toml_path, "rb") as f:
data = tomllib.load(f)
name = data["name"]
plugin_dir = output_dir / name
agent_dir = plugin_dir / "agents"
meta_dir = plugin_dir / ".claude-plugin"
agent_path = agent_dir / f"{name}.md"
meta_path = meta_dir / "plugin.json"
agent_content = build_agent_md(data, category)
plugin_content = build_plugin_json(data)
existing_agent = agent_path.read_text() if agent_path.exists() else None
existing_plugin = meta_path.read_text() if meta_path.exists() else None
if existing_agent == agent_content and existing_plugin == plugin_content:
if verbose:
print(f" unchanged: {name}")
return name, "unchanged"
status = "updated" if agent_path.exists() else "added"
if not dry_run:
agent_dir.mkdir(parents=True, exist_ok=True)
meta_dir.mkdir(parents=True, exist_ok=True)
agent_path.write_text(agent_content)
meta_path.write_text(plugin_content)
if verbose:
print(f" {status}: {name} (from {category})")
return name, status
def detect_removed(
manifest_path: Path,
current_names: set[str],
output_dir: Path,
dry_run: bool,
verbose: bool,
) -> list[str]:
"""Detect agents in the manifest that no longer exist upstream. Remove their plugin dirs."""
removed = []
if not manifest_path.exists():
return removed
manifest = json.loads(manifest_path.read_text())
old_names = set(manifest.get("agents", {}).keys())
gone = old_names - current_names
for name in sorted(gone):
plugin_dir = output_dir / name
if plugin_dir.exists():
if not dry_run:
shutil.rmtree(plugin_dir)
if verbose:
print(f" removed: {name}")
removed.append(name)
return removed
def main():
parser = argparse.ArgumentParser(
description="Convert Codex agent TOMLs to Claude Code plugins"
)
parser.add_argument(
"input_dir", type=Path, help="Path to upstream categories/ directory"
)
parser.add_argument(
"output_dir", type=Path, help="Path to plugins/ output directory"
)
parser.add_argument(
"--dry-run", action="store_true", help="Print actions without writing"
)
parser.add_argument("--verbose", action="store_true", help="Print per-file status")
parser.add_argument(
"--manifest", type=Path, default=None, help="Path to codex-manifest.json"
)
args = parser.parse_args()
if not args.input_dir.is_dir():
print(f"Error: input directory not found: {args.input_dir}", file=sys.stderr)
sys.exit(1)
manifest_path = args.manifest or args.output_dir.parent / "codex-manifest.json"
toml_files = sorted(args.input_dir.rglob("*.toml"))
if not toml_files:
print("No .toml files found in input directory.", file=sys.stderr)
sys.exit(1)
# Build hash map and detect duplicates
seen_names: dict[str, Path] = {}
file_hashes: dict[str, str] = {}
to_convert: list[tuple[Path, str]] = [] # (path, category)
# Load existing manifest for hash comparison
old_manifest: dict = {}
if manifest_path.exists():
old_manifest = json.loads(manifest_path.read_text())
old_agents = old_manifest.get("agents", {})
for toml_path in toml_files:
with open(toml_path, "rb") as f:
data = tomllib.load(f)
name = data["name"]
category = toml_path.parent.name
file_hash = sha256_file(toml_path)
if name in seen_names:
print(
f" warning: duplicate name '{name}' in {toml_path} (already seen in {seen_names[name]}), skipping",
file=sys.stderr,
)
continue
seen_names[name] = toml_path
file_hashes[name] = file_hash
# Skip if hash unchanged
old_entry = old_agents.get(name, {})
if old_entry.get("sha256") == file_hash:
# Verify the output still exists
plugin_dir = args.output_dir / name
if (plugin_dir / "agents" / f"{name}.md").exists():
if args.verbose:
print(f" unchanged: {name} (hash match)")
continue
to_convert.append((toml_path, category))
# Convert changed files
counts = {"added": 0, "updated": 0, "unchanged": 0, "removed": 0}
# Count hash-skipped as unchanged
counts["unchanged"] = len(seen_names) - len(to_convert)
for toml_path, category in to_convert:
name, status = convert_file(
toml_path, args.output_dir, category, args.dry_run, args.verbose
)
counts[status] += 1
# Detect and remove deleted agents
removed = detect_removed(
manifest_path,
set(seen_names.keys()),
args.output_dir,
args.dry_run,
args.verbose,
)
counts["removed"] = len(removed)
# Build marketplace.json (at .claude-plugin/marketplace.json relative to repo root)
marketplace_dir = args.output_dir.parent / ".claude-plugin"
marketplace_path = marketplace_dir / "marketplace.json"
# Collect all agent descriptions for the marketplace index
all_agents: dict[str, str] = {}
for name, path in sorted(seen_names.items()):
with open(path, "rb") as f:
data = tomllib.load(f)
all_agents[name] = data.get("description", "")
marketplace_data = {
"name": "codex-agents",
"owner": {"name": "Cal"},
"plugins": [
{
"name": name,
"source": f"./plugins/{name}",
"description": desc,
}
for name, desc in sorted(all_agents.items())
],
}
if not args.dry_run:
marketplace_dir.mkdir(parents=True, exist_ok=True)
marketplace_path.write_text(json.dumps(marketplace_data, indent=2) + "\n")
# Write manifest
manifest_data = {
"synced_at": datetime.now(timezone.utc).isoformat(),
"agent_count": len(seen_names),
"agents": {
name: {
"source": str(path.relative_to(args.input_dir.parent)),
"sha256": file_hashes[name],
}
for name, path in sorted(seen_names.items())
},
}
if not args.dry_run:
args.output_dir.mkdir(parents=True, exist_ok=True)
manifest_path.write_text(json.dumps(manifest_data, indent=2) + "\n")
# Summary
prefix = "[dry-run] " if args.dry_run else ""
total = sum(counts.values())
parts = [
f"{counts[k]} {k}"
for k in ("added", "updated", "unchanged", "removed")
if counts[k]
]
print(f"{prefix}Processed {total} agents: {', '.join(parts)}")
if not args.dry_run:
print(f"Manifest written to {manifest_path}")
if __name__ == "__main__":
main()