refactor(server): extract amc_server package from monolithic script

Split the 860+ line bin/amc-server into a modular Python package: amc_server/ __init__.py - Package marker context.py - Shared constants (DATA_DIR, PORT, CLAUDE_PROJECTS_DIR, etc.) handler.py - AMCHandler class using mixin composition logging_utils.py - Structured logging setup with signal handlers server.py - Main entry point (ThreadingHTTPServer) mixins/ __init__.py - Mixin package marker control.py - Session control (dismiss, respond via Zellij) conversation.py - Conversation history parsing (Claude JSONL format) discovery.py - Session discovery (Codex pane inspection, Zellij cache) http.py - HTTP response helpers (CORS, JSON, static files) parsing.py - Session state parsing and aggregation state.py - Session state endpoint logic The monolithic bin/amc-server becomes a thin launcher that just imports and calls main(). This separation enables: - Easier testing of individual components - Better IDE support (proper Python package structure) - Cleaner separation of concerns (discovery vs parsing vs control) - ThreadingHTTPServer instead of single-threaded (handles concurrent requests) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-25 14:02:00 -05:00
parent e718c44555
commit a7b2b3b902
13 changed files with 1437 additions and 854 deletions
--- a/amc_server/mixins/parsing.py
+++ b/amc_server/mixins/parsing.py
@@ -0,0 +1,268 @@
+import json
+import os
+from pathlib import Path
+
+from amc_server.context import (
+    CLAUDE_PROJECTS_DIR,
+    CODEX_SESSIONS_DIR,
+    _CONTEXT_CACHE_MAX,
+    _codex_transcript_cache,
+    _context_usage_cache,
+)
+from amc_server.logging_utils import LOGGER
+
+
+class SessionParsingMixin:
+    def _get_claude_conversation_file(self, session_id, project_dir):
+        """Resolve Claude conversation file path from session id + project dir."""
+        if not project_dir:
+            return None
+
+        encoded_dir = project_dir.replace("/", "-")
+        if not encoded_dir.startswith("-"):
+            encoded_dir = "-" + encoded_dir
+
+        conv_file = CLAUDE_PROJECTS_DIR / encoded_dir / f"{session_id}.jsonl"
+        return conv_file if conv_file.exists() else None
+
+    def _find_codex_transcript_file(self, session_id):
+        """Resolve Codex transcript path for a session id with lightweight caching."""
+        if not session_id:
+            return None
+
+        if session_id in _codex_transcript_cache:
+            cached = _codex_transcript_cache.get(session_id)
+            if cached:
+                path = Path(cached)
+                if path.exists():
+                    return path
+            return None
+
+        if not CODEX_SESSIONS_DIR.exists():
+            _codex_transcript_cache[session_id] = None
+            return None
+
+        try:
+            for jsonl_file in CODEX_SESSIONS_DIR.rglob(f"*{session_id}*.jsonl"):
+                _codex_transcript_cache[session_id] = str(jsonl_file)
+                return jsonl_file
+        except OSError:
+            pass
+
+        _codex_transcript_cache[session_id] = None
+        return None
+
+    def _read_jsonl_tail_entries(self, file_path, max_lines=300, max_bytes=1024 * 1024):
+        """Read only the tail of a JSONL file and return parsed entries."""
+        entries = []
+
+        try:
+            with file_path.open("rb") as f:
+                f.seek(0, os.SEEK_END)
+                file_size = f.tell()
+                if file_size <= 0:
+                    return entries
+
+                read_size = min(file_size, max_bytes)
+                f.seek(file_size - read_size)
+                chunk = f.read(read_size)
+        except OSError:
+            return entries
+
+        lines = chunk.splitlines()
+        if file_size > read_size and lines:
+            # First line may be partial because we started in the middle.
+            lines = lines[1:]
+
+        for raw_line in lines[-max_lines:]:
+            if not raw_line:
+                continue
+            try:
+                entries.append(json.loads(raw_line.decode("utf-8", errors="replace")))
+            except json.JSONDecodeError:
+                continue
+
+        return entries
+
+    def _to_int(self, value):
+        """Best-effort integer conversion."""
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, int):
+            return value
+        if isinstance(value, float):
+            return int(value)
+        if isinstance(value, str):
+            try:
+                return int(value)
+            except ValueError:
+                return None
+        return None
+
+    def _sum_optional_ints(self, values):
+        """Sum available ints, return None when no values are present."""
+        present = [v for v in values if isinstance(v, int)]
+        if not present:
+            return None
+        return sum(present)
+
+    def _as_dict(self, value):
+        """Normalize potentially-null JSON objects into dicts."""
+        return value if isinstance(value, dict) else {}
+
+    def _parse_codex_context_usage_from_file(self, file_path):
+        """Extract the latest Codex context usage snapshot from transcript tail."""
+        entries = self._read_jsonl_tail_entries(file_path, max_lines=600, max_bytes=1024 * 1024)
+
+        for entry in reversed(entries):
+            if not isinstance(entry, dict):
+                continue
+            if entry.get("type") != "event_msg":
+                continue
+            payload = self._as_dict(entry.get("payload"))
+            if payload.get("type") != "token_count":
+                continue
+
+            info = self._as_dict(payload.get("info"))
+            last_usage = self._as_dict(info.get("last_token_usage"))
+            total_usage = self._as_dict(info.get("total_token_usage"))
+
+            input_tokens = self._to_int(last_usage.get("input_tokens"))
+            output_tokens = self._to_int(last_usage.get("output_tokens"))
+            cached_input_tokens = self._to_int(last_usage.get("cached_input_tokens"))
+            current_tokens = self._to_int(last_usage.get("total_tokens"))
+            if current_tokens is None:
+                current_tokens = self._sum_optional_ints([input_tokens, output_tokens, cached_input_tokens])
+
+            usage = {
+                "window_tokens": self._to_int(info.get("model_context_window")),
+                "current_tokens": current_tokens,
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cached_input_tokens": cached_input_tokens,
+                "session_total_tokens": self._to_int(total_usage.get("total_tokens")),
+                "updated_at": entry.get("timestamp", ""),
+            }
+
+            if usage["current_tokens"] is None and usage["session_total_tokens"] is None:
+                continue
+            return usage
+
+        return None
+
+    def _get_claude_context_window(self, model):
+        """Return context window size for Claude models."""
+        if not model:
+            return 200_000  # Default for unknown Claude models
+        # All current Claude 3.5/4 models have 200K context
+        # Legacy claude-3-opus/sonnet/haiku also 200K, claude-2 was 100K
+        if "claude-2" in model:
+            return 100_000
+        return 200_000
+
+    def _parse_claude_context_usage_from_file(self, file_path):
+        """Extract Claude usage with context window."""
+        entries = self._read_jsonl_tail_entries(file_path, max_lines=400, max_bytes=1024 * 1024)
+
+        for entry in reversed(entries):
+            if not isinstance(entry, dict):
+                continue
+            if entry.get("type") != "assistant":
+                continue
+
+            message = self._as_dict(entry.get("message"))
+            usage = self._as_dict(message.get("usage"))
+            if not usage:
+                continue
+
+            input_tokens = self._to_int(usage.get("input_tokens"))
+            output_tokens = self._to_int(usage.get("output_tokens"))
+            cache_read_input_tokens = self._to_int(usage.get("cache_read_input_tokens"))
+            cache_creation_input_tokens = self._to_int(usage.get("cache_creation_input_tokens"))
+            cached_input_tokens = self._sum_optional_ints([
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
+            ])
+            current_tokens = self._sum_optional_ints([
+                input_tokens,
+                output_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
+            ])
+
+            if current_tokens is None:
+                continue
+
+            model = message.get("model", "")
+            return {
+                "window_tokens": self._get_claude_context_window(model),
+                "current_tokens": current_tokens,
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cached_input_tokens": cached_input_tokens,
+                "session_total_tokens": None,
+                "updated_at": entry.get("timestamp", ""),
+                "model": model,
+            }
+
+        return None
+
+    def _get_cached_context_usage(self, file_path, parser):
+        """Cache parsed usage by transcript mtime/size for poll efficiency."""
+        try:
+            stat = file_path.stat()
+        except OSError:
+            return None
+
+        key = str(file_path)
+        cached = _context_usage_cache.get(key)
+        if cached and cached.get("mtime_ns") == stat.st_mtime_ns and cached.get("size") == stat.st_size:
+            return cached.get("usage")
+
+        try:
+            usage = parser(file_path)
+        except Exception:
+            LOGGER.exception("Failed to parse context usage for %s", file_path)
+            usage = None
+
+        # Evict oldest entries if cache is full (simple FIFO)
+        if len(_context_usage_cache) >= _CONTEXT_CACHE_MAX:
+            keys_to_remove = list(_context_usage_cache.keys())[: _CONTEXT_CACHE_MAX // 5]
+            for k in keys_to_remove:
+                _context_usage_cache.pop(k, None)
+
+        _context_usage_cache[key] = {
+            "mtime_ns": stat.st_mtime_ns,
+            "size": stat.st_size,
+            "usage": usage,
+        }
+        return usage
+
+    def _get_context_usage_for_session(self, session_data):
+        """Attach context/token usage info for both Codex and Claude sessions."""
+        agent = session_data.get("agent")
+        existing = session_data.get("context_usage")
+
+        if agent == "codex":
+            transcript_path = session_data.get("transcript_path", "")
+            transcript_file = Path(transcript_path) if transcript_path else None
+            if transcript_file and not transcript_file.exists():
+                transcript_file = None
+            if not transcript_file:
+                transcript_file = self._find_codex_transcript_file(session_data.get("session_id", ""))
+            if not transcript_file:
+                return existing
+            parsed = self._get_cached_context_usage(transcript_file, self._parse_codex_context_usage_from_file)
+            return parsed or existing
+
+        if agent == "claude":
+            conv_file = self._get_claude_conversation_file(
+                session_data.get("session_id", ""),
+                session_data.get("project_dir", ""),
+            )
+            if not conv_file:
+                return existing
+            parsed = self._get_cached_context_usage(conv_file, self._parse_claude_context_usage_from_file)
+            return parsed or existing
+
+        return existing