feat(server): rewrite Codex pane discovery using process inspection

Replace the zellij dump-layout approach with direct process inspection
for matching Codex sessions to Zellij panes. The old method couldn't
extract pane IDs from layout output, leaving them empty. The new
approach uses a three-step pipeline:

1. pgrep -x codex to find running Codex PIDs
2. ps eww to extract ZELLIJ_PANE_ID and ZELLIJ_SESSION_NAME from each
   process's inherited environment variables
3. lsof -a -p <pids> -d cwd to batch-resolve working directories

Session-to-pane matching then uses a two-tier strategy:
- Primary: lsof -t <session_file> to find which PID has the JSONL open
- Fallback: normalized CWD comparison

Also adds:
- _codex_pane_cache with 5-second TTL to avoid running pgrep/ps/lsof
  on every dashboard poll cycle
- _dismissed_codex_ids set to track Codex sessions the user has
  dismissed, preventing re-discovery on subsequent polls
- Clearer error message when session lacks pane info for input routing
This commit is contained in:
teernisse
2026-02-25 11:36:40 -05:00
parent c777ef05b6
commit e994c7a0e8

View File

@@ -45,6 +45,12 @@ STALE_STARTING_AGE = 3600 # 1 hour - sessions stuck in "starting" are orphans
# Cache for Zellij session list (avoid calling zellij on every request)
_zellij_cache = {"sessions": None, "expires": 0}
# Cache for Codex pane info (avoid running pgrep/ps/lsof on every request)
_codex_pane_cache = {"pid_info": {}, "cwd_map": {}, "expires": 0}
# Codex sessions dismissed during this server lifetime (prevents re-discovery)
_dismissed_codex_ids = set()
class AMCHandler(BaseHTTPRequestHandler):
def do_GET(self):
@@ -338,6 +344,8 @@ class AMCHandler(BaseHTTPRequestHandler):
"""Delete a session file (manual dismiss from dashboard)."""
safe_id = os.path.basename(session_id)
session_file = SESSIONS_DIR / f"{safe_id}.json"
# Track dismissed Codex sessions to prevent re-discovery
_dismissed_codex_ids.add(safe_id)
session_file.unlink(missing_ok=True)
response = json.dumps({"ok": True}).encode()
@@ -383,7 +391,7 @@ class AMCHandler(BaseHTTPRequestHandler):
zellij_pane = session.get("zellij_pane", "")
if not zellij_session or not zellij_pane:
self._json_error(400, "Session missing Zellij pane info - input not supported for auto-discovered Codex sessions")
self._json_error(400, "Session missing Zellij pane info - cannot send input without a pane target")
return
# Parse pane ID from "terminal_N" format
@@ -536,8 +544,8 @@ class AMCHandler(BaseHTTPRequestHandler):
if not CODEX_SESSIONS_DIR.exists():
return
# Get Zellij panes running codex with their cwds
codex_panes = self._get_codex_zellij_panes()
# Get Zellij pane info for running codex processes
pid_info, cwd_map = self._get_codex_pane_info()
# Only look at sessions modified in the last 10 minutes (active)
now = time.time()
@@ -556,6 +564,11 @@ class AMCHandler(BaseHTTPRequestHandler):
continue
session_id = match.group(1)
# Skip sessions the user has dismissed
if session_id in _dismissed_codex_ids:
continue
session_file = SESSIONS_DIR / f"{session_id}.json"
# Parse first line to get session metadata
@@ -572,16 +585,10 @@ class AMCHandler(BaseHTTPRequestHandler):
cwd = payload.get("cwd", "")
project = os.path.basename(cwd) if cwd else "Unknown"
# Try to find matching Zellij pane by cwd
zellij_session = ""
zellij_pane = ""
if cwd and codex_panes:
for pane_cwd, pane_info in codex_panes.items():
# Match by directory name (end of path)
if cwd.endswith(pane_cwd) or pane_cwd.endswith(os.path.basename(cwd)):
zellij_session = pane_info.get("session", "")
zellij_pane = pane_info.get("pane_id", "")
break
# Match session to Zellij pane (UUID match via lsof, CWD fallback)
zellij_session, zellij_pane = self._match_codex_session_to_pane(
jsonl_file, cwd, pid_info, cwd_map
)
# Determine status based on file age
file_age_minutes = (now - mtime) / 60
@@ -647,39 +654,119 @@ class AMCHandler(BaseHTTPRequestHandler):
except (OSError, json.JSONDecodeError):
continue
def _get_codex_zellij_panes(self):
"""Get Zellij panes running codex with their cwds."""
def _get_codex_pane_info(self):
"""Get Zellij pane info for running codex processes via process inspection.
Extracts ZELLIJ_PANE_ID from each codex process's inherited environment,
since zellij dump-layout doesn't provide pane IDs.
Results are cached for 5 seconds to avoid running pgrep/ps/lsof on
every dashboard poll.
Returns:
tuple: (pid_info, cwd_map)
pid_info: {pid_str: {"pane_id": str, "zellij_session": str}}
cwd_map: {cwd_path: {"session": str, "pane_id": str}}
"""
now = time.time()
if now < _codex_pane_cache["expires"]:
return _codex_pane_cache["pid_info"], _codex_pane_cache["cwd_map"]
pid_info = {}
cwd_map = {}
try:
# Step 1: Find codex process PIDs
result = subprocess.run(
["zellij", "action", "dump-layout"],
capture_output=True,
text=True,
timeout=2,
["pgrep", "-x", "codex"],
capture_output=True, text=True, timeout=2,
)
if result.returncode != 0:
return {}
pids = [p.strip() for p in result.stdout.strip().splitlines() if p.strip()] if result.returncode == 0 else []
# Parse layout to find codex panes
# Format: pane command="codex" cwd="projects/amc" ...
panes = {}
zellij_session = os.environ.get("ZELLIJ_SESSION_NAME", "")
# Step 2: Extract ZELLIJ env vars from each process
for pid in pids:
try:
env_result = subprocess.run(
["ps", "eww", "-o", "args=", "-p", pid],
capture_output=True, text=True, timeout=2,
)
if env_result.returncode != 0:
continue
for line in result.stdout.splitlines():
if 'command="codex"' in line:
# Extract cwd
cwd_match = re.search(r'cwd="([^"]+)"', line)
if cwd_match:
cwd = cwd_match.group(1)
# We don't have pane ID from dump-layout, but we can use focus
panes[cwd] = {
"session": zellij_session,
"pane_id": "", # Can't get from dump-layout
env_str = env_result.stdout
pane_match = re.search(r'ZELLIJ_PANE_ID=(\d+)', env_str)
session_match = re.search(r'ZELLIJ_SESSION_NAME=(\S+)', env_str)
if pane_match and session_match:
pid_info[pid] = {
"pane_id": pane_match.group(1),
"zellij_session": session_match.group(1),
}
except (subprocess.TimeoutExpired, Exception):
continue
return panes
# Step 3: Get CWDs via single batched lsof call
if pid_info:
pid_list = ",".join(pid_info.keys())
try:
cwd_result = subprocess.run(
["lsof", "-a", "-p", pid_list, "-d", "cwd", "-Fn"],
capture_output=True, text=True, timeout=3,
)
if cwd_result.returncode == 0:
current_pid = None
for line in cwd_result.stdout.splitlines():
if line.startswith("p"):
current_pid = line[1:]
elif line.startswith("n/") and current_pid and current_pid in pid_info:
cwd = line[1:]
info = pid_info[current_pid]
cwd_map[cwd] = {
"session": info["zellij_session"],
"pane_id": info["pane_id"],
}
except (subprocess.TimeoutExpired, Exception):
pass
except (subprocess.TimeoutExpired, FileNotFoundError, Exception):
return {}
pass
_codex_pane_cache["pid_info"] = pid_info
_codex_pane_cache["cwd_map"] = cwd_map
_codex_pane_cache["expires"] = now + 5 # Cache for 5 seconds
return pid_info, cwd_map
def _match_codex_session_to_pane(self, session_file, session_cwd, pid_info, cwd_map):
"""Match a Codex session file to a Zellij pane.
Tries session-file-to-PID matching first (via lsof), falls back to CWD.
Returns:
tuple: (zellij_session, pane_id) or ("", "")
"""
# Try precise match: which process has this session file open?
try:
result = subprocess.run(
["lsof", "-t", str(session_file)],
capture_output=True, text=True, timeout=2,
)
if result.returncode == 0 and result.stdout.strip():
for pid in result.stdout.strip().splitlines():
pid = pid.strip()
if pid in pid_info:
info = pid_info[pid]
return info["zellij_session"], info["pane_id"]
except (subprocess.TimeoutExpired, Exception):
pass
# Fall back to CWD match
normalized_cwd = os.path.normpath(session_cwd) if session_cwd else ""
for pane_cwd, info in cwd_map.items():
if os.path.normpath(pane_cwd) == normalized_cwd:
return info["session"], info["pane_id"]
return "", ""
def _json_error(self, code, message):
"""Send a JSON error response."""