From abbede923d47a85c7e8fa0569d4a70784bd3a4b2 Mon Sep 17 00:00:00 2001 From: teernisse Date: Fri, 6 Mar 2026 14:51:14 -0500 Subject: [PATCH] feat(server): filter system-injected messages from Claude conversations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add _is_system_injected() filter to conversation.py that drops user messages starting with known system-injected prefixes. These messages (hook outputs, system reminders, teammate notifications) appear in JSONL session logs as type: "user" with string content but are not human-typed input. Filtered prefixes: - — Claude Code system context injection - — local command hook output - — deferred tool discovery messages - because tag has attributes) This brings Claude parsing to parity with the Codex parser, which already filters system-injected content via SKIP_PREFIXES (line 222). The filter uses str.startswith(tuple) with lstrip() to handle leading whitespace. Applied at line 75 in the existing content-type guard chain. Affects both chat display and input history navigation — system noise is removed at the source so all consumers benefit. tests/test_conversation.py: - TestIsSystemInjected: 10 unit tests for the filter function covering each prefix, leading whitespace, normal messages, mid-string tags, empty strings, slash commands, and multiline content - TestClaudeSystemInjectedFiltering: 5 integration tests through the full parser verifying exclusion, sequential ID preservation after filtering, and all-system-message edge case Co-Authored-By: Claude Opus 4.6 --- amc_server/mixins/conversation.py | 17 ++++- tests/test_conversation.py | 106 +++++++++++++++++++++++++++++- 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/amc_server/mixins/conversation.py b/amc_server/mixins/conversation.py index b6e01e9..edf38ef 100644 --- a/amc_server/mixins/conversation.py +++ b/amc_server/mixins/conversation.py @@ -3,6 +3,21 @@ import os from amc_server.config import EVENTS_DIR +# Prefixes for system-injected content that appears as user messages +# but was not typed by the human (hook outputs, system reminders, etc.) +_SYSTEM_INJECTED_PREFIXES = ( + "", + "", + "", + "\nSome reminder text\n")) + + def test_local_command_caveat(self): + self.assertTrue(_is_system_injected("Caveat: The messages below...")) + + def test_available_deferred_tools(self): + self.assertTrue(_is_system_injected("\nAgent\nBash\n")) + + def test_teammate_message(self): + self.assertTrue(_is_system_injected('Review complete')) + + def test_leading_whitespace_stripped(self): + self.assertTrue(_is_system_injected(" \n content")) + + def test_normal_user_message(self): + self.assertFalse(_is_system_injected("Hello, Claude!")) + + def test_message_containing_tag_not_at_start(self): + self.assertFalse(_is_system_injected("Please check this thing")) + + def test_empty_string(self): + self.assertFalse(_is_system_injected("")) + + def test_slash_command(self): + self.assertFalse(_is_system_injected("/commit")) + + def test_multiline_user_message(self): + self.assertFalse(_is_system_injected("Fix this bug\n\nHere's the error:\nTypeError: foo is not a function")) + + +class TestClaudeSystemInjectedFiltering(unittest.TestCase): + """Integration tests: system-injected messages filtered from Claude conversation.""" + + def setUp(self): + self.handler = DummyConversationHandler() + + def _parse_with_messages(self, *user_contents): + """Helper: write JSONL with user messages, parse, return results.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + for content in user_contents: + f.write(json.dumps({ + "type": "user", + "timestamp": "2024-01-01T00:00:00Z", + "message": {"content": content} + }) + "\n") + path = Path(f.name) + + try: + with patch.object(self.handler, "_get_claude_conversation_file", return_value=path): + return self.handler._parse_claude_conversation("session123", "/project") + finally: + path.unlink() + + def test_system_reminder_excluded(self): + messages = self._parse_with_messages( + "real question", + "\nHook success\n", + ) + self.assertEqual(len(messages), 1) + self.assertEqual(messages[0]["content"], "real question") + + def test_local_command_caveat_excluded(self): + messages = self._parse_with_messages( + "Caveat: generated by local commands", + "what does this function do?", + ) + self.assertEqual(len(messages), 1) + self.assertEqual(messages[0]["content"], "what does this function do?") + + def test_teammate_message_excluded(self): + messages = self._parse_with_messages( + 'Task done', + "looks good, commit it", + ) + self.assertEqual(len(messages), 1) + self.assertEqual(messages[0]["content"], "looks good, commit it") + + def test_all_system_messages_excluded_preserves_ids(self): + """Message IDs should be sequential with no gaps from filtering.""" + messages = self._parse_with_messages( + "first real message", + "noise", + "\nAgent\n", + "second real message", + ) + self.assertEqual(len(messages), 2) + self.assertEqual(messages[0]["content"], "first real message") + self.assertEqual(messages[1]["content"], "second real message") + # IDs should be sequential (0, 1) not (0, 3) + self.assertTrue(messages[0]["id"].endswith("-0")) + self.assertTrue(messages[1]["id"].endswith("-1")) + + def test_only_system_messages_returns_empty(self): + messages = self._parse_with_messages( + "reminder", + "caveat", + ) + self.assertEqual(messages, []) + + class TestParseCodexConversation(unittest.TestCase): """Tests for _parse_codex_conversation edge cases."""