From 0e6520277875b286e0118f4bc3acfa99c2a32ab6 Mon Sep 17 00:00:00 2001
From: teernisse <teernisse@visiostack.com>
Date: Fri, 13 Feb 2026 14:08:25 -0500
Subject: [PATCH] feat(timeline): add DiscussionThread types and seed-phase
 discussion matching

Introduces the foundation for full discussion thread support in the
timeline pipeline. Adds three new domain types to timeline.rs:

- ThreadNote: individual note within a thread (id, author, body, timestamp)
- MatchedDiscussion: tracks discussions matched during seeding with their
  parent entity (issue or MR) for downstream collection
- DiscussionThread variant on TimelineEventType: carries a full thread of
  notes, sorted between NoteEvidence and CrossReferenced

Moves truncate_to_chars() from timeline_seed.rs to timeline.rs as pub(crate)
for reuse by the collect phase. Adds THREAD_NOTE_MAX_CHARS (2000) and
THREAD_MAX_NOTES (50) constants.

Upgrades the seed SQL in resolve_documents_to_entities() to resolve note
documents to their parent discussion via an additional LEFT JOIN chain
(notes -> discussions), using COALESCE to unify the entity resolution path
for both discussion and note source types. SeedResult gains a
matched_discussions field that captures deduplicated discussion matches.

Tests cover: discussion matching from discussion docs, note-to-parent
resolution, deduplication of same discussion across multiple docs, and
correct parent entity type (issue vs MR).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/core/timeline.rs            | 130 +++++++++++++++++++++++++++++++-
 src/core/timeline_seed.rs       |  81 ++++++++++++--------
 src/core/timeline_seed_tests.rs | 117 ++++++++++++++++++++++++----
 3 files changed, 278 insertions(+), 50 deletions(-)
diff --git a/src/core/timeline.rs b/src/core/timeline.rs
index bf89b26..4be0b50 100644
--- a/src/core/timeline.rs
+++ b/src/core/timeline.rs
@@ -49,6 +49,21 @@ impl Ord for TimelineEvent {
     }
 }
 
+/// Maximum characters per note body in a discussion thread.
+pub const THREAD_NOTE_MAX_CHARS: usize = 2000;
+
+/// Maximum notes per discussion thread before truncation.
+pub const THREAD_MAX_NOTES: usize = 50;
+
+/// A single note within a discussion thread.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
+pub struct ThreadNote {
+    pub note_id: i64,
+    pub author: Option<String>,
+    pub body: String,
+    pub created_at: i64,
+}
+
 /// Per spec Section 3.3. Serde tagged enum for JSON output.
 ///
 /// Variant declaration order defines the sort order within a timestamp+entity
@@ -78,11 +93,39 @@ pub enum TimelineEventType {
         snippet: String,
         discussion_id: Option<i64>,
     },
+    DiscussionThread {
+        discussion_id: i64,
+        notes: Vec<ThreadNote>,
+    },
     CrossReferenced {
         target: String,
     },
 }
 
+/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
+pub(crate) fn truncate_to_chars(s: &str, max_chars: usize) -> String {
+    let char_count = s.chars().count();
+    if char_count <= max_chars {
+        return s.to_owned();
+    }
+
+    let byte_end = s
+        .char_indices()
+        .nth(max_chars)
+        .map(|(i, _)| i)
+        .unwrap_or(s.len());
+    s[..byte_end].to_owned()
+}
+
+/// A discussion matched during the seed phase, to be collected as a full thread.
+#[derive(Debug, Clone)]
+pub struct MatchedDiscussion {
+    pub discussion_id: i64,
+    pub entity_type: String,
+    pub entity_id: i64,
+    pub project_id: i64,
+}
+
 /// Internal entity reference used across pipeline stages.
 #[derive(Debug, Clone, Serialize)]
 pub struct EntityRef {
@@ -250,7 +293,7 @@ mod tests {
 
     #[test]
     fn test_timeline_event_type_variant_count() {
-        // Verify all 9 variants serialize without panic
+        // Verify all 10 variants serialize without panic
         let variants: Vec<TimelineEventType> = vec![
             TimelineEventType::Created,
             TimelineEventType::StateChanged {
@@ -274,13 +317,96 @@ mod tests {
                 snippet: "text".to_owned(),
                 discussion_id: None,
             },
+            TimelineEventType::DiscussionThread {
+                discussion_id: 1,
+                notes: vec![ThreadNote {
+                    note_id: 1,
+                    author: Some("alice".to_owned()),
+                    body: "hello".to_owned(),
+                    created_at: 1000,
+                }],
+            },
             TimelineEventType::CrossReferenced {
                 target: "!567".to_owned(),
             },
         ];
-        assert_eq!(variants.len(), 9);
+        assert_eq!(variants.len(), 10);
         for v in &variants {
             serde_json::to_value(v).unwrap();
         }
     }
+
+    #[test]
+    fn test_discussion_thread_serializes_tagged() {
+        let event_type = TimelineEventType::DiscussionThread {
+            discussion_id: 42,
+            notes: vec![
+                ThreadNote {
+                    note_id: 1,
+                    author: Some("alice".to_owned()),
+                    body: "first note".to_owned(),
+                    created_at: 1000,
+                },
+                ThreadNote {
+                    note_id: 2,
+                    author: Some("bob".to_owned()),
+                    body: "second note".to_owned(),
+                    created_at: 2000,
+                },
+            ],
+        };
+        let json = serde_json::to_value(&event_type).unwrap();
+        assert_eq!(json["kind"], "discussion_thread");
+        assert_eq!(json["discussion_id"], 42);
+        assert_eq!(json["notes"].as_array().unwrap().len(), 2);
+        assert_eq!(json["notes"][0]["note_id"], 1);
+        assert_eq!(json["notes"][0]["author"], "alice");
+        assert_eq!(json["notes"][0]["body"], "first note");
+        assert_eq!(json["notes"][1]["note_id"], 2);
+    }
+
+    #[test]
+    fn test_discussion_thread_sort_order() {
+        // DiscussionThread should sort after NoteEvidence, before CrossReferenced
+        let note_ev = TimelineEventType::NoteEvidence {
+            note_id: 1,
+            snippet: "a".to_owned(),
+            discussion_id: None,
+        };
+        let thread = TimelineEventType::DiscussionThread {
+            discussion_id: 1,
+            notes: vec![],
+        };
+        let cross_ref = TimelineEventType::CrossReferenced {
+            target: "!1".to_owned(),
+        };
+
+        assert!(note_ev < thread);
+        assert!(thread < cross_ref);
+    }
+
+    #[test]
+    fn test_thread_note_ord() {
+        let a = ThreadNote {
+            note_id: 1,
+            author: Some("alice".to_owned()),
+            body: "first".to_owned(),
+            created_at: 1000,
+        };
+        let b = ThreadNote {
+            note_id: 2,
+            author: Some("bob".to_owned()),
+            body: "second".to_owned(),
+            created_at: 2000,
+        };
+        // ThreadNote derives Ord — note_id is the first field, so ordering is by note_id
+        assert!(a < b);
+    }
+
+    #[test]
+    fn test_truncate_to_chars() {
+        assert_eq!(truncate_to_chars("hello", 200), "hello");
+        let long = "a".repeat(300);
+        assert_eq!(truncate_to_chars(&long, 200).chars().count(), 200);
+    }
 }
diff --git a/src/core/timeline_seed.rs b/src/core/timeline_seed.rs
index 1610211..b0bcbeb 100644
--- a/src/core/timeline_seed.rs
+++ b/src/core/timeline_seed.rs
@@ -4,7 +4,10 @@ use rusqlite::Connection;
 use tracing::debug;
 
 use crate::core::error::Result;
-use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref};
+use crate::core::timeline::{
+    EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_ref,
+    truncate_to_chars,
+};
 use crate::embedding::ollama::OllamaClient;
 use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
 
@@ -12,6 +15,8 @@ use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_f
 pub struct SeedResult {
     pub seed_entities: Vec<EntityRef>,
     pub evidence_notes: Vec<TimelineEvent>,
+    /// Discussions matched during seeding, to be collected as full threads.
+    pub matched_discussions: Vec<MatchedDiscussion>,
     /// The search mode actually used (hybrid with fallback info).
     pub search_mode: String,
 }
@@ -38,6 +43,7 @@ pub async fn seed_timeline(
         return Ok(SeedResult {
             seed_entities: Vec::new(),
             evidence_notes: Vec::new(),
+            matched_discussions: Vec::new(),
             search_mode: "lexical".to_owned(),
         });
     }
@@ -76,7 +82,7 @@ pub async fn seed_timeline(
         debug!(warning = %w, "hybrid search warning during timeline seeding");
     }
 
-    let seed_entities = resolve_documents_to_entities(
+    let (seed_entities, matched_discussions) = resolve_documents_to_entities(
         conn,
         &hybrid_results
             .iter()
@@ -91,19 +97,21 @@ pub async fn seed_timeline(
     Ok(SeedResult {
         seed_entities,
         evidence_notes,
+        matched_discussions,
         search_mode,
     })
 }
 
-/// Resolve a list of document IDs to deduplicated entity refs.
-/// Discussion documents are resolved to their parent entity (issue or MR).
+/// Resolve a list of document IDs to deduplicated entity refs and matched discussions.
+/// Discussion and note documents are resolved to their parent entity (issue or MR).
+/// Returns (entities, matched_discussions).
 fn resolve_documents_to_entities(
     conn: &Connection,
     document_ids: &[i64],
     max_entities: usize,
-) -> Result<Vec<EntityRef>> {
+) -> Result<(Vec<EntityRef>, Vec<MatchedDiscussion>)> {
     if document_ids.is_empty() {
-        return Ok(Vec::new());
+        return Ok((Vec::new(), Vec::new()));
     }
 
     let placeholders: String = document_ids
@@ -114,9 +122,13 @@ fn resolve_documents_to_entities(
     let sql = format!(
         r"
         SELECT d.source_type, d.source_id, d.project_id,
-               disc.issue_id, disc.merge_request_id
+               COALESCE(disc.issue_id, note_disc.issue_id) AS issue_id,
+               COALESCE(disc.merge_request_id, note_disc.merge_request_id) AS mr_id,
+               COALESCE(disc.id, note_disc.id) AS discussion_id
         FROM documents d
         LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
+        LEFT JOIN notes n ON n.id = d.source_id AND d.source_type = 'note'
+        LEFT JOIN discussions note_disc ON note_disc.id = n.discussion_id AND d.source_type = 'note'
         WHERE d.id IN ({placeholders})
         ORDER BY CASE d.id {order_clause} END
         ",
@@ -135,37 +147,55 @@ fn resolve_documents_to_entities(
         .collect();
     let rows = stmt.query_map(params.as_slice(), |row| {
         Ok((
-            row.get::<_, String>(0)?,
-            row.get::<_, i64>(1)?,
-            row.get::<_, i64>(2)?,
-            row.get::<_, Option<i64>>(3)?,
-            row.get::<_, Option<i64>>(4)?,
+            row.get::<_, String>(0)?,      // source_type
+            row.get::<_, i64>(1)?,         // source_id
+            row.get::<_, i64>(2)?,         // project_id
+            row.get::<_, Option<i64>>(3)?, // issue_id (coalesced)
+            row.get::<_, Option<i64>>(4)?, // mr_id (coalesced)
+            row.get::<_, Option<i64>>(5)?, // discussion_id (coalesced)
         ))
     })?;
 
-    let mut seen = HashSet::new();
+    let mut seen_entities = HashSet::new();
+    let mut seen_discussions = HashSet::new();
     let mut entities = Vec::new();
+    let mut matched_discussions = Vec::new();
 
     for row_result in rows {
-        let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id) = row_result?;
+        let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id, discussion_id) =
+            row_result?;
 
         let (entity_type, entity_id) = match source_type.as_str() {
             "issue" => ("issue".to_owned(), source_id),
             "merge_request" => ("merge_request".to_owned(), source_id),
-            "discussion" => {
+            "discussion" | "note" => {
                 if let Some(issue_id) = disc_issue_id {
                     ("issue".to_owned(), issue_id)
                 } else if let Some(mr_id) = disc_mr_id {
                     ("merge_request".to_owned(), mr_id)
                 } else {
-                    continue; // orphaned discussion
+                    continue; // orphaned discussion/note
                 }
             }
             _ => continue,
         };
 
+        // Capture matched discussion (deduplicated)
+        if let Some(disc_id) = discussion_id
+            && (source_type == "discussion" || source_type == "note")
+            && seen_discussions.insert(disc_id)
+        {
+            matched_discussions.push(MatchedDiscussion {
+                discussion_id: disc_id,
+                entity_type: entity_type.clone(),
+                entity_id,
+                project_id: proj_id,
+            });
+        }
+
+        // Entity dedup
         let key = (entity_type.clone(), entity_id);
-        if !seen.insert(key) {
+        if !seen_entities.insert(key) {
             continue;
         }
 
@@ -179,7 +209,7 @@ fn resolve_documents_to_entities(
         }
     }
 
-    Ok(entities)
+    Ok((entities, matched_discussions))
 }
 
 /// Find evidence notes: FTS5-matched discussion notes that provide context.
@@ -275,21 +305,6 @@ fn find_evidence_notes(
     Ok(events)
 }
 
-/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
-fn truncate_to_chars(s: &str, max_chars: usize) -> String {
-    let char_count = s.chars().count();
-    if char_count <= max_chars {
-        return s.to_owned();
-    }
-
-    let byte_end = s
-        .char_indices()
-        .nth(max_chars)
-        .map(|(i, _)| i)
-        .unwrap_or(s.len());
-    s[..byte_end].to_owned()
-}
-
 #[cfg(test)]
 #[path = "timeline_seed_tests.rs"]
 mod tests;
diff --git a/src/core/timeline_seed_tests.rs b/src/core/timeline_seed_tests.rs
index 7e4e379..119fb2d 100644
--- a/src/core/timeline_seed_tests.rs
+++ b/src/core/timeline_seed_tests.rs
@@ -316,23 +316,110 @@ async fn test_seed_respects_project_filter() {
     assert_eq!(result.seed_entities[0].project_path, "group/project");
 }
 
-#[test]
-fn test_truncate_to_chars_short() {
-    assert_eq!(truncate_to_chars("hello", 200), "hello");
+// ─── Matched discussion tests ───────────────────────────────────────────────
+
+#[tokio::test]
+async fn test_seed_captures_matched_discussions_from_discussion_doc() {
+    let conn = setup_test_db();
+    let project_id = insert_test_project(&conn);
+    let issue_id = insert_test_issue(&conn, project_id, 1);
+    let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
+    insert_document(
+        &conn,
+        "discussion",
+        disc_id,
+        project_id,
+        "deployment pipeline authentication",
+    );
+
+    let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
+        .await
+        .unwrap();
+    assert_eq!(result.matched_discussions.len(), 1);
+    assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
+    assert_eq!(result.matched_discussions[0].entity_type, "issue");
+    assert_eq!(result.matched_discussions[0].entity_id, issue_id);
 }
 
-#[test]
-fn test_truncate_to_chars_long() {
-    let long = "a".repeat(300);
-    let result = truncate_to_chars(&long, 200);
-    assert_eq!(result.chars().count(), 200);
+#[tokio::test]
+async fn test_seed_captures_matched_discussions_from_note_doc() {
+    let conn = setup_test_db();
+    let project_id = insert_test_project(&conn);
+    let issue_id = insert_test_issue(&conn, project_id, 1);
+    let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
+    let note_id = insert_note(&conn, disc_id, project_id, "note about deployment", false);
+    insert_document(
+        &conn,
+        "note",
+        note_id,
+        project_id,
+        "deployment configuration details",
+    );
+
+    let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
+        .await
+        .unwrap();
+    assert_eq!(
+        result.matched_discussions.len(),
+        1,
+        "Note doc should resolve to parent discussion"
+    );
+    assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
+    assert_eq!(result.matched_discussions[0].entity_type, "issue");
 }
 
-#[test]
-fn test_truncate_to_chars_multibyte() {
-    let s = "\u{1F600}".repeat(300); // emoji
-    let result = truncate_to_chars(&s, 200);
-    assert_eq!(result.chars().count(), 200);
-    // Verify valid UTF-8
-    assert!(std::str::from_utf8(result.as_bytes()).is_ok());
+#[tokio::test]
+async fn test_seed_deduplicates_matched_discussions() {
+    let conn = setup_test_db();
+    let project_id = insert_test_project(&conn);
+    let issue_id = insert_test_issue(&conn, project_id, 1);
+    let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
+
+    // Two docs referencing the same discussion
+    insert_document(
+        &conn,
+        "discussion",
+        disc_id,
+        project_id,
+        "deployment pipeline first doc",
+    );
+    let note_id = insert_note(&conn, disc_id, project_id, "deployment note", false);
+    insert_document(
+        &conn,
+        "note",
+        note_id,
+        project_id,
+        "deployment pipeline second doc",
+    );
+
+    let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
+        .await
+        .unwrap();
+    assert_eq!(
+        result.matched_discussions.len(),
+        1,
+        "Same discussion_id from two docs should deduplicate"
+    );
+}
+
+#[tokio::test]
+async fn test_seed_matched_discussions_have_correct_parent_entity() {
+    let conn = setup_test_db();
+    let project_id = insert_test_project(&conn);
+    let mr_id = insert_test_mr(&conn, project_id, 99);
+    let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
+    insert_document(
+        &conn,
+        "discussion",
+        disc_id,
+        project_id,
+        "deployment pipeline for merge request",
+    );
+
+    let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
+        .await
+        .unwrap();
+    assert_eq!(result.matched_discussions.len(), 1);
+    assert_eq!(result.matched_discussions[0].entity_type, "merge_request");
+    assert_eq!(result.matched_discussions[0].entity_id, mr_id);
 }