feat(timeline): add DiscussionThread types and seed-phase discussion matching
Introduces the foundation for full discussion thread support in the timeline pipeline. Adds three new domain types to timeline.rs: - ThreadNote: individual note within a thread (id, author, body, timestamp) - MatchedDiscussion: tracks discussions matched during seeding with their parent entity (issue or MR) for downstream collection - DiscussionThread variant on TimelineEventType: carries a full thread of notes, sorted between NoteEvidence and CrossReferenced Moves truncate_to_chars() from timeline_seed.rs to timeline.rs as pub(crate) for reuse by the collect phase. Adds THREAD_NOTE_MAX_CHARS (2000) and THREAD_MAX_NOTES (50) constants. Upgrades the seed SQL in resolve_documents_to_entities() to resolve note documents to their parent discussion via an additional LEFT JOIN chain (notes -> discussions), using COALESCE to unify the entity resolution path for both discussion and note source types. SeedResult gains a matched_discussions field that captures deduplicated discussion matches. Tests cover: discussion matching from discussion docs, note-to-parent resolution, deduplication of same discussion across multiple docs, and correct parent entity type (issue vs MR). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -49,6 +49,21 @@ impl Ord for TimelineEvent {
|
||||
}
|
||||
}
|
||||
|
||||
/// Maximum characters per note body in a discussion thread.
|
||||
pub const THREAD_NOTE_MAX_CHARS: usize = 2000;
|
||||
|
||||
/// Maximum notes per discussion thread before truncation.
|
||||
pub const THREAD_MAX_NOTES: usize = 50;
|
||||
|
||||
/// A single note within a discussion thread.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize)]
|
||||
pub struct ThreadNote {
|
||||
pub note_id: i64,
|
||||
pub author: Option<String>,
|
||||
pub body: String,
|
||||
pub created_at: i64,
|
||||
}
|
||||
|
||||
/// Per spec Section 3.3. Serde tagged enum for JSON output.
|
||||
///
|
||||
/// Variant declaration order defines the sort order within a timestamp+entity
|
||||
@@ -78,11 +93,39 @@ pub enum TimelineEventType {
|
||||
snippet: String,
|
||||
discussion_id: Option<i64>,
|
||||
},
|
||||
DiscussionThread {
|
||||
discussion_id: i64,
|
||||
notes: Vec<ThreadNote>,
|
||||
},
|
||||
CrossReferenced {
|
||||
target: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
|
||||
pub(crate) fn truncate_to_chars(s: &str, max_chars: usize) -> String {
|
||||
let char_count = s.chars().count();
|
||||
if char_count <= max_chars {
|
||||
return s.to_owned();
|
||||
}
|
||||
|
||||
let byte_end = s
|
||||
.char_indices()
|
||||
.nth(max_chars)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(s.len());
|
||||
s[..byte_end].to_owned()
|
||||
}
|
||||
|
||||
/// A discussion matched during the seed phase, to be collected as a full thread.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MatchedDiscussion {
|
||||
pub discussion_id: i64,
|
||||
pub entity_type: String,
|
||||
pub entity_id: i64,
|
||||
pub project_id: i64,
|
||||
}
|
||||
|
||||
/// Internal entity reference used across pipeline stages.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct EntityRef {
|
||||
@@ -250,7 +293,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_timeline_event_type_variant_count() {
|
||||
// Verify all 9 variants serialize without panic
|
||||
// Verify all 10 variants serialize without panic
|
||||
let variants: Vec<TimelineEventType> = vec![
|
||||
TimelineEventType::Created,
|
||||
TimelineEventType::StateChanged {
|
||||
@@ -274,13 +317,96 @@ mod tests {
|
||||
snippet: "text".to_owned(),
|
||||
discussion_id: None,
|
||||
},
|
||||
TimelineEventType::DiscussionThread {
|
||||
discussion_id: 1,
|
||||
notes: vec![ThreadNote {
|
||||
note_id: 1,
|
||||
author: Some("alice".to_owned()),
|
||||
body: "hello".to_owned(),
|
||||
created_at: 1000,
|
||||
}],
|
||||
},
|
||||
TimelineEventType::CrossReferenced {
|
||||
target: "!567".to_owned(),
|
||||
},
|
||||
];
|
||||
assert_eq!(variants.len(), 9);
|
||||
assert_eq!(variants.len(), 10);
|
||||
for v in &variants {
|
||||
serde_json::to_value(v).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_discussion_thread_serializes_tagged() {
|
||||
let event_type = TimelineEventType::DiscussionThread {
|
||||
discussion_id: 42,
|
||||
notes: vec![
|
||||
ThreadNote {
|
||||
note_id: 1,
|
||||
author: Some("alice".to_owned()),
|
||||
body: "first note".to_owned(),
|
||||
created_at: 1000,
|
||||
},
|
||||
ThreadNote {
|
||||
note_id: 2,
|
||||
author: Some("bob".to_owned()),
|
||||
body: "second note".to_owned(),
|
||||
created_at: 2000,
|
||||
},
|
||||
],
|
||||
};
|
||||
let json = serde_json::to_value(&event_type).unwrap();
|
||||
assert_eq!(json["kind"], "discussion_thread");
|
||||
assert_eq!(json["discussion_id"], 42);
|
||||
assert_eq!(json["notes"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(json["notes"][0]["note_id"], 1);
|
||||
assert_eq!(json["notes"][0]["author"], "alice");
|
||||
assert_eq!(json["notes"][0]["body"], "first note");
|
||||
assert_eq!(json["notes"][1]["note_id"], 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_discussion_thread_sort_order() {
|
||||
// DiscussionThread should sort after NoteEvidence, before CrossReferenced
|
||||
let note_ev = TimelineEventType::NoteEvidence {
|
||||
note_id: 1,
|
||||
snippet: "a".to_owned(),
|
||||
discussion_id: None,
|
||||
};
|
||||
let thread = TimelineEventType::DiscussionThread {
|
||||
discussion_id: 1,
|
||||
notes: vec![],
|
||||
};
|
||||
let cross_ref = TimelineEventType::CrossReferenced {
|
||||
target: "!1".to_owned(),
|
||||
};
|
||||
|
||||
assert!(note_ev < thread);
|
||||
assert!(thread < cross_ref);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thread_note_ord() {
|
||||
let a = ThreadNote {
|
||||
note_id: 1,
|
||||
author: Some("alice".to_owned()),
|
||||
body: "first".to_owned(),
|
||||
created_at: 1000,
|
||||
};
|
||||
let b = ThreadNote {
|
||||
note_id: 2,
|
||||
author: Some("bob".to_owned()),
|
||||
body: "second".to_owned(),
|
||||
created_at: 2000,
|
||||
};
|
||||
// ThreadNote derives Ord — note_id is the first field, so ordering is by note_id
|
||||
assert!(a < b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_to_chars() {
|
||||
assert_eq!(truncate_to_chars("hello", 200), "hello");
|
||||
let long = "a".repeat(300);
|
||||
assert_eq!(truncate_to_chars(&long, 200).chars().count(), 200);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,10 @@ use rusqlite::Connection;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::core::error::Result;
|
||||
use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref};
|
||||
use crate::core::timeline::{
|
||||
EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_ref,
|
||||
truncate_to_chars,
|
||||
};
|
||||
use crate::embedding::ollama::OllamaClient;
|
||||
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
|
||||
|
||||
@@ -12,6 +15,8 @@ use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_f
|
||||
pub struct SeedResult {
|
||||
pub seed_entities: Vec<EntityRef>,
|
||||
pub evidence_notes: Vec<TimelineEvent>,
|
||||
/// Discussions matched during seeding, to be collected as full threads.
|
||||
pub matched_discussions: Vec<MatchedDiscussion>,
|
||||
/// The search mode actually used (hybrid with fallback info).
|
||||
pub search_mode: String,
|
||||
}
|
||||
@@ -38,6 +43,7 @@ pub async fn seed_timeline(
|
||||
return Ok(SeedResult {
|
||||
seed_entities: Vec::new(),
|
||||
evidence_notes: Vec::new(),
|
||||
matched_discussions: Vec::new(),
|
||||
search_mode: "lexical".to_owned(),
|
||||
});
|
||||
}
|
||||
@@ -76,7 +82,7 @@ pub async fn seed_timeline(
|
||||
debug!(warning = %w, "hybrid search warning during timeline seeding");
|
||||
}
|
||||
|
||||
let seed_entities = resolve_documents_to_entities(
|
||||
let (seed_entities, matched_discussions) = resolve_documents_to_entities(
|
||||
conn,
|
||||
&hybrid_results
|
||||
.iter()
|
||||
@@ -91,19 +97,21 @@ pub async fn seed_timeline(
|
||||
Ok(SeedResult {
|
||||
seed_entities,
|
||||
evidence_notes,
|
||||
matched_discussions,
|
||||
search_mode,
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolve a list of document IDs to deduplicated entity refs.
|
||||
/// Discussion documents are resolved to their parent entity (issue or MR).
|
||||
/// Resolve a list of document IDs to deduplicated entity refs and matched discussions.
|
||||
/// Discussion and note documents are resolved to their parent entity (issue or MR).
|
||||
/// Returns (entities, matched_discussions).
|
||||
fn resolve_documents_to_entities(
|
||||
conn: &Connection,
|
||||
document_ids: &[i64],
|
||||
max_entities: usize,
|
||||
) -> Result<Vec<EntityRef>> {
|
||||
) -> Result<(Vec<EntityRef>, Vec<MatchedDiscussion>)> {
|
||||
if document_ids.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
return Ok((Vec::new(), Vec::new()));
|
||||
}
|
||||
|
||||
let placeholders: String = document_ids
|
||||
@@ -114,9 +122,13 @@ fn resolve_documents_to_entities(
|
||||
let sql = format!(
|
||||
r"
|
||||
SELECT d.source_type, d.source_id, d.project_id,
|
||||
disc.issue_id, disc.merge_request_id
|
||||
COALESCE(disc.issue_id, note_disc.issue_id) AS issue_id,
|
||||
COALESCE(disc.merge_request_id, note_disc.merge_request_id) AS mr_id,
|
||||
COALESCE(disc.id, note_disc.id) AS discussion_id
|
||||
FROM documents d
|
||||
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
|
||||
LEFT JOIN notes n ON n.id = d.source_id AND d.source_type = 'note'
|
||||
LEFT JOIN discussions note_disc ON note_disc.id = n.discussion_id AND d.source_type = 'note'
|
||||
WHERE d.id IN ({placeholders})
|
||||
ORDER BY CASE d.id {order_clause} END
|
||||
",
|
||||
@@ -135,37 +147,55 @@ fn resolve_documents_to_entities(
|
||||
.collect();
|
||||
let rows = stmt.query_map(params.as_slice(), |row| {
|
||||
Ok((
|
||||
row.get::<_, String>(0)?,
|
||||
row.get::<_, i64>(1)?,
|
||||
row.get::<_, i64>(2)?,
|
||||
row.get::<_, Option<i64>>(3)?,
|
||||
row.get::<_, Option<i64>>(4)?,
|
||||
row.get::<_, String>(0)?, // source_type
|
||||
row.get::<_, i64>(1)?, // source_id
|
||||
row.get::<_, i64>(2)?, // project_id
|
||||
row.get::<_, Option<i64>>(3)?, // issue_id (coalesced)
|
||||
row.get::<_, Option<i64>>(4)?, // mr_id (coalesced)
|
||||
row.get::<_, Option<i64>>(5)?, // discussion_id (coalesced)
|
||||
))
|
||||
})?;
|
||||
|
||||
let mut seen = HashSet::new();
|
||||
let mut seen_entities = HashSet::new();
|
||||
let mut seen_discussions = HashSet::new();
|
||||
let mut entities = Vec::new();
|
||||
let mut matched_discussions = Vec::new();
|
||||
|
||||
for row_result in rows {
|
||||
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id) = row_result?;
|
||||
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id, discussion_id) =
|
||||
row_result?;
|
||||
|
||||
let (entity_type, entity_id) = match source_type.as_str() {
|
||||
"issue" => ("issue".to_owned(), source_id),
|
||||
"merge_request" => ("merge_request".to_owned(), source_id),
|
||||
"discussion" => {
|
||||
"discussion" | "note" => {
|
||||
if let Some(issue_id) = disc_issue_id {
|
||||
("issue".to_owned(), issue_id)
|
||||
} else if let Some(mr_id) = disc_mr_id {
|
||||
("merge_request".to_owned(), mr_id)
|
||||
} else {
|
||||
continue; // orphaned discussion
|
||||
continue; // orphaned discussion/note
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
// Capture matched discussion (deduplicated)
|
||||
if let Some(disc_id) = discussion_id
|
||||
&& (source_type == "discussion" || source_type == "note")
|
||||
&& seen_discussions.insert(disc_id)
|
||||
{
|
||||
matched_discussions.push(MatchedDiscussion {
|
||||
discussion_id: disc_id,
|
||||
entity_type: entity_type.clone(),
|
||||
entity_id,
|
||||
project_id: proj_id,
|
||||
});
|
||||
}
|
||||
|
||||
// Entity dedup
|
||||
let key = (entity_type.clone(), entity_id);
|
||||
if !seen.insert(key) {
|
||||
if !seen_entities.insert(key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -179,7 +209,7 @@ fn resolve_documents_to_entities(
|
||||
}
|
||||
}
|
||||
|
||||
Ok(entities)
|
||||
Ok((entities, matched_discussions))
|
||||
}
|
||||
|
||||
/// Find evidence notes: FTS5-matched discussion notes that provide context.
|
||||
@@ -275,21 +305,6 @@ fn find_evidence_notes(
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
|
||||
fn truncate_to_chars(s: &str, max_chars: usize) -> String {
|
||||
let char_count = s.chars().count();
|
||||
if char_count <= max_chars {
|
||||
return s.to_owned();
|
||||
}
|
||||
|
||||
let byte_end = s
|
||||
.char_indices()
|
||||
.nth(max_chars)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(s.len());
|
||||
s[..byte_end].to_owned()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "timeline_seed_tests.rs"]
|
||||
mod tests;
|
||||
|
||||
@@ -316,23 +316,110 @@ async fn test_seed_respects_project_filter() {
|
||||
assert_eq!(result.seed_entities[0].project_path, "group/project");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_to_chars_short() {
|
||||
assert_eq!(truncate_to_chars("hello", 200), "hello");
|
||||
// ─── Matched discussion tests ───────────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_seed_captures_matched_discussions_from_discussion_doc() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = insert_test_project(&conn);
|
||||
let issue_id = insert_test_issue(&conn, project_id, 1);
|
||||
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
|
||||
insert_document(
|
||||
&conn,
|
||||
"discussion",
|
||||
disc_id,
|
||||
project_id,
|
||||
"deployment pipeline authentication",
|
||||
);
|
||||
|
||||
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.matched_discussions.len(), 1);
|
||||
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
|
||||
assert_eq!(result.matched_discussions[0].entity_type, "issue");
|
||||
assert_eq!(result.matched_discussions[0].entity_id, issue_id);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_to_chars_long() {
|
||||
let long = "a".repeat(300);
|
||||
let result = truncate_to_chars(&long, 200);
|
||||
assert_eq!(result.chars().count(), 200);
|
||||
#[tokio::test]
|
||||
async fn test_seed_captures_matched_discussions_from_note_doc() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = insert_test_project(&conn);
|
||||
let issue_id = insert_test_issue(&conn, project_id, 1);
|
||||
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
|
||||
let note_id = insert_note(&conn, disc_id, project_id, "note about deployment", false);
|
||||
insert_document(
|
||||
&conn,
|
||||
"note",
|
||||
note_id,
|
||||
project_id,
|
||||
"deployment configuration details",
|
||||
);
|
||||
|
||||
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
result.matched_discussions.len(),
|
||||
1,
|
||||
"Note doc should resolve to parent discussion"
|
||||
);
|
||||
assert_eq!(result.matched_discussions[0].discussion_id, disc_id);
|
||||
assert_eq!(result.matched_discussions[0].entity_type, "issue");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_to_chars_multibyte() {
|
||||
let s = "\u{1F600}".repeat(300); // emoji
|
||||
let result = truncate_to_chars(&s, 200);
|
||||
assert_eq!(result.chars().count(), 200);
|
||||
// Verify valid UTF-8
|
||||
assert!(std::str::from_utf8(result.as_bytes()).is_ok());
|
||||
#[tokio::test]
|
||||
async fn test_seed_deduplicates_matched_discussions() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = insert_test_project(&conn);
|
||||
let issue_id = insert_test_issue(&conn, project_id, 1);
|
||||
let disc_id = insert_discussion(&conn, project_id, Some(issue_id), None);
|
||||
|
||||
// Two docs referencing the same discussion
|
||||
insert_document(
|
||||
&conn,
|
||||
"discussion",
|
||||
disc_id,
|
||||
project_id,
|
||||
"deployment pipeline first doc",
|
||||
);
|
||||
let note_id = insert_note(&conn, disc_id, project_id, "deployment note", false);
|
||||
insert_document(
|
||||
&conn,
|
||||
"note",
|
||||
note_id,
|
||||
project_id,
|
||||
"deployment pipeline second doc",
|
||||
);
|
||||
|
||||
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
result.matched_discussions.len(),
|
||||
1,
|
||||
"Same discussion_id from two docs should deduplicate"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_seed_matched_discussions_have_correct_parent_entity() {
|
||||
let conn = setup_test_db();
|
||||
let project_id = insert_test_project(&conn);
|
||||
let mr_id = insert_test_mr(&conn, project_id, 99);
|
||||
let disc_id = insert_discussion(&conn, project_id, None, Some(mr_id));
|
||||
insert_document(
|
||||
&conn,
|
||||
"discussion",
|
||||
disc_id,
|
||||
project_id,
|
||||
"deployment pipeline for merge request",
|
||||
);
|
||||
|
||||
let result = seed_timeline(&conn, None, "deployment", None, None, 50, 10)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.matched_discussions.len(), 1);
|
||||
assert_eq!(result.matched_discussions[0].entity_type, "merge_request");
|
||||
assert_eq!(result.matched_discussions[0].entity_id, mr_id);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user