feat(timeline): add DiscussionThread types and seed-phase discussion matching

Introduces the foundation for full discussion thread support in the
timeline pipeline. Adds three new domain types to timeline.rs:

- ThreadNote: individual note within a thread (id, author, body, timestamp)
- MatchedDiscussion: tracks discussions matched during seeding with their
  parent entity (issue or MR) for downstream collection
- DiscussionThread variant on TimelineEventType: carries a full thread of
  notes, sorted between NoteEvidence and CrossReferenced

Moves truncate_to_chars() from timeline_seed.rs to timeline.rs as pub(crate)
for reuse by the collect phase. Adds THREAD_NOTE_MAX_CHARS (2000) and
THREAD_MAX_NOTES (50) constants.

Upgrades the seed SQL in resolve_documents_to_entities() to resolve note
documents to their parent discussion via an additional LEFT JOIN chain
(notes -> discussions), using COALESCE to unify the entity resolution path
for both discussion and note source types. SeedResult gains a
matched_discussions field that captures deduplicated discussion matches.

Tests cover: discussion matching from discussion docs, note-to-parent
resolution, deduplication of same discussion across multiple docs, and
correct parent entity type (issue vs MR).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
teernisse
2026-02-13 14:08:25 -05:00
parent f439c42b3d
commit 0e65202778
3 changed files with 278 additions and 50 deletions

View File

@@ -4,7 +4,10 @@ use rusqlite::Connection;
use tracing::debug;
use crate::core::error::Result;
use crate::core::timeline::{EntityRef, TimelineEvent, TimelineEventType, resolve_entity_ref};
use crate::core::timeline::{
EntityRef, MatchedDiscussion, TimelineEvent, TimelineEventType, resolve_entity_ref,
truncate_to_chars,
};
use crate::embedding::ollama::OllamaClient;
use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_fts_query};
@@ -12,6 +15,8 @@ use crate::search::{FtsQueryMode, SearchFilters, SearchMode, search_hybrid, to_f
pub struct SeedResult {
pub seed_entities: Vec<EntityRef>,
pub evidence_notes: Vec<TimelineEvent>,
/// Discussions matched during seeding, to be collected as full threads.
pub matched_discussions: Vec<MatchedDiscussion>,
/// The search mode actually used (hybrid with fallback info).
pub search_mode: String,
}
@@ -38,6 +43,7 @@ pub async fn seed_timeline(
return Ok(SeedResult {
seed_entities: Vec::new(),
evidence_notes: Vec::new(),
matched_discussions: Vec::new(),
search_mode: "lexical".to_owned(),
});
}
@@ -76,7 +82,7 @@ pub async fn seed_timeline(
debug!(warning = %w, "hybrid search warning during timeline seeding");
}
let seed_entities = resolve_documents_to_entities(
let (seed_entities, matched_discussions) = resolve_documents_to_entities(
conn,
&hybrid_results
.iter()
@@ -91,19 +97,21 @@ pub async fn seed_timeline(
Ok(SeedResult {
seed_entities,
evidence_notes,
matched_discussions,
search_mode,
})
}
/// Resolve a list of document IDs to deduplicated entity refs.
/// Discussion documents are resolved to their parent entity (issue or MR).
/// Resolve a list of document IDs to deduplicated entity refs and matched discussions.
/// Discussion and note documents are resolved to their parent entity (issue or MR).
/// Returns (entities, matched_discussions).
fn resolve_documents_to_entities(
conn: &Connection,
document_ids: &[i64],
max_entities: usize,
) -> Result<Vec<EntityRef>> {
) -> Result<(Vec<EntityRef>, Vec<MatchedDiscussion>)> {
if document_ids.is_empty() {
return Ok(Vec::new());
return Ok((Vec::new(), Vec::new()));
}
let placeholders: String = document_ids
@@ -114,9 +122,13 @@ fn resolve_documents_to_entities(
let sql = format!(
r"
SELECT d.source_type, d.source_id, d.project_id,
disc.issue_id, disc.merge_request_id
COALESCE(disc.issue_id, note_disc.issue_id) AS issue_id,
COALESCE(disc.merge_request_id, note_disc.merge_request_id) AS mr_id,
COALESCE(disc.id, note_disc.id) AS discussion_id
FROM documents d
LEFT JOIN discussions disc ON disc.id = d.source_id AND d.source_type = 'discussion'
LEFT JOIN notes n ON n.id = d.source_id AND d.source_type = 'note'
LEFT JOIN discussions note_disc ON note_disc.id = n.discussion_id AND d.source_type = 'note'
WHERE d.id IN ({placeholders})
ORDER BY CASE d.id {order_clause} END
",
@@ -135,37 +147,55 @@ fn resolve_documents_to_entities(
.collect();
let rows = stmt.query_map(params.as_slice(), |row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, i64>(1)?,
row.get::<_, i64>(2)?,
row.get::<_, Option<i64>>(3)?,
row.get::<_, Option<i64>>(4)?,
row.get::<_, String>(0)?, // source_type
row.get::<_, i64>(1)?, // source_id
row.get::<_, i64>(2)?, // project_id
row.get::<_, Option<i64>>(3)?, // issue_id (coalesced)
row.get::<_, Option<i64>>(4)?, // mr_id (coalesced)
row.get::<_, Option<i64>>(5)?, // discussion_id (coalesced)
))
})?;
let mut seen = HashSet::new();
let mut seen_entities = HashSet::new();
let mut seen_discussions = HashSet::new();
let mut entities = Vec::new();
let mut matched_discussions = Vec::new();
for row_result in rows {
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id) = row_result?;
let (source_type, source_id, proj_id, disc_issue_id, disc_mr_id, discussion_id) =
row_result?;
let (entity_type, entity_id) = match source_type.as_str() {
"issue" => ("issue".to_owned(), source_id),
"merge_request" => ("merge_request".to_owned(), source_id),
"discussion" => {
"discussion" | "note" => {
if let Some(issue_id) = disc_issue_id {
("issue".to_owned(), issue_id)
} else if let Some(mr_id) = disc_mr_id {
("merge_request".to_owned(), mr_id)
} else {
continue; // orphaned discussion
continue; // orphaned discussion/note
}
}
_ => continue,
};
// Capture matched discussion (deduplicated)
if let Some(disc_id) = discussion_id
&& (source_type == "discussion" || source_type == "note")
&& seen_discussions.insert(disc_id)
{
matched_discussions.push(MatchedDiscussion {
discussion_id: disc_id,
entity_type: entity_type.clone(),
entity_id,
project_id: proj_id,
});
}
// Entity dedup
let key = (entity_type.clone(), entity_id);
if !seen.insert(key) {
if !seen_entities.insert(key) {
continue;
}
@@ -179,7 +209,7 @@ fn resolve_documents_to_entities(
}
}
Ok(entities)
Ok((entities, matched_discussions))
}
/// Find evidence notes: FTS5-matched discussion notes that provide context.
@@ -275,21 +305,6 @@ fn find_evidence_notes(
Ok(events)
}
/// Truncate a string to at most `max_chars` characters on a safe UTF-8 boundary.
fn truncate_to_chars(s: &str, max_chars: usize) -> String {
let char_count = s.chars().count();
if char_count <= max_chars {
return s.to_owned();
}
let byte_end = s
.char_indices()
.nth(max_chars)
.map(|(i, _)| i)
.unwrap_or(s.len());
s[..byte_end].to_owned()
}
#[cfg(test)]
#[path = "timeline_seed_tests.rs"]
mod tests;