fix(explain): address review findings — N+1 queries, duplicate decisions, silent errors

1. fetch_open_threads: replace N+1 loop (2 queries per thread) with a single query using correlated subqueries for note_count and started_by. 2. extract_key_decisions: track consumed notes so the same note is not matched to multiple events, preventing duplicate decision entries. 3. build_timeline_excerpt_from_pipeline: log tracing::warn on seed/collect failures instead of silently returning empty timeline.
2026-03-10 16:43:06 -04:00
parent 08bda08934
commit 06889ec85a
10 changed files with 92 additions and 248 deletions
--- a/src/embedding/chunk_ids.rs
+++ b/src/embedding/chunk_ids.rs
@@ -1,70 +0,0 @@
-pub const CHUNK_ROWID_MULTIPLIER: i64 = 1000;
-
-pub fn encode_rowid(document_id: i64, chunk_index: i64) -> i64 {
-    assert!(
-        (0..CHUNK_ROWID_MULTIPLIER).contains(&chunk_index),
-        "chunk_index {chunk_index} out of range [0, {CHUNK_ROWID_MULTIPLIER})"
-    );
-    document_id
-        .checked_mul(CHUNK_ROWID_MULTIPLIER)
-        .and_then(|v| v.checked_add(chunk_index))
-        .unwrap_or_else(|| {
-            panic!("encode_rowid overflow: document_id={document_id}, chunk_index={chunk_index}")
-        })
-}
-
-pub fn decode_rowid(rowid: i64) -> (i64, i64) {
-    assert!(
-        rowid >= 0,
-        "decode_rowid called with negative rowid: {rowid}"
-    );
-    let document_id = rowid / CHUNK_ROWID_MULTIPLIER;
-    let chunk_index = rowid % CHUNK_ROWID_MULTIPLIER;
-    (document_id, chunk_index)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_encode_single_chunk() {
-        assert_eq!(encode_rowid(1, 0), 1000);
-    }
-
-    #[test]
-    fn test_encode_multi_chunk() {
-        assert_eq!(encode_rowid(1, 5), 1005);
-    }
-
-    #[test]
-    fn test_encode_specific_values() {
-        assert_eq!(encode_rowid(42, 0), 42000);
-        assert_eq!(encode_rowid(42, 5), 42005);
-    }
-
-    #[test]
-    fn test_decode_zero_chunk() {
-        assert_eq!(decode_rowid(42000), (42, 0));
-    }
-
-    #[test]
-    fn test_decode_roundtrip() {
-        for doc_id in [0, 1, 42, 100, 999, 10000] {
-            for chunk_idx in [0, 1, 5, 99, 999] {
-                let rowid = encode_rowid(doc_id, chunk_idx);
-                let (decoded_doc, decoded_chunk) = decode_rowid(rowid);
-                assert_eq!(
-                    (decoded_doc, decoded_chunk),
-                    (doc_id, chunk_idx),
-                    "Roundtrip failed for doc_id={doc_id}, chunk_idx={chunk_idx}"
-                );
-            }
-        }
-    }
-
-    #[test]
-    fn test_multiplier_value() {
-        assert_eq!(CHUNK_ROWID_MULTIPLIER, 1000);
-    }
-}
--- a/src/embedding/chunking.rs
+++ b/src/embedding/chunking.rs
@@ -1,107 +0,0 @@
-pub const CHUNK_MAX_BYTES: usize = 1_500;
-
-pub const EXPECTED_DIMS: usize = 768;
-
-pub const CHUNK_OVERLAP_CHARS: usize = 200;
-
-pub fn split_into_chunks(content: &str) -> Vec<(usize, String)> {
-    if content.is_empty() {
-        return Vec::new();
-    }
-
-    if content.len() <= CHUNK_MAX_BYTES {
-        return vec![(0, content.to_string())];
-    }
-
-    let mut chunks: Vec<(usize, String)> = Vec::new();
-    let mut start = 0;
-    let mut chunk_index = 0;
-
-    while start < content.len() {
-        let remaining = &content[start..];
-        if remaining.len() <= CHUNK_MAX_BYTES {
-            chunks.push((chunk_index, remaining.to_string()));
-            break;
-        }
-
-        let end = floor_char_boundary(content, start + CHUNK_MAX_BYTES);
-        let window = &content[start..end];
-
-        let split_at = find_paragraph_break(window)
-            .or_else(|| find_sentence_break(window))
-            .or_else(|| find_word_break(window))
-            .unwrap_or(window.len());
-
-        let chunk_text = &content[start..start + split_at];
-        chunks.push((chunk_index, chunk_text.to_string()));
-
-        let advance = if split_at > CHUNK_OVERLAP_CHARS {
-            split_at - CHUNK_OVERLAP_CHARS
-        } else {
-            split_at
-        }
-        .max(1);
-        let old_start = start;
-        start += advance;
-        // Ensure start lands on a char boundary after overlap subtraction
-        start = floor_char_boundary(content, start);
-        // Guarantee forward progress: multi-byte chars can cause
-        // floor_char_boundary to round back to old_start
-        if start <= old_start {
-            start = old_start
-                + content[old_start..]
-                    .chars()
-                    .next()
-                    .map_or(1, |c| c.len_utf8());
-        }
-        chunk_index += 1;
-    }
-
-    chunks
-}
-
-fn find_paragraph_break(window: &str) -> Option<usize> {
-    let search_start = floor_char_boundary(window, window.len() * 2 / 3);
-    window[search_start..]
-        .rfind("\n\n")
-        .map(|pos| search_start + pos + 2)
-        .or_else(|| window[..search_start].rfind("\n\n").map(|pos| pos + 2))
-}
-
-fn find_sentence_break(window: &str) -> Option<usize> {
-    let search_start = floor_char_boundary(window, window.len() / 2);
-    for pat in &[". ", "? ", "! "] {
-        if let Some(pos) = window[search_start..].rfind(pat) {
-            return Some(search_start + pos + pat.len());
-        }
-    }
-    for pat in &[". ", "? ", "! "] {
-        if let Some(pos) = window[..search_start].rfind(pat) {
-            return Some(pos + pat.len());
-        }
-    }
-    None
-}
-
-fn find_word_break(window: &str) -> Option<usize> {
-    let search_start = floor_char_boundary(window, window.len() / 2);
-    window[search_start..]
-        .rfind(' ')
-        .map(|pos| search_start + pos + 1)
-        .or_else(|| window[..search_start].rfind(' ').map(|pos| pos + 1))
-}
-
-fn floor_char_boundary(s: &str, idx: usize) -> usize {
-    if idx >= s.len() {
-        return s.len();
-    }
-    let mut i = idx;
-    while i > 0 && !s.is_char_boundary(i) {
-        i -= 1;
-    }
-    i
-}
-
-#[cfg(test)]
-#[path = "chunking_tests.rs"]
-mod tests;