feat(bd-226s): implement time-decay expert scoring model

Replace flat-weight expertise scoring with exponential half-life decay, split reviewer signals (participated vs assigned-only), dual-path rename awareness, and new CLI flags (--as-of, --explain-score, --include-bots, --all-history). Changes: - ScoringConfig: 8 new fields with validation (config.rs) - half_life_decay() and normalize_query_path() pure functions (who.rs) - CTE-based SQL with dual-path matching, mr_activity, reviewer_participation (who.rs) - Rust-side decay aggregation with deterministic f64 ordering (who.rs) - Path resolution probes check old_path columns (who.rs) - Migration 026: 5 new indexes for dual-path and reviewer participation - Default --since changed from 6m to 24m - 31 new tests (example-based + invariant), 621 total who tests passing - Autocorrect registry updated with new flags Closes: bd-226s, bd-2w1p, bd-1soz, bd-18dn, bd-2ao4, bd-2yu5, bd-1b50, bd-1hoq, bd-1h3f, bd-13q8, bd-11mg, bd-1vti, bd-1j5o
release: v0.7.0
2026-02-12 15:44:55 -05:00 · 2026-02-12 13:31:57 -05:00 · 2026-02-12 13:31:24 -05:00
27 changed files with 8479 additions and 338 deletions
--- a/.beads/issues.jsonl
+++ b/.beads/issues.jsonl
--- a/.beads/last-touched
+++ b/.beads/last-touched
@@ -1 +1 @@
-bd-2kop
+bd-226s
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1106,7 +1106,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"

 [[package]]
 name = "lore"
-version = "0.6.2"
+version = "0.7.0"
 dependencies = [
 "async-stream",
 "chrono",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lore"
-version = "0.6.2"
+version = "0.7.0"
 edition = "2024"
 description = "Gitlore - Local GitLab data management with semantic search"
 authors = ["Taylor Eernisse"]
--- a/migrations/022_notes_query_index.sql
+++ b/migrations/022_notes_query_index.sql
@@ -0,0 +1,21 @@
+-- Migration 022: Composite query indexes for notes + author_id column
+-- Optimizes author-scoped and project-scoped date-range queries on notes.
+-- Adds discussion JOIN indexes and immutable author identity column.
+
+-- Composite index for author-scoped queries (who command, notes --author)
+CREATE INDEX IF NOT EXISTS idx_notes_user_created
+ON notes(project_id, author_username COLLATE NOCASE, created_at DESC, id DESC)
+WHERE is_system = 0;
+
+-- Composite index for project-scoped date-range queries
+CREATE INDEX IF NOT EXISTS idx_notes_project_created
+ON notes(project_id, created_at DESC, id DESC)
+WHERE is_system = 0;
+
+-- Discussion JOIN indexes
+CREATE INDEX IF NOT EXISTS idx_discussions_issue_id ON discussions(issue_id);
+CREATE INDEX IF NOT EXISTS idx_discussions_mr_id ON discussions(merge_request_id);
+
+-- Immutable author identity column (GitLab numeric user ID)
+ALTER TABLE notes ADD COLUMN author_id INTEGER;
+CREATE INDEX IF NOT EXISTS idx_notes_author_id ON notes(author_id) WHERE author_id IS NOT NULL;
--- a/migrations/024_note_documents.sql
+++ b/migrations/024_note_documents.sql
@@ -0,0 +1,153 @@
+-- Migration 024: Add 'note' source_type to documents and dirty_sources
+-- SQLite does not support ALTER CONSTRAINT, so we use the table-rebuild pattern.
+
+-- ============================================================
+-- 1. Rebuild dirty_sources with updated CHECK constraint
+-- ============================================================
+
+CREATE TABLE dirty_sources_new (
+  source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
+  source_id INTEGER NOT NULL,
+  queued_at INTEGER NOT NULL,
+  attempt_count INTEGER NOT NULL DEFAULT 0,
+  last_attempt_at INTEGER,
+  last_error TEXT,
+  next_attempt_at INTEGER,
+  PRIMARY KEY(source_type, source_id)
+);
+
+INSERT INTO dirty_sources_new SELECT * FROM dirty_sources;
+DROP TABLE dirty_sources;
+ALTER TABLE dirty_sources_new RENAME TO dirty_sources;
+CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
+
+-- ============================================================
+-- 2. Rebuild documents with updated CHECK constraint
+-- ============================================================
+
+-- 2a. Backup junction table data
+CREATE TEMP TABLE _doc_labels_backup AS SELECT * FROM document_labels;
+CREATE TEMP TABLE _doc_paths_backup AS SELECT * FROM document_paths;
+
+-- 2b. Drop all triggers that reference documents
+DROP TRIGGER IF EXISTS documents_ai;
+DROP TRIGGER IF EXISTS documents_ad;
+DROP TRIGGER IF EXISTS documents_au;
+DROP TRIGGER IF EXISTS documents_embeddings_ad;
+
+-- 2c. Drop junction tables (they have FK references to documents)
+DROP TABLE IF EXISTS document_labels;
+DROP TABLE IF EXISTS document_paths;
+
+-- 2d. Create new documents table with 'note' in CHECK constraint
+CREATE TABLE documents_new (
+  id INTEGER PRIMARY KEY,
+  source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
+  source_id INTEGER NOT NULL,
+  project_id INTEGER NOT NULL REFERENCES projects(id),
+  author_username TEXT,
+  label_names TEXT,
+  created_at INTEGER,
+  updated_at INTEGER,
+  url TEXT,
+  title TEXT,
+  content_text TEXT NOT NULL,
+  content_hash TEXT NOT NULL,
+  labels_hash TEXT NOT NULL DEFAULT '',
+  paths_hash TEXT NOT NULL DEFAULT '',
+  is_truncated INTEGER NOT NULL DEFAULT 0,
+  truncated_reason TEXT CHECK (
+    truncated_reason IN (
+      'token_limit_middle_drop','single_note_oversized','first_last_oversized',
+      'hard_cap_oversized'
+    )
+    OR truncated_reason IS NULL
+  ),
+  UNIQUE(source_type, source_id)
+);
+
+-- 2e. Copy all existing data
+INSERT INTO documents_new SELECT * FROM documents;
+
+-- 2f. Swap tables
+DROP TABLE documents;
+ALTER TABLE documents_new RENAME TO documents;
+
+-- 2g. Recreate all indexes on documents
+CREATE INDEX idx_documents_project_updated ON documents(project_id, updated_at);
+CREATE INDEX idx_documents_author ON documents(author_username);
+CREATE INDEX idx_documents_source ON documents(source_type, source_id);
+CREATE INDEX idx_documents_hash ON documents(content_hash);
+
+-- 2h. Recreate junction tables
+CREATE TABLE document_labels (
+  document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+  label_name TEXT NOT NULL,
+  PRIMARY KEY(document_id, label_name)
+) WITHOUT ROWID;
+CREATE INDEX idx_document_labels_label ON document_labels(label_name);
+
+CREATE TABLE document_paths (
+  document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
+  path TEXT NOT NULL,
+  PRIMARY KEY(document_id, path)
+) WITHOUT ROWID;
+CREATE INDEX idx_document_paths_path ON document_paths(path);
+
+-- 2i. Restore junction table data from backups
+INSERT INTO document_labels SELECT * FROM _doc_labels_backup;
+INSERT INTO document_paths SELECT * FROM _doc_paths_backup;
+
+-- 2j. Recreate FTS triggers (from migration 008)
+CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN
+  INSERT INTO documents_fts(rowid, title, content_text)
+  VALUES (new.id, COALESCE(new.title, ''), new.content_text);
+END;
+
+CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
+  INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
+  VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
+END;
+
+CREATE TRIGGER documents_au AFTER UPDATE ON documents
+WHEN old.title IS NOT new.title OR old.content_text != new.content_text
+BEGIN
+  INSERT INTO documents_fts(documents_fts, rowid, title, content_text)
+  VALUES('delete', old.id, COALESCE(old.title, ''), old.content_text);
+  INSERT INTO documents_fts(rowid, title, content_text)
+  VALUES (new.id, COALESCE(new.title, ''), new.content_text);
+END;
+
+-- 2k. Recreate embeddings cleanup trigger (from migration 009)
+CREATE TRIGGER documents_embeddings_ad AFTER DELETE ON documents BEGIN
+  DELETE FROM embeddings
+    WHERE rowid >= old.id * 1000
+      AND rowid < (old.id + 1) * 1000;
+END;
+
+-- 2l. Rebuild FTS index to ensure consistency after table swap
+INSERT INTO documents_fts(documents_fts) VALUES('rebuild');
+
+-- ============================================================
+-- 3. Defense triggers: clean up documents when notes are
+--    deleted or flipped to system notes
+-- ============================================================
+
+CREATE TRIGGER notes_ad_cleanup AFTER DELETE ON notes
+WHEN old.is_system = 0
+BEGIN
+  DELETE FROM documents WHERE source_type = 'note' AND source_id = old.id;
+END;
+
+CREATE TRIGGER notes_au_system_cleanup AFTER UPDATE OF is_system ON notes
+WHEN NEW.is_system = 1 AND OLD.is_system = 0
+BEGIN
+  DELETE FROM documents WHERE source_type = 'note' AND source_id = OLD.id;
+END;
+
+-- ============================================================
+-- 4. Drop temp backup tables
+-- ============================================================
+
+DROP TABLE IF EXISTS _doc_labels_backup;
+DROP TABLE IF EXISTS _doc_paths_backup;
--- a/migrations/025_note_dirty_backfill.sql
+++ b/migrations/025_note_dirty_backfill.sql
@@ -0,0 +1,8 @@
+-- Backfill existing non-system notes into dirty queue for document generation.
+-- Only seeds notes that don't already have documents and aren't already queued.
+INSERT INTO dirty_sources (source_type, source_id, queued_at)
+SELECT 'note', n.id, CAST(strftime('%s', 'now') AS INTEGER) * 1000
+FROM notes n
+LEFT JOIN documents d ON d.source_type = 'note' AND d.source_id = n.id
+WHERE n.is_system = 0 AND d.id IS NULL
+ON CONFLICT(source_type, source_id) DO NOTHING;
--- a/migrations/026_scoring_indexes.sql
+++ b/migrations/026_scoring_indexes.sql
@@ -0,0 +1,20 @@
+-- Indexes for time-decay expert scoring: dual-path matching and reviewer participation.
+
+CREATE INDEX IF NOT EXISTS idx_notes_old_path_author
+  ON notes(position_old_path, author_username, created_at)
+  WHERE note_type = 'DiffNote' AND is_system = 0 AND position_old_path IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_mfc_old_path_project_mr
+  ON mr_file_changes(old_path, project_id, merge_request_id)
+  WHERE old_path IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_mfc_new_path_project_mr
+  ON mr_file_changes(new_path, project_id, merge_request_id);
+
+CREATE INDEX IF NOT EXISTS idx_notes_diffnote_discussion_author
+  ON notes(discussion_id, author_username, created_at)
+  WHERE note_type = 'DiffNote' AND is_system = 0;
+
+CREATE INDEX IF NOT EXISTS idx_notes_old_path_project_created
+  ON notes(position_old_path, project_id, created_at)
+  WHERE note_type = 'DiffNote' AND is_system = 0 AND position_old_path IS NOT NULL;
--- a/plans/time-decay-expert-scoring.md
+++ b/plans/time-decay-expert-scoring.md
@@ -4,7 +4,7 @@ title: ""
 status: iterating
 iteration: 6
 target_iterations: 8
-beads_revision: 1
+beads_revision: 2
 related_plans: []
 created: 2026-02-08
 updated: 2026-02-12
--- a/src/cli/autocorrect.rs
+++ b/src/cli/autocorrect.rs
@@ -183,9 +183,38 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
            "--fields",
            "--detail",
            "--no-detail",
+            "--as-of",
+            "--explain-score",
+            "--include-bots",
+            "--all-history",
        ],
    ),
    ("drift", &["--threshold", "--project"]),
+    (
+        "notes",
+        &[
+            "--limit",
+            "--fields",
+            "--format",
+            "--author",
+            "--note-type",
+            "--contains",
+            "--note-id",
+            "--gitlab-note-id",
+            "--discussion-id",
+            "--include-system",
+            "--for-issue",
+            "--for-mr",
+            "--project",
+            "--since",
+            "--until",
+            "--path",
+            "--resolution",
+            "--sort",
+            "--asc",
+            "--open",
+        ],
+    ),
    (
        "init",
        &[
--- a/src/cli/commands/generate_docs.rs
+++ b/src/cli/commands/generate_docs.rs
@@ -39,6 +39,7 @@ pub fn run_generate_docs(
        result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
        result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
        result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
+        result.seeded += seed_dirty_notes(&conn, project_filter)?;
    }

    let regen =
@@ -67,6 +68,10 @@ fn seed_dirty(
        SourceType::Issue => "issues",
        SourceType::MergeRequest => "merge_requests",
        SourceType::Discussion => "discussions",
+        SourceType::Note => {
+            // NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
+            unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
+        }
    };
    let type_str = source_type.as_str();
    let now = chrono::Utc::now().timestamp_millis();
@@ -125,6 +130,55 @@ fn seed_dirty(
    Ok(total_seeded)
 }

+fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
+    let now = chrono::Utc::now().timestamp_millis();
+    let mut total_seeded: usize = 0;
+    let mut last_id: i64 = 0;
+
+    loop {
+        let inserted = if let Some(project) = project_filter {
+            let project_id = resolve_project(conn, project)?;
+
+            conn.execute(
+                "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
+                 SELECT 'note', id, ?1, 0, NULL, NULL, NULL
+                 FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
+                 ON CONFLICT(source_type, source_id) DO NOTHING",
+                rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
+            )?
+        } else {
+            conn.execute(
+                "INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
+                 SELECT 'note', id, ?1, 0, NULL, NULL, NULL
+                 FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
+                 ON CONFLICT(source_type, source_id) DO NOTHING",
+                rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
+            )?
+        };
+
+        if inserted == 0 {
+            break;
+        }
+
+        let max_id: i64 = conn.query_row(
+            "SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
+            rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
+            |row| row.get(0),
+        )?;
+
+        total_seeded += inserted;
+        last_id = max_id;
+    }
+
+    info!(
+        source_type = "note",
+        seeded = total_seeded,
+        "Seeded dirty_sources"
+    );
+
+    Ok(total_seeded)
+}
+
 pub fn print_generate_docs(result: &GenerateDocsResult) {
    let mode = if result.full_mode {
        "full"
@@ -186,3 +240,81 @@ pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
    };
    println!("{}", serde_json::to_string(&output).unwrap());
 }
+
+#[cfg(test)]
+mod tests {
+    use std::path::Path;
+
+    use crate::core::db::{create_connection, run_migrations};
+
+    use super::*;
+
+    fn setup_db() -> Connection {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations(&conn).unwrap();
+        conn.execute(
+            "INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn
+    }
+
+    fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
+            rusqlite::params![id, gitlab_id, is_system as i32],
+        ).unwrap();
+    }
+
+    #[test]
+    fn test_full_seed_includes_notes() {
+        let conn = setup_db();
+        insert_note(&conn, 1, 101, false);
+        insert_note(&conn, 2, 102, false);
+        insert_note(&conn, 3, 103, false);
+        insert_note(&conn, 4, 104, true); // system note — should be excluded
+
+        let seeded = seed_dirty_notes(&conn, None).unwrap();
+        assert_eq!(seeded, 3);
+
+        let count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(count, 3);
+    }
+
+    #[test]
+    fn test_note_document_count_stable_after_second_generate_docs_full() {
+        let conn = setup_db();
+        insert_note(&conn, 1, 101, false);
+        insert_note(&conn, 2, 102, false);
+
+        let first = seed_dirty_notes(&conn, None).unwrap();
+        assert_eq!(first, 2);
+
+        // Second run should be idempotent (ON CONFLICT DO NOTHING)
+        let second = seed_dirty_notes(&conn, None).unwrap();
+        assert_eq!(second, 0);
+
+        let count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(count, 2);
+    }
+}
--- a/src/cli/commands/list.rs
+++ b/src/cli/commands/list.rs
--- a/src/cli/commands/mod.rs
+++ b/src/cli/commands/mod.rs
@@ -30,8 +30,10 @@ pub use ingest::{
 };
 pub use init::{InitInputs, InitOptions, InitResult, run_init};
 pub use list::{
-    ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
-    print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
+    ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
+    print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
+    print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl,
+    query_notes, run_list_issues, run_list_mrs,
 };
 pub use search::{
    SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
--- a/src/cli/commands/search.rs
+++ b/src/cli/commands/search.rs
@@ -334,6 +334,7 @@ pub fn print_search_results(response: &SearchResponse) {
            "issue" => "Issue",
            "merge_request" => "MR",
            "discussion" => "Discussion",
+            "note" => "Note",
            _ => &result.source_type,
        };

--- a/src/cli/commands/who.rs
+++ b/src/cli/commands/who.rs
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -112,6 +112,9 @@ pub enum Commands {
    /// List or show merge requests
    Mrs(MrsArgs),

+    /// List notes from discussions
+    Notes(NotesArgs),
+
    /// Ingest data from GitLab
    Ingest(IngestArgs),

@@ -489,6 +492,113 @@ pub struct MrsArgs {
    pub no_open: bool,
 }

+#[derive(Parser)]
+#[command(after_help = "\x1b[1mExamples:\x1b[0m
+  lore notes                                  # List 50 most recent notes
+  lore notes --author alice --since 7d        # Notes by alice in last 7 days
+  lore notes --for-issue 42 -p group/repo     # Notes on issue #42
+  lore notes --path src/ --resolution unresolved  # Unresolved diff notes in src/")]
+pub struct NotesArgs {
+    /// Maximum results
+    #[arg(
+        short = 'n',
+        long = "limit",
+        default_value = "50",
+        help_heading = "Output"
+    )]
+    pub limit: usize,
+
+    /// Select output fields (comma-separated, or 'minimal' preset: id,author_username,body,created_at_iso)
+    #[arg(long, help_heading = "Output", value_delimiter = ',')]
+    pub fields: Option<Vec<String>>,
+
+    /// Output format (table, json, jsonl, csv)
+    #[arg(
+        long,
+        default_value = "table",
+        value_parser = ["table", "json", "jsonl", "csv"],
+        help_heading = "Output"
+    )]
+    pub format: String,
+
+    /// Filter by author username
+    #[arg(short = 'a', long, help_heading = "Filters")]
+    pub author: Option<String>,
+
+    /// Filter by note type (DiffNote, DiscussionNote)
+    #[arg(long, help_heading = "Filters")]
+    pub note_type: Option<String>,
+
+    /// Filter by body text (substring match)
+    #[arg(long, help_heading = "Filters")]
+    pub contains: Option<String>,
+
+    /// Filter by internal note ID
+    #[arg(long, help_heading = "Filters")]
+    pub note_id: Option<i64>,
+
+    /// Filter by GitLab note ID
+    #[arg(long, help_heading = "Filters")]
+    pub gitlab_note_id: Option<i64>,
+
+    /// Filter by discussion ID
+    #[arg(long, help_heading = "Filters")]
+    pub discussion_id: Option<String>,
+
+    /// Include system notes (excluded by default)
+    #[arg(long, help_heading = "Filters")]
+    pub include_system: bool,
+
+    /// Filter to notes on a specific issue IID (requires --project or default_project)
+    #[arg(long, conflicts_with = "for_mr", help_heading = "Filters")]
+    pub for_issue: Option<i64>,
+
+    /// Filter to notes on a specific MR IID (requires --project or default_project)
+    #[arg(long, conflicts_with = "for_issue", help_heading = "Filters")]
+    pub for_mr: Option<i64>,
+
+    /// Filter by project path
+    #[arg(short = 'p', long, help_heading = "Filters")]
+    pub project: Option<String>,
+
+    /// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
+    #[arg(long, help_heading = "Filters")]
+    pub since: Option<String>,
+
+    /// Filter until date (YYYY-MM-DD, inclusive end-of-day)
+    #[arg(long, help_heading = "Filters")]
+    pub until: Option<String>,
+
+    /// Filter by file path (exact match or prefix with trailing /)
+    #[arg(long, help_heading = "Filters")]
+    pub path: Option<String>,
+
+    /// Filter by resolution status (any, unresolved, resolved)
+    #[arg(
+        long,
+        value_parser = ["any", "unresolved", "resolved"],
+        help_heading = "Filters"
+    )]
+    pub resolution: Option<String>,
+
+    /// Sort field (created, updated)
+    #[arg(
+        long,
+        value_parser = ["created", "updated"],
+        default_value = "created",
+        help_heading = "Sorting"
+    )]
+    pub sort: String,
+
+    /// Sort ascending (default: descending)
+    #[arg(long, help_heading = "Sorting")]
+    pub asc: bool,
+
+    /// Open first matching item in browser
+    #[arg(long, help_heading = "Actions")]
+    pub open: bool,
+}
+
 #[derive(Parser)]
 pub struct IngestArgs {
    /// Entity to ingest (issues, mrs). Omit to ingest everything
@@ -556,8 +666,8 @@ pub struct SearchArgs {
    #[arg(long, default_value = "hybrid", value_parser = ["lexical", "hybrid", "semantic"], help_heading = "Mode")]
    pub mode: String,

-    /// Filter by source type (issue, mr, discussion)
-    #[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion"], help_heading = "Filters")]
+    /// Filter by source type (issue, mr, discussion, note)
+    #[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion", "note"], help_heading = "Filters")]
    pub source_type: Option<String>,

    /// Filter by author username
@@ -800,6 +910,26 @@ pub struct WhoArgs {

    #[arg(long = "no-detail", hide = true, overrides_with = "detail")]
    pub no_detail: bool,
+
+    /// Score as if "now" is this date (ISO 8601 or duration like 30d). Expert mode only.
+    #[arg(long = "as-of", help_heading = "Scoring")]
+    pub as_of: Option<String>,
+
+    /// Show per-component score breakdown in output. Expert mode only.
+    #[arg(long = "explain-score", help_heading = "Scoring")]
+    pub explain_score: bool,
+
+    /// Include bot users in results (normally excluded via scoring.excluded_usernames).
+    #[arg(long = "include-bots", help_heading = "Scoring")]
+    pub include_bots: bool,
+
+    /// Remove the default time window (query all history). Conflicts with --since.
+    #[arg(
+        long = "all-history",
+        help_heading = "Filters",
+        conflicts_with = "since"
+    )]
+    pub all_history: bool,
 }

 #[derive(Parser)]
--- a/src/cli/robot.rs
+++ b/src/cli/robot.rs
@@ -64,6 +64,10 @@ pub fn expand_fields_preset(fields: &[String], entity: &str) -> Vec<String> {
                .iter()
                .map(|s| (*s).to_string())
                .collect(),
+            "notes" => ["id", "author_username", "body", "created_at_iso"]
+                .iter()
+                .map(|s| (*s).to_string())
+                .collect(),
            _ => fields.to_vec(),
        }
    } else {
@@ -82,3 +86,25 @@ pub fn strip_schemas(commands: &mut serde_json::Value) {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_expand_fields_preset_notes() {
+        let fields = vec!["minimal".to_string()];
+        let expanded = expand_fields_preset(&fields, "notes");
+        assert_eq!(
+            expanded,
+            ["id", "author_username", "body", "created_at_iso"]
+        );
+    }
+
+    #[test]
+    fn test_expand_fields_preset_passthrough() {
+        let fields = vec!["id".to_string(), "body".to_string()];
+        let expanded = expand_fields_preset(&fields, "notes");
+        assert_eq!(expanded, ["id", "body"]);
+    }
+}
--- a/src/core/config.rs
+++ b/src/core/config.rs
@@ -164,6 +164,38 @@ pub struct ScoringConfig {
    /// Bonus points per individual inline review comment (DiffNote).
    #[serde(rename = "noteBonus")]
    pub note_bonus: i64,
+
+    /// Points per MR where the user was assigned as a reviewer.
+    #[serde(rename = "reviewerAssignmentWeight")]
+    pub reviewer_assignment_weight: i64,
+
+    /// Half-life in days for author contribution decay.
+    #[serde(rename = "authorHalfLifeDays")]
+    pub author_half_life_days: u32,
+
+    /// Half-life in days for reviewer contribution decay.
+    #[serde(rename = "reviewerHalfLifeDays")]
+    pub reviewer_half_life_days: u32,
+
+    /// Half-life in days for reviewer assignment decay.
+    #[serde(rename = "reviewerAssignmentHalfLifeDays")]
+    pub reviewer_assignment_half_life_days: u32,
+
+    /// Half-life in days for note/comment contribution decay.
+    #[serde(rename = "noteHalfLifeDays")]
+    pub note_half_life_days: u32,
+
+    /// Multiplier applied to scores from closed (not merged) MRs.
+    #[serde(rename = "closedMrMultiplier")]
+    pub closed_mr_multiplier: f64,
+
+    /// Minimum character count for a review note to earn note_bonus.
+    #[serde(rename = "reviewerMinNoteChars")]
+    pub reviewer_min_note_chars: u32,
+
+    /// Usernames excluded from expert/scoring results.
+    #[serde(rename = "excludedUsernames")]
+    pub excluded_usernames: Vec<String>,
 }

 impl Default for ScoringConfig {
@@ -172,6 +204,14 @@ impl Default for ScoringConfig {
            author_weight: 25,
            reviewer_weight: 10,
            note_bonus: 1,
+            reviewer_assignment_weight: 3,
+            author_half_life_days: 180,
+            reviewer_half_life_days: 90,
+            reviewer_assignment_half_life_days: 45,
+            note_half_life_days: 45,
+            closed_mr_multiplier: 0.5,
+            reviewer_min_note_chars: 20,
+            excluded_usernames: vec![],
        }
    }
 }
@@ -287,6 +327,55 @@ fn validate_scoring(scoring: &ScoringConfig) -> Result<()> {
            details: "scoring.noteBonus must be >= 0".to_string(),
        });
    }
+    if scoring.reviewer_assignment_weight < 0 {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.reviewerAssignmentWeight must be >= 0".to_string(),
+        });
+    }
+    if scoring.author_half_life_days == 0 || scoring.author_half_life_days > 3650 {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.authorHalfLifeDays must be in 1..=3650".to_string(),
+        });
+    }
+    if scoring.reviewer_half_life_days == 0 || scoring.reviewer_half_life_days > 3650 {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.reviewerHalfLifeDays must be in 1..=3650".to_string(),
+        });
+    }
+    if scoring.reviewer_assignment_half_life_days == 0
+        || scoring.reviewer_assignment_half_life_days > 3650
+    {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.reviewerAssignmentHalfLifeDays must be in 1..=3650".to_string(),
+        });
+    }
+    if scoring.note_half_life_days == 0 || scoring.note_half_life_days > 3650 {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.noteHalfLifeDays must be in 1..=3650".to_string(),
+        });
+    }
+    if !scoring.closed_mr_multiplier.is_finite()
+        || scoring.closed_mr_multiplier <= 0.0
+        || scoring.closed_mr_multiplier > 1.0
+    {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.closedMrMultiplier must be finite and in (0.0, 1.0]".to_string(),
+        });
+    }
+    if scoring.reviewer_min_note_chars > 4096 {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.reviewerMinNoteChars must be <= 4096".to_string(),
+        });
+    }
+    if scoring
+        .excluded_usernames
+        .iter()
+        .any(|u| u.trim().is_empty())
+    {
+        return Err(LoreError::ConfigInvalid {
+            details: "scoring.excludedUsernames entries must be non-empty".to_string(),
+        });
+    }
    Ok(())
 }

@@ -561,4 +650,140 @@ mod tests {
            "set default_project should be present: {json}"
        );
    }
+
+    #[test]
+    fn test_config_validation_rejects_zero_half_life() {
+        let scoring = ScoringConfig {
+            author_half_life_days: 0,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("authorHalfLifeDays"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_absurd_half_life() {
+        let scoring = ScoringConfig {
+            author_half_life_days: 5000,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("authorHalfLifeDays"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_nan_multiplier() {
+        let scoring = ScoringConfig {
+            closed_mr_multiplier: f64::NAN,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("closedMrMultiplier"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_zero_multiplier() {
+        let scoring = ScoringConfig {
+            closed_mr_multiplier: 0.0,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("closedMrMultiplier"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_negative_reviewer_assignment_weight() {
+        let scoring = ScoringConfig {
+            reviewer_assignment_weight: -1,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("reviewerAssignmentWeight"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_oversized_min_note_chars() {
+        let scoring = ScoringConfig {
+            reviewer_min_note_chars: 5000,
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(
+            msg.contains("reviewerMinNoteChars"),
+            "unexpected error: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_config_validation_rejects_empty_excluded_username() {
+        let scoring = ScoringConfig {
+            excluded_usernames: vec!["valid".to_string(), "  ".to_string()],
+            ..Default::default()
+        };
+        let err = validate_scoring(&scoring).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("excludedUsernames"), "unexpected error: {msg}");
+    }
+
+    #[test]
+    fn test_config_validation_accepts_valid_new_fields() {
+        let scoring = ScoringConfig {
+            author_half_life_days: 365,
+            reviewer_half_life_days: 180,
+            reviewer_assignment_half_life_days: 90,
+            note_half_life_days: 60,
+            closed_mr_multiplier: 0.5,
+            reviewer_min_note_chars: 20,
+            reviewer_assignment_weight: 3,
+            excluded_usernames: vec!["bot-user".to_string()],
+            ..Default::default()
+        };
+        validate_scoring(&scoring).unwrap();
+    }
+
+    #[test]
+    fn test_config_validation_accepts_boundary_half_life() {
+        // 1 and 3650 are both valid boundaries
+        let scoring_min = ScoringConfig {
+            author_half_life_days: 1,
+            ..Default::default()
+        };
+        validate_scoring(&scoring_min).unwrap();
+
+        let scoring_max = ScoringConfig {
+            author_half_life_days: 3650,
+            ..Default::default()
+        };
+        validate_scoring(&scoring_max).unwrap();
+    }
+
+    #[test]
+    fn test_config_validation_accepts_multiplier_at_one() {
+        let scoring = ScoringConfig {
+            closed_mr_multiplier: 1.0,
+            ..Default::default()
+        };
+        validate_scoring(&scoring).unwrap();
+    }
 }
--- a/src/core/db.rs
+++ b/src/core/db.rs
@@ -69,10 +69,26 @@ const MIGRATIONS: &[(&str, &str)] = &[
        "021",
        include_str!("../../migrations/021_work_item_status.sql"),
    ),
+    (
+        "022",
+        include_str!("../../migrations/022_notes_query_index.sql"),
+    ),
    (
        "023",
        include_str!("../../migrations/023_issue_detail_fields.sql"),
    ),
+    (
+        "024",
+        include_str!("../../migrations/024_note_documents.sql"),
+    ),
+    (
+        "025",
+        include_str!("../../migrations/025_note_dirty_backfill.sql"),
+    ),
+    (
+        "026",
+        include_str!("../../migrations/026_scoring_indexes.sql"),
+    ),
 ];

 pub fn create_connection(db_path: &Path) -> Result<Connection> {
@@ -316,3 +332,639 @@ pub fn get_schema_version(conn: &Connection) -> i32 {
    )
    .unwrap_or(0)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn setup_migrated_db() -> Connection {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations(&conn).unwrap();
+        conn
+    }
+
+    fn index_exists(conn: &Connection, index_name: &str) -> bool {
+        conn.query_row(
+            "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1",
+            [index_name],
+            |row| row.get(0),
+        )
+        .unwrap_or(false)
+    }
+
+    fn column_exists(conn: &Connection, table: &str, column: &str) -> bool {
+        let sql = format!("PRAGMA table_info({})", table);
+        let mut stmt = conn.prepare(&sql).unwrap();
+        let columns: Vec<String> = stmt
+            .query_map([], |row| row.get::<_, String>(1))
+            .unwrap()
+            .filter_map(|r| r.ok())
+            .collect();
+        columns.contains(&column.to_string())
+    }
+
+    #[test]
+    fn test_migration_022_indexes_exist() {
+        let conn = setup_migrated_db();
+
+        // New indexes from migration 022
+        assert!(
+            index_exists(&conn, "idx_notes_user_created"),
+            "idx_notes_user_created should exist"
+        );
+        assert!(
+            index_exists(&conn, "idx_notes_project_created"),
+            "idx_notes_project_created should exist"
+        );
+        assert!(
+            index_exists(&conn, "idx_notes_author_id"),
+            "idx_notes_author_id should exist"
+        );
+
+        // Discussion JOIN indexes (idx_discussions_issue_id is new;
+        // idx_discussions_mr_id already existed from migration 006 but
+        // IF NOT EXISTS makes it safe)
+        assert!(
+            index_exists(&conn, "idx_discussions_issue_id"),
+            "idx_discussions_issue_id should exist"
+        );
+        assert!(
+            index_exists(&conn, "idx_discussions_mr_id"),
+            "idx_discussions_mr_id should exist"
+        );
+
+        // author_id column on notes
+        assert!(
+            column_exists(&conn, "notes", "author_id"),
+            "notes.author_id column should exist"
+        );
+    }
+
+    // -- Helper: insert a minimal project for FK satisfaction --
+    fn insert_test_project(conn: &Connection) -> i64 {
+        conn.execute(
+            "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
+             VALUES (1000, 'test/project', 'https://example.com/test/project')",
+            [],
+        )
+        .unwrap();
+        conn.last_insert_rowid()
+    }
+
+    // -- Helper: insert a minimal issue --
+    fn insert_test_issue(conn: &Connection, project_id: i64) -> i64 {
+        conn.execute(
+            "INSERT INTO issues (gitlab_id, project_id, iid, state, author_username, \
+             created_at, updated_at, last_seen_at) \
+             VALUES (100, ?1, 1, 'opened', 'alice', 1000, 1000, 1000)",
+            [project_id],
+        )
+        .unwrap();
+        conn.last_insert_rowid()
+    }
+
+    // -- Helper: insert a minimal discussion --
+    fn insert_test_discussion(conn: &Connection, project_id: i64, issue_id: i64) -> i64 {
+        conn.execute(
+            "INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, \
+             noteable_type, last_seen_at) \
+             VALUES ('disc-001', ?1, ?2, 'Issue', 1000)",
+            rusqlite::params![project_id, issue_id],
+        )
+        .unwrap();
+        conn.last_insert_rowid()
+    }
+
+    // -- Helper: insert a minimal non-system note --
+    #[allow(clippy::too_many_arguments)]
+    fn insert_test_note(
+        conn: &Connection,
+        gitlab_id: i64,
+        discussion_id: i64,
+        project_id: i64,
+        is_system: bool,
+    ) -> i64 {
+        conn.execute(
+            "INSERT INTO notes (gitlab_id, discussion_id, project_id, is_system, \
+             author_username, body, created_at, updated_at, last_seen_at) \
+             VALUES (?1, ?2, ?3, ?4, 'alice', 'note body', 1000, 1000, 1000)",
+            rusqlite::params![gitlab_id, discussion_id, project_id, is_system as i32],
+        )
+        .unwrap();
+        conn.last_insert_rowid()
+    }
+
+    // -- Helper: insert a document --
+    fn insert_test_document(
+        conn: &Connection,
+        source_type: &str,
+        source_id: i64,
+        project_id: i64,
+    ) -> i64 {
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
+             VALUES (?1, ?2, ?3, 'test content', 'hash123')",
+            rusqlite::params![source_type, source_id, project_id],
+        )
+        .unwrap();
+        conn.last_insert_rowid()
+    }
+
+    #[test]
+    fn test_migration_024_allows_note_source_type() {
+        let conn = setup_migrated_db();
+        let pid = insert_test_project(&conn);
+
+        // Should succeed — 'note' is now allowed
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
+             VALUES ('note', 1, ?1, 'note content', 'hash-note')",
+            [pid],
+        )
+        .expect("INSERT with source_type='note' into documents should succeed");
+
+        // dirty_sources should also accept 'note'
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at) \
+             VALUES ('note', 1, 1000)",
+            [],
+        )
+        .expect("INSERT with source_type='note' into dirty_sources should succeed");
+    }
+
+    #[test]
+    fn test_migration_024_preserves_existing_data() {
+        // Run migrations up to 023 only, insert data, then apply 024
+        // Migration 024 is at index 23 (0-based). Use hardcoded index so adding
+        // later migrations doesn't silently shift what this test exercises.
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+
+        // Apply migrations 001-023 (indices 0..23)
+        run_migrations_up_to(&conn, 23);
+
+        let pid = insert_test_project(&conn);
+
+        // Insert a document with existing source_type
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, title) \
+             VALUES ('issue', 1, ?1, 'issue content', 'hash-issue', 'Test Issue')",
+            [pid],
+        )
+        .unwrap();
+        let doc_id: i64 = conn.last_insert_rowid();
+
+        // Insert junction data
+        conn.execute(
+            "INSERT INTO document_labels (document_id, label_name) VALUES (?1, 'bug')",
+            [doc_id],
+        )
+        .unwrap();
+        conn.execute(
+            "INSERT INTO document_paths (document_id, path) VALUES (?1, 'src/main.rs')",
+            [doc_id],
+        )
+        .unwrap();
+
+        // Insert dirty_sources row
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at) VALUES ('issue', 1, 1000)",
+            [],
+        )
+        .unwrap();
+
+        // Now apply migration 024 (index 23) — the table-rebuild migration
+        run_single_migration(&conn, 23);
+
+        // Verify document still exists with correct data
+        let (st, content, title): (String, String, String) = conn
+            .query_row(
+                "SELECT source_type, content_text, title FROM documents WHERE id = ?1",
+                [doc_id],
+                |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
+            )
+            .unwrap();
+        assert_eq!(st, "issue");
+        assert_eq!(content, "issue content");
+        assert_eq!(title, "Test Issue");
+
+        // Verify junction data preserved
+        let label_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM document_labels WHERE document_id = ?1",
+                [doc_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(label_count, 1);
+
+        let path_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM document_paths WHERE document_id = ?1",
+                [doc_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(path_count, 1);
+
+        // Verify dirty_sources preserved
+        let dirty_count: i64 = conn
+            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
+            .unwrap();
+        assert_eq!(dirty_count, 1);
+    }
+
+    #[test]
+    fn test_migration_024_fts_triggers_intact() {
+        let conn = setup_migrated_db();
+        let pid = insert_test_project(&conn);
+
+        // Insert a document after migration — FTS trigger should fire
+        let doc_id = insert_test_document(&conn, "note", 1, pid);
+
+        // Verify FTS entry exists
+        let fts_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'test'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert!(fts_count > 0, "FTS trigger should have created an entry");
+
+        // Verify update trigger works
+        conn.execute(
+            "UPDATE documents SET content_text = 'updated content' WHERE id = ?1",
+            [doc_id],
+        )
+        .unwrap();
+
+        let fts_updated: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'updated'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert!(
+            fts_updated > 0,
+            "FTS update trigger should reflect new content"
+        );
+
+        // Verify delete trigger works
+        conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
+            .unwrap();
+
+        let fts_after_delete: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents_fts WHERE documents_fts MATCH 'updated'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            fts_after_delete, 0,
+            "FTS delete trigger should remove the entry"
+        );
+    }
+
+    #[test]
+    fn test_migration_024_row_counts_preserved() {
+        let conn = setup_migrated_db();
+
+        // After full migration, tables should exist and be queryable
+        let doc_count: i64 = conn
+            .query_row("SELECT COUNT(*) FROM documents", [], |row| row.get(0))
+            .unwrap();
+        assert_eq!(doc_count, 0, "Fresh DB should have 0 documents");
+
+        let dirty_count: i64 = conn
+            .query_row("SELECT COUNT(*) FROM dirty_sources", [], |row| row.get(0))
+            .unwrap();
+        assert_eq!(dirty_count, 0, "Fresh DB should have 0 dirty_sources");
+    }
+
+    #[test]
+    fn test_migration_024_integrity_checks_pass() {
+        let conn = setup_migrated_db();
+
+        // PRAGMA integrity_check
+        let integrity: String = conn
+            .query_row("PRAGMA integrity_check", [], |row| row.get(0))
+            .unwrap();
+        assert_eq!(integrity, "ok", "Database integrity check should pass");
+
+        // PRAGMA foreign_key_check (returns rows only if there are violations)
+        let fk_violations: i64 = conn
+            .query_row("SELECT COUNT(*) FROM pragma_foreign_key_check", [], |row| {
+                row.get(0)
+            })
+            .unwrap();
+        assert_eq!(fk_violations, 0, "No foreign key violations should exist");
+    }
+
+    #[test]
+    fn test_migration_024_note_delete_trigger_cleans_document() {
+        let conn = setup_migrated_db();
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+        let note_id = insert_test_note(&conn, 200, disc_id, pid, false);
+
+        // Create a document for this note
+        insert_test_document(&conn, "note", note_id, pid);
+
+        let doc_before: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(doc_before, 1);
+
+        // Delete the note — trigger should remove the document
+        conn.execute("DELETE FROM notes WHERE id = ?1", [note_id])
+            .unwrap();
+
+        let doc_after: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            doc_after, 0,
+            "notes_ad_cleanup trigger should delete the document"
+        );
+    }
+
+    #[test]
+    fn test_migration_024_note_system_flip_trigger_cleans_document() {
+        let conn = setup_migrated_db();
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+        let note_id = insert_test_note(&conn, 201, disc_id, pid, false);
+
+        // Create a document for this note
+        insert_test_document(&conn, "note", note_id, pid);
+
+        let doc_before: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(doc_before, 1);
+
+        // Flip is_system from 0 to 1 — trigger should remove the document
+        conn.execute("UPDATE notes SET is_system = 1 WHERE id = ?1", [note_id])
+            .unwrap();
+
+        let doc_after: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            doc_after, 0,
+            "notes_au_system_cleanup trigger should delete the document"
+        );
+    }
+
+    #[test]
+    fn test_migration_024_system_note_delete_trigger_does_not_fire() {
+        let conn = setup_migrated_db();
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+
+        // Insert a system note (is_system = true)
+        let note_id = insert_test_note(&conn, 202, disc_id, pid, true);
+
+        // Manually insert a document (shouldn't exist for system notes in practice,
+        // but we test the trigger guard)
+        insert_test_document(&conn, "note", note_id, pid);
+
+        let doc_before: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(doc_before, 1);
+
+        // Delete system note — trigger has WHEN old.is_system = 0 so it should NOT fire
+        conn.execute("DELETE FROM notes WHERE id = ?1", [note_id])
+            .unwrap();
+
+        let doc_after: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?1",
+                [note_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            doc_after, 1,
+            "notes_ad_cleanup trigger should NOT fire for system notes"
+        );
+    }
+
+    /// Run migrations only up to version `up_to` (inclusive).
+    fn run_migrations_up_to(conn: &Connection, up_to: usize) {
+        conn.execute_batch(
+            "CREATE TABLE IF NOT EXISTS schema_version ( \
+             version INTEGER PRIMARY KEY, applied_at INTEGER NOT NULL, description TEXT);",
+        )
+        .unwrap();
+
+        for (version_str, sql) in &MIGRATIONS[..up_to] {
+            let version: i32 = version_str.parse().unwrap();
+            conn.execute_batch(sql).unwrap();
+            conn.execute(
+                "INSERT OR REPLACE INTO schema_version (version, applied_at, description) \
+                 VALUES (?1, strftime('%s', 'now') * 1000, ?2)",
+                rusqlite::params![version, version_str],
+            )
+            .unwrap();
+        }
+    }
+
+    /// Run a single migration by index (0-based).
+    fn run_single_migration(conn: &Connection, index: usize) {
+        let (version_str, sql) = MIGRATIONS[index];
+        let version: i32 = version_str.parse().unwrap();
+        conn.execute_batch(sql).unwrap();
+        conn.execute(
+            "INSERT OR REPLACE INTO schema_version (version, applied_at, description) \
+             VALUES (?1, strftime('%s', 'now') * 1000, ?2)",
+            rusqlite::params![version, version_str],
+        )
+        .unwrap();
+    }
+
+    #[test]
+    fn test_migration_025_backfills_existing_notes() {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        // Run all migrations through 024 (index 0..24)
+        run_migrations_up_to(&conn, 24);
+
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+
+        // Insert 5 non-system notes
+        for i in 1..=5 {
+            insert_test_note(&conn, 300 + i, disc_id, pid, false);
+        }
+        // Insert 2 system notes
+        for i in 1..=2 {
+            insert_test_note(&conn, 400 + i, disc_id, pid, true);
+        }
+
+        // Run migration 025
+        run_single_migration(&conn, 24);
+
+        let dirty_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            dirty_count, 5,
+            "Migration 025 should backfill 5 non-system notes"
+        );
+
+        // Verify system notes were not backfilled
+        let system_note_ids: Vec<i64> = {
+            let mut stmt = conn
+                .prepare(
+                    "SELECT source_id FROM dirty_sources WHERE source_type = 'note' ORDER BY source_id",
+                )
+                .unwrap();
+            stmt.query_map([], |row| row.get(0))
+                .unwrap()
+                .collect::<std::result::Result<Vec<_>, _>>()
+                .unwrap()
+        };
+        // System note ids should not appear
+        let all_system_note_ids: Vec<i64> = {
+            let mut stmt = conn
+                .prepare("SELECT id FROM notes WHERE is_system = 1 ORDER BY id")
+                .unwrap();
+            stmt.query_map([], |row| row.get(0))
+                .unwrap()
+                .collect::<std::result::Result<Vec<_>, _>>()
+                .unwrap()
+        };
+        for sys_id in &all_system_note_ids {
+            assert!(
+                !system_note_ids.contains(sys_id),
+                "System note id {} should not be in dirty_sources",
+                sys_id
+            );
+        }
+    }
+
+    #[test]
+    fn test_migration_025_idempotent_with_existing_documents() {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations_up_to(&conn, 24);
+
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+
+        // Insert 3 non-system notes
+        let note_ids: Vec<i64> = (1..=3)
+            .map(|i| insert_test_note(&conn, 500 + i, disc_id, pid, false))
+            .collect();
+
+        // Create documents for 2 of 3 notes (simulating already-generated docs)
+        insert_test_document(&conn, "note", note_ids[0], pid);
+        insert_test_document(&conn, "note", note_ids[1], pid);
+
+        // Run migration 025
+        run_single_migration(&conn, 24);
+
+        let dirty_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            dirty_count, 1,
+            "Only the note without a document should be backfilled"
+        );
+
+        // Verify the correct note was queued
+        let queued_id: i64 = conn
+            .query_row(
+                "SELECT source_id FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(queued_id, note_ids[2]);
+    }
+
+    #[test]
+    fn test_migration_025_skips_notes_already_in_dirty_queue() {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations_up_to(&conn, 24);
+
+        let pid = insert_test_project(&conn);
+        let issue_id = insert_test_issue(&conn, pid);
+        let disc_id = insert_test_discussion(&conn, pid, issue_id);
+
+        // Insert 3 non-system notes
+        let note_ids: Vec<i64> = (1..=3)
+            .map(|i| insert_test_note(&conn, 600 + i, disc_id, pid, false))
+            .collect();
+
+        // Pre-queue one note in dirty_sources
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at) VALUES ('note', ?1, 999)",
+            [note_ids[0]],
+        )
+        .unwrap();
+
+        // Run migration 025
+        run_single_migration(&conn, 24);
+
+        let dirty_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+                [],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            dirty_count, 3,
+            "All 3 notes should be in dirty_sources (1 pre-existing + 2 new)"
+        );
+
+        // Verify the pre-existing entry preserved its original queued_at
+        let original_queued_at: i64 = conn
+            .query_row(
+                "SELECT queued_at FROM dirty_sources WHERE source_type = 'note' AND source_id = ?1",
+                [note_ids[0]],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(
+            original_queued_at, 999,
+            "ON CONFLICT DO NOTHING should preserve the original queued_at"
+        );
+    }
+}
--- a/src/documents/extractor.rs
+++ b/src/documents/extractor.rs
@@ -2,13 +2,14 @@ use chrono::DateTime;
 use rusqlite::Connection;
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
-use std::collections::BTreeSet;
+use std::collections::{BTreeSet, HashMap};
 use std::fmt::Write as _;

 use super::truncation::{
    MAX_DISCUSSION_BYTES, NoteContent, truncate_discussion, truncate_hard_cap,
 };
 use crate::core::error::Result;
+use crate::core::time::ms_to_iso;

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -16,6 +17,7 @@ pub enum SourceType {
    Issue,
    MergeRequest,
    Discussion,
+    Note,
 }

 impl SourceType {
@@ -24,6 +26,7 @@ impl SourceType {
            Self::Issue => "issue",
            Self::MergeRequest => "merge_request",
            Self::Discussion => "discussion",
+            Self::Note => "note",
        }
    }

@@ -32,6 +35,7 @@ impl SourceType {
            "issue" | "issues" => Some(Self::Issue),
            "mr" | "mrs" | "merge_request" | "merge_requests" => Some(Self::MergeRequest),
            "discussion" | "discussions" => Some(Self::Discussion),
+            "note" | "notes" => Some(Self::Note),
            _ => None,
        }
    }
@@ -515,6 +519,521 @@ pub fn extract_discussion_document(
    }))
 }

+pub fn extract_note_document(conn: &Connection, note_id: i64) -> Result<Option<DocumentData>> {
+    let row = conn.query_row(
+        "SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
+                n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
+                n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
+                d.noteable_type, d.issue_id, d.merge_request_id,
+                p.path_with_namespace, p.id AS project_id
+         FROM notes n
+         JOIN discussions d ON n.discussion_id = d.id
+         JOIN projects p ON n.project_id = p.id
+         WHERE n.id = ?1",
+        rusqlite::params![note_id],
+        |row| {
+            Ok((
+                row.get::<_, i64>(0)?,
+                row.get::<_, i64>(1)?,
+                row.get::<_, Option<String>>(2)?,
+                row.get::<_, Option<String>>(3)?,
+                row.get::<_, Option<String>>(4)?,
+                row.get::<_, bool>(5)?,
+                row.get::<_, i64>(6)?,
+                row.get::<_, i64>(7)?,
+                row.get::<_, Option<String>>(8)?,
+                row.get::<_, Option<i64>>(9)?,
+                row.get::<_, Option<String>>(10)?,
+                row.get::<_, Option<i64>>(11)?,
+                row.get::<_, bool>(12)?,
+                row.get::<_, bool>(13)?,
+                row.get::<_, Option<String>>(14)?,
+                row.get::<_, String>(15)?,
+                row.get::<_, Option<i64>>(16)?,
+                row.get::<_, Option<i64>>(17)?,
+                row.get::<_, String>(18)?,
+                row.get::<_, i64>(19)?,
+            ))
+        },
+    );
+
+    let (
+        _id,
+        gitlab_id,
+        author_username,
+        body,
+        note_type,
+        is_system,
+        created_at,
+        updated_at,
+        position_new_path,
+        position_new_line,
+        position_old_path,
+        _position_old_line,
+        resolvable,
+        resolved,
+        _resolved_by,
+        noteable_type,
+        issue_id,
+        merge_request_id,
+        path_with_namespace,
+        project_id,
+    ) = match row {
+        Ok(r) => r,
+        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+        Err(e) => return Err(e.into()),
+    };
+
+    if is_system {
+        return Ok(None);
+    }
+
+    let (parent_iid, parent_title, parent_web_url, parent_type_label, labels) =
+        match noteable_type.as_str() {
+            "Issue" => {
+                let parent_id = match issue_id {
+                    Some(pid) => pid,
+                    None => return Ok(None),
+                };
+                let parent = conn.query_row(
+                    "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
+                    rusqlite::params![parent_id],
+                    |row| {
+                        Ok((
+                            row.get::<_, i64>(0)?,
+                            row.get::<_, Option<String>>(1)?,
+                            row.get::<_, Option<String>>(2)?,
+                        ))
+                    },
+                );
+                let (iid, title, web_url) = match parent {
+                    Ok(r) => r,
+                    Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+                    Err(e) => return Err(e.into()),
+                };
+                let mut label_stmt = conn.prepare_cached(
+                    "SELECT l.name FROM issue_labels il
+                     JOIN labels l ON l.id = il.label_id
+                     WHERE il.issue_id = ?1
+                     ORDER BY l.name",
+                )?;
+                let labels: Vec<String> = label_stmt
+                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
+                    .collect::<std::result::Result<Vec<_>, _>>()?;
+
+                (iid, title, web_url, "Issue", labels)
+            }
+            "MergeRequest" => {
+                let parent_id = match merge_request_id {
+                    Some(pid) => pid,
+                    None => return Ok(None),
+                };
+                let parent = conn.query_row(
+                    "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
+                    rusqlite::params![parent_id],
+                    |row| {
+                        Ok((
+                            row.get::<_, i64>(0)?,
+                            row.get::<_, Option<String>>(1)?,
+                            row.get::<_, Option<String>>(2)?,
+                        ))
+                    },
+                );
+                let (iid, title, web_url) = match parent {
+                    Ok(r) => r,
+                    Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+                    Err(e) => return Err(e.into()),
+                };
+                let mut label_stmt = conn.prepare_cached(
+                    "SELECT l.name FROM mr_labels ml
+                     JOIN labels l ON l.id = ml.label_id
+                     WHERE ml.merge_request_id = ?1
+                     ORDER BY l.name",
+                )?;
+                let labels: Vec<String> = label_stmt
+                    .query_map(rusqlite::params![parent_id], |row| row.get(0))?
+                    .collect::<std::result::Result<Vec<_>, _>>()?;
+
+                (iid, title, web_url, "MergeRequest", labels)
+            }
+            _ => return Ok(None),
+        };
+
+    build_note_document(
+        note_id,
+        gitlab_id,
+        author_username,
+        body,
+        note_type,
+        created_at,
+        updated_at,
+        position_new_path,
+        position_new_line,
+        position_old_path,
+        resolvable,
+        resolved,
+        parent_iid,
+        parent_title.as_deref(),
+        parent_web_url.as_deref(),
+        &labels,
+        parent_type_label,
+        &path_with_namespace,
+        project_id,
+    )
+}
+
+pub struct ParentMetadata {
+    pub iid: i64,
+    pub title: Option<String>,
+    pub web_url: Option<String>,
+    pub labels: Vec<String>,
+    pub project_path: String,
+}
+
+pub struct ParentMetadataCache {
+    cache: HashMap<(String, i64), Option<ParentMetadata>>,
+}
+
+impl Default for ParentMetadataCache {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ParentMetadataCache {
+    pub fn new() -> Self {
+        Self {
+            cache: HashMap::new(),
+        }
+    }
+
+    pub fn get_or_fetch(
+        &mut self,
+        conn: &Connection,
+        noteable_type: &str,
+        parent_id: i64,
+        project_path: &str,
+    ) -> Result<Option<&ParentMetadata>> {
+        let key = (noteable_type.to_string(), parent_id);
+        if !self.cache.contains_key(&key) {
+            let meta = fetch_parent_metadata(conn, noteable_type, parent_id, project_path)?;
+            self.cache.insert(key.clone(), meta);
+        }
+        Ok(self.cache.get(&key).and_then(|m| m.as_ref()))
+    }
+}
+
+fn fetch_parent_metadata(
+    conn: &Connection,
+    noteable_type: &str,
+    parent_id: i64,
+    project_path: &str,
+) -> Result<Option<ParentMetadata>> {
+    match noteable_type {
+        "Issue" => {
+            let parent = conn.query_row(
+                "SELECT i.iid, i.title, i.web_url FROM issues i WHERE i.id = ?1",
+                rusqlite::params![parent_id],
+                |row| {
+                    Ok((
+                        row.get::<_, i64>(0)?,
+                        row.get::<_, Option<String>>(1)?,
+                        row.get::<_, Option<String>>(2)?,
+                    ))
+                },
+            );
+            let (iid, title, web_url) = match parent {
+                Ok(r) => r,
+                Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+                Err(e) => return Err(e.into()),
+            };
+            let mut label_stmt = conn.prepare_cached(
+                "SELECT l.name FROM issue_labels il
+                 JOIN labels l ON l.id = il.label_id
+                 WHERE il.issue_id = ?1
+                 ORDER BY l.name",
+            )?;
+            let labels: Vec<String> = label_stmt
+                .query_map(rusqlite::params![parent_id], |row| row.get(0))?
+                .collect::<std::result::Result<Vec<_>, _>>()?;
+            Ok(Some(ParentMetadata {
+                iid,
+                title,
+                web_url,
+                labels,
+                project_path: project_path.to_string(),
+            }))
+        }
+        "MergeRequest" => {
+            let parent = conn.query_row(
+                "SELECT m.iid, m.title, m.web_url FROM merge_requests m WHERE m.id = ?1",
+                rusqlite::params![parent_id],
+                |row| {
+                    Ok((
+                        row.get::<_, i64>(0)?,
+                        row.get::<_, Option<String>>(1)?,
+                        row.get::<_, Option<String>>(2)?,
+                    ))
+                },
+            );
+            let (iid, title, web_url) = match parent {
+                Ok(r) => r,
+                Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+                Err(e) => return Err(e.into()),
+            };
+            let mut label_stmt = conn.prepare_cached(
+                "SELECT l.name FROM mr_labels ml
+                 JOIN labels l ON l.id = ml.label_id
+                 WHERE ml.merge_request_id = ?1
+                 ORDER BY l.name",
+            )?;
+            let labels: Vec<String> = label_stmt
+                .query_map(rusqlite::params![parent_id], |row| row.get(0))?
+                .collect::<std::result::Result<Vec<_>, _>>()?;
+            Ok(Some(ParentMetadata {
+                iid,
+                title,
+                web_url,
+                labels,
+                project_path: project_path.to_string(),
+            }))
+        }
+        _ => Ok(None),
+    }
+}
+
+pub fn extract_note_document_cached(
+    conn: &Connection,
+    note_id: i64,
+    cache: &mut ParentMetadataCache,
+) -> Result<Option<DocumentData>> {
+    let row = conn.query_row(
+        "SELECT n.id, n.gitlab_id, n.author_username, n.body, n.note_type, n.is_system,
+                n.created_at, n.updated_at, n.position_new_path, n.position_new_line,
+                n.position_old_path, n.position_old_line, n.resolvable, n.resolved, n.resolved_by,
+                d.noteable_type, d.issue_id, d.merge_request_id,
+                p.path_with_namespace, p.id AS project_id
+         FROM notes n
+         JOIN discussions d ON n.discussion_id = d.id
+         JOIN projects p ON n.project_id = p.id
+         WHERE n.id = ?1",
+        rusqlite::params![note_id],
+        |row| {
+            Ok((
+                row.get::<_, i64>(0)?,
+                row.get::<_, i64>(1)?,
+                row.get::<_, Option<String>>(2)?,
+                row.get::<_, Option<String>>(3)?,
+                row.get::<_, Option<String>>(4)?,
+                row.get::<_, bool>(5)?,
+                row.get::<_, i64>(6)?,
+                row.get::<_, i64>(7)?,
+                row.get::<_, Option<String>>(8)?,
+                row.get::<_, Option<i64>>(9)?,
+                row.get::<_, Option<String>>(10)?,
+                row.get::<_, Option<i64>>(11)?,
+                row.get::<_, bool>(12)?,
+                row.get::<_, bool>(13)?,
+                row.get::<_, Option<String>>(14)?,
+                row.get::<_, String>(15)?,
+                row.get::<_, Option<i64>>(16)?,
+                row.get::<_, Option<i64>>(17)?,
+                row.get::<_, String>(18)?,
+                row.get::<_, i64>(19)?,
+            ))
+        },
+    );
+
+    let (
+        _id,
+        gitlab_id,
+        author_username,
+        body,
+        note_type,
+        is_system,
+        created_at,
+        updated_at,
+        position_new_path,
+        position_new_line,
+        position_old_path,
+        _position_old_line,
+        resolvable,
+        resolved,
+        _resolved_by,
+        noteable_type,
+        issue_id,
+        merge_request_id,
+        path_with_namespace,
+        project_id,
+    ) = match row {
+        Ok(r) => r,
+        Err(rusqlite::Error::QueryReturnedNoRows) => return Ok(None),
+        Err(e) => return Err(e.into()),
+    };
+
+    if is_system {
+        return Ok(None);
+    }
+
+    let parent_id = match noteable_type.as_str() {
+        "Issue" => match issue_id {
+            Some(pid) => pid,
+            None => return Ok(None),
+        },
+        "MergeRequest" => match merge_request_id {
+            Some(pid) => pid,
+            None => return Ok(None),
+        },
+        _ => return Ok(None),
+    };
+
+    let parent = cache.get_or_fetch(conn, &noteable_type, parent_id, &path_with_namespace)?;
+    let parent = match parent {
+        Some(p) => p,
+        None => return Ok(None),
+    };
+
+    let parent_iid = parent.iid;
+    let parent_title = parent.title.as_deref();
+    let parent_web_url = parent.web_url.as_deref();
+    let labels = parent.labels.clone();
+    let parent_type_label = noteable_type.as_str();
+
+    build_note_document(
+        note_id,
+        gitlab_id,
+        author_username,
+        body,
+        note_type,
+        created_at,
+        updated_at,
+        position_new_path,
+        position_new_line,
+        position_old_path,
+        resolvable,
+        resolved,
+        parent_iid,
+        parent_title,
+        parent_web_url,
+        &labels,
+        parent_type_label,
+        &path_with_namespace,
+        project_id,
+    )
+}
+
+#[allow(clippy::too_many_arguments)]
+fn build_note_document(
+    note_id: i64,
+    gitlab_id: i64,
+    author_username: Option<String>,
+    body: Option<String>,
+    note_type: Option<String>,
+    created_at: i64,
+    updated_at: i64,
+    position_new_path: Option<String>,
+    position_new_line: Option<i64>,
+    position_old_path: Option<String>,
+    resolvable: bool,
+    resolved: bool,
+    parent_iid: i64,
+    parent_title: Option<&str>,
+    parent_web_url: Option<&str>,
+    labels: &[String],
+    parent_type_label: &str,
+    path_with_namespace: &str,
+    project_id: i64,
+) -> Result<Option<DocumentData>> {
+    let mut path_set = BTreeSet::new();
+    if let Some(ref p) = position_old_path
+        && !p.is_empty()
+    {
+        path_set.insert(p.clone());
+    }
+    if let Some(ref p) = position_new_path
+        && !p.is_empty()
+    {
+        path_set.insert(p.clone());
+    }
+    let paths: Vec<String> = path_set.into_iter().collect();
+
+    let url = parent_web_url.map(|wu| format!("{}#note_{}", wu, gitlab_id));
+
+    let display_title = parent_title.unwrap_or("(untitled)");
+    let display_note_type = note_type.as_deref().unwrap_or("Note");
+    let display_author = author_username.as_deref().unwrap_or("unknown");
+    let parent_prefix = if parent_type_label == "Issue" {
+        format!("Issue #{}", parent_iid)
+    } else {
+        format!("MR !{}", parent_iid)
+    };
+
+    let title = format!(
+        "Note by @{} on {}: {}",
+        display_author, parent_prefix, display_title
+    );
+
+    let labels_csv = labels.join(", ");
+
+    let mut content = String::new();
+    let _ = writeln!(content, "[[Note]]");
+    let _ = writeln!(content, "source_type: note");
+    let _ = writeln!(content, "note_gitlab_id: {}", gitlab_id);
+    let _ = writeln!(content, "project: {}", path_with_namespace);
+    let _ = writeln!(content, "parent_type: {}", parent_type_label);
+    let _ = writeln!(content, "parent_iid: {}", parent_iid);
+    let _ = writeln!(content, "parent_title: {}", display_title);
+    let _ = writeln!(content, "note_type: {}", display_note_type);
+    let _ = writeln!(content, "author: @{}", display_author);
+    let _ = writeln!(content, "created_at: {}", ms_to_iso(created_at));
+    if resolvable {
+        let _ = writeln!(content, "resolved: {}", resolved);
+    }
+    if display_note_type == "DiffNote"
+        && let Some(ref p) = position_new_path
+    {
+        if let Some(line) = position_new_line {
+            let _ = writeln!(content, "path: {}:{}", p, line);
+        } else {
+            let _ = writeln!(content, "path: {}", p);
+        }
+    }
+    if !labels.is_empty() {
+        let _ = writeln!(content, "labels: {}", labels_csv);
+    }
+    if let Some(ref u) = url {
+        let _ = writeln!(content, "url: {}", u);
+    }
+
+    content.push_str("\n--- Body ---\n\n");
+    content.push_str(body.as_deref().unwrap_or(""));
+
+    let labels_hash = compute_list_hash(labels);
+    let paths_hash = compute_list_hash(&paths);
+
+    let hard_cap = truncate_hard_cap(&content);
+    let content_hash = compute_content_hash(&hard_cap.content);
+
+    Ok(Some(DocumentData {
+        source_type: SourceType::Note,
+        source_id: note_id,
+        project_id,
+        author_username,
+        labels: labels.to_vec(),
+        paths,
+        labels_hash,
+        paths_hash,
+        created_at,
+        updated_at,
+        url,
+        title: Some(title),
+        content_text: hard_cap.content,
+        content_hash,
+        is_truncated: hard_cap.is_truncated,
+        truncated_reason: hard_cap.reason.map(|r| r.as_str().to_string()),
+    }))
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -545,6 +1064,26 @@ mod tests {
        assert_eq!(SourceType::parse("ISSUE"), Some(SourceType::Issue));
    }

+    #[test]
+    fn test_source_type_parse_note() {
+        assert_eq!(SourceType::parse("note"), Some(SourceType::Note));
+    }
+
+    #[test]
+    fn test_source_type_note_as_str() {
+        assert_eq!(SourceType::Note.as_str(), "note");
+    }
+
+    #[test]
+    fn test_source_type_note_display() {
+        assert_eq!(format!("{}", SourceType::Note), "note");
+    }
+
+    #[test]
+    fn test_source_type_parse_notes_alias() {
+        assert_eq!(SourceType::parse("notes"), Some(SourceType::Note));
+    }
+
    #[test]
    fn test_source_type_as_str() {
        assert_eq!(SourceType::Issue.as_str(), "issue");
@@ -1449,4 +1988,354 @@ mod tests {
        let result = extract_discussion_document(&conn, 1).unwrap();
        assert!(result.is_none());
    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn insert_note_with_type(
+        conn: &Connection,
+        id: i64,
+        gitlab_id: i64,
+        discussion_id: i64,
+        author: Option<&str>,
+        body: Option<&str>,
+        created_at: i64,
+        is_system: bool,
+        old_path: Option<&str>,
+        new_path: Option<&str>,
+        old_line: Option<i64>,
+        new_line: Option<i64>,
+        note_type: Option<&str>,
+        resolvable: bool,
+        resolved: bool,
+    ) {
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system, position_old_path, position_new_path, position_old_line, position_new_line, note_type, resolvable, resolved) VALUES (?1, ?2, ?3, 1, ?4, ?5, ?6, ?6, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
+            rusqlite::params![id, gitlab_id, discussion_id, author, body, created_at, is_system as i32, old_path, new_path, old_line, new_line, note_type, resolvable as i32, resolved as i32],
+        ).unwrap();
+    }
+
+    #[test]
+    fn test_note_document_basic_format() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            42,
+            Some("Fix login bug"),
+            Some("desc"),
+            "opened",
+            Some("johndoe"),
+            Some("https://gitlab.example.com/group/project-one/-/issues/42"),
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            12345,
+            1,
+            Some("alice"),
+            Some("This looks like a race condition"),
+            1710460800000,
+            false,
+            None,
+            None,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert_eq!(doc.source_type, SourceType::Note);
+        assert_eq!(doc.source_id, 1);
+        assert_eq!(doc.project_id, 1);
+        assert_eq!(doc.author_username, Some("alice".to_string()));
+        assert!(doc.content_text.contains("[[Note]]"));
+        assert!(doc.content_text.contains("source_type: note"));
+        assert!(doc.content_text.contains("note_gitlab_id: 12345"));
+        assert!(doc.content_text.contains("project: group/project-one"));
+        assert!(doc.content_text.contains("parent_type: Issue"));
+        assert!(doc.content_text.contains("parent_iid: 42"));
+        assert!(doc.content_text.contains("parent_title: Fix login bug"));
+        assert!(doc.content_text.contains("author: @alice"));
+        assert!(doc.content_text.contains("--- Body ---"));
+        assert!(
+            doc.content_text
+                .contains("This looks like a race condition")
+        );
+        assert_eq!(
+            doc.title,
+            Some("Note by @alice on Issue #42: Fix login bug".to_string())
+        );
+        assert_eq!(
+            doc.url,
+            Some("https://gitlab.example.com/group/project-one/-/issues/42#note_12345".to_string())
+        );
+    }
+
+    #[test]
+    fn test_note_document_diffnote_with_path() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Refactor auth"),
+            Some("desc"),
+            "opened",
+            None,
+            Some("https://gitlab.example.com/group/project-one/-/issues/10"),
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note_with_type(
+            &conn,
+            1,
+            555,
+            1,
+            Some("bob"),
+            Some("Unused variable here"),
+            1000,
+            false,
+            Some("src/old_auth.rs"),
+            Some("src/auth.rs"),
+            Some(10),
+            Some(25),
+            Some("DiffNote"),
+            true,
+            false,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert!(doc.content_text.contains("note_type: DiffNote"));
+        assert!(doc.content_text.contains("path: src/auth.rs:25"));
+        assert!(doc.content_text.contains("resolved: false"));
+        assert_eq!(doc.paths, vec!["src/auth.rs", "src/old_auth.rs"]);
+    }
+
+    #[test]
+    fn test_note_document_inherits_parent_labels() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Test"),
+            Some("desc"),
+            "opened",
+            None,
+            None,
+        );
+        insert_label(&conn, 1, "backend");
+        insert_label(&conn, 2, "api");
+        link_issue_label(&conn, 1, 1);
+        link_issue_label(&conn, 1, 2);
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("alice"),
+            Some("Note body"),
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert_eq!(doc.labels, vec!["api", "backend"]);
+        assert!(doc.content_text.contains("labels: api, backend"));
+    }
+
+    #[test]
+    fn test_note_document_mr_parent() {
+        let conn = setup_discussion_test_db();
+        insert_mr(
+            &conn,
+            1,
+            456,
+            Some("JWT Auth"),
+            Some("desc"),
+            Some("opened"),
+            Some("johndoe"),
+            Some("feature/jwt"),
+            Some("main"),
+            Some("https://gitlab.example.com/group/project-one/-/merge_requests/456"),
+        );
+        insert_discussion(&conn, 1, "MergeRequest", None, Some(1));
+        insert_note(
+            &conn,
+            1,
+            200,
+            1,
+            Some("reviewer"),
+            Some("Needs tests"),
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert!(doc.content_text.contains("parent_type: MergeRequest"));
+        assert!(doc.content_text.contains("parent_iid: 456"));
+        assert_eq!(
+            doc.title,
+            Some("Note by @reviewer on MR !456: JWT Auth".to_string())
+        );
+    }
+
+    #[test]
+    fn test_note_document_system_note_returns_none() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Test"),
+            Some("desc"),
+            "opened",
+            None,
+            None,
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("bot"),
+            Some("assigned to @alice"),
+            1000,
+            true,
+            None,
+            None,
+        );
+
+        let result = extract_note_document(&conn, 1).unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_note_document_not_found() {
+        let conn = setup_discussion_test_db();
+        let result = extract_note_document(&conn, 999).unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_note_document_orphaned_discussion() {
+        let conn = setup_discussion_test_db();
+        insert_discussion(&conn, 1, "Issue", None, None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("alice"),
+            Some("Comment"),
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let result = extract_note_document(&conn, 1).unwrap();
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_note_document_hash_deterministic() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Test"),
+            Some("desc"),
+            "opened",
+            None,
+            None,
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("alice"),
+            Some("Comment"),
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let doc1 = extract_note_document(&conn, 1).unwrap().unwrap();
+        let doc2 = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert_eq!(doc1.content_hash, doc2.content_hash);
+        assert_eq!(doc1.labels_hash, doc2.labels_hash);
+        assert_eq!(doc1.paths_hash, doc2.paths_hash);
+        assert_eq!(doc1.content_hash.len(), 64);
+    }
+
+    #[test]
+    fn test_note_document_empty_body() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Test"),
+            Some("desc"),
+            "opened",
+            None,
+            None,
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("alice"),
+            Some(""),
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert!(doc.content_text.contains("--- Body ---\n\n"));
+        assert!(!doc.is_truncated);
+    }
+
+    #[test]
+    fn test_note_document_null_body() {
+        let conn = setup_discussion_test_db();
+        insert_issue(
+            &conn,
+            1,
+            10,
+            Some("Test"),
+            Some("desc"),
+            "opened",
+            None,
+            None,
+        );
+        insert_discussion(&conn, 1, "Issue", Some(1), None);
+        insert_note(
+            &conn,
+            1,
+            100,
+            1,
+            Some("alice"),
+            None,
+            1000,
+            false,
+            None,
+            None,
+        );
+
+        let doc = extract_note_document(&conn, 1).unwrap().unwrap();
+        assert!(doc.content_text.contains("--- Body ---\n\n"));
+        assert!(doc.content_text.ends_with("--- Body ---\n\n"));
+    }
 }
--- a/src/documents/mod.rs
+++ b/src/documents/mod.rs
@@ -3,8 +3,9 @@ mod regenerator;
 mod truncation;

 pub use extractor::{
-    DocumentData, SourceType, compute_content_hash, compute_list_hash, extract_discussion_document,
-    extract_issue_document, extract_mr_document,
+    DocumentData, ParentMetadataCache, SourceType, compute_content_hash, compute_list_hash,
+    extract_discussion_document, extract_issue_document, extract_mr_document,
+    extract_note_document, extract_note_document_cached,
 };
 pub use regenerator::{RegenerateResult, regenerate_dirty_documents};
 pub use truncation::{
--- a/src/documents/regenerator.rs
+++ b/src/documents/regenerator.rs
@@ -4,8 +4,8 @@ use tracing::{debug, instrument, warn};

 use crate::core::error::Result;
 use crate::documents::{
-    DocumentData, SourceType, extract_discussion_document, extract_issue_document,
-    extract_mr_document,
+    DocumentData, ParentMetadataCache, SourceType, extract_discussion_document,
+    extract_issue_document, extract_mr_document, extract_note_document_cached,
 };
 use crate::ingestion::dirty_tracker::{clear_dirty, get_dirty_sources, record_dirty_error};

@@ -27,6 +27,7 @@ pub fn regenerate_dirty_documents(
    let mut result = RegenerateResult::default();

    let mut estimated_total: usize = 0;
+    let mut cache = ParentMetadataCache::new();

    loop {
        let dirty = get_dirty_sources(conn)?;
@@ -41,7 +42,7 @@ pub fn regenerate_dirty_documents(
        estimated_total = estimated_total.max(processed_so_far + remaining);

        for (source_type, source_id) in &dirty {
-            match regenerate_one(conn, *source_type, *source_id) {
+            match regenerate_one(conn, *source_type, *source_id, &mut cache) {
                Ok(changed) => {
                    if changed {
                        result.regenerated += 1;
@@ -83,11 +84,17 @@ pub fn regenerate_dirty_documents(
    Ok(result)
 }

-fn regenerate_one(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<bool> {
+fn regenerate_one(
+    conn: &Connection,
+    source_type: SourceType,
+    source_id: i64,
+    cache: &mut ParentMetadataCache,
+) -> Result<bool> {
    let doc = match source_type {
        SourceType::Issue => extract_issue_document(conn, source_id)?,
        SourceType::MergeRequest => extract_mr_document(conn, source_id)?,
        SourceType::Discussion => extract_discussion_document(conn, source_id)?,
+        SourceType::Note => extract_note_document_cached(conn, source_id, cache)?,
    };

    let Some(doc) = doc else {
@@ -122,11 +129,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
        )
        .optional()?;

-    let content_changed = match &existing {
-        Some((_, old_content_hash, _, _)) => old_content_hash != &doc.content_hash,
-        None => true,
-    };
-
+    // Fast path: if all three hashes match, nothing changed at all.
    if let Some((_, ref old_content_hash, ref old_labels_hash, ref old_paths_hash)) = existing
        && old_content_hash == &doc.content_hash
        && old_labels_hash == &doc.labels_hash
@@ -134,6 +137,7 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
    {
        return Ok(false);
    }
+    // Past this point at least one hash differs, so the document will be updated.

    let labels_json = serde_json::to_string(&doc.labels).unwrap_or_else(|_| "[]".to_string());

@@ -243,7 +247,8 @@ fn upsert_document_inner(conn: &Connection, doc: &DocumentData) -> Result<bool>
        }
    }

-    Ok(content_changed)
+    // We passed the triple-hash fast path, so at least one hash differs.
+    Ok(true)
 }

 fn delete_document(conn: &Connection, source_type: SourceType, source_id: i64) -> Result<()> {
@@ -473,4 +478,316 @@ mod tests {
            .unwrap();
        assert_eq!(label_count, 1);
    }
+
+    fn setup_note_db() -> Connection {
+        let conn = setup_db();
+        conn.execute_batch(
+            "
+            CREATE TABLE merge_requests (
+                id INTEGER PRIMARY KEY,
+                gitlab_id INTEGER UNIQUE NOT NULL,
+                project_id INTEGER NOT NULL REFERENCES projects(id),
+                iid INTEGER NOT NULL,
+                title TEXT,
+                description TEXT,
+                state TEXT,
+                draft INTEGER NOT NULL DEFAULT 0,
+                author_username TEXT,
+                source_branch TEXT,
+                target_branch TEXT,
+                head_sha TEXT,
+                references_short TEXT,
+                references_full TEXT,
+                detailed_merge_status TEXT,
+                merge_user_username TEXT,
+                created_at INTEGER,
+                updated_at INTEGER,
+                merged_at INTEGER,
+                closed_at INTEGER,
+                last_seen_at INTEGER NOT NULL,
+                discussions_synced_for_updated_at INTEGER,
+                discussions_sync_last_attempt_at INTEGER,
+                discussions_sync_attempts INTEGER DEFAULT 0,
+                discussions_sync_last_error TEXT,
+                resource_events_synced_for_updated_at INTEGER,
+                web_url TEXT,
+                raw_payload_id INTEGER
+            );
+            CREATE TABLE mr_labels (
+                merge_request_id INTEGER REFERENCES merge_requests(id),
+                label_id INTEGER REFERENCES labels(id),
+                PRIMARY KEY(merge_request_id, label_id)
+            );
+            CREATE TABLE discussions (
+                id INTEGER PRIMARY KEY,
+                gitlab_discussion_id TEXT NOT NULL,
+                project_id INTEGER NOT NULL REFERENCES projects(id),
+                issue_id INTEGER REFERENCES issues(id),
+                merge_request_id INTEGER,
+                noteable_type TEXT NOT NULL,
+                individual_note INTEGER NOT NULL DEFAULT 0,
+                first_note_at INTEGER,
+                last_note_at INTEGER,
+                last_seen_at INTEGER NOT NULL,
+                resolvable INTEGER NOT NULL DEFAULT 0,
+                resolved INTEGER NOT NULL DEFAULT 0
+            );
+            CREATE TABLE notes (
+                id INTEGER PRIMARY KEY,
+                gitlab_id INTEGER UNIQUE NOT NULL,
+                discussion_id INTEGER NOT NULL REFERENCES discussions(id),
+                project_id INTEGER NOT NULL REFERENCES projects(id),
+                note_type TEXT,
+                is_system INTEGER NOT NULL DEFAULT 0,
+                author_username TEXT,
+                body TEXT,
+                created_at INTEGER NOT NULL,
+                updated_at INTEGER NOT NULL,
+                last_seen_at INTEGER NOT NULL,
+                position INTEGER,
+                resolvable INTEGER NOT NULL DEFAULT 0,
+                resolved INTEGER NOT NULL DEFAULT 0,
+                resolved_by TEXT,
+                resolved_at INTEGER,
+                position_old_path TEXT,
+                position_new_path TEXT,
+                position_old_line INTEGER,
+                position_new_line INTEGER,
+                raw_payload_id INTEGER
+            );
+        ",
+        )
+        .unwrap();
+        conn
+    }
+
+    #[test]
+    fn test_regenerate_note_document() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'This is a note', 1000, 2000, 3000, 0)",
+            [],
+        ).unwrap();
+
+        mark_dirty(&conn, SourceType::Note, 1).unwrap();
+        let result = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(result.regenerated, 1);
+        assert_eq!(result.unchanged, 0);
+        assert_eq!(result.errored, 0);
+
+        let (source_type, content): (String, String) = conn
+            .query_row(
+                "SELECT source_type, content_text FROM documents WHERE source_id = 1",
+                [],
+                |r| Ok((r.get(0)?, r.get(1)?)),
+            )
+            .unwrap();
+        assert_eq!(source_type, "note");
+        assert!(content.contains("[[Note]]"));
+        assert!(content.contains("author: @bob"));
+    }
+
+    #[test]
+    fn test_regenerate_note_system_note_deletes() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bot', 'assigned to @alice', 1000, 2000, 3000, 1)",
+            [],
+        ).unwrap();
+
+        // Pre-insert a document for this note (simulating a previously-generated doc)
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) VALUES ('note', 1, 1, 'old content', 'oldhash')",
+            [],
+        ).unwrap();
+
+        mark_dirty(&conn, SourceType::Note, 1).unwrap();
+        let result = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(result.regenerated, 1);
+
+        let count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
+                [],
+                |r| r.get(0),
+            )
+            .unwrap();
+        assert_eq!(count, 0);
+    }
+
+    #[test]
+    fn test_regenerate_note_unchanged() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Test', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some note', 1000, 2000, 3000, 0)",
+            [],
+        ).unwrap();
+
+        mark_dirty(&conn, SourceType::Note, 1).unwrap();
+        let r1 = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(r1.regenerated, 1);
+
+        mark_dirty(&conn, SourceType::Note, 1).unwrap();
+        let r2 = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(r2.unchanged, 1);
+        assert_eq!(r2.regenerated, 0);
+    }
+
+    #[test]
+    fn test_note_regeneration_batch_uses_cache() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Shared Issue', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+
+        for i in 1..=10 {
+            conn.execute(
+                "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'bob', ?3, 1000, 2000, 3000, 0)",
+                rusqlite::params![i, i * 100, format!("Note body {}", i)],
+            ).unwrap();
+            mark_dirty(&conn, SourceType::Note, i).unwrap();
+        }
+
+        let result = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(result.regenerated, 10);
+        assert_eq!(result.errored, 0);
+
+        let count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM documents WHERE source_type = 'note'",
+                [],
+                |r| r.get(0),
+            )
+            .unwrap();
+        assert_eq!(count, 10);
+    }
+
+    #[test]
+    fn test_note_regeneration_cache_consistent_with_direct_extraction() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, author_username, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Consistency Check', 'opened', 'alice', 1000, 2000, 3000, 'https://example.com/issues/42')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO labels (id, project_id, name) VALUES (1, 1, 'backend')",
+            [],
+        )
+        .unwrap();
+        conn.execute(
+            "INSERT INTO issue_labels (issue_id, label_id) VALUES (1, 1)",
+            [],
+        )
+        .unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Some content', 1000, 2000, 3000, 0)",
+            [],
+        ).unwrap();
+
+        use crate::documents::extract_note_document;
+        let direct = extract_note_document(&conn, 1).unwrap().unwrap();
+
+        let mut cache = ParentMetadataCache::new();
+        let cached = extract_note_document_cached(&conn, 1, &mut cache)
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(direct.content_text, cached.content_text);
+        assert_eq!(direct.content_hash, cached.content_hash);
+        assert_eq!(direct.labels, cached.labels);
+        assert_eq!(direct.labels_hash, cached.labels_hash);
+        assert_eq!(direct.paths_hash, cached.paths_hash);
+        assert_eq!(direct.title, cached.title);
+        assert_eq!(direct.url, cached.url);
+        assert_eq!(direct.author_username, cached.author_username);
+    }
+
+    #[test]
+    fn test_note_regeneration_cache_invalidates_across_parents() {
+        let conn = setup_note_db();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (1, 10, 1, 42, 'Issue Alpha', 'opened', 1000, 2000, 3000, 'https://example.com/issues/42')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at, web_url) VALUES (2, 20, 1, 99, 'Issue Beta', 'opened', 1000, 2000, 3000, 'https://example.com/issues/99')",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (2, 'disc_2', 1, 2, 'Issue', 3000)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (1, 100, 1, 1, 'bob', 'Alpha note', 1000, 2000, 3000, 0)",
+            [],
+        ).unwrap();
+        conn.execute(
+            "INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (2, 200, 2, 1, 'alice', 'Beta note', 1000, 2000, 3000, 0)",
+            [],
+        ).unwrap();
+
+        mark_dirty(&conn, SourceType::Note, 1).unwrap();
+        mark_dirty(&conn, SourceType::Note, 2).unwrap();
+
+        let result = regenerate_dirty_documents(&conn, None).unwrap();
+        assert_eq!(result.regenerated, 2);
+        assert_eq!(result.errored, 0);
+
+        let alpha_content: String = conn
+            .query_row(
+                "SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 1",
+                [],
+                |r| r.get(0),
+            )
+            .unwrap();
+        let beta_content: String = conn
+            .query_row(
+                "SELECT content_text FROM documents WHERE source_type = 'note' AND source_id = 2",
+                [],
+                |r| r.get(0),
+            )
+            .unwrap();
+
+        assert!(alpha_content.contains("parent_iid: 42"));
+        assert!(alpha_content.contains("parent_title: Issue Alpha"));
+        assert!(beta_content.contains("parent_iid: 99"));
+        assert!(beta_content.contains("parent_title: Issue Beta"));
+    }
 }
--- a/src/gitlab/transformers/discussion.rs
+++ b/src/gitlab/transformers/discussion.rs
@@ -30,6 +30,7 @@ pub struct NormalizedNote {
    pub project_id: i64,
    pub note_type: Option<String>,
    pub is_system: bool,
+    pub author_id: Option<i64>,
    pub author_username: String,
    pub body: String,
    pub created_at: i64,
@@ -160,6 +161,7 @@ fn transform_single_note(
        project_id: local_project_id,
        note_type: note.note_type.clone(),
        is_system: note.system,
+        author_id: Some(note.author.id),
        author_username: note.author.username.clone(),
        body: note.body.clone(),
        created_at: parse_timestamp(&note.created_at),
@@ -265,6 +267,7 @@ fn transform_single_note_strict(
        project_id: local_project_id,
        note_type: note.note_type.clone(),
        is_system: note.system,
+        author_id: Some(note.author.id),
        author_username: note.author.username.clone(),
        body: note.body.clone(),
        created_at,
--- a/src/ingestion/dirty_tracker.rs
+++ b/src/ingestion/dirty_tracker.rs
@@ -131,7 +131,7 @@ mod tests {
        let conn = Connection::open_in_memory().unwrap();
        conn.execute_batch("
            CREATE TABLE dirty_sources (
-                source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion')),
+                source_type TEXT NOT NULL CHECK (source_type IN ('issue','merge_request','discussion','note')),
                source_id INTEGER NOT NULL,
                queued_at INTEGER NOT NULL,
                attempt_count INTEGER NOT NULL DEFAULT 0,
@@ -258,6 +258,21 @@ mod tests {
        assert_eq!(count, 0);
    }

+    #[test]
+    fn test_mark_dirty_note_type() {
+        let conn = setup_db();
+        mark_dirty(&conn, SourceType::Note, 42).unwrap();
+
+        let results = get_dirty_sources(&conn).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].0, SourceType::Note);
+        assert_eq!(results[0].1, 42);
+
+        clear_dirty(&conn, SourceType::Note, 42).unwrap();
+        let results = get_dirty_sources(&conn).unwrap();
+        assert!(results.is_empty());
+    }
+
    #[test]
    fn test_drain_loop() {
        let conn = setup_db();
--- a/src/ingestion/discussions.rs
+++ b/src/ingestion/discussions.rs
@@ -1,17 +1,26 @@
 use futures::StreamExt;
-use rusqlite::Connection;
+use rusqlite::{Connection, params};
 use tracing::{debug, warn};

 use crate::Config;
 use crate::core::error::Result;
 use crate::core::payloads::{StorePayloadOptions, store_payload};
+use crate::core::time::now_ms;
 use crate::documents::SourceType;
 use crate::gitlab::GitLabClient;
-use crate::gitlab::transformers::{NoteableRef, transform_discussion, transform_notes};
+use crate::gitlab::transformers::{
+    NormalizedNote, NoteableRef, transform_discussion, transform_notes,
+};
 use crate::ingestion::dirty_tracker;

 use super::issues::IssueForDiscussionSync;

+#[derive(Debug)]
+pub struct NoteUpsertOutcome {
+    pub local_note_id: i64,
+    pub changed_semantics: bool,
+}
+
 #[derive(Debug, Default)]
 pub struct IngestDiscussionsResult {
    pub discussions_fetched: usize,
@@ -80,6 +89,8 @@ async fn ingest_discussions_for_issue(
    let mut seen_discussion_ids: Vec<String> = Vec::new();
    let mut pagination_error: Option<crate::core::error::LoreError> = None;

+    let run_seen_at = now_ms();
+
    while let Some(disc_result) = discussions_stream.next().await {
        let gitlab_discussion = match disc_result {
            Ok(d) => d,
@@ -126,17 +137,28 @@ async fn ingest_discussions_for_issue(

        dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;

+        // Mark child note documents dirty (they inherit parent metadata)
+        tx.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at)
+             SELECT 'note', n.id, ?1
+             FROM notes n
+             WHERE n.discussion_id = ?2 AND n.is_system = 0
+             ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
+            params![now_ms(), local_discussion_id],
+        )?;
+
        let notes = transform_notes(&gitlab_discussion, local_project_id);
        let notes_count = notes.len();

-        tx.execute(
-            "DELETE FROM notes WHERE discussion_id = ?",
-            [local_discussion_id],
-        )?;
-
        for note in notes {
-            insert_note(&tx, local_discussion_id, &note, None)?;
+            let outcome =
+                upsert_note_for_issue(&tx, local_discussion_id, &note, run_seen_at, None)?;
+            if !note.is_system && outcome.changed_semantics {
+                dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
            }
+        }
+
+        sweep_stale_issue_notes(&tx, local_discussion_id, run_seen_at)?;

        tx.commit()?;

@@ -198,38 +220,182 @@ fn upsert_discussion(
    Ok(())
 }

-fn insert_note(
+fn upsert_note_for_issue(
    conn: &Connection,
    discussion_id: i64,
-    note: &crate::gitlab::transformers::NormalizedNote,
+    note: &NormalizedNote,
+    last_seen_at: i64,
    payload_id: Option<i64>,
-) -> Result<()> {
+) -> Result<NoteUpsertOutcome> {
+    // Pre-read for semantic change detection
+    let existing = conn
+        .query_row(
+            "SELECT id, body, note_type, resolved, resolved_by,
+                    position_old_path, position_new_path, position_old_line, position_new_line,
+                    position_type, position_line_range_start, position_line_range_end,
+                    position_base_sha, position_start_sha, position_head_sha
+             FROM notes WHERE gitlab_id = ?",
+            params![note.gitlab_id],
+            |row| {
+                Ok((
+                    row.get::<_, i64>(0)?,
+                    row.get::<_, String>(1)?,
+                    row.get::<_, Option<String>>(2)?,
+                    row.get::<_, bool>(3)?,
+                    row.get::<_, Option<String>>(4)?,
+                    row.get::<_, Option<String>>(5)?,
+                    row.get::<_, Option<String>>(6)?,
+                    row.get::<_, Option<i32>>(7)?,
+                    row.get::<_, Option<i32>>(8)?,
+                    row.get::<_, Option<String>>(9)?,
+                    row.get::<_, Option<i32>>(10)?,
+                    row.get::<_, Option<i32>>(11)?,
+                    row.get::<_, Option<String>>(12)?,
+                    row.get::<_, Option<String>>(13)?,
+                    row.get::<_, Option<String>>(14)?,
+                ))
+            },
+        )
+        .ok();
+
+    let changed_semantics = match &existing {
+        Some((
+            _id,
+            body,
+            note_type,
+            resolved,
+            resolved_by,
+            pos_old_path,
+            pos_new_path,
+            pos_old_line,
+            pos_new_line,
+            pos_type,
+            pos_range_start,
+            pos_range_end,
+            pos_base_sha,
+            pos_start_sha,
+            pos_head_sha,
+        )) => {
+            *body != note.body
+                || *note_type != note.note_type
+                || *resolved != note.resolved
+                || *resolved_by != note.resolved_by
+                || *pos_old_path != note.position_old_path
+                || *pos_new_path != note.position_new_path
+                || *pos_old_line != note.position_old_line
+                || *pos_new_line != note.position_new_line
+                || *pos_type != note.position_type
+                || *pos_range_start != note.position_line_range_start
+                || *pos_range_end != note.position_line_range_end
+                || *pos_base_sha != note.position_base_sha
+                || *pos_start_sha != note.position_start_sha
+                || *pos_head_sha != note.position_head_sha
+        }
+        None => true,
+    };
+
    conn.execute(
        "INSERT INTO notes (
            gitlab_id, discussion_id, project_id, note_type, is_system,
-            author_username, body, created_at, updated_at, last_seen_at,
-            position, resolvable, resolved, resolved_by, resolved_at, raw_payload_id
-        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
-        (
+            author_id, author_username, body, created_at, updated_at, last_seen_at,
+            position, resolvable, resolved, resolved_by, resolved_at,
+            position_old_path, position_new_path, position_old_line, position_new_line,
+            position_type, position_line_range_start, position_line_range_end,
+            position_base_sha, position_start_sha, position_head_sha,
+            raw_payload_id
+        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
+        ON CONFLICT(gitlab_id) DO UPDATE SET
+            body = excluded.body,
+            note_type = excluded.note_type,
+            author_id = excluded.author_id,
+            updated_at = excluded.updated_at,
+            last_seen_at = excluded.last_seen_at,
+            resolvable = excluded.resolvable,
+            resolved = excluded.resolved,
+            resolved_by = excluded.resolved_by,
+            resolved_at = excluded.resolved_at,
+            position_old_path = excluded.position_old_path,
+            position_new_path = excluded.position_new_path,
+            position_old_line = excluded.position_old_line,
+            position_new_line = excluded.position_new_line,
+            position_type = excluded.position_type,
+            position_line_range_start = excluded.position_line_range_start,
+            position_line_range_end = excluded.position_line_range_end,
+            position_base_sha = excluded.position_base_sha,
+            position_start_sha = excluded.position_start_sha,
+            position_head_sha = excluded.position_head_sha,
+            raw_payload_id = COALESCE(excluded.raw_payload_id, raw_payload_id)",
+        params![
            note.gitlab_id,
            discussion_id,
            note.project_id,
            &note.note_type,
            note.is_system,
+            note.author_id,
            &note.author_username,
            &note.body,
            note.created_at,
            note.updated_at,
-            note.last_seen_at,
+            last_seen_at,
            note.position,
            note.resolvable,
            note.resolved,
            &note.resolved_by,
            note.resolved_at,
+            &note.position_old_path,
+            &note.position_new_path,
+            note.position_old_line,
+            note.position_new_line,
+            &note.position_type,
+            note.position_line_range_start,
+            note.position_line_range_end,
+            &note.position_base_sha,
+            &note.position_start_sha,
+            &note.position_head_sha,
            payload_id,
-        ),
+        ],
    )?;
-    Ok(())
+
+    let local_note_id: i64 = conn.query_row(
+        "SELECT id FROM notes WHERE gitlab_id = ?",
+        params![note.gitlab_id],
+        |row| row.get(0),
+    )?;
+
+    Ok(NoteUpsertOutcome {
+        local_note_id,
+        changed_semantics,
+    })
+}
+
+fn sweep_stale_issue_notes(
+    conn: &Connection,
+    discussion_id: i64,
+    last_seen_at: i64,
+) -> Result<usize> {
+    // Step 1: Delete note documents for stale notes
+    conn.execute(
+        "DELETE FROM documents WHERE source_type = 'note' AND source_id IN
+         (SELECT id FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2 AND is_system = 0)",
+        params![discussion_id, last_seen_at],
+    )?;
+
+    // Step 2: Delete dirty_sources entries for stale notes
+    conn.execute(
+        "DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id IN
+         (SELECT id FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2 AND is_system = 0)",
+        params![discussion_id, last_seen_at],
+    )?;
+
+    // Step 3: Delete the stale notes themselves
+    let deleted = conn.execute(
+        "DELETE FROM notes WHERE discussion_id = ?1 AND last_seen_at < ?2",
+        params![discussion_id, last_seen_at],
+    )?;
+    if deleted > 0 {
+        debug!(discussion_id, deleted, "Swept stale issue notes");
+    }
+    Ok(deleted)
 }

 fn remove_stale_discussions(
@@ -303,6 +469,9 @@ fn update_issue_sync_timestamp(conn: &Connection, issue_id: i64, updated_at: i64
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::core::db::{create_connection, run_migrations};
+    use crate::gitlab::transformers::NormalizedNote;
+    use std::path::Path;

    #[test]
    fn result_default_has_zero_counts() {
@@ -311,4 +480,462 @@ mod tests {
        assert_eq!(result.discussions_upserted, 0);
        assert_eq!(result.notes_upserted, 0);
    }
+
+    fn setup() -> Connection {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations(&conn).unwrap();
+
+        conn.execute(
+            "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
+             VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
+            [],
+        )
+        .unwrap();
+
+        conn.execute(
+            "INSERT INTO issues (gitlab_id, iid, project_id, title, state, author_username, created_at, updated_at, last_seen_at) \
+             VALUES (100, 1, 1, 'Test Issue', 'opened', 'testuser', 1000, 2000, 3000)",
+            [],
+        )
+        .unwrap();
+
+        conn.execute(
+            "INSERT INTO discussions (gitlab_discussion_id, project_id, issue_id, noteable_type, individual_note, last_seen_at, resolvable, resolved) \
+             VALUES ('disc-1', 1, 1, 'Issue', 0, 3000, 0, 0)",
+            [],
+        )
+        .unwrap();
+
+        conn
+    }
+
+    fn get_discussion_id(conn: &Connection) -> i64 {
+        conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0))
+            .unwrap()
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn make_note(
+        gitlab_id: i64,
+        project_id: i64,
+        body: &str,
+        note_type: Option<&str>,
+        created_at: i64,
+        updated_at: i64,
+        resolved: bool,
+        resolved_by: Option<&str>,
+    ) -> NormalizedNote {
+        NormalizedNote {
+            gitlab_id,
+            project_id,
+            note_type: note_type.map(String::from),
+            is_system: false,
+            author_id: None,
+            author_username: "testuser".to_string(),
+            body: body.to_string(),
+            created_at,
+            updated_at,
+            last_seen_at: updated_at,
+            position: 0,
+            resolvable: false,
+            resolved,
+            resolved_by: resolved_by.map(String::from),
+            resolved_at: None,
+            position_old_path: None,
+            position_new_path: None,
+            position_old_line: None,
+            position_new_line: None,
+            position_type: None,
+            position_line_range_start: None,
+            position_line_range_end: None,
+            position_base_sha: None,
+            position_start_sha: None,
+            position_head_sha: None,
+        }
+    }
+
+    #[test]
+    fn test_issue_note_upsert_stable_id() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+        let last_seen_at = 5000;
+
+        let note1 = make_note(1001, 1, "First note", None, 1000, 2000, false, None);
+        let note2 = make_note(1002, 1, "Second note", None, 1000, 2000, false, None);
+
+        let out1 = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at, None).unwrap();
+        let out2 = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at, None).unwrap();
+        let id1 = out1.local_note_id;
+        let id2 = out2.local_note_id;
+
+        // Re-sync same gitlab_ids
+        let out1b = upsert_note_for_issue(&conn, disc_id, &note1, last_seen_at + 1, None).unwrap();
+        let out2b = upsert_note_for_issue(&conn, disc_id, &note2, last_seen_at + 1, None).unwrap();
+
+        assert_eq!(id1, out1b.local_note_id);
+        assert_eq!(id2, out2b.local_note_id);
+    }
+
+    #[test]
+    fn test_issue_note_upsert_detects_body_change() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(2001, 1, "Original body", None, 1000, 2000, false, None);
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        let mut changed = make_note(2001, 1, "Updated body", None, 1000, 3000, false, None);
+        changed.updated_at = 3000;
+        let outcome = upsert_note_for_issue(&conn, disc_id, &changed, 5001, None).unwrap();
+        assert!(outcome.changed_semantics);
+    }
+
+    #[test]
+    fn test_issue_note_upsert_unchanged_returns_false() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(3001, 1, "Same body", None, 1000, 2000, false, None);
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Re-sync identical note
+        let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5001, None).unwrap();
+        assert!(!outcome.changed_semantics);
+    }
+
+    #[test]
+    fn test_issue_note_upsert_updated_at_only_does_not_mark_semantic_change() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(4001, 1, "Body stays", None, 1000, 2000, false, None);
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Only change updated_at (non-semantic field)
+        let mut same = make_note(4001, 1, "Body stays", None, 1000, 9999, false, None);
+        same.updated_at = 9999;
+        let outcome = upsert_note_for_issue(&conn, disc_id, &same, 5001, None).unwrap();
+        assert!(!outcome.changed_semantics);
+    }
+
+    #[test]
+    fn test_issue_note_sweep_removes_stale() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note1 = make_note(5001, 1, "Keep me", None, 1000, 2000, false, None);
+        let note2 = make_note(5002, 1, "Stale me", None, 1000, 2000, false, None);
+
+        upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
+        upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
+
+        // Re-sync only note1 with newer timestamp
+        upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
+
+        // Sweep should remove note2 (last_seen_at=5000 < 6000)
+        let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
+        assert_eq!(swept, 1);
+
+        let count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM notes WHERE discussion_id = ?",
+                [disc_id],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(count, 1);
+    }
+
+    #[test]
+    fn test_issue_note_upsert_returns_local_id() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(6001, 1, "Check my ID", None, 1000, 2000, false, None);
+        let outcome = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Verify the local_note_id matches what's in the DB
+        let db_id: i64 = conn
+            .query_row(
+                "SELECT id FROM notes WHERE gitlab_id = ?",
+                [6001_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(outcome.local_note_id, db_id);
+    }
+
+    #[test]
+    fn test_issue_note_upsert_captures_author_id() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let mut note = make_note(7001, 1, "With author", None, 1000, 2000, false, None);
+        note.author_id = Some(12345);
+
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        let stored: Option<i64> = conn
+            .query_row(
+                "SELECT author_id FROM notes WHERE gitlab_id = ?",
+                [7001_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(stored, Some(12345));
+    }
+
+    #[test]
+    fn test_note_upsert_author_id_nullable() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(7002, 1, "No author id", None, 1000, 2000, false, None);
+        // author_id defaults to None in make_note
+
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        let stored: Option<i64> = conn
+            .query_row(
+                "SELECT author_id FROM notes WHERE gitlab_id = ?",
+                [7002_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(stored, None);
+    }
+
+    #[test]
+    fn test_note_author_id_survives_username_change() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let mut note = make_note(7003, 1, "Original body", None, 1000, 2000, false, None);
+        note.author_id = Some(99999);
+        note.author_username = "oldname".to_string();
+
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Re-sync with changed username, changed body, same author_id
+        let mut updated = make_note(7003, 1, "Updated body", None, 1000, 3000, false, None);
+        updated.author_id = Some(99999);
+        updated.author_username = "newname".to_string();
+
+        upsert_note_for_issue(&conn, disc_id, &updated, 5001, None).unwrap();
+
+        // author_id must survive the re-sync intact
+        let stored_id: Option<i64> = conn
+            .query_row(
+                "SELECT author_id FROM notes WHERE gitlab_id = ?",
+                [7003_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(stored_id, Some(99999));
+    }
+
+    fn insert_note_document(conn: &Connection, note_local_id: i64) {
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
+             VALUES ('note', ?1, 1, 'note content', 'hash123')",
+            [note_local_id],
+        )
+        .unwrap();
+    }
+
+    fn insert_note_dirty_source(conn: &Connection, note_local_id: i64) {
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at) \
+             VALUES ('note', ?1, 1000)",
+            [note_local_id],
+        )
+        .unwrap();
+    }
+
+    fn count_note_documents(conn: &Connection, note_local_id: i64) -> i64 {
+        conn.query_row(
+            "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?",
+            [note_local_id],
+            |row| row.get(0),
+        )
+        .unwrap()
+    }
+
+    fn count_note_dirty_sources(conn: &Connection, note_local_id: i64) -> i64 {
+        conn.query_row(
+            "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note' AND source_id = ?",
+            [note_local_id],
+            |row| row.get(0),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_issue_note_sweep_deletes_note_documents_immediately() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        // Insert 3 notes
+        let note1 = make_note(9001, 1, "Keep me", None, 1000, 2000, false, None);
+        let note2 = make_note(9002, 1, "Keep me too", None, 1000, 2000, false, None);
+        let note3 = make_note(9003, 1, "Stale me", None, 1000, 2000, false, None);
+
+        let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
+        let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
+        let out3 = upsert_note_for_issue(&conn, disc_id, &note3, 5000, None).unwrap();
+
+        // Add documents for all 3
+        insert_note_document(&conn, out1.local_note_id);
+        insert_note_document(&conn, out2.local_note_id);
+        insert_note_document(&conn, out3.local_note_id);
+
+        // Add dirty_sources for note3
+        insert_note_dirty_source(&conn, out3.local_note_id);
+
+        // Re-sync only notes 1 and 2 with newer timestamp
+        upsert_note_for_issue(&conn, disc_id, &note1, 6000, None).unwrap();
+        upsert_note_for_issue(&conn, disc_id, &note2, 6000, None).unwrap();
+
+        // Sweep should remove note3 and its document + dirty_source
+        sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
+
+        // Stale note's document should be gone
+        assert_eq!(count_note_documents(&conn, out3.local_note_id), 0);
+        assert_eq!(count_note_dirty_sources(&conn, out3.local_note_id), 0);
+
+        // Kept notes' documents should survive
+        assert_eq!(count_note_documents(&conn, out1.local_note_id), 1);
+        assert_eq!(count_note_documents(&conn, out2.local_note_id), 1);
+    }
+
+    #[test]
+    fn test_sweep_deletion_handles_note_without_document() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        let note = make_note(9004, 1, "No doc", None, 1000, 2000, false, None);
+        upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Don't insert any document -- sweep should still work without error
+        let swept = sweep_stale_issue_notes(&conn, disc_id, 6000).unwrap();
+        assert_eq!(swept, 1);
+    }
+
+    #[test]
+    fn test_set_based_deletion_atomicity() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        // Insert a stale note with both document and dirty_source
+        let note = make_note(9005, 1, "Stale with deps", None, 1000, 2000, false, None);
+        let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+        insert_note_document(&conn, out.local_note_id);
+        insert_note_dirty_source(&conn, out.local_note_id);
+
+        // Verify they exist before sweep
+        assert_eq!(count_note_documents(&conn, out.local_note_id), 1);
+        assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
+
+        // The sweep function already runs inside a transaction (called from
+        // ingest_discussions_for_issue's tx). Simulate by wrapping in a transaction.
+        let tx = conn.unchecked_transaction().unwrap();
+        sweep_stale_issue_notes(&tx, disc_id, 6000).unwrap();
+        tx.commit().unwrap();
+
+        // All three DELETEs must have happened
+        assert_eq!(count_note_documents(&conn, out.local_note_id), 0);
+        assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 0);
+
+        let note_count: i64 = conn
+            .query_row(
+                "SELECT COUNT(*) FROM notes WHERE gitlab_id = ?",
+                [9005_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(note_count, 0);
+    }
+
+    fn count_dirty_notes(conn: &Connection) -> i64 {
+        conn.query_row(
+            "SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
+            [],
+            |row| row.get(0),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_parent_title_change_marks_notes_dirty() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        // Insert two user notes and one system note
+        let note1 = make_note(10001, 1, "User note 1", None, 1000, 2000, false, None);
+        let note2 = make_note(10002, 1, "User note 2", None, 1000, 2000, false, None);
+        let mut sys_note = make_note(10003, 1, "System note", None, 1000, 2000, false, None);
+        sys_note.is_system = true;
+
+        let out1 = upsert_note_for_issue(&conn, disc_id, &note1, 5000, None).unwrap();
+        let out2 = upsert_note_for_issue(&conn, disc_id, &note2, 5000, None).unwrap();
+        upsert_note_for_issue(&conn, disc_id, &sys_note, 5000, None).unwrap();
+
+        // Clear any dirty_sources from individual note upserts
+        conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
+            .unwrap();
+        assert_eq!(count_dirty_notes(&conn), 0);
+
+        // Simulate parent title change triggering discussion re-ingest:
+        // update the issue title, then run the propagation SQL
+        conn.execute("UPDATE issues SET title = 'Changed Title' WHERE id = 1", [])
+            .unwrap();
+
+        // Run the propagation query (same as in ingestion code)
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at)
+             SELECT 'note', n.id, ?1
+             FROM notes n
+             WHERE n.discussion_id = ?2 AND n.is_system = 0
+             ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
+            params![now_ms(), disc_id],
+        )
+        .unwrap();
+
+        // Both user notes should be dirty, system note should not
+        assert_eq!(count_dirty_notes(&conn), 2);
+        assert_eq!(count_note_dirty_sources(&conn, out1.local_note_id), 1);
+        assert_eq!(count_note_dirty_sources(&conn, out2.local_note_id), 1);
+    }
+
+    #[test]
+    fn test_parent_label_change_marks_notes_dirty() {
+        let conn = setup();
+        let disc_id = get_discussion_id(&conn);
+
+        // Insert one user note
+        let note = make_note(11001, 1, "User note", None, 1000, 2000, false, None);
+        let out = upsert_note_for_issue(&conn, disc_id, &note, 5000, None).unwrap();
+
+        // Clear dirty_sources
+        conn.execute("DELETE FROM dirty_sources WHERE source_type = 'note'", [])
+            .unwrap();
+
+        // Simulate label change on parent issue (labels are part of issue metadata)
+        conn.execute("UPDATE issues SET updated_at = 9999 WHERE id = 1", [])
+            .unwrap();
+
+        // Run propagation query
+        conn.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at)
+             SELECT 'note', n.id, ?1
+             FROM notes n
+             WHERE n.discussion_id = ?2 AND n.is_system = 0
+             ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
+            params![now_ms(), disc_id],
+        )
+        .unwrap();
+
+        assert_eq!(count_dirty_notes(&conn), 1);
+        assert_eq!(count_note_dirty_sources(&conn, out.local_note_id), 1);
+    }
 }
--- a/src/ingestion/mr_discussions.rs
+++ b/src/ingestion/mr_discussions.rs
@@ -14,6 +14,7 @@ use crate::gitlab::transformers::{
 };
 use crate::gitlab::types::GitLabDiscussion;
 use crate::ingestion::dirty_tracker;
+use crate::ingestion::discussions::NoteUpsertOutcome;

 use super::merge_requests::MrForDiscussionSync;

@@ -161,6 +162,16 @@ pub fn write_prefetched_mr_discussions(

        dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;

+        // Mark child note documents dirty (they inherit parent metadata)
+        tx.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at)
+             SELECT 'note', n.id, ?1
+             FROM notes n
+             WHERE n.discussion_id = ?2 AND n.is_system = 0
+             ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
+            params![now_ms(), local_discussion_id],
+        )?;
+
        for note in &disc.notes {
            let should_store_payload = !note.is_system
                || note.position_new_path.is_some()
@@ -187,7 +198,11 @@ pub fn write_prefetched_mr_discussions(
                None
            };

+            let outcome =
                upsert_note(&tx, local_discussion_id, note, run_seen_at, note_payload_id)?;
+            if !note.is_system && outcome.changed_semantics {
+                dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
+            }
        }

        tx.commit()?;
@@ -361,6 +376,16 @@ async fn ingest_discussions_for_mr(

        dirty_tracker::mark_dirty_tx(&tx, SourceType::Discussion, local_discussion_id)?;

+        // Mark child note documents dirty (they inherit parent metadata)
+        tx.execute(
+            "INSERT INTO dirty_sources (source_type, source_id, queued_at)
+             SELECT 'note', n.id, ?1
+             FROM notes n
+             WHERE n.discussion_id = ?2 AND n.is_system = 0
+             ON CONFLICT(source_type, source_id) DO UPDATE SET queued_at = excluded.queued_at, attempt_count = 0",
+            params![now_ms(), local_discussion_id],
+        )?;
+
        for note in &notes {
            let should_store_payload = !note.is_system
                || note.position_new_path.is_some()
@@ -390,7 +415,11 @@ async fn ingest_discussions_for_mr(
                None
            };

+            let outcome =
                upsert_note(&tx, local_discussion_id, note, run_seen_at, note_payload_id)?;
+            if !note.is_system && outcome.changed_semantics {
+                dirty_tracker::mark_dirty_tx(&tx, SourceType::Note, outcome.local_note_id)?;
+            }
        }

        tx.commit()?;
@@ -473,19 +502,87 @@ fn upsert_note(
    note: &NormalizedNote,
    last_seen_at: i64,
    payload_id: Option<i64>,
-) -> Result<()> {
+) -> Result<NoteUpsertOutcome> {
+    // Pre-read for semantic change detection
+    let existing = conn
+        .query_row(
+            "SELECT id, body, note_type, resolved, resolved_by,
+                    position_old_path, position_new_path, position_old_line, position_new_line,
+                    position_type, position_line_range_start, position_line_range_end,
+                    position_base_sha, position_start_sha, position_head_sha
+             FROM notes WHERE gitlab_id = ?",
+            params![note.gitlab_id],
+            |row| {
+                Ok((
+                    row.get::<_, i64>(0)?,
+                    row.get::<_, String>(1)?,
+                    row.get::<_, Option<String>>(2)?,
+                    row.get::<_, bool>(3)?,
+                    row.get::<_, Option<String>>(4)?,
+                    row.get::<_, Option<String>>(5)?,
+                    row.get::<_, Option<String>>(6)?,
+                    row.get::<_, Option<i32>>(7)?,
+                    row.get::<_, Option<i32>>(8)?,
+                    row.get::<_, Option<String>>(9)?,
+                    row.get::<_, Option<i32>>(10)?,
+                    row.get::<_, Option<i32>>(11)?,
+                    row.get::<_, Option<String>>(12)?,
+                    row.get::<_, Option<String>>(13)?,
+                    row.get::<_, Option<String>>(14)?,
+                ))
+            },
+        )
+        .ok();
+
+    let changed_semantics = match &existing {
+        Some((
+            _id,
+            body,
+            note_type,
+            resolved,
+            resolved_by,
+            pos_old_path,
+            pos_new_path,
+            pos_old_line,
+            pos_new_line,
+            pos_type,
+            pos_range_start,
+            pos_range_end,
+            pos_base_sha,
+            pos_start_sha,
+            pos_head_sha,
+        )) => {
+            *body != note.body
+                || *note_type != note.note_type
+                || *resolved != note.resolved
+                || *resolved_by != note.resolved_by
+                || *pos_old_path != note.position_old_path
+                || *pos_new_path != note.position_new_path
+                || *pos_old_line != note.position_old_line
+                || *pos_new_line != note.position_new_line
+                || *pos_type != note.position_type
+                || *pos_range_start != note.position_line_range_start
+                || *pos_range_end != note.position_line_range_end
+                || *pos_base_sha != note.position_base_sha
+                || *pos_start_sha != note.position_start_sha
+                || *pos_head_sha != note.position_head_sha
+        }
+        None => true,
+    };
+
    conn.execute(
        "INSERT INTO notes (
            gitlab_id, discussion_id, project_id, note_type, is_system,
-            author_username, body, created_at, updated_at, last_seen_at,
+            author_id, author_username, body, created_at, updated_at, last_seen_at,
            position, resolvable, resolved, resolved_by, resolved_at,
            position_old_path, position_new_path, position_old_line, position_new_line,
            position_type, position_line_range_start, position_line_range_end,
            position_base_sha, position_start_sha, position_head_sha,
            raw_payload_id
-        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26)
+        ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16, ?17, ?18, ?19, ?20, ?21, ?22, ?23, ?24, ?25, ?26, ?27)
        ON CONFLICT(gitlab_id) DO UPDATE SET
            note_type = excluded.note_type,
+            author_id = excluded.author_id,
            body = excluded.body,
            updated_at = excluded.updated_at,
            last_seen_at = excluded.last_seen_at,
@@ -510,6 +607,7 @@ fn upsert_note(
            note.project_id,
            &note.note_type,
            note.is_system,
+            note.author_id,
            &note.author_username,
            &note.body,
            note.created_at,
@@ -533,7 +631,17 @@ fn upsert_note(
            payload_id,
        ],
    )?;
-    Ok(())
+
+    let local_note_id: i64 = conn.query_row(
+        "SELECT id FROM notes WHERE gitlab_id = ?",
+        params![note.gitlab_id],
+        |row| row.get(0),
+    )?;
+
+    Ok(NoteUpsertOutcome {
+        local_note_id,
+        changed_semantics,
+    })
 }

 fn sweep_stale_discussions(conn: &Connection, local_mr_id: i64, run_seen_at: i64) -> Result<usize> {
@@ -554,13 +662,36 @@ fn sweep_stale_notes(
    local_mr_id: i64,
    run_seen_at: i64,
 ) -> Result<usize> {
+    // Step 1: Delete note documents for stale notes
+    conn.execute(
+        "DELETE FROM documents WHERE source_type = 'note' AND source_id IN
+         (SELECT id FROM notes
+          WHERE project_id = ?1
+            AND discussion_id IN (SELECT id FROM discussions WHERE merge_request_id = ?2)
+            AND last_seen_at < ?3
+            AND is_system = 0)",
+        params![local_project_id, local_mr_id, run_seen_at],
+    )?;
+
+    // Step 2: Delete dirty_sources entries for stale notes
+    conn.execute(
+        "DELETE FROM dirty_sources WHERE source_type = 'note' AND source_id IN
+         (SELECT id FROM notes
+          WHERE project_id = ?1
+            AND discussion_id IN (SELECT id FROM discussions WHERE merge_request_id = ?2)
+            AND last_seen_at < ?3
+            AND is_system = 0)",
+        params![local_project_id, local_mr_id, run_seen_at],
+    )?;
+
+    // Step 3: Delete the stale notes themselves
    let deleted = conn.execute(
        "DELETE FROM notes
-         WHERE project_id = ?
+         WHERE project_id = ?1
           AND discussion_id IN (
-             SELECT id FROM discussions WHERE merge_request_id = ?
+             SELECT id FROM discussions WHERE merge_request_id = ?2
           )
-           AND last_seen_at < ?",
+           AND last_seen_at < ?3",
        params![local_project_id, local_mr_id, run_seen_at],
    )?;
    if deleted > 0 {
@@ -604,6 +735,8 @@ fn clear_sync_health_error(conn: &Connection, local_mr_id: i64) -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::core::db::{create_connection, run_migrations};
+    use std::path::Path;

    #[test]
    fn result_default_has_zero_counts() {
@@ -621,4 +754,153 @@ mod tests {
        let result = IngestMrDiscussionsResult::default();
        assert!(!result.pagination_succeeded);
    }
+
+    fn setup_mr() -> Connection {
+        let conn = create_connection(Path::new(":memory:")).unwrap();
+        run_migrations(&conn).unwrap();
+
+        conn.execute(
+            "INSERT INTO projects (gitlab_project_id, path_with_namespace, web_url) \
+             VALUES (1, 'group/repo', 'https://gitlab.com/group/repo')",
+            [],
+        )
+        .unwrap();
+
+        conn.execute(
+            "INSERT INTO merge_requests (gitlab_id, iid, project_id, title, state, \
+             author_username, source_branch, target_branch, created_at, updated_at, last_seen_at) \
+             VALUES (200, 1, 1, 'Test MR', 'opened', 'testuser', 'feat', 'main', 1000, 2000, 3000)",
+            [],
+        )
+        .unwrap();
+
+        conn.execute(
+            "INSERT INTO discussions (gitlab_discussion_id, project_id, merge_request_id, noteable_type, \
+             individual_note, last_seen_at, resolvable, resolved) \
+             VALUES ('mr-disc-1', 1, 1, 'MergeRequest', 0, 3000, 0, 0)",
+            [],
+        )
+        .unwrap();
+
+        conn
+    }
+
+    fn get_mr_discussion_id(conn: &Connection) -> i64 {
+        conn.query_row("SELECT id FROM discussions LIMIT 1", [], |row| row.get(0))
+            .unwrap()
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn make_mr_note(
+        gitlab_id: i64,
+        project_id: i64,
+        body: &str,
+        note_type: Option<&str>,
+        created_at: i64,
+        updated_at: i64,
+        resolved: bool,
+        resolved_by: Option<&str>,
+    ) -> NormalizedNote {
+        NormalizedNote {
+            gitlab_id,
+            project_id,
+            note_type: note_type.map(String::from),
+            is_system: false,
+            author_id: None,
+            author_username: "testuser".to_string(),
+            body: body.to_string(),
+            created_at,
+            updated_at,
+            last_seen_at: updated_at,
+            position: 0,
+            resolvable: false,
+            resolved,
+            resolved_by: resolved_by.map(String::from),
+            resolved_at: None,
+            position_old_path: None,
+            position_new_path: None,
+            position_old_line: None,
+            position_new_line: None,
+            position_type: None,
+            position_line_range_start: None,
+            position_line_range_end: None,
+            position_base_sha: None,
+            position_start_sha: None,
+            position_head_sha: None,
+        }
+    }
+
+    #[test]
+    fn test_mr_note_upsert_captures_author_id() {
+        let conn = setup_mr();
+        let disc_id = get_mr_discussion_id(&conn);
+
+        let mut note = make_mr_note(8001, 1, "MR note", None, 1000, 2000, false, None);
+        note.author_id = Some(12345);
+
+        upsert_note(&conn, disc_id, &note, 5000, None).unwrap();
+
+        let stored: Option<i64> = conn
+            .query_row(
+                "SELECT author_id FROM notes WHERE gitlab_id = ?",
+                [8001_i64],
+                |row| row.get(0),
+            )
+            .unwrap();
+        assert_eq!(stored, Some(12345));
+    }
+
+    fn insert_note_document(conn: &Connection, note_local_id: i64) {
+        conn.execute(
+            "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash) \
+             VALUES ('note', ?1, 1, 'note content', 'hash123')",
+            [note_local_id],
+        )
+        .unwrap();
+    }
+
+    fn count_note_documents(conn: &Connection, note_local_id: i64) -> i64 {
+        conn.query_row(
+            "SELECT COUNT(*) FROM documents WHERE source_type = 'note' AND source_id = ?",
+            [note_local_id],
+            |row| row.get(0),
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn test_mr_note_sweep_deletes_note_documents_immediately() {
+        let conn = setup_mr();
+        let disc_id = get_mr_discussion_id(&conn);
+        let local_project_id = 1;
+        let local_mr_id = 1;
+
+        // Insert 3 notes
+        let note1 = make_mr_note(8101, 1, "Keep", None, 1000, 2000, false, None);
+        let note2 = make_mr_note(8102, 1, "Keep too", None, 1000, 2000, false, None);
+        let note3 = make_mr_note(8103, 1, "Stale", None, 1000, 2000, false, None);
+
+        let out1 = upsert_note(&conn, disc_id, &note1, 5000, None).unwrap();
+        let out2 = upsert_note(&conn, disc_id, &note2, 5000, None).unwrap();
+        let out3 = upsert_note(&conn, disc_id, &note3, 5000, None).unwrap();
+
+        // Add documents for all 3
+        insert_note_document(&conn, out1.local_note_id);
+        insert_note_document(&conn, out2.local_note_id);
+        insert_note_document(&conn, out3.local_note_id);
+
+        // Re-sync only notes 1 and 2
+        upsert_note(&conn, disc_id, &note1, 6000, None).unwrap();
+        upsert_note(&conn, disc_id, &note2, 6000, None).unwrap();
+
+        // Sweep stale notes
+        sweep_stale_notes(&conn, local_project_id, local_mr_id, 6000).unwrap();
+
+        // Stale note's document should be gone
+        assert_eq!(count_note_documents(&conn, out3.local_note_id), 0);
+
+        // Kept notes' documents should survive
+        assert_eq!(count_note_documents(&conn, out1.local_note_id), 1);
+        assert_eq!(count_note_documents(&conn, out2.local_note_id), 1);
+    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,23 +11,25 @@ use lore::Config;
 use lore::cli::autocorrect::{self, CorrectionResult};
 use lore::cli::commands::{
    IngestDisplay, InitInputs, InitOptions, InitResult, ListFilters, MrListFilters,
-    SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser, open_mr_in_browser,
-    print_count, print_count_json, print_doctor_results, print_drift_human, print_drift_json,
-    print_dry_run_preview, print_dry_run_preview_json, print_embed, print_embed_json,
-    print_event_count, print_event_count_json, print_generate_docs, print_generate_docs_json,
-    print_ingest_summary, print_ingest_summary_json, print_list_issues, print_list_issues_json,
-    print_list_mrs, print_list_mrs_json, print_search_results, print_search_results_json,
-    print_show_issue, print_show_issue_json, print_show_mr, print_show_mr_json, print_stats,
-    print_stats_json, print_sync, print_sync_json, print_sync_status, print_sync_status_json,
-    print_timeline, print_timeline_json_with_meta, print_who_human, print_who_json, run_auth_test,
-    run_count, run_count_events, run_doctor, run_drift, run_embed, run_generate_docs, run_ingest,
-    run_ingest_dry_run, run_init, run_list_issues, run_list_mrs, run_search, run_show_issue,
-    run_show_mr, run_stats, run_sync, run_sync_status, run_timeline, run_who,
+    NoteListFilters, SearchCliFilters, SyncOptions, TimelineParams, open_issue_in_browser,
+    open_mr_in_browser, print_count, print_count_json, print_doctor_results, print_drift_human,
+    print_drift_json, print_dry_run_preview, print_dry_run_preview_json, print_embed,
+    print_embed_json, print_event_count, print_event_count_json, print_generate_docs,
+    print_generate_docs_json, print_ingest_summary, print_ingest_summary_json, print_list_issues,
+    print_list_issues_json, print_list_mrs, print_list_mrs_json, print_list_notes,
+    print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl, print_search_results,
+    print_search_results_json, print_show_issue, print_show_issue_json, print_show_mr,
+    print_show_mr_json, print_stats, print_stats_json, print_sync, print_sync_json,
+    print_sync_status, print_sync_status_json, print_timeline, print_timeline_json_with_meta,
+    print_who_human, print_who_json, query_notes, run_auth_test, run_count, run_count_events,
+    run_doctor, run_drift, run_embed, run_generate_docs, run_ingest, run_ingest_dry_run, run_init,
+    run_list_issues, run_list_mrs, run_search, run_show_issue, run_show_mr, run_stats, run_sync,
+    run_sync_status, run_timeline, run_who,
 };
 use lore::cli::robot::{RobotMeta, strip_schemas};
 use lore::cli::{
    Cli, Commands, CountArgs, EmbedArgs, GenerateDocsArgs, IngestArgs, IssuesArgs, MrsArgs,
-    SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
+    NotesArgs, SearchArgs, StatsArgs, SyncArgs, TimelineArgs, WhoArgs,
 };
 use lore::core::db::{
    LATEST_SCHEMA_VERSION, create_connection, get_schema_version, run_migrations,
@@ -173,6 +175,7 @@ async fn main() {
        }
        Some(Commands::Issues(args)) => handle_issues(cli.config.as_deref(), args, robot_mode),
        Some(Commands::Mrs(args)) => handle_mrs(cli.config.as_deref(), args, robot_mode),
+        Some(Commands::Notes(args)) => handle_notes(cli.config.as_deref(), args, robot_mode),
        Some(Commands::Search(args)) => {
            handle_search(cli.config.as_deref(), args, robot_mode).await
        }
@@ -801,6 +804,59 @@ fn handle_mrs(
    Ok(())
 }

+fn handle_notes(
+    config_override: Option<&str>,
+    args: NotesArgs,
+    robot_mode: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let start = std::time::Instant::now();
+    let config = Config::load(config_override)?;
+    let db_path = get_db_path(config.storage.db_path.as_deref());
+    let conn = create_connection(&db_path)?;
+
+    let order = if args.asc { "asc" } else { "desc" };
+    let filters = NoteListFilters {
+        limit: args.limit,
+        project: args.project,
+        author: args.author,
+        note_type: args.note_type,
+        include_system: args.include_system,
+        for_issue_iid: args.for_issue,
+        for_mr_iid: args.for_mr,
+        note_id: args.note_id,
+        gitlab_note_id: args.gitlab_note_id,
+        discussion_id: args.discussion_id,
+        since: args.since,
+        until: args.until,
+        path: args.path,
+        contains: args.contains,
+        resolution: args.resolution,
+        sort: args.sort,
+        order: order.to_string(),
+    };
+
+    let result = query_notes(&conn, &filters, &config)?;
+
+    let format = if robot_mode && args.format == "table" {
+        "json"
+    } else {
+        &args.format
+    };
+
+    match format {
+        "json" => print_list_notes_json(
+            &result,
+            start.elapsed().as_millis() as u64,
+            args.fields.as_deref(),
+        ),
+        "jsonl" => print_list_notes_jsonl(&result),
+        "csv" => print_list_notes_csv(&result),
+        _ => print_list_notes(&result),
+    }
+
+    Ok(())
+}
+
 async fn handle_ingest(
    config_override: Option<&str>,
    args: IngestArgs,
@@ -2317,6 +2373,17 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
                "active_minimal": ["entity_type", "iid", "title", "participants"]
            }
        },
+        "notes": {
+            "description": "List notes from discussions with rich filtering",
+            "flags": ["--limit/-n <N>", "--author/-a <username>", "--note-type <type>", "--contains <text>", "--for-issue <iid>", "--for-mr <iid>", "-p/--project <path>", "--since <period>", "--until <period>", "--path <filepath>", "--resolution <any|unresolved|resolved>", "--sort <created|updated>", "--asc", "--include-system", "--note-id <id>", "--gitlab-note-id <id>", "--discussion-id <id>", "--format <table|json|jsonl|csv>", "--fields <list|minimal>", "--open"],
+            "robot_flags": ["--format json", "--fields minimal"],
+            "example": "lore --robot notes --author jdefting --since 1y --format json --fields minimal",
+            "response_schema": {
+                "ok": "bool",
+                "data": {"notes": "[NoteListRowJson]", "total_count": "int", "showing": "int"},
+                "meta": {"elapsed_ms": "int"}
+            }
+        },
        "robot-docs": {
            "description": "This command (agent self-discovery manifest)",
            "flags": ["--brief"],
@@ -2338,6 +2405,7 @@ fn handle_robot_docs(robot_mode: bool, brief: bool) -> Result<(), Box<dyn std::e
            "search: FTS5 + vector hybrid search across all entities",
            "who: Expert/workload/reviews analysis per file path or person",
            "timeline: Chronological event reconstruction across entities",
+            "notes: Rich note listing with author, type, resolution, path, and discussion filters",
            "stats: Database statistics with document/note/discussion counts",
            "count: Entity counts with state breakdowns",
            "embed: Generate vector embeddings for semantic search via Ollama"