feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache - Migration 022: composite indexes for notes queries + author_id column - Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers - Migration 025: backfill existing non-system notes into dirty queue - Add lore notes CLI command with 17 filter options (author, path, resolution, etc.) - Support table/json/jsonl/csv output formats with field selection - Wire note dirty tracking through discussion and MR discussion ingestion - Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration) - Fix upsert_document_inner returning false for label/path-only changes
This commit is contained in:
@@ -39,6 +39,7 @@ pub fn run_generate_docs(
|
||||
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
|
||||
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
|
||||
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
|
||||
result.seeded += seed_dirty_notes(&conn, project_filter)?;
|
||||
}
|
||||
|
||||
let regen =
|
||||
@@ -67,6 +68,10 @@ fn seed_dirty(
|
||||
SourceType::Issue => "issues",
|
||||
SourceType::MergeRequest => "merge_requests",
|
||||
SourceType::Discussion => "discussions",
|
||||
SourceType::Note => {
|
||||
// NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
|
||||
unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
|
||||
}
|
||||
};
|
||||
let type_str = source_type.as_str();
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
@@ -125,6 +130,55 @@ fn seed_dirty(
|
||||
Ok(total_seeded)
|
||||
}
|
||||
|
||||
fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
let mut total_seeded: usize = 0;
|
||||
let mut last_id: i64 = 0;
|
||||
|
||||
loop {
|
||||
let inserted = if let Some(project) = project_filter {
|
||||
let project_id = resolve_project(conn, project)?;
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
||||
FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
|
||||
ON CONFLICT(source_type, source_id) DO NOTHING",
|
||||
rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
|
||||
)?
|
||||
} else {
|
||||
conn.execute(
|
||||
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
||||
FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
|
||||
ON CONFLICT(source_type, source_id) DO NOTHING",
|
||||
rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
|
||||
)?
|
||||
};
|
||||
|
||||
if inserted == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let max_id: i64 = conn.query_row(
|
||||
"SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
|
||||
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
total_seeded += inserted;
|
||||
last_id = max_id;
|
||||
}
|
||||
|
||||
info!(
|
||||
source_type = "note",
|
||||
seeded = total_seeded,
|
||||
"Seeded dirty_sources"
|
||||
);
|
||||
|
||||
Ok(total_seeded)
|
||||
}
|
||||
|
||||
pub fn print_generate_docs(result: &GenerateDocsResult) {
|
||||
let mode = if result.full_mode {
|
||||
"full"
|
||||
@@ -186,3 +240,81 @@ pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
|
||||
};
|
||||
println!("{}", serde_json::to_string(&output).unwrap());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn setup_db() -> Connection {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
|
||||
rusqlite::params![id, gitlab_id, is_system as i32],
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_full_seed_includes_notes() {
|
||||
let conn = setup_db();
|
||||
insert_note(&conn, 1, 101, false);
|
||||
insert_note(&conn, 2, 102, false);
|
||||
insert_note(&conn, 3, 103, false);
|
||||
insert_note(&conn, 4, 104, true); // system note — should be excluded
|
||||
|
||||
let seeded = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(seeded, 3);
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_note_document_count_stable_after_second_generate_docs_full() {
|
||||
let conn = setup_db();
|
||||
insert_note(&conn, 1, 101, false);
|
||||
insert_note(&conn, 2, 102, false);
|
||||
|
||||
let first = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(first, 2);
|
||||
|
||||
// Second run should be idempotent (ON CONFLICT DO NOTHING)
|
||||
let second = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(second, 0);
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 2);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,8 +30,10 @@ pub use ingest::{
|
||||
};
|
||||
pub use init::{InitInputs, InitOptions, InitResult, run_init};
|
||||
pub use list::{
|
||||
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
|
||||
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
|
||||
ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
|
||||
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
||||
print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl,
|
||||
query_notes, run_list_issues, run_list_mrs,
|
||||
};
|
||||
pub use search::{
|
||||
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
|
||||
|
||||
@@ -334,6 +334,7 @@ pub fn print_search_results(response: &SearchResponse) {
|
||||
"issue" => "Issue",
|
||||
"merge_request" => "MR",
|
||||
"discussion" => "Discussion",
|
||||
"note" => "Note",
|
||||
_ => &result.source_type,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user