feat: implement per-note search and document pipeline
- Add SourceType::Note with extract_note_document() and ParentMetadataCache - Migration 022: composite indexes for notes queries + author_id column - Migration 024: table rebuild adding 'note' to CHECK constraints, defense triggers - Migration 025: backfill existing non-system notes into dirty queue - Add lore notes CLI command with 17 filter options (author, path, resolution, etc.) - Support table/json/jsonl/csv output formats with field selection - Wire note dirty tracking through discussion and MR discussion ingestion - Fix test_migration_024_preserves_existing_data off-by-one (tested wrong migration) - Fix upsert_document_inner returning false for label/path-only changes
This commit is contained in:
@@ -186,6 +186,31 @@ const COMMAND_FLAGS: &[(&str, &[&str])] = &[
|
||||
],
|
||||
),
|
||||
("drift", &["--threshold", "--project"]),
|
||||
(
|
||||
"notes",
|
||||
&[
|
||||
"--limit",
|
||||
"--fields",
|
||||
"--format",
|
||||
"--author",
|
||||
"--note-type",
|
||||
"--contains",
|
||||
"--note-id",
|
||||
"--gitlab-note-id",
|
||||
"--discussion-id",
|
||||
"--include-system",
|
||||
"--for-issue",
|
||||
"--for-mr",
|
||||
"--project",
|
||||
"--since",
|
||||
"--until",
|
||||
"--path",
|
||||
"--resolution",
|
||||
"--sort",
|
||||
"--asc",
|
||||
"--open",
|
||||
],
|
||||
),
|
||||
(
|
||||
"init",
|
||||
&[
|
||||
|
||||
@@ -39,6 +39,7 @@ pub fn run_generate_docs(
|
||||
result.seeded += seed_dirty(&conn, SourceType::Issue, project_filter)?;
|
||||
result.seeded += seed_dirty(&conn, SourceType::MergeRequest, project_filter)?;
|
||||
result.seeded += seed_dirty(&conn, SourceType::Discussion, project_filter)?;
|
||||
result.seeded += seed_dirty_notes(&conn, project_filter)?;
|
||||
}
|
||||
|
||||
let regen =
|
||||
@@ -67,6 +68,10 @@ fn seed_dirty(
|
||||
SourceType::Issue => "issues",
|
||||
SourceType::MergeRequest => "merge_requests",
|
||||
SourceType::Discussion => "discussions",
|
||||
SourceType::Note => {
|
||||
// NOTE-2E will implement seed_dirty_notes separately (needs is_system filter)
|
||||
unreachable!("Note seeding handled by seed_dirty_notes, not seed_dirty")
|
||||
}
|
||||
};
|
||||
let type_str = source_type.as_str();
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
@@ -125,6 +130,55 @@ fn seed_dirty(
|
||||
Ok(total_seeded)
|
||||
}
|
||||
|
||||
fn seed_dirty_notes(conn: &Connection, project_filter: Option<&str>) -> Result<usize> {
|
||||
let now = chrono::Utc::now().timestamp_millis();
|
||||
let mut total_seeded: usize = 0;
|
||||
let mut last_id: i64 = 0;
|
||||
|
||||
loop {
|
||||
let inserted = if let Some(project) = project_filter {
|
||||
let project_id = resolve_project(conn, project)?;
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
||||
FROM notes WHERE id > ?2 AND project_id = ?3 AND is_system = 0 ORDER BY id LIMIT ?4
|
||||
ON CONFLICT(source_type, source_id) DO NOTHING",
|
||||
rusqlite::params![now, last_id, project_id, FULL_MODE_CHUNK_SIZE],
|
||||
)?
|
||||
} else {
|
||||
conn.execute(
|
||||
"INSERT INTO dirty_sources (source_type, source_id, queued_at, attempt_count, last_attempt_at, last_error, next_attempt_at)
|
||||
SELECT 'note', id, ?1, 0, NULL, NULL, NULL
|
||||
FROM notes WHERE id > ?2 AND is_system = 0 ORDER BY id LIMIT ?3
|
||||
ON CONFLICT(source_type, source_id) DO NOTHING",
|
||||
rusqlite::params![now, last_id, FULL_MODE_CHUNK_SIZE],
|
||||
)?
|
||||
};
|
||||
|
||||
if inserted == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let max_id: i64 = conn.query_row(
|
||||
"SELECT MAX(id) FROM (SELECT id FROM notes WHERE id > ?1 AND is_system = 0 ORDER BY id LIMIT ?2)",
|
||||
rusqlite::params![last_id, FULL_MODE_CHUNK_SIZE],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
|
||||
total_seeded += inserted;
|
||||
last_id = max_id;
|
||||
}
|
||||
|
||||
info!(
|
||||
source_type = "note",
|
||||
seeded = total_seeded,
|
||||
"Seeded dirty_sources"
|
||||
);
|
||||
|
||||
Ok(total_seeded)
|
||||
}
|
||||
|
||||
pub fn print_generate_docs(result: &GenerateDocsResult) {
|
||||
let mode = if result.full_mode {
|
||||
"full"
|
||||
@@ -186,3 +240,81 @@ pub fn print_generate_docs_json(result: &GenerateDocsResult, elapsed_ms: u64) {
|
||||
};
|
||||
println!("{}", serde_json::to_string(&output).unwrap());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use crate::core::db::{create_connection, run_migrations};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn setup_db() -> Connection {
|
||||
let conn = create_connection(Path::new(":memory:")).unwrap();
|
||||
run_migrations(&conn).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO projects (id, gitlab_project_id, path_with_namespace, web_url) VALUES (1, 100, 'group/project', 'https://gitlab.com/group/project')",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) VALUES (1, 10, 1, 1, 'Test', 'opened', 1000, 2000, 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO discussions (id, gitlab_discussion_id, project_id, issue_id, noteable_type, last_seen_at) VALUES (1, 'disc_1', 1, 1, 'Issue', 3000)",
|
||||
[],
|
||||
).unwrap();
|
||||
conn
|
||||
}
|
||||
|
||||
fn insert_note(conn: &Connection, id: i64, gitlab_id: i64, is_system: bool) {
|
||||
conn.execute(
|
||||
"INSERT INTO notes (id, gitlab_id, discussion_id, project_id, author_username, body, created_at, updated_at, last_seen_at, is_system) VALUES (?1, ?2, 1, 1, 'alice', 'note body', 1000, 2000, 3000, ?3)",
|
||||
rusqlite::params![id, gitlab_id, is_system as i32],
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_full_seed_includes_notes() {
|
||||
let conn = setup_db();
|
||||
insert_note(&conn, 1, 101, false);
|
||||
insert_note(&conn, 2, 102, false);
|
||||
insert_note(&conn, 3, 103, false);
|
||||
insert_note(&conn, 4, 104, true); // system note — should be excluded
|
||||
|
||||
let seeded = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(seeded, 3);
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_note_document_count_stable_after_second_generate_docs_full() {
|
||||
let conn = setup_db();
|
||||
insert_note(&conn, 1, 101, false);
|
||||
insert_note(&conn, 2, 102, false);
|
||||
|
||||
let first = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(first, 2);
|
||||
|
||||
// Second run should be idempotent (ON CONFLICT DO NOTHING)
|
||||
let second = seed_dirty_notes(&conn, None).unwrap();
|
||||
assert_eq!(second, 0);
|
||||
|
||||
let count: i64 = conn
|
||||
.query_row(
|
||||
"SELECT COUNT(*) FROM dirty_sources WHERE source_type = 'note'",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(count, 2);
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -30,8 +30,10 @@ pub use ingest::{
|
||||
};
|
||||
pub use init::{InitInputs, InitOptions, InitResult, run_init};
|
||||
pub use list::{
|
||||
ListFilters, MrListFilters, open_issue_in_browser, open_mr_in_browser, print_list_issues,
|
||||
print_list_issues_json, print_list_mrs, print_list_mrs_json, run_list_issues, run_list_mrs,
|
||||
ListFilters, MrListFilters, NoteListFilters, open_issue_in_browser, open_mr_in_browser,
|
||||
print_list_issues, print_list_issues_json, print_list_mrs, print_list_mrs_json,
|
||||
print_list_notes, print_list_notes_csv, print_list_notes_json, print_list_notes_jsonl,
|
||||
query_notes, run_list_issues, run_list_mrs,
|
||||
};
|
||||
pub use search::{
|
||||
SearchCliFilters, SearchResponse, print_search_results, print_search_results_json, run_search,
|
||||
|
||||
@@ -334,6 +334,7 @@ pub fn print_search_results(response: &SearchResponse) {
|
||||
"issue" => "Issue",
|
||||
"merge_request" => "MR",
|
||||
"discussion" => "Discussion",
|
||||
"note" => "Note",
|
||||
_ => &result.source_type,
|
||||
};
|
||||
|
||||
|
||||
114
src/cli/mod.rs
114
src/cli/mod.rs
@@ -112,6 +112,9 @@ pub enum Commands {
|
||||
/// List or show merge requests
|
||||
Mrs(MrsArgs),
|
||||
|
||||
/// List notes from discussions
|
||||
Notes(NotesArgs),
|
||||
|
||||
/// Ingest data from GitLab
|
||||
Ingest(IngestArgs),
|
||||
|
||||
@@ -489,6 +492,113 @@ pub struct MrsArgs {
|
||||
pub no_open: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(after_help = "\x1b[1mExamples:\x1b[0m
|
||||
lore notes # List 50 most recent notes
|
||||
lore notes --author alice --since 7d # Notes by alice in last 7 days
|
||||
lore notes --for-issue 42 -p group/repo # Notes on issue #42
|
||||
lore notes --path src/ --resolution unresolved # Unresolved diff notes in src/")]
|
||||
pub struct NotesArgs {
|
||||
/// Maximum results
|
||||
#[arg(
|
||||
short = 'n',
|
||||
long = "limit",
|
||||
default_value = "50",
|
||||
help_heading = "Output"
|
||||
)]
|
||||
pub limit: usize,
|
||||
|
||||
/// Select output fields (comma-separated, or 'minimal' preset: id,author_username,body,created_at_iso)
|
||||
#[arg(long, help_heading = "Output", value_delimiter = ',')]
|
||||
pub fields: Option<Vec<String>>,
|
||||
|
||||
/// Output format (table, json, jsonl, csv)
|
||||
#[arg(
|
||||
long,
|
||||
default_value = "table",
|
||||
value_parser = ["table", "json", "jsonl", "csv"],
|
||||
help_heading = "Output"
|
||||
)]
|
||||
pub format: String,
|
||||
|
||||
/// Filter by author username
|
||||
#[arg(short = 'a', long, help_heading = "Filters")]
|
||||
pub author: Option<String>,
|
||||
|
||||
/// Filter by note type (DiffNote, DiscussionNote)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub note_type: Option<String>,
|
||||
|
||||
/// Filter by body text (substring match)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub contains: Option<String>,
|
||||
|
||||
/// Filter by internal note ID
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub note_id: Option<i64>,
|
||||
|
||||
/// Filter by GitLab note ID
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub gitlab_note_id: Option<i64>,
|
||||
|
||||
/// Filter by discussion ID
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub discussion_id: Option<String>,
|
||||
|
||||
/// Include system notes (excluded by default)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub include_system: bool,
|
||||
|
||||
/// Filter to notes on a specific issue IID (requires --project or default_project)
|
||||
#[arg(long, conflicts_with = "for_mr", help_heading = "Filters")]
|
||||
pub for_issue: Option<i64>,
|
||||
|
||||
/// Filter to notes on a specific MR IID (requires --project or default_project)
|
||||
#[arg(long, conflicts_with = "for_issue", help_heading = "Filters")]
|
||||
pub for_mr: Option<i64>,
|
||||
|
||||
/// Filter by project path
|
||||
#[arg(short = 'p', long, help_heading = "Filters")]
|
||||
pub project: Option<String>,
|
||||
|
||||
/// Filter by time (7d, 2w, 1m, or YYYY-MM-DD)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub since: Option<String>,
|
||||
|
||||
/// Filter until date (YYYY-MM-DD, inclusive end-of-day)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub until: Option<String>,
|
||||
|
||||
/// Filter by file path (exact match or prefix with trailing /)
|
||||
#[arg(long, help_heading = "Filters")]
|
||||
pub path: Option<String>,
|
||||
|
||||
/// Filter by resolution status (any, unresolved, resolved)
|
||||
#[arg(
|
||||
long,
|
||||
value_parser = ["any", "unresolved", "resolved"],
|
||||
help_heading = "Filters"
|
||||
)]
|
||||
pub resolution: Option<String>,
|
||||
|
||||
/// Sort field (created, updated)
|
||||
#[arg(
|
||||
long,
|
||||
value_parser = ["created", "updated"],
|
||||
default_value = "created",
|
||||
help_heading = "Sorting"
|
||||
)]
|
||||
pub sort: String,
|
||||
|
||||
/// Sort ascending (default: descending)
|
||||
#[arg(long, help_heading = "Sorting")]
|
||||
pub asc: bool,
|
||||
|
||||
/// Open first matching item in browser
|
||||
#[arg(long, help_heading = "Actions")]
|
||||
pub open: bool,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
pub struct IngestArgs {
|
||||
/// Entity to ingest (issues, mrs). Omit to ingest everything
|
||||
@@ -556,8 +666,8 @@ pub struct SearchArgs {
|
||||
#[arg(long, default_value = "hybrid", value_parser = ["lexical", "hybrid", "semantic"], help_heading = "Mode")]
|
||||
pub mode: String,
|
||||
|
||||
/// Filter by source type (issue, mr, discussion)
|
||||
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion"], help_heading = "Filters")]
|
||||
/// Filter by source type (issue, mr, discussion, note)
|
||||
#[arg(long = "type", value_name = "TYPE", value_parser = ["issue", "mr", "discussion", "note"], help_heading = "Filters")]
|
||||
pub source_type: Option<String>,
|
||||
|
||||
/// Filter by author username
|
||||
|
||||
@@ -64,6 +64,10 @@ pub fn expand_fields_preset(fields: &[String], entity: &str) -> Vec<String> {
|
||||
.iter()
|
||||
.map(|s| (*s).to_string())
|
||||
.collect(),
|
||||
"notes" => ["id", "author_username", "body", "created_at_iso"]
|
||||
.iter()
|
||||
.map(|s| (*s).to_string())
|
||||
.collect(),
|
||||
_ => fields.to_vec(),
|
||||
}
|
||||
} else {
|
||||
@@ -82,3 +86,25 @@ pub fn strip_schemas(commands: &mut serde_json::Value) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_expand_fields_preset_notes() {
|
||||
let fields = vec!["minimal".to_string()];
|
||||
let expanded = expand_fields_preset(&fields, "notes");
|
||||
assert_eq!(
|
||||
expanded,
|
||||
["id", "author_username", "body", "created_at_iso"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expand_fields_preset_passthrough() {
|
||||
let fields = vec!["id".to_string(), "body".to_string()];
|
||||
let expanded = expand_fields_preset(&fields, "notes");
|
||||
assert_eq!(expanded, ["id", "body"]);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user