diff --git a/tests/perf_benchmark.rs b/tests/perf_benchmark.rs new file mode 100644 index 0000000..b212f9b --- /dev/null +++ b/tests/perf_benchmark.rs @@ -0,0 +1,353 @@ +//! Performance benchmarks for optimization verification. +//! Run with: cargo test --test perf_benchmark -- --nocapture + +use rusqlite::Connection; +use std::time::Instant; + +fn setup_db() -> Connection { + let conn = Connection::open_in_memory().unwrap(); + conn.execute_batch( + " + PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL; + + CREATE TABLE projects ( + id INTEGER PRIMARY KEY, + gitlab_project_id INTEGER UNIQUE NOT NULL, + path_with_namespace TEXT NOT NULL, + default_branch TEXT, + web_url TEXT, + created_at INTEGER, + updated_at INTEGER, + raw_payload_id INTEGER + ); + INSERT INTO projects (id, gitlab_project_id, path_with_namespace) + VALUES (1, 100, 'group/project'); + + CREATE TABLE issues ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER UNIQUE NOT NULL, + project_id INTEGER NOT NULL REFERENCES projects(id), + iid INTEGER NOT NULL, + title TEXT, + description TEXT, + state TEXT NOT NULL, + author_username TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + last_seen_at INTEGER NOT NULL, + discussions_synced_for_updated_at INTEGER, + resource_events_synced_for_updated_at INTEGER, + web_url TEXT, + raw_payload_id INTEGER + ); + CREATE TABLE labels ( + id INTEGER PRIMARY KEY, + gitlab_id INTEGER, + project_id INTEGER NOT NULL REFERENCES projects(id), + name TEXT NOT NULL, + color TEXT, + description TEXT + ); + CREATE TABLE issue_labels ( + issue_id INTEGER NOT NULL REFERENCES issues(id), + label_id INTEGER NOT NULL REFERENCES labels(id), + PRIMARY KEY(issue_id, label_id) + ); + + CREATE TABLE documents ( + id INTEGER PRIMARY KEY, + source_type TEXT NOT NULL, + source_id INTEGER NOT NULL, + project_id INTEGER NOT NULL, + author_username TEXT, + label_names TEXT, + created_at INTEGER, + updated_at INTEGER, + url TEXT, + title TEXT, + content_text TEXT NOT NULL, + content_hash TEXT NOT NULL, + labels_hash TEXT NOT NULL DEFAULT '', + paths_hash TEXT NOT NULL DEFAULT '', + is_truncated INTEGER NOT NULL DEFAULT 0, + truncated_reason TEXT, + UNIQUE(source_type, source_id) + ); + CREATE TABLE document_labels ( + document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + label_name TEXT NOT NULL, + PRIMARY KEY(document_id, label_name) + ); + CREATE TABLE document_paths ( + document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE, + path TEXT NOT NULL, + PRIMARY KEY(document_id, path) + ); + CREATE TABLE dirty_sources ( + source_type TEXT NOT NULL, + source_id INTEGER NOT NULL, + queued_at INTEGER NOT NULL, + attempt_count INTEGER NOT NULL DEFAULT 0, + last_attempt_at INTEGER, + last_error TEXT, + next_attempt_at INTEGER, + PRIMARY KEY(source_type, source_id) + ); + CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at); + ", + ) + .unwrap(); + conn +} + +/// Simulate the OLD approach: individual INSERT per label +fn insert_labels_individual(conn: &Connection, doc_id: i64, labels: &[&str]) { + conn.execute( + "DELETE FROM document_labels WHERE document_id = ?1", + [doc_id], + ) + .unwrap(); + for label in labels { + conn.execute( + "INSERT INTO document_labels (document_id, label_name) VALUES (?1, ?2)", + rusqlite::params![doc_id, label], + ) + .unwrap(); + } +} + +/// Simulate the NEW approach: batch INSERT +fn insert_labels_batch(conn: &Connection, doc_id: i64, labels: &[&str]) { + conn.execute( + "DELETE FROM document_labels WHERE document_id = ?1", + [doc_id], + ) + .unwrap(); + if !labels.is_empty() { + let placeholders: String = labels + .iter() + .enumerate() + .map(|(i, _)| format!("(?1, ?{})", i + 2)) + .collect::>() + .join(", "); + let sql = format!( + "INSERT INTO document_labels (document_id, label_name) VALUES {}", + placeholders + ); + let mut params: Vec> = vec![Box::new(doc_id)]; + for label in labels { + params.push(Box::new(*label)); + } + let param_refs: Vec<&dyn rusqlite::types::ToSql> = + params.iter().map(|p| p.as_ref()).collect(); + conn.execute(&sql, param_refs.as_slice()).unwrap(); + } +} + +/// Simulate OLD string building: format! + push_str +fn build_content_old(iid: i64, title: &str, project: &str, labels: &str, state: &str, author: &str, url: &str) -> String { + let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project); + content.push_str(&format!("URL: {}\n", url)); + content.push_str(&format!("Labels: {}\n", labels)); + content.push_str(&format!("State: {}\n", state)); + content.push_str(&format!("Author: @{}\n", author)); + content +} + +/// Simulate NEW string building: writeln! directly +fn build_content_new(iid: i64, title: &str, project: &str, labels: &str, state: &str, author: &str, url: &str) -> String { + use std::fmt::Write as _; + let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project); + let _ = writeln!(content, "URL: {}", url); + let _ = writeln!(content, "Labels: {}", labels); + let _ = writeln!(content, "State: {}", state); + let _ = writeln!(content, "Author: @{}", author); + content +} + +const LABEL_SETS: &[&[&str]] = &[ + &["bug", "critical", "backend", "needs-review", "p1"], + &["feature", "frontend", "design", "ux"], + &["bug", "database", "performance"], + &["docs", "api"], + &["infrastructure", "ci-cd", "devops", "monitoring", "alerting", "sre"], +]; + +#[test] +fn bench_label_insert_individual_vs_batch() { + let conn = setup_db(); + + // Create a document to attach labels to + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)", + [], + ).unwrap(); + conn.execute( + "INSERT INTO documents (id, source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash) + VALUES (1, 'issue', 1, 1, 'test content', 'hash1', 'lhash1', 'phash1')", + [], + ).unwrap(); + + let iterations = 5000; + + // Warm up + for labels in LABEL_SETS { + insert_labels_individual(&conn, 1, labels); + insert_labels_batch(&conn, 1, labels); + } + + // Benchmark INDIVIDUAL inserts + let start = Instant::now(); + for i in 0..iterations { + let labels = LABEL_SETS[i % LABEL_SETS.len()]; + insert_labels_individual(&conn, 1, labels); + } + let individual_elapsed = start.elapsed(); + + // Benchmark BATCH inserts + let start = Instant::now(); + for i in 0..iterations { + let labels = LABEL_SETS[i % LABEL_SETS.len()]; + insert_labels_batch(&conn, 1, labels); + } + let batch_elapsed = start.elapsed(); + + let speedup = individual_elapsed.as_nanos() as f64 / batch_elapsed.as_nanos() as f64; + + println!("\n=== Label INSERT Benchmark ({} iterations) ===", iterations); + println!("Individual INSERTs: {:?}", individual_elapsed); + println!("Batch INSERT: {:?}", batch_elapsed); + println!("Speedup: {:.2}x", speedup); + println!(); + + // Verify correctness: both approaches produce same result + insert_labels_individual(&conn, 1, &["a", "b", "c"]); + let individual_labels: Vec = conn + .prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name") + .unwrap() + .query_map([], |row| row.get(0)) + .unwrap() + .collect::, _>>() + .unwrap(); + + insert_labels_batch(&conn, 1, &["a", "b", "c"]); + let batch_labels: Vec = conn + .prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name") + .unwrap() + .query_map([], |row| row.get(0)) + .unwrap() + .collect::, _>>() + .unwrap(); + + assert_eq!(individual_labels, batch_labels, "Both approaches must produce identical results"); +} + +#[test] +fn bench_string_building_old_vs_new() { + let iterations = 50_000; + + // Warm up + for _ in 0..100 { + let _ = build_content_old(42, "Fix authentication bug in login flow", "mygroup/myproject", "[\"bug\",\"auth\",\"critical\"]", "opened", "alice", "https://gitlab.example.com/mygroup/myproject/-/issues/42"); + let _ = build_content_new(42, "Fix authentication bug in login flow", "mygroup/myproject", "[\"bug\",\"auth\",\"critical\"]", "opened", "alice", "https://gitlab.example.com/mygroup/myproject/-/issues/42"); + } + + // Benchmark OLD + let start = Instant::now(); + for i in 0..iterations { + let s = build_content_old( + i as i64, + "Fix authentication bug in login flow with extended description", + "mygroup/myproject", + "[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]", + "opened", + "alice", + "https://gitlab.example.com/mygroup/myproject/-/issues/42", + ); + std::hint::black_box(s); + } + let old_elapsed = start.elapsed(); + + // Benchmark NEW + let start = Instant::now(); + for i in 0..iterations { + let s = build_content_new( + i as i64, + "Fix authentication bug in login flow with extended description", + "mygroup/myproject", + "[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]", + "opened", + "alice", + "https://gitlab.example.com/mygroup/myproject/-/issues/42", + ); + std::hint::black_box(s); + } + let new_elapsed = start.elapsed(); + + let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64; + + println!("\n=== String Building Benchmark ({} iterations) ===", iterations); + println!("format!+push_str: {:?}", old_elapsed); + println!("writeln!: {:?}", new_elapsed); + println!("Speedup: {:.2}x", speedup); + println!(); + + // Verify correctness: both produce identical output + let old = build_content_old(42, "Test", "group/proj", "[\"bug\"]", "opened", "alice", "https://example.com"); + let new = build_content_new(42, "Test", "group/proj", "[\"bug\"]", "opened", "alice", "https://example.com"); + assert_eq!(old, new, "Both approaches must produce identical strings"); +} + +#[test] +fn bench_prepare_vs_prepare_cached() { + let conn = setup_db(); + + // Seed some documents + for i in 1..=100 { + conn.execute( + "INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at) + VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)", + rusqlite::params![i, i * 10], + ).unwrap(); + conn.execute( + "INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash) + VALUES ('issue', ?1, 1, 'content', ?2, 'lh', 'ph')", + rusqlite::params![i, format!("hash_{}", i)], + ).unwrap(); + } + + let iterations = 10_000; + let sql = "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2"; + + // Benchmark prepare (uncached) + let start = Instant::now(); + for i in 0..iterations { + let source_id = (i % 100) + 1; + let mut stmt = conn.prepare(sql).unwrap(); + let _hash: Option = stmt + .query_row(rusqlite::params!["issue", source_id as i64], |row| row.get(0)) + .ok(); + } + let uncached_elapsed = start.elapsed(); + + // Benchmark prepare_cached + let start = Instant::now(); + for i in 0..iterations { + let source_id = (i % 100) + 1; + let mut stmt = conn.prepare_cached(sql).unwrap(); + let _hash: Option = stmt + .query_row(rusqlite::params!["issue", source_id as i64], |row| row.get(0)) + .ok(); + } + let cached_elapsed = start.elapsed(); + + let speedup = uncached_elapsed.as_nanos() as f64 / cached_elapsed.as_nanos() as f64; + + println!("\n=== prepare vs prepare_cached Benchmark ({} iterations) ===", iterations); + println!("prepare(): {:?}", uncached_elapsed); + println!("prepare_cached(): {:?}", cached_elapsed); + println!("Speedup: {:.2}x", speedup); + println!(); +}