425 lines
13 KiB
Rust
425 lines
13 KiB
Rust
//! Performance benchmarks for optimization verification.
|
|
//! Run with: cargo test --test perf_benchmark -- --nocapture
|
|
|
|
use rusqlite::Connection;
|
|
use std::time::Instant;
|
|
|
|
fn setup_db() -> Connection {
|
|
let conn = Connection::open_in_memory().unwrap();
|
|
conn.execute_batch(
|
|
"
|
|
PRAGMA journal_mode = WAL;
|
|
PRAGMA synchronous = NORMAL;
|
|
|
|
CREATE TABLE projects (
|
|
id INTEGER PRIMARY KEY,
|
|
gitlab_project_id INTEGER UNIQUE NOT NULL,
|
|
path_with_namespace TEXT NOT NULL,
|
|
default_branch TEXT,
|
|
web_url TEXT,
|
|
created_at INTEGER,
|
|
updated_at INTEGER,
|
|
raw_payload_id INTEGER
|
|
);
|
|
INSERT INTO projects (id, gitlab_project_id, path_with_namespace)
|
|
VALUES (1, 100, 'group/project');
|
|
|
|
CREATE TABLE issues (
|
|
id INTEGER PRIMARY KEY,
|
|
gitlab_id INTEGER UNIQUE NOT NULL,
|
|
project_id INTEGER NOT NULL REFERENCES projects(id),
|
|
iid INTEGER NOT NULL,
|
|
title TEXT,
|
|
description TEXT,
|
|
state TEXT NOT NULL,
|
|
author_username TEXT,
|
|
created_at INTEGER NOT NULL,
|
|
updated_at INTEGER NOT NULL,
|
|
last_seen_at INTEGER NOT NULL,
|
|
discussions_synced_for_updated_at INTEGER,
|
|
resource_events_synced_for_updated_at INTEGER,
|
|
web_url TEXT,
|
|
raw_payload_id INTEGER
|
|
);
|
|
CREATE TABLE labels (
|
|
id INTEGER PRIMARY KEY,
|
|
gitlab_id INTEGER,
|
|
project_id INTEGER NOT NULL REFERENCES projects(id),
|
|
name TEXT NOT NULL,
|
|
color TEXT,
|
|
description TEXT
|
|
);
|
|
CREATE TABLE issue_labels (
|
|
issue_id INTEGER NOT NULL REFERENCES issues(id),
|
|
label_id INTEGER NOT NULL REFERENCES labels(id),
|
|
PRIMARY KEY(issue_id, label_id)
|
|
);
|
|
|
|
CREATE TABLE documents (
|
|
id INTEGER PRIMARY KEY,
|
|
source_type TEXT NOT NULL,
|
|
source_id INTEGER NOT NULL,
|
|
project_id INTEGER NOT NULL,
|
|
author_username TEXT,
|
|
label_names TEXT,
|
|
created_at INTEGER,
|
|
updated_at INTEGER,
|
|
url TEXT,
|
|
title TEXT,
|
|
content_text TEXT NOT NULL,
|
|
content_hash TEXT NOT NULL,
|
|
labels_hash TEXT NOT NULL DEFAULT '',
|
|
paths_hash TEXT NOT NULL DEFAULT '',
|
|
is_truncated INTEGER NOT NULL DEFAULT 0,
|
|
truncated_reason TEXT,
|
|
UNIQUE(source_type, source_id)
|
|
);
|
|
CREATE TABLE document_labels (
|
|
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
label_name TEXT NOT NULL,
|
|
PRIMARY KEY(document_id, label_name)
|
|
);
|
|
CREATE TABLE document_paths (
|
|
document_id INTEGER NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
|
path TEXT NOT NULL,
|
|
PRIMARY KEY(document_id, path)
|
|
);
|
|
CREATE TABLE dirty_sources (
|
|
source_type TEXT NOT NULL,
|
|
source_id INTEGER NOT NULL,
|
|
queued_at INTEGER NOT NULL,
|
|
attempt_count INTEGER NOT NULL DEFAULT 0,
|
|
last_attempt_at INTEGER,
|
|
last_error TEXT,
|
|
next_attempt_at INTEGER,
|
|
PRIMARY KEY(source_type, source_id)
|
|
);
|
|
CREATE INDEX idx_dirty_sources_next_attempt ON dirty_sources(next_attempt_at);
|
|
",
|
|
)
|
|
.unwrap();
|
|
conn
|
|
}
|
|
|
|
/// Simulate the OLD approach: individual INSERT per label
|
|
fn insert_labels_individual(conn: &Connection, doc_id: i64, labels: &[&str]) {
|
|
conn.execute(
|
|
"DELETE FROM document_labels WHERE document_id = ?1",
|
|
[doc_id],
|
|
)
|
|
.unwrap();
|
|
for label in labels {
|
|
conn.execute(
|
|
"INSERT INTO document_labels (document_id, label_name) VALUES (?1, ?2)",
|
|
rusqlite::params![doc_id, label],
|
|
)
|
|
.unwrap();
|
|
}
|
|
}
|
|
|
|
/// Simulate the NEW approach: batch INSERT
|
|
fn insert_labels_batch(conn: &Connection, doc_id: i64, labels: &[&str]) {
|
|
conn.execute(
|
|
"DELETE FROM document_labels WHERE document_id = ?1",
|
|
[doc_id],
|
|
)
|
|
.unwrap();
|
|
if !labels.is_empty() {
|
|
let placeholders: String = labels
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, _)| format!("(?1, ?{})", i + 2))
|
|
.collect::<Vec<_>>()
|
|
.join(", ");
|
|
let sql = format!(
|
|
"INSERT INTO document_labels (document_id, label_name) VALUES {}",
|
|
placeholders
|
|
);
|
|
let mut params: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(doc_id)];
|
|
for label in labels {
|
|
params.push(Box::new(*label));
|
|
}
|
|
let param_refs: Vec<&dyn rusqlite::types::ToSql> =
|
|
params.iter().map(|p| p.as_ref()).collect();
|
|
conn.execute(&sql, param_refs.as_slice()).unwrap();
|
|
}
|
|
}
|
|
|
|
/// Simulate OLD string building: format! + push_str
|
|
fn build_content_old(
|
|
iid: i64,
|
|
title: &str,
|
|
project: &str,
|
|
labels: &str,
|
|
state: &str,
|
|
author: &str,
|
|
url: &str,
|
|
) -> String {
|
|
let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
|
|
content.push_str(&format!("URL: {}\n", url));
|
|
content.push_str(&format!("Labels: {}\n", labels));
|
|
content.push_str(&format!("State: {}\n", state));
|
|
content.push_str(&format!("Author: @{}\n", author));
|
|
content
|
|
}
|
|
|
|
/// Simulate NEW string building: writeln! directly
|
|
fn build_content_new(
|
|
iid: i64,
|
|
title: &str,
|
|
project: &str,
|
|
labels: &str,
|
|
state: &str,
|
|
author: &str,
|
|
url: &str,
|
|
) -> String {
|
|
use std::fmt::Write as _;
|
|
let mut content = format!("[[Issue]] #{}: {}\nProject: {}\n", iid, title, project);
|
|
let _ = writeln!(content, "URL: {}", url);
|
|
let _ = writeln!(content, "Labels: {}", labels);
|
|
let _ = writeln!(content, "State: {}", state);
|
|
let _ = writeln!(content, "Author: @{}", author);
|
|
content
|
|
}
|
|
|
|
const LABEL_SETS: &[&[&str]] = &[
|
|
&["bug", "critical", "backend", "needs-review", "p1"],
|
|
&["feature", "frontend", "design", "ux"],
|
|
&["bug", "database", "performance"],
|
|
&["docs", "api"],
|
|
&[
|
|
"infrastructure",
|
|
"ci-cd",
|
|
"devops",
|
|
"monitoring",
|
|
"alerting",
|
|
"sre",
|
|
],
|
|
];
|
|
|
|
#[test]
|
|
fn bench_label_insert_individual_vs_batch() {
|
|
let conn = setup_db();
|
|
|
|
// Create a document to attach labels to
|
|
conn.execute(
|
|
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
|
VALUES (1, 10, 1, 42, 'Test Issue', 'opened', 1000, 2000, 3000)",
|
|
[],
|
|
).unwrap();
|
|
conn.execute(
|
|
"INSERT INTO documents (id, source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
|
|
VALUES (1, 'issue', 1, 1, 'test content', 'hash1', 'lhash1', 'phash1')",
|
|
[],
|
|
).unwrap();
|
|
|
|
let iterations = 5000;
|
|
|
|
// Warm up
|
|
for labels in LABEL_SETS {
|
|
insert_labels_individual(&conn, 1, labels);
|
|
insert_labels_batch(&conn, 1, labels);
|
|
}
|
|
|
|
// Benchmark INDIVIDUAL inserts
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let labels = LABEL_SETS[i % LABEL_SETS.len()];
|
|
insert_labels_individual(&conn, 1, labels);
|
|
}
|
|
let individual_elapsed = start.elapsed();
|
|
|
|
// Benchmark BATCH inserts
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let labels = LABEL_SETS[i % LABEL_SETS.len()];
|
|
insert_labels_batch(&conn, 1, labels);
|
|
}
|
|
let batch_elapsed = start.elapsed();
|
|
|
|
let speedup = individual_elapsed.as_nanos() as f64 / batch_elapsed.as_nanos() as f64;
|
|
|
|
println!(
|
|
"\n=== Label INSERT Benchmark ({} iterations) ===",
|
|
iterations
|
|
);
|
|
println!("Individual INSERTs: {:?}", individual_elapsed);
|
|
println!("Batch INSERT: {:?}", batch_elapsed);
|
|
println!("Speedup: {:.2}x", speedup);
|
|
println!();
|
|
|
|
// Verify correctness: both approaches produce same result
|
|
insert_labels_individual(&conn, 1, &["a", "b", "c"]);
|
|
let individual_labels: Vec<String> = conn
|
|
.prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
|
|
.unwrap()
|
|
.query_map([], |row| row.get(0))
|
|
.unwrap()
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
insert_labels_batch(&conn, 1, &["a", "b", "c"]);
|
|
let batch_labels: Vec<String> = conn
|
|
.prepare("SELECT label_name FROM document_labels WHERE document_id = 1 ORDER BY label_name")
|
|
.unwrap()
|
|
.query_map([], |row| row.get(0))
|
|
.unwrap()
|
|
.collect::<Result<Vec<_>, _>>()
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
individual_labels, batch_labels,
|
|
"Both approaches must produce identical results"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bench_string_building_old_vs_new() {
|
|
let iterations = 50_000;
|
|
|
|
// Warm up
|
|
for _ in 0..100 {
|
|
let _ = build_content_old(
|
|
42,
|
|
"Fix authentication bug in login flow",
|
|
"mygroup/myproject",
|
|
"[\"bug\",\"auth\",\"critical\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
|
);
|
|
let _ = build_content_new(
|
|
42,
|
|
"Fix authentication bug in login flow",
|
|
"mygroup/myproject",
|
|
"[\"bug\",\"auth\",\"critical\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
|
);
|
|
}
|
|
|
|
// Benchmark OLD
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let s = build_content_old(
|
|
i as i64,
|
|
"Fix authentication bug in login flow with extended description",
|
|
"mygroup/myproject",
|
|
"[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
|
);
|
|
std::hint::black_box(s);
|
|
}
|
|
let old_elapsed = start.elapsed();
|
|
|
|
// Benchmark NEW
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let s = build_content_new(
|
|
i as i64,
|
|
"Fix authentication bug in login flow with extended description",
|
|
"mygroup/myproject",
|
|
"[\"bug\",\"auth\",\"critical\",\"backend\",\"needs-review\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://gitlab.example.com/mygroup/myproject/-/issues/42",
|
|
);
|
|
std::hint::black_box(s);
|
|
}
|
|
let new_elapsed = start.elapsed();
|
|
|
|
let speedup = old_elapsed.as_nanos() as f64 / new_elapsed.as_nanos() as f64;
|
|
|
|
println!(
|
|
"\n=== String Building Benchmark ({} iterations) ===",
|
|
iterations
|
|
);
|
|
println!("format!+push_str: {:?}", old_elapsed);
|
|
println!("writeln!: {:?}", new_elapsed);
|
|
println!("Speedup: {:.2}x", speedup);
|
|
println!();
|
|
|
|
// Verify correctness: both produce identical output
|
|
let old = build_content_old(
|
|
42,
|
|
"Test",
|
|
"group/proj",
|
|
"[\"bug\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://example.com",
|
|
);
|
|
let new = build_content_new(
|
|
42,
|
|
"Test",
|
|
"group/proj",
|
|
"[\"bug\"]",
|
|
"opened",
|
|
"alice",
|
|
"https://example.com",
|
|
);
|
|
assert_eq!(old, new, "Both approaches must produce identical strings");
|
|
}
|
|
|
|
#[test]
|
|
fn bench_prepare_vs_prepare_cached() {
|
|
let conn = setup_db();
|
|
|
|
// Seed some documents
|
|
for i in 1..=100 {
|
|
conn.execute(
|
|
"INSERT INTO issues (id, gitlab_id, project_id, iid, title, state, created_at, updated_at, last_seen_at)
|
|
VALUES (?1, ?2, 1, ?1, 'Test', 'opened', 1000, 2000, 3000)",
|
|
rusqlite::params![i, i * 10],
|
|
).unwrap();
|
|
conn.execute(
|
|
"INSERT INTO documents (source_type, source_id, project_id, content_text, content_hash, labels_hash, paths_hash)
|
|
VALUES ('issue', ?1, 1, 'content', ?2, 'lh', 'ph')",
|
|
rusqlite::params![i, format!("hash_{}", i)],
|
|
).unwrap();
|
|
}
|
|
|
|
let iterations = 10_000;
|
|
let sql = "SELECT content_hash FROM documents WHERE source_type = ?1 AND source_id = ?2";
|
|
|
|
// Benchmark prepare (uncached)
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let source_id = (i % 100) + 1;
|
|
let mut stmt = conn.prepare(sql).unwrap();
|
|
let _hash: Option<String> = stmt
|
|
.query_row(rusqlite::params!["issue", source_id as i64], |row| {
|
|
row.get(0)
|
|
})
|
|
.ok();
|
|
}
|
|
let uncached_elapsed = start.elapsed();
|
|
|
|
// Benchmark prepare_cached
|
|
let start = Instant::now();
|
|
for i in 0..iterations {
|
|
let source_id = (i % 100) + 1;
|
|
let mut stmt = conn.prepare_cached(sql).unwrap();
|
|
let _hash: Option<String> = stmt
|
|
.query_row(rusqlite::params!["issue", source_id as i64], |row| {
|
|
row.get(0)
|
|
})
|
|
.ok();
|
|
}
|
|
let cached_elapsed = start.elapsed();
|
|
|
|
let speedup = uncached_elapsed.as_nanos() as f64 / cached_elapsed.as_nanos() as f64;
|
|
|
|
println!(
|
|
"\n=== prepare vs prepare_cached Benchmark ({} iterations) ===",
|
|
iterations
|
|
);
|
|
println!("prepare(): {:?}", uncached_elapsed);
|
|
println!("prepare_cached(): {:?}", cached_elapsed);
|
|
println!("Speedup: {:.2}x", speedup);
|
|
println!();
|
|
}
|