bugfix: DB column and size issues
This commit is contained in:
@@ -194,7 +194,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
|
||||
i.due_date, i.milestone_title,
|
||||
(SELECT COUNT(*) FROM notes n
|
||||
JOIN discussions d ON n.discussion_id = d.id
|
||||
WHERE d.noteable_type = 'Issue' AND d.noteable_id = i.id AND n.is_system = 0) AS user_notes_count,
|
||||
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
|
||||
i.status_name, i.status_category, i.status_color,
|
||||
i.status_icon_name, i.status_synced_at
|
||||
FROM issues i
|
||||
@@ -210,7 +210,7 @@ fn find_issue(conn: &Connection, iid: i64, project_filter: Option<&str>) -> Resu
|
||||
i.due_date, i.milestone_title,
|
||||
(SELECT COUNT(*) FROM notes n
|
||||
JOIN discussions d ON n.discussion_id = d.id
|
||||
WHERE d.noteable_type = 'Issue' AND d.noteable_id = i.id AND n.is_system = 0) AS user_notes_count,
|
||||
WHERE d.noteable_type = 'Issue' AND d.issue_id = i.id AND n.is_system = 0) AS user_notes_count,
|
||||
i.status_name, i.status_category, i.status_color,
|
||||
i.status_icon_name, i.status_synced_at
|
||||
FROM issues i
|
||||
|
||||
@@ -40,6 +40,17 @@ fn max_chunks_per_document(conn: &Connection) -> Result<i64> {
|
||||
.unwrap_or(1))
|
||||
}
|
||||
|
||||
/// sqlite-vec hard limit for KNN `k` parameter.
|
||||
const SQLITE_VEC_KNN_MAX: usize = 4_096;
|
||||
|
||||
/// Compute the KNN k value from the requested limit and the max chunks per
|
||||
/// document. The result is guaranteed to never exceed [`SQLITE_VEC_KNN_MAX`].
|
||||
fn compute_knn_k(limit: usize, max_chunks_per_doc: i64) -> usize {
|
||||
let max_chunks = max_chunks_per_doc.unsigned_abs().max(1) as usize;
|
||||
let multiplier = (max_chunks * 3 / 2 + 1).clamp(8, 200);
|
||||
(limit * multiplier).min(SQLITE_VEC_KNN_MAX)
|
||||
}
|
||||
|
||||
pub fn search_vector(
|
||||
conn: &Connection,
|
||||
query_embedding: &[f32],
|
||||
@@ -55,8 +66,7 @@ pub fn search_vector(
|
||||
.collect();
|
||||
|
||||
let max_chunks = max_chunks_per_document(conn)?.max(1);
|
||||
let multiplier = ((max_chunks.unsigned_abs() as usize * 3 / 2) + 1).clamp(8, 200);
|
||||
let k = (limit * multiplier).min(10_000);
|
||||
let k = compute_knn_k(limit, max_chunks);
|
||||
|
||||
let mut stmt = conn.prepare(
|
||||
"SELECT rowid, distance
|
||||
@@ -124,6 +134,49 @@ mod tests {
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_never_exceeds_sqlite_vec_limit() {
|
||||
for limit in [1, 10, 50, 100, 500, 1000, 1500, 2000, 5000] {
|
||||
for max_chunks in [1, 2, 5, 10, 50, 100, 200, 500, 1000] {
|
||||
let k = compute_knn_k(limit, max_chunks);
|
||||
assert!(
|
||||
k <= SQLITE_VEC_KNN_MAX,
|
||||
"k={k} exceeded limit for limit={limit}, max_chunks={max_chunks}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_reproduces_original_bug_scenario() {
|
||||
let k = compute_knn_k(1500, 1);
|
||||
assert!(k <= SQLITE_VEC_KNN_MAX, "k={k} exceeded 4096 at RECALL_CAP with 1 chunk");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_small_limit_uses_minimum_multiplier() {
|
||||
let k = compute_knn_k(10, 1);
|
||||
assert_eq!(k, 80);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_high_chunks_caps_multiplier() {
|
||||
let k = compute_knn_k(10, 200);
|
||||
assert_eq!(k, 2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_zero_max_chunks_treated_as_one() {
|
||||
let k = compute_knn_k(10, 0);
|
||||
assert_eq!(k, 80);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_k_negative_max_chunks_uses_absolute() {
|
||||
let k = compute_knn_k(10, -5);
|
||||
assert_eq!(k, compute_knn_k(10, 5));
|
||||
}
|
||||
|
||||
fn search_vector_dedup(rows: Vec<(i64, f64)>, limit: usize) -> Vec<VectorResult> {
|
||||
let mut best: HashMap<i64, f64> = HashMap::new();
|
||||
for (rowid, distance) in rows {
|
||||
|
||||
Reference in New Issue
Block a user