Wave 4: Full CLI command implementations - fetch, list, show, search, tags, aliases, doctor, cache lifecycle (bd-16o, bd-3km, bd-1dj, bd-acf, bd-3bl, bd-30a, bd-2s6, bd-1d4)

This commit is contained in:
teernisse
2026-02-12 12:54:10 -05:00
parent 9b29490f5f
commit faa6281790
13 changed files with 5157 additions and 110 deletions

634
src/core/search.rs Normal file
View File

@@ -0,0 +1,634 @@
use serde::Serialize;
use super::indexer::method_rank;
use super::spec::SpecIndex;
// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize)]
pub struct SearchResult {
pub result_type: SearchResultType,
pub name: String,
pub method: Option<String>,
pub summary: Option<String>,
pub rank: usize,
pub score: u32,
pub matches: Vec<Match>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum SearchResultType {
Endpoint,
Schema,
}
impl SearchResultType {
fn ordinal(self) -> u8 {
match self {
Self::Endpoint => 0,
Self::Schema => 1,
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Match {
pub field: String,
pub snippet: String,
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub search_paths: bool,
pub search_descriptions: bool,
pub search_schemas: bool,
pub case_sensitive: bool,
pub exact: bool,
pub limit: usize,
}
impl Default for SearchOptions {
fn default() -> Self {
Self {
search_paths: true,
search_descriptions: true,
search_schemas: true,
case_sensitive: false,
exact: false,
limit: 20,
}
}
}
// ---------------------------------------------------------------------------
// Field weights
// ---------------------------------------------------------------------------
const WEIGHT_PATH: f64 = 10.0;
const WEIGHT_SUMMARY: f64 = 5.0;
const WEIGHT_DESCRIPTION: f64 = 2.0;
const WEIGHT_SCHEMA_NAME: f64 = 8.0;
// ---------------------------------------------------------------------------
// Search engine
// ---------------------------------------------------------------------------
pub struct SearchEngine<'a> {
index: &'a SpecIndex,
}
impl<'a> SearchEngine<'a> {
pub fn new(index: &'a SpecIndex) -> Self {
Self { index }
}
pub fn search(&self, query: &str, opts: &SearchOptions) -> Vec<SearchResult> {
let query = query.trim();
if query.is_empty() {
return Vec::new();
}
let terms = tokenize(query, opts.exact);
let total_terms = terms.len();
let mut results: Vec<SearchResult> = Vec::new();
// Search endpoints
if opts.search_paths || opts.search_descriptions {
for ep in &self.index.endpoints {
let mut raw_score: f64 = 0.0;
let mut matched_terms: usize = 0;
let mut matches: Vec<Match> = Vec::new();
for term in &terms {
let mut term_matched = false;
if opts.search_paths && contains_term(&ep.path, term, opts.case_sensitive) {
raw_score += WEIGHT_PATH;
matches.push(Match {
field: "path".into(),
snippet: safe_snippet(&ep.path, term, opts.case_sensitive),
});
term_matched = true;
}
if (opts.search_descriptions || opts.search_paths)
&& let Some(ref summary) = ep.summary
&& contains_term(summary, term, opts.case_sensitive)
{
raw_score += WEIGHT_SUMMARY;
matches.push(Match {
field: "summary".into(),
snippet: safe_snippet(summary, term, opts.case_sensitive),
});
term_matched = true;
}
if opts.search_descriptions
&& let Some(ref desc) = ep.description
&& contains_term(desc, term, opts.case_sensitive)
{
raw_score += WEIGHT_DESCRIPTION;
matches.push(Match {
field: "description".into(),
snippet: safe_snippet(desc, term, opts.case_sensitive),
});
term_matched = true;
}
if term_matched {
matched_terms += 1;
}
}
if raw_score > 0.0 {
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
let final_score = raw_score * coverage_boost;
let quantized = (final_score * 100.0).round() as u32;
results.push(SearchResult {
result_type: SearchResultType::Endpoint,
name: ep.path.clone(),
method: Some(ep.method.clone()),
summary: ep.summary.clone(),
rank: 0, // assigned after sort
score: quantized,
matches,
});
}
}
}
// Search schemas
if opts.search_schemas {
for schema in &self.index.schemas {
let mut raw_score: f64 = 0.0;
let mut matched_terms: usize = 0;
let mut matches: Vec<Match> = Vec::new();
for term in &terms {
if contains_term(&schema.name, term, opts.case_sensitive) {
raw_score += WEIGHT_SCHEMA_NAME;
matches.push(Match {
field: "schema_name".into(),
snippet: safe_snippet(&schema.name, term, opts.case_sensitive),
});
matched_terms += 1;
}
}
if raw_score > 0.0 {
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
let final_score = raw_score * coverage_boost;
let quantized = (final_score * 100.0).round() as u32;
results.push(SearchResult {
result_type: SearchResultType::Schema,
name: schema.name.clone(),
method: None,
summary: None,
rank: 0,
score: quantized,
matches,
});
}
}
}
// Deterministic sort: score DESC, type ordinal ASC, name ASC, method_rank ASC
results.sort_by(|a, b| {
b.score
.cmp(&a.score)
.then_with(|| a.result_type.ordinal().cmp(&b.result_type.ordinal()))
.then_with(|| a.name.cmp(&b.name))
.then_with(|| {
let a_rank = a.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
let b_rank = b.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
a_rank.cmp(&b_rank)
})
});
// Assign 1-based ranks and apply limit
results.truncate(opts.limit);
for (i, result) in results.iter_mut().enumerate() {
result.rank = i + 1;
}
results
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
fn tokenize(query: &str, exact: bool) -> Vec<String> {
if exact {
vec![query.to_string()]
} else {
query.split_whitespace().map(String::from).collect()
}
}
fn contains_term(haystack: &str, needle: &str, case_sensitive: bool) -> bool {
if case_sensitive {
haystack.contains(needle)
} else {
let h = haystack.to_lowercase();
let n = needle.to_lowercase();
h.contains(&n)
}
}
/// Build a Unicode-safe snippet around the first occurrence of `needle` in
/// `haystack`. The context window is 50 characters. Ellipses are added when
/// the snippet is truncated.
fn safe_snippet(haystack: &str, needle: &str, case_sensitive: bool) -> String {
let (h_search, n_search) = if case_sensitive {
(haystack.to_string(), needle.to_string())
} else {
(haystack.to_lowercase(), needle.to_lowercase())
};
let byte_pos = match h_search.find(&n_search) {
Some(pos) => pos,
None => return haystack.chars().take(50).collect(),
};
// Convert byte position to char index.
let char_start = haystack[..byte_pos].chars().count();
let needle_char_len = needle.chars().count();
let haystack_chars: Vec<char> = haystack.chars().collect();
let total_chars = haystack_chars.len();
const WINDOW: usize = 50;
// Centre the window around the match.
let context_budget = WINDOW.saturating_sub(needle_char_len);
let left_context = context_budget / 2;
let snippet_start = char_start.saturating_sub(left_context);
let snippet_end = (snippet_start + WINDOW).min(total_chars);
let prefix = if snippet_start > 0 { "..." } else { "" };
let suffix = if snippet_end < total_chars { "..." } else { "" };
let snippet_body: String = haystack_chars[snippet_start..snippet_end].iter().collect();
format!("{prefix}{snippet_body}{suffix}")
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::core::spec::{
IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex,
};
fn petstore_index() -> SpecIndex {
SpecIndex {
index_version: 1,
generation: 1,
content_hash: "sha256:test".into(),
openapi: "3.0.3".into(),
info: IndexInfo {
title: "Petstore".into(),
version: "1.0.0".into(),
},
endpoints: vec![
IndexedEndpoint {
path: "/pets".into(),
method: "GET".into(),
summary: Some("List all pets".into()),
description: Some("Returns a list of pets from the store".into()),
operation_id: Some("listPets".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![IndexedParam {
name: "limit".into(),
location: "query".into(),
required: false,
description: Some("Max items".into()),
}],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets/get".into(),
},
IndexedEndpoint {
path: "/pets".into(),
method: "POST".into(),
summary: Some("Create a pet".into()),
description: None,
operation_id: Some("createPet".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![],
request_body_required: true,
request_body_content_types: vec!["application/json".into()],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets/post".into(),
},
IndexedEndpoint {
path: "/pets/{petId}".into(),
method: "GET".into(),
summary: Some("Info for a specific pet".into()),
description: Some("Detailed information about a single pet".into()),
operation_id: Some("showPetById".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![IndexedParam {
name: "petId".into(),
location: "path".into(),
required: true,
description: Some("The id of the pet".into()),
}],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets~1{petId}/get".into(),
},
IndexedEndpoint {
path: "/store/inventory".into(),
method: "GET".into(),
summary: Some("Returns store inventory".into()),
description: None,
operation_id: Some("getInventory".into()),
tags: vec!["store".into()],
deprecated: false,
parameters: vec![],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1store~1inventory/get".into(),
},
],
schemas: vec![
IndexedSchema {
name: "Pet".into(),
schema_ptr: "/components/schemas/Pet".into(),
},
IndexedSchema {
name: "Error".into(),
schema_ptr: "/components/schemas/Error".into(),
},
IndexedSchema {
name: "PetList".into(),
schema_ptr: "/components/schemas/PetList".into(),
},
],
tags: vec![
IndexedTag {
name: "pets".into(),
description: Some("Pet operations".into()),
endpoint_count: 3,
},
IndexedTag {
name: "store".into(),
description: Some("Store operations".into()),
endpoint_count: 1,
},
],
}
}
#[test]
fn test_search_basic() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let results = engine.search("pet", &opts);
assert!(
!results.is_empty(),
"should find 'pet' in petstore endpoints"
);
// All results should mention pet somewhere
for r in &results {
let has_pet = r
.matches
.iter()
.any(|m| m.snippet.to_lowercase().contains("pet"));
assert!(has_pet, "result {:?} should match 'pet'", r.name);
}
// Ranks should be sequential 1-based
for (i, r) in results.iter().enumerate() {
assert_eq!(r.rank, i + 1, "rank should be 1-based sequential");
}
}
#[test]
fn test_search_scores_deterministic() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let run1 = engine.search("pet", &opts);
let run2 = engine.search("pet", &opts);
assert_eq!(run1.len(), run2.len());
for (a, b) in run1.iter().zip(run2.iter()) {
assert_eq!(a.score, b.score, "scores should be identical across runs");
assert_eq!(a.rank, b.rank, "ranks should be identical across runs");
assert_eq!(a.name, b.name, "names should be identical across runs");
assert_eq!(
a.method, b.method,
"methods should be identical across runs"
);
}
}
#[test]
fn test_search_exact_mode() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
// "list all" as two tokens: should match broadly
let loose_opts = SearchOptions {
exact: false,
..SearchOptions::default()
};
let loose = engine.search("list all", &loose_opts);
// "list all" as exact phrase: only matches if that exact phrase appears
let exact_opts = SearchOptions {
exact: true,
..SearchOptions::default()
};
let exact = engine.search("list all", &exact_opts);
// Exact should be a subset of (or equal to) loose results
assert!(
exact.len() <= loose.len(),
"exact mode should return fewer or equal results"
);
// The exact match should find "List all pets" summary
assert!(
!exact.is_empty(),
"exact 'list all' should match 'List all pets'"
);
}
#[test]
fn test_search_case_sensitive() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
// Case-insensitive (default): "PET" matches "pet", "/pets", etc.
let insensitive = SearchOptions {
case_sensitive: false,
..SearchOptions::default()
};
let results_insensitive = engine.search("PET", &insensitive);
// Case-sensitive: "PET" should NOT match lowercase "pet" or "/pets"
let sensitive = SearchOptions {
case_sensitive: true,
..SearchOptions::default()
};
let results_sensitive = engine.search("PET", &sensitive);
assert!(
results_sensitive.len() < results_insensitive.len(),
"case-sensitive 'PET' should match fewer results than case-insensitive"
);
}
#[test]
fn test_safe_snippet_unicode() {
// Emoji and multi-byte characters
let haystack = "Hello \u{1F600} world of pets and \u{1F431} cats everywhere";
let snippet = safe_snippet(haystack, "pets", false);
assert!(
snippet.contains("pets"),
"snippet should contain the search term"
);
// Must not panic on multi-byte boundaries
}
#[test]
fn test_safe_snippet_truncation() {
let long = "a".repeat(200);
let haystack = format!("{long}needle{long}");
let snippet = safe_snippet(&haystack, "needle", false);
assert!(snippet.contains("needle"));
assert!(
snippet.contains("..."),
"should have ellipsis for truncation"
);
// Snippet should be around 50 chars + ellipsis markers
let body_len = snippet.replace("...", "").chars().count();
assert!(body_len <= 50, "snippet body should be at most 50 chars");
}
#[test]
fn test_empty_query_returns_empty() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
assert!(engine.search("", &opts).is_empty());
assert!(engine.search(" ", &opts).is_empty());
}
#[test]
fn test_search_limit() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
limit: 2,
..SearchOptions::default()
};
let results = engine.search("pet", &opts);
assert!(results.len() <= 2, "should respect limit");
}
#[test]
fn test_search_schemas_only() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
search_paths: false,
search_descriptions: false,
search_schemas: true,
..SearchOptions::default()
};
let results = engine.search("Pet", &opts);
assert!(!results.is_empty());
for r in &results {
assert_eq!(
r.result_type,
SearchResultType::Schema,
"should only return schemas"
);
}
}
#[test]
fn test_search_paths_only() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
search_paths: true,
search_descriptions: false,
search_schemas: false,
..SearchOptions::default()
};
let results = engine.search("store", &opts);
assert!(!results.is_empty());
for r in &results {
assert_eq!(
r.result_type,
SearchResultType::Endpoint,
"should only return endpoints"
);
}
}
#[test]
fn test_multi_term_coverage_boost() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
// "pets store" has two terms; an endpoint matching both gets higher coverage
let results = engine.search("pets list", &opts);
if results.len() >= 2 {
// The first result should have a higher score due to more term matches
assert!(
results[0].score >= results[1].score,
"results should be sorted by score descending"
);
}
}
#[test]
fn test_no_match_returns_empty() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let results = engine.search("zzzznotfound", &opts);
assert!(
results.is_empty(),
"gibberish query should return no results"
);
}
}