Wave 4: Full CLI command implementations - fetch, list, show, search, tags, aliases, doctor, cache lifecycle (bd-16o, bd-3km, bd-1dj, bd-acf, bd-3bl, bd-30a, bd-2s6, bd-1d4)

This commit is contained in:
teernisse
2026-02-12 12:54:10 -05:00
parent 9b29490f5f
commit faa6281790
13 changed files with 5157 additions and 110 deletions

View File

@@ -236,10 +236,7 @@ impl CacheManager {
/// Validates that index_version, generation, and index_hash all match
/// between meta and the on-disk index. Returns `AliasNotFound` if
/// meta.json is missing, `CacheIntegrity` on any mismatch.
pub fn load_index(
&self,
alias: &str,
) -> Result<(SpecIndex, CacheMetadata), SwaggerCliError> {
pub fn load_index(&self, alias: &str) -> Result<(SpecIndex, CacheMetadata), SwaggerCliError> {
validate_alias(alias)?;
let dir = self.alias_dir(alias);
@@ -248,24 +245,16 @@ impl CacheManager {
if e.kind() == std::io::ErrorKind::NotFound {
SwaggerCliError::AliasNotFound(alias.to_string())
} else {
SwaggerCliError::Cache(format!(
"Failed to read {}: {e}",
meta_path.display()
))
SwaggerCliError::Cache(format!("Failed to read {}: {e}", meta_path.display()))
}
})?;
let meta: CacheMetadata = serde_json::from_slice(&meta_bytes).map_err(|e| {
SwaggerCliError::CacheIntegrity(format!(
"Corrupt meta.json for alias '{alias}': {e}"
))
SwaggerCliError::CacheIntegrity(format!("Corrupt meta.json for alias '{alias}': {e}"))
})?;
let index_path = dir.join("index.json");
let index_bytes = fs::read(&index_path).map_err(|e| {
SwaggerCliError::Cache(format!(
"Failed to read {}: {e}",
index_path.display()
))
SwaggerCliError::Cache(format!("Failed to read {}: {e}", index_path.display()))
})?;
let actual_hash = compute_hash(&index_bytes);
@@ -277,9 +266,7 @@ impl CacheManager {
}
let index: SpecIndex = serde_json::from_slice(&index_bytes).map_err(|e| {
SwaggerCliError::CacheIntegrity(format!(
"Corrupt index.json for alias '{alias}': {e}"
))
SwaggerCliError::CacheIntegrity(format!("Corrupt index.json for alias '{alias}': {e}"))
})?;
if meta.index_version != index.index_version {
@@ -317,10 +304,7 @@ impl CacheManager {
) -> Result<serde_json::Value, SwaggerCliError> {
let raw_path = self.alias_dir(alias).join("raw.json");
let raw_bytes = fs::read(&raw_path).map_err(|e| {
SwaggerCliError::Cache(format!(
"Failed to read {}: {e}",
raw_path.display()
))
SwaggerCliError::Cache(format!("Failed to read {}: {e}", raw_path.display()))
})?;
let actual_hash = compute_hash(&raw_bytes);
@@ -331,13 +315,9 @@ impl CacheManager {
)));
}
let value: serde_json::Value =
serde_json::from_slice(&raw_bytes).map_err(|e| {
SwaggerCliError::Cache(format!(
"Failed to parse raw.json for '{}': {e}",
alias
))
})?;
let value: serde_json::Value = serde_json::from_slice(&raw_bytes).map_err(|e| {
SwaggerCliError::Cache(format!("Failed to parse raw.json for '{}': {e}", alias))
})?;
Ok(value)
}
@@ -697,14 +677,12 @@ mod tests {
manager
.write_cache(
"api1", b"src1", b"{}", &index, None, "1.0", "API 1", "json",
None, None, None,
"api1", b"src1", b"{}", &index, None, "1.0", "API 1", "json", None, None, None,
)
.unwrap();
manager
.write_cache(
"api2", b"src2", b"{}", &index, None, "2.0", "API 2", "yaml",
None, None, None,
"api2", b"src2", b"{}", &index, None, "2.0", "API 2", "yaml", None, None, None,
)
.unwrap();

View File

@@ -72,19 +72,18 @@ fn is_blocked_mapped_v4(v6: &std::net::Ipv6Addr) -> bool {
// ---------------------------------------------------------------------------
fn validate_url(url: &str, allow_insecure_http: bool) -> Result<Url, SwaggerCliError> {
let parsed = Url::parse(url).map_err(|e| {
SwaggerCliError::InvalidSpec(format!("invalid URL '{url}': {e}"))
})?;
let parsed = Url::parse(url)
.map_err(|e| SwaggerCliError::InvalidSpec(format!("invalid URL '{url}': {e}")))?;
match parsed.scheme() {
"https" => Ok(parsed),
"http" if allow_insecure_http => Ok(parsed),
"http" => Err(SwaggerCliError::PolicyBlocked(
format!("HTTP is not allowed for '{url}'. Use --allow-insecure-http to override."),
)),
other => Err(SwaggerCliError::InvalidSpec(
format!("unsupported scheme '{other}' in URL '{url}'"),
)),
"http" => Err(SwaggerCliError::PolicyBlocked(format!(
"HTTP is not allowed for '{url}'. Use --allow-insecure-http to override."
))),
other => Err(SwaggerCliError::InvalidSpec(format!(
"unsupported scheme '{other}' in URL '{url}'"
))),
}
}
@@ -105,16 +104,16 @@ async fn resolve_and_check(
let addrs: Vec<_> = match lookup_host(&addr).await {
Ok(iter) => iter.collect(),
Err(e) => {
return Err(SwaggerCliError::InvalidSpec(
format!("DNS resolution failed for '{host}': {e}"),
));
return Err(SwaggerCliError::InvalidSpec(format!(
"DNS resolution failed for '{host}': {e}"
)));
}
};
if addrs.is_empty() {
return Err(SwaggerCliError::InvalidSpec(
format!("DNS resolution returned no addresses for '{host}'"),
));
return Err(SwaggerCliError::InvalidSpec(format!(
"DNS resolution returned no addresses for '{host}'"
)));
}
for socket_addr in &addrs {
@@ -178,9 +177,9 @@ impl AsyncHttpClient {
pub async fn fetch_spec(&self, url: &str) -> Result<FetchResult, SwaggerCliError> {
let parsed = validate_url(url, self.allow_insecure_http)?;
let host = parsed.host_str().ok_or_else(|| {
SwaggerCliError::InvalidSpec(format!("URL '{url}' has no host"))
})?;
let host = parsed
.host_str()
.ok_or_else(|| SwaggerCliError::InvalidSpec(format!("URL '{url}' has no host")))?;
let port = parsed.port_or_known_default().unwrap_or(443);
resolve_and_check(host, port, &self.allowed_private_hosts).await?;
@@ -215,11 +214,7 @@ impl AsyncHttpClient {
attempts += 1;
if attempts > self.max_retries {
return Err(SwaggerCliError::Network(
client
.get(url)
.send()
.await
.unwrap_err(),
client.get(url).send().await.unwrap_err(),
));
}
let delay = self.retry_delay(&response, attempts);
@@ -370,7 +365,9 @@ mod tests {
#[test]
fn test_ssrf_blocks_loopback() {
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))));
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(127, 255, 255, 254))));
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(
127, 255, 255, 254
))));
assert!(is_ip_blocked(&IpAddr::V6(Ipv6Addr::LOCALHOST)));
}
@@ -392,7 +389,9 @@ mod tests {
#[test]
fn test_ssrf_blocks_link_local() {
// IPv4 link-local (169.254.x.x) -- includes the AWS metadata endpoint
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(169, 254, 169, 254))));
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(
169, 254, 169, 254
))));
assert!(is_ip_blocked(&IpAddr::V4(Ipv4Addr::new(169, 254, 0, 1))));
// IPv6 link-local (fe80::/10)
@@ -441,10 +440,7 @@ mod tests {
fn test_url_allows_https() {
let result = validate_url("https://example.com/spec.json", false);
assert!(result.is_ok());
assert_eq!(
result.unwrap().as_str(),
"https://example.com/spec.json"
);
assert_eq!(result.unwrap().as_str(), "https://example.com/spec.json");
}
#[test]
@@ -457,7 +453,10 @@ mod tests {
fn test_url_rejects_unsupported_scheme() {
let result = validate_url("ftp://example.com/spec.json", false);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), SwaggerCliError::InvalidSpec(_)));
assert!(matches!(
result.unwrap_err(),
SwaggerCliError::InvalidSpec(_)
));
}
#[test]
@@ -505,8 +504,7 @@ mod tests {
#[tokio::test]
async fn test_resolve_and_check_skips_allowed_host() {
let result =
resolve_and_check("localhost", 80, &["localhost".into()]).await;
let result = resolve_and_check("localhost", 80, &["localhost".into()]).await;
assert!(result.is_ok());
}
}

View File

@@ -2,4 +2,6 @@ pub mod cache;
pub mod config;
pub mod http;
pub mod indexer;
pub mod refs;
pub mod search;
pub mod spec;

295
src/core/refs.rs Normal file
View File

@@ -0,0 +1,295 @@
use std::collections::HashSet;
use serde_json::Value;
/// Resolve a JSON Pointer (RFC 6901) against a root value.
///
/// Unescapes `~1` -> `/` and `~0` -> `~` (in that order per spec).
/// Returns `None` if the pointer is empty, malformed, or the path does not exist.
pub fn resolve_json_pointer<'a>(root: &'a Value, pointer: &str) -> Option<&'a Value> {
if pointer.is_empty() {
return None;
}
let stripped = pointer.strip_prefix('/')?;
let mut current = root;
for token in stripped.split('/') {
let unescaped = token.replace("~1", "/").replace("~0", "~");
match current {
Value::Object(map) => {
current = map.get(&unescaped)?;
}
Value::Array(arr) => {
let idx: usize = unescaped.parse().ok()?;
current = arr.get(idx)?;
}
_ => return None,
}
}
Some(current)
}
/// Expand all `$ref` entries in `value` by inlining the referenced content from `root`.
///
/// - Internal refs (starting with `#/`) are resolved via JSON pointer navigation.
/// - External refs (not starting with `#/`) are replaced with `{"$external_ref": "..."}`.
/// - Circular refs (already visited in the current path) are replaced with `{"$circular_ref": "..."}`.
/// - Expansion stops at `max_depth` to prevent unbounded recursion.
pub fn expand_refs(value: &mut Value, root: &Value, max_depth: u32) {
let mut visited = HashSet::new();
expand_recursive(value, root, max_depth, 0, &mut visited);
}
fn expand_recursive(
value: &mut Value,
root: &Value,
max_depth: u32,
depth: u32,
visited: &mut HashSet<String>,
) {
if let Some(ref_str) = extract_ref_if_present(value) {
if !ref_str.starts_with("#/") {
*value = serde_json::json!({ "$external_ref": ref_str });
return;
}
if depth >= max_depth || visited.contains(&ref_str) {
*value = serde_json::json!({ "$circular_ref": ref_str });
return;
}
let pointer = &ref_str[1..]; // strip leading '#'
if let Some(resolved) = resolve_json_pointer(root, pointer) {
let mut expanded = resolved.clone();
visited.insert(ref_str);
expand_recursive(&mut expanded, root, max_depth, depth + 1, visited);
// Do not remove from visited: keep it for sibling detection within the same
// subtree path. The caller manages the visited set across siblings.
*value = expanded;
}
// If pointer doesn't resolve, leave the $ref as-is (broken ref)
return;
}
match value {
Value::Object(map) => {
for val in map.values_mut() {
expand_recursive(val, root, max_depth, depth, visited);
}
}
Value::Array(arr) => {
for item in arr.iter_mut() {
expand_recursive(item, root, max_depth, depth, visited);
}
}
_ => {}
}
}
fn extract_ref_if_present(value: &Value) -> Option<String> {
let map = value.as_object()?;
let ref_val = map.get("$ref")?;
Some(ref_val.as_str()?.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_resolve_json_pointer() {
let root = json!({
"components": {
"schemas": {
"Pet": {
"type": "object",
"properties": {
"name": { "type": "string" }
}
}
}
}
});
let result = resolve_json_pointer(&root, "/components/schemas/Pet");
assert!(result.is_some());
let pet = result.unwrap();
assert_eq!(pet["type"], "object");
assert_eq!(pet["properties"]["name"]["type"], "string");
// Empty pointer
assert!(resolve_json_pointer(&root, "").is_none());
// Missing path
assert!(resolve_json_pointer(&root, "/components/schemas/Dog").is_none());
// No leading slash
assert!(resolve_json_pointer(&root, "components").is_none());
// Escaped path segments: ~1 -> /
let root_with_slash = json!({
"paths": {
"/pets/{petId}": {
"get": { "summary": "Get pet" }
}
}
});
let result = resolve_json_pointer(&root_with_slash, "/paths/~1pets~1{petId}/get");
assert!(result.is_some());
assert_eq!(result.unwrap()["summary"], "Get pet");
// Escaped: ~0 -> ~
let root_with_tilde = json!({
"x~y": "found"
});
let result = resolve_json_pointer(&root_with_tilde, "/x~0y");
assert!(result.is_some());
assert_eq!(result.unwrap(), "found");
// Array indexing
let root_with_array = json!({
"items": ["a", "b", "c"]
});
let result = resolve_json_pointer(&root_with_array, "/items/1");
assert!(result.is_some());
assert_eq!(result.unwrap(), "b");
}
#[test]
fn test_expand_basic_ref() {
let root = json!({
"components": {
"schemas": {
"Pet": {
"type": "object",
"properties": {
"name": { "type": "string" }
}
}
}
}
});
let mut value = json!({
"schema": { "$ref": "#/components/schemas/Pet" }
});
expand_refs(&mut value, &root, 10);
assert_eq!(value["schema"]["type"], "object");
assert_eq!(value["schema"]["properties"]["name"]["type"], "string");
// $ref key should be gone (replaced with inlined content)
assert!(value["schema"]["$ref"].is_null());
}
#[test]
fn test_expand_circular_ref() {
let root = json!({
"components": {
"schemas": {
"Node": {
"type": "object",
"properties": {
"child": { "$ref": "#/components/schemas/Node" }
}
}
}
}
});
let mut value = json!({
"schema": { "$ref": "#/components/schemas/Node" }
});
expand_refs(&mut value, &root, 5);
// The first expansion should succeed
assert_eq!(value["schema"]["type"], "object");
// The recursive child ref should be replaced with $circular_ref
let child = &value["schema"]["properties"]["child"];
assert_eq!(child["$circular_ref"], "#/components/schemas/Node");
}
#[test]
fn test_expand_external_ref() {
let root = json!({});
let mut value = json!({
"schema": { "$ref": "https://example.com/schemas/Pet.json" }
});
expand_refs(&mut value, &root, 5);
assert_eq!(
value["schema"]["$external_ref"],
"https://example.com/schemas/Pet.json"
);
}
#[test]
fn test_expand_max_depth() {
let root = json!({
"components": {
"schemas": {
"A": {
"nested": { "$ref": "#/components/schemas/B" }
},
"B": {
"value": "deep"
}
}
}
});
// With max_depth=1, the first ref resolves but nested ref hits depth limit
let mut value = json!({ "$ref": "#/components/schemas/A" });
expand_refs(&mut value, &root, 1);
// A should be expanded
assert!(value.get("nested").is_some());
// B ref should be left as $circular_ref due to max_depth
assert_eq!(value["nested"]["$circular_ref"], "#/components/schemas/B");
}
#[test]
fn test_expand_array_refs() {
let root = json!({
"components": {
"schemas": {
"Tag": { "type": "string" }
}
}
});
let mut value = json!({
"items": [
{ "$ref": "#/components/schemas/Tag" },
{ "type": "integer" }
]
});
expand_refs(&mut value, &root, 5);
assert_eq!(value["items"][0]["type"], "string");
assert_eq!(value["items"][1]["type"], "integer");
}
#[test]
fn test_expand_broken_ref_left_as_is() {
let root = json!({});
let mut value = json!({
"schema": { "$ref": "#/components/schemas/Missing" }
});
let original = value.clone();
expand_refs(&mut value, &root, 5);
// Broken internal ref left untouched
assert_eq!(value, original);
}
}

634
src/core/search.rs Normal file
View File

@@ -0,0 +1,634 @@
use serde::Serialize;
use super::indexer::method_rank;
use super::spec::SpecIndex;
// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize)]
pub struct SearchResult {
pub result_type: SearchResultType,
pub name: String,
pub method: Option<String>,
pub summary: Option<String>,
pub rank: usize,
pub score: u32,
pub matches: Vec<Match>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum SearchResultType {
Endpoint,
Schema,
}
impl SearchResultType {
fn ordinal(self) -> u8 {
match self {
Self::Endpoint => 0,
Self::Schema => 1,
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Match {
pub field: String,
pub snippet: String,
}
#[derive(Debug, Clone)]
pub struct SearchOptions {
pub search_paths: bool,
pub search_descriptions: bool,
pub search_schemas: bool,
pub case_sensitive: bool,
pub exact: bool,
pub limit: usize,
}
impl Default for SearchOptions {
fn default() -> Self {
Self {
search_paths: true,
search_descriptions: true,
search_schemas: true,
case_sensitive: false,
exact: false,
limit: 20,
}
}
}
// ---------------------------------------------------------------------------
// Field weights
// ---------------------------------------------------------------------------
const WEIGHT_PATH: f64 = 10.0;
const WEIGHT_SUMMARY: f64 = 5.0;
const WEIGHT_DESCRIPTION: f64 = 2.0;
const WEIGHT_SCHEMA_NAME: f64 = 8.0;
// ---------------------------------------------------------------------------
// Search engine
// ---------------------------------------------------------------------------
pub struct SearchEngine<'a> {
index: &'a SpecIndex,
}
impl<'a> SearchEngine<'a> {
pub fn new(index: &'a SpecIndex) -> Self {
Self { index }
}
pub fn search(&self, query: &str, opts: &SearchOptions) -> Vec<SearchResult> {
let query = query.trim();
if query.is_empty() {
return Vec::new();
}
let terms = tokenize(query, opts.exact);
let total_terms = terms.len();
let mut results: Vec<SearchResult> = Vec::new();
// Search endpoints
if opts.search_paths || opts.search_descriptions {
for ep in &self.index.endpoints {
let mut raw_score: f64 = 0.0;
let mut matched_terms: usize = 0;
let mut matches: Vec<Match> = Vec::new();
for term in &terms {
let mut term_matched = false;
if opts.search_paths && contains_term(&ep.path, term, opts.case_sensitive) {
raw_score += WEIGHT_PATH;
matches.push(Match {
field: "path".into(),
snippet: safe_snippet(&ep.path, term, opts.case_sensitive),
});
term_matched = true;
}
if (opts.search_descriptions || opts.search_paths)
&& let Some(ref summary) = ep.summary
&& contains_term(summary, term, opts.case_sensitive)
{
raw_score += WEIGHT_SUMMARY;
matches.push(Match {
field: "summary".into(),
snippet: safe_snippet(summary, term, opts.case_sensitive),
});
term_matched = true;
}
if opts.search_descriptions
&& let Some(ref desc) = ep.description
&& contains_term(desc, term, opts.case_sensitive)
{
raw_score += WEIGHT_DESCRIPTION;
matches.push(Match {
field: "description".into(),
snippet: safe_snippet(desc, term, opts.case_sensitive),
});
term_matched = true;
}
if term_matched {
matched_terms += 1;
}
}
if raw_score > 0.0 {
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
let final_score = raw_score * coverage_boost;
let quantized = (final_score * 100.0).round() as u32;
results.push(SearchResult {
result_type: SearchResultType::Endpoint,
name: ep.path.clone(),
method: Some(ep.method.clone()),
summary: ep.summary.clone(),
rank: 0, // assigned after sort
score: quantized,
matches,
});
}
}
}
// Search schemas
if opts.search_schemas {
for schema in &self.index.schemas {
let mut raw_score: f64 = 0.0;
let mut matched_terms: usize = 0;
let mut matches: Vec<Match> = Vec::new();
for term in &terms {
if contains_term(&schema.name, term, opts.case_sensitive) {
raw_score += WEIGHT_SCHEMA_NAME;
matches.push(Match {
field: "schema_name".into(),
snippet: safe_snippet(&schema.name, term, opts.case_sensitive),
});
matched_terms += 1;
}
}
if raw_score > 0.0 {
let coverage_boost = 1.0 + (matched_terms as f64 / total_terms.max(1) as f64);
let final_score = raw_score * coverage_boost;
let quantized = (final_score * 100.0).round() as u32;
results.push(SearchResult {
result_type: SearchResultType::Schema,
name: schema.name.clone(),
method: None,
summary: None,
rank: 0,
score: quantized,
matches,
});
}
}
}
// Deterministic sort: score DESC, type ordinal ASC, name ASC, method_rank ASC
results.sort_by(|a, b| {
b.score
.cmp(&a.score)
.then_with(|| a.result_type.ordinal().cmp(&b.result_type.ordinal()))
.then_with(|| a.name.cmp(&b.name))
.then_with(|| {
let a_rank = a.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
let b_rank = b.method.as_deref().map(method_rank).unwrap_or(u8::MAX);
a_rank.cmp(&b_rank)
})
});
// Assign 1-based ranks and apply limit
results.truncate(opts.limit);
for (i, result) in results.iter_mut().enumerate() {
result.rank = i + 1;
}
results
}
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
fn tokenize(query: &str, exact: bool) -> Vec<String> {
if exact {
vec![query.to_string()]
} else {
query.split_whitespace().map(String::from).collect()
}
}
fn contains_term(haystack: &str, needle: &str, case_sensitive: bool) -> bool {
if case_sensitive {
haystack.contains(needle)
} else {
let h = haystack.to_lowercase();
let n = needle.to_lowercase();
h.contains(&n)
}
}
/// Build a Unicode-safe snippet around the first occurrence of `needle` in
/// `haystack`. The context window is 50 characters. Ellipses are added when
/// the snippet is truncated.
fn safe_snippet(haystack: &str, needle: &str, case_sensitive: bool) -> String {
let (h_search, n_search) = if case_sensitive {
(haystack.to_string(), needle.to_string())
} else {
(haystack.to_lowercase(), needle.to_lowercase())
};
let byte_pos = match h_search.find(&n_search) {
Some(pos) => pos,
None => return haystack.chars().take(50).collect(),
};
// Convert byte position to char index.
let char_start = haystack[..byte_pos].chars().count();
let needle_char_len = needle.chars().count();
let haystack_chars: Vec<char> = haystack.chars().collect();
let total_chars = haystack_chars.len();
const WINDOW: usize = 50;
// Centre the window around the match.
let context_budget = WINDOW.saturating_sub(needle_char_len);
let left_context = context_budget / 2;
let snippet_start = char_start.saturating_sub(left_context);
let snippet_end = (snippet_start + WINDOW).min(total_chars);
let prefix = if snippet_start > 0 { "..." } else { "" };
let suffix = if snippet_end < total_chars { "..." } else { "" };
let snippet_body: String = haystack_chars[snippet_start..snippet_end].iter().collect();
format!("{prefix}{snippet_body}{suffix}")
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::core::spec::{
IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex,
};
fn petstore_index() -> SpecIndex {
SpecIndex {
index_version: 1,
generation: 1,
content_hash: "sha256:test".into(),
openapi: "3.0.3".into(),
info: IndexInfo {
title: "Petstore".into(),
version: "1.0.0".into(),
},
endpoints: vec![
IndexedEndpoint {
path: "/pets".into(),
method: "GET".into(),
summary: Some("List all pets".into()),
description: Some("Returns a list of pets from the store".into()),
operation_id: Some("listPets".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![IndexedParam {
name: "limit".into(),
location: "query".into(),
required: false,
description: Some("Max items".into()),
}],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets/get".into(),
},
IndexedEndpoint {
path: "/pets".into(),
method: "POST".into(),
summary: Some("Create a pet".into()),
description: None,
operation_id: Some("createPet".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![],
request_body_required: true,
request_body_content_types: vec!["application/json".into()],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets/post".into(),
},
IndexedEndpoint {
path: "/pets/{petId}".into(),
method: "GET".into(),
summary: Some("Info for a specific pet".into()),
description: Some("Detailed information about a single pet".into()),
operation_id: Some("showPetById".into()),
tags: vec!["pets".into()],
deprecated: false,
parameters: vec![IndexedParam {
name: "petId".into(),
location: "path".into(),
required: true,
description: Some("The id of the pet".into()),
}],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1pets~1{petId}/get".into(),
},
IndexedEndpoint {
path: "/store/inventory".into(),
method: "GET".into(),
summary: Some("Returns store inventory".into()),
description: None,
operation_id: Some("getInventory".into()),
tags: vec!["store".into()],
deprecated: false,
parameters: vec![],
request_body_required: false,
request_body_content_types: vec![],
security_schemes: vec![],
security_required: false,
operation_ptr: "/paths/~1store~1inventory/get".into(),
},
],
schemas: vec![
IndexedSchema {
name: "Pet".into(),
schema_ptr: "/components/schemas/Pet".into(),
},
IndexedSchema {
name: "Error".into(),
schema_ptr: "/components/schemas/Error".into(),
},
IndexedSchema {
name: "PetList".into(),
schema_ptr: "/components/schemas/PetList".into(),
},
],
tags: vec![
IndexedTag {
name: "pets".into(),
description: Some("Pet operations".into()),
endpoint_count: 3,
},
IndexedTag {
name: "store".into(),
description: Some("Store operations".into()),
endpoint_count: 1,
},
],
}
}
#[test]
fn test_search_basic() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let results = engine.search("pet", &opts);
assert!(
!results.is_empty(),
"should find 'pet' in petstore endpoints"
);
// All results should mention pet somewhere
for r in &results {
let has_pet = r
.matches
.iter()
.any(|m| m.snippet.to_lowercase().contains("pet"));
assert!(has_pet, "result {:?} should match 'pet'", r.name);
}
// Ranks should be sequential 1-based
for (i, r) in results.iter().enumerate() {
assert_eq!(r.rank, i + 1, "rank should be 1-based sequential");
}
}
#[test]
fn test_search_scores_deterministic() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let run1 = engine.search("pet", &opts);
let run2 = engine.search("pet", &opts);
assert_eq!(run1.len(), run2.len());
for (a, b) in run1.iter().zip(run2.iter()) {
assert_eq!(a.score, b.score, "scores should be identical across runs");
assert_eq!(a.rank, b.rank, "ranks should be identical across runs");
assert_eq!(a.name, b.name, "names should be identical across runs");
assert_eq!(
a.method, b.method,
"methods should be identical across runs"
);
}
}
#[test]
fn test_search_exact_mode() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
// "list all" as two tokens: should match broadly
let loose_opts = SearchOptions {
exact: false,
..SearchOptions::default()
};
let loose = engine.search("list all", &loose_opts);
// "list all" as exact phrase: only matches if that exact phrase appears
let exact_opts = SearchOptions {
exact: true,
..SearchOptions::default()
};
let exact = engine.search("list all", &exact_opts);
// Exact should be a subset of (or equal to) loose results
assert!(
exact.len() <= loose.len(),
"exact mode should return fewer or equal results"
);
// The exact match should find "List all pets" summary
assert!(
!exact.is_empty(),
"exact 'list all' should match 'List all pets'"
);
}
#[test]
fn test_search_case_sensitive() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
// Case-insensitive (default): "PET" matches "pet", "/pets", etc.
let insensitive = SearchOptions {
case_sensitive: false,
..SearchOptions::default()
};
let results_insensitive = engine.search("PET", &insensitive);
// Case-sensitive: "PET" should NOT match lowercase "pet" or "/pets"
let sensitive = SearchOptions {
case_sensitive: true,
..SearchOptions::default()
};
let results_sensitive = engine.search("PET", &sensitive);
assert!(
results_sensitive.len() < results_insensitive.len(),
"case-sensitive 'PET' should match fewer results than case-insensitive"
);
}
#[test]
fn test_safe_snippet_unicode() {
// Emoji and multi-byte characters
let haystack = "Hello \u{1F600} world of pets and \u{1F431} cats everywhere";
let snippet = safe_snippet(haystack, "pets", false);
assert!(
snippet.contains("pets"),
"snippet should contain the search term"
);
// Must not panic on multi-byte boundaries
}
#[test]
fn test_safe_snippet_truncation() {
let long = "a".repeat(200);
let haystack = format!("{long}needle{long}");
let snippet = safe_snippet(&haystack, "needle", false);
assert!(snippet.contains("needle"));
assert!(
snippet.contains("..."),
"should have ellipsis for truncation"
);
// Snippet should be around 50 chars + ellipsis markers
let body_len = snippet.replace("...", "").chars().count();
assert!(body_len <= 50, "snippet body should be at most 50 chars");
}
#[test]
fn test_empty_query_returns_empty() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
assert!(engine.search("", &opts).is_empty());
assert!(engine.search(" ", &opts).is_empty());
}
#[test]
fn test_search_limit() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
limit: 2,
..SearchOptions::default()
};
let results = engine.search("pet", &opts);
assert!(results.len() <= 2, "should respect limit");
}
#[test]
fn test_search_schemas_only() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
search_paths: false,
search_descriptions: false,
search_schemas: true,
..SearchOptions::default()
};
let results = engine.search("Pet", &opts);
assert!(!results.is_empty());
for r in &results {
assert_eq!(
r.result_type,
SearchResultType::Schema,
"should only return schemas"
);
}
}
#[test]
fn test_search_paths_only() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions {
search_paths: true,
search_descriptions: false,
search_schemas: false,
..SearchOptions::default()
};
let results = engine.search("store", &opts);
assert!(!results.is_empty());
for r in &results {
assert_eq!(
r.result_type,
SearchResultType::Endpoint,
"should only return endpoints"
);
}
}
#[test]
fn test_multi_term_coverage_boost() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
// "pets store" has two terms; an endpoint matching both gets higher coverage
let results = engine.search("pets list", &opts);
if results.len() >= 2 {
// The first result should have a higher score due to more term matches
assert!(
results[0].score >= results[1].score,
"results should be sorted by score descending"
);
}
}
#[test]
fn test_no_match_returns_empty() {
let index = petstore_index();
let engine = SearchEngine::new(&index);
let opts = SearchOptions::default();
let results = engine.search("zzzznotfound", &opts);
assert!(
results.is_empty(),
"gibberish query should return no results"
);
}
}