use std::collections::HashMap; use crate::core::spec::{ IndexInfo, IndexedEndpoint, IndexedParam, IndexedSchema, IndexedTag, SpecIndex, }; use crate::errors::SwaggerCliError; #[derive(Debug, Clone, Copy, PartialEq)] pub enum Format { Json, Yaml, } /// Detect whether raw bytes are JSON or YAML. /// /// Priority: content-type header > file extension > content sniffing. pub fn detect_format( bytes: &[u8], filename_hint: Option<&str>, content_type_hint: Option<&str>, ) -> Format { if let Some(ct) = content_type_hint { let ct_lower = ct.to_ascii_lowercase(); if ct_lower.contains("json") { return Format::Json; } if ct_lower.contains("yaml") || ct_lower.contains("yml") { return Format::Yaml; } } if let Some(name) = filename_hint { let name_lower = name.to_ascii_lowercase(); if name_lower.ends_with(".json") { return Format::Json; } if name_lower.ends_with(".yaml") || name_lower.ends_with(".yml") { return Format::Yaml; } } // Content sniffing: check the first non-whitespace byte. Valid JSON // documents start with '{' or '['. This avoids a full JSON parse just // to detect format — a ~300x speedup for the common case. let first_meaningful = bytes.iter().find(|b| !b.is_ascii_whitespace()); match first_meaningful { Some(b'{') | Some(b'[') => Format::Json, _ => Format::Yaml, } } /// Normalize raw bytes to canonical JSON, returning both the bytes and parsed value. /// /// For JSON input: parses once and returns the original bytes + parsed value. /// For YAML input: parses YAML into a Value, serializes to JSON bytes. /// /// This eliminates the common double-parse pattern where callers would /// call `normalize_to_json()` then immediately `serde_json::from_slice()`. pub fn normalize_to_json( bytes: &[u8], format: Format, ) -> Result<(Vec, serde_json::Value), SwaggerCliError> { match format { Format::Json => { let value: serde_json::Value = serde_json::from_slice(bytes)?; Ok((bytes.to_vec(), value)) } Format::Yaml => { let value: serde_json::Value = serde_yaml::from_slice(bytes) .map_err(|e| SwaggerCliError::InvalidSpec(format!("YAML parse error: {e}")))?; let json_bytes = serde_json::to_vec(&value)?; Ok((json_bytes, value)) } } } /// Build a `SpecIndex` from a parsed JSON OpenAPI document. pub fn build_index( raw_json: &serde_json::Value, content_hash: &str, generation: u64, ) -> Result { let openapi = raw_json .get("openapi") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let info_obj = raw_json.get("info"); let title = info_obj .and_then(|i| i.get("title")) .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); let version = info_obj .and_then(|i| i.get("version")) .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); // Root-level security schemes (names only). let root_security = extract_security_scheme_names(raw_json.get("security")); let mut endpoints = Vec::new(); let mut tag_counts: HashMap = HashMap::new(); if let Some(paths) = raw_json.get("paths").and_then(|p| p.as_object()) { for (path, path_item) in paths { let path_obj = match path_item.as_object() { Some(o) => o, None => continue, }; // Path-level parameters apply to all operations under this path. let path_params = path_obj .get("parameters") .and_then(|v| v.as_array()) .map(|arr| extract_params(arr)) .unwrap_or_default(); for (method, operation) in path_obj { if !is_http_method(method) { continue; } let op = match operation.as_object() { Some(o) => o, None => continue, }; let method_upper = method.to_ascii_uppercase(); let path_encoded = json_pointer_encode(path); let method_lower = method.to_ascii_lowercase(); let operation_ptr = format!("/paths/{path_encoded}/{method_lower}"); // Merge path-level + operation-level parameters (operation wins on conflict). let op_params = op .get("parameters") .and_then(|v| v.as_array()) .map(|arr| extract_params(arr)) .unwrap_or_default(); let parameters = merge_params(&path_params, &op_params); let tags: Vec = op .get("tags") .and_then(|v| v.as_array()) .map(|arr| { arr.iter() .filter_map(|t| t.as_str().map(String::from)) .collect() }) .unwrap_or_default(); for tag in &tags { *tag_counts.entry(tag.clone()).or_insert(0) += 1; } let deprecated = op .get("deprecated") .and_then(|v| v.as_bool()) .unwrap_or(false); let summary = op.get("summary").and_then(|v| v.as_str()).map(String::from); let description = op .get("description") .and_then(|v| v.as_str()) .map(String::from); let operation_id = op .get("operationId") .and_then(|v| v.as_str()) .map(String::from); let (request_body_required, request_body_content_types) = extract_request_body(op.get("requestBody")); // Security: operation-level overrides root. An explicit empty array // means "no auth required". let (security_schemes, security_required) = if let Some(op_sec) = op.get("security") { let schemes = extract_security_scheme_names(Some(op_sec)); let required = !schemes.is_empty(); (schemes, required) } else { let required = !root_security.is_empty(); (root_security.clone(), required) }; if !resolve_pointer(raw_json, &operation_ptr) { return Err(SwaggerCliError::InvalidSpec(format!( "JSON pointer does not resolve: {operation_ptr}" ))); } endpoints.push(IndexedEndpoint { path: path.clone(), method: method_upper, summary, description, operation_id, tags, deprecated, parameters, request_body_required, request_body_content_types, security_schemes, security_required, operation_ptr, }); } } } // Sort endpoints: path ASC then method rank ASC. endpoints.sort_by(|a, b| { a.path .cmp(&b.path) .then_with(|| method_rank(&a.method).cmp(&method_rank(&b.method))) }); // Schemas from components.schemas. let mut schemas: Vec = Vec::new(); if let Some(components_schemas) = raw_json .pointer("/components/schemas") .and_then(|v| v.as_object()) { for name in components_schemas.keys() { let schema_ptr = format!("/components/schemas/{}", json_pointer_encode(name)); if !resolve_pointer(raw_json, &schema_ptr) { return Err(SwaggerCliError::InvalidSpec(format!( "JSON pointer does not resolve: {schema_ptr}" ))); } schemas.push(IndexedSchema { name: name.clone(), schema_ptr, }); } } schemas.sort_by(|a, b| a.name.cmp(&b.name)); // Collect tag descriptions from the top-level `tags` array (if present). let mut tag_descriptions: HashMap> = HashMap::new(); if let Some(tags_arr) = raw_json.get("tags").and_then(|v| v.as_array()) { for tag_obj in tags_arr { if let Some(name) = tag_obj.get("name").and_then(|v| v.as_str()) { let desc = tag_obj .get("description") .and_then(|v| v.as_str()) .map(String::from); tag_descriptions.insert(name.to_string(), desc); } } } let mut tags: Vec = tag_counts .into_iter() .map(|(name, count)| { let description = tag_descriptions.get(&name).cloned().unwrap_or(None); IndexedTag { name, description, endpoint_count: count, } }) .collect(); tags.sort_by(|a, b| a.name.cmp(&b.name)); Ok(SpecIndex { index_version: 1, generation, content_hash: content_hash.to_string(), openapi, info: IndexInfo { title, version }, endpoints, schemas, tags, }) } /// Return the sort rank for an HTTP method. pub fn method_rank(method: &str) -> u8 { match method.to_ascii_uppercase().as_str() { "GET" => 0, "POST" => 1, "PUT" => 2, "PATCH" => 3, "DELETE" => 4, "OPTIONS" => 5, "HEAD" => 6, "TRACE" => 7, _ => 99, } } /// RFC 6901 JSON pointer encoding for a single segment: `~` -> `~0`, `/` -> `~1`. pub fn json_pointer_encode(segment: &str) -> String { segment.replace('~', "~0").replace('/', "~1") } /// Check whether a JSON pointer resolves within `value`. pub fn resolve_pointer(value: &serde_json::Value, pointer: &str) -> bool { value.pointer(pointer).is_some() } // --------------------------------------------------------------------------- // Private helpers // --------------------------------------------------------------------------- fn is_http_method(key: &str) -> bool { matches!( key.to_ascii_lowercase().as_str(), "get" | "post" | "put" | "patch" | "delete" | "options" | "head" | "trace" ) } fn extract_params(arr: &[serde_json::Value]) -> Vec { arr.iter() .filter_map(|p| { let name = p.get("name")?.as_str()?.to_string(); let location = p.get("in")?.as_str()?.to_string(); let required = p.get("required").and_then(|v| v.as_bool()).unwrap_or(false); let description = p .get("description") .and_then(|v| v.as_str()) .map(String::from); Some(IndexedParam { name, location, required, description, }) }) .collect() } /// Merge path-level and operation-level parameters. Operation params override /// path params with the same (name, location) pair. fn merge_params(path_params: &[IndexedParam], op_params: &[IndexedParam]) -> Vec { let mut merged: Vec = path_params.to_vec(); for op_p in op_params { if let Some(existing) = merged .iter_mut() .find(|p| p.name == op_p.name && p.location == op_p.location) { *existing = op_p.clone(); } else { merged.push(op_p.clone()); } } merged } fn extract_request_body(rb: Option<&serde_json::Value>) -> (bool, Vec) { let Some(rb) = rb else { return (false, Vec::new()); }; let required = rb .get("required") .and_then(|v| v.as_bool()) .unwrap_or(false); let content_types = rb .get("content") .and_then(|v| v.as_object()) .map(|obj| obj.keys().cloned().collect()) .unwrap_or_default(); (required, content_types) } fn extract_security_scheme_names(security: Option<&serde_json::Value>) -> Vec { let Some(arr) = security.and_then(|v| v.as_array()) else { return Vec::new(); }; let mut names: Vec = Vec::new(); for item in arr { if let Some(obj) = item.as_object() { for key in obj.keys() { if !names.contains(key) { names.push(key.clone()); } } } } names.sort(); names } #[cfg(test)] mod tests { use super::*; #[test] fn test_detect_format_json() { let bytes = b"{}"; assert_eq!( detect_format(bytes, None, Some("application/json")), Format::Json, ); assert_eq!(detect_format(bytes, Some("spec.json"), None), Format::Json,); } #[test] fn test_detect_format_yaml() { let bytes = b"openapi: '3.0.0'"; assert_eq!( detect_format(bytes, None, Some("application/x-yaml")), Format::Yaml, ); assert_eq!(detect_format(bytes, Some("spec.yaml"), None), Format::Yaml,); assert_eq!(detect_format(bytes, Some("spec.yml"), None), Format::Yaml,); } #[test] fn test_detect_format_sniffing() { // Valid JSON -> detected as JSON even without hints. let json_bytes = br#"{"openapi":"3.0.0"}"#; assert_eq!(detect_format(json_bytes, None, None), Format::Json); // Invalid JSON but valid YAML -> falls back to YAML. let yaml_bytes = b"openapi: '3.0.0'\ninfo:\n title: Test"; assert_eq!(detect_format(yaml_bytes, None, None), Format::Yaml); } #[test] fn test_yaml_normalization_roundtrip() { let yaml = br#" openapi: "3.0.0" info: title: Test API version: "1.0" paths: {} "#; let (json_bytes, parsed) = normalize_to_json(yaml, Format::Yaml).unwrap(); // Verify the bytes are also valid JSON let _: serde_json::Value = serde_json::from_slice(&json_bytes).unwrap(); assert_eq!(parsed["openapi"], "3.0.0"); assert_eq!(parsed["info"]["title"], "Test API"); } #[test] fn test_json_pointer_encoding() { assert_eq!(json_pointer_encode("/pet/{petId}"), "~1pet~1{petId}"); assert_eq!(json_pointer_encode("simple"), "simple"); assert_eq!(json_pointer_encode("a~b/c"), "a~0b~1c"); } #[test] fn test_method_rank_ordering() { assert_eq!(method_rank("GET"), 0); assert_eq!(method_rank("POST"), 1); assert_eq!(method_rank("PUT"), 2); assert_eq!(method_rank("PATCH"), 3); assert_eq!(method_rank("DELETE"), 4); assert_eq!(method_rank("OPTIONS"), 5); assert_eq!(method_rank("HEAD"), 6); assert_eq!(method_rank("TRACE"), 7); assert_eq!(method_rank("CUSTOM"), 99); // Case-insensitive. assert_eq!(method_rank("get"), 0); assert_eq!(method_rank("Post"), 1); } #[test] fn test_build_index_basic() { let spec: serde_json::Value = serde_json::json!({ "openapi": "3.0.3", "info": { "title": "Pet Store", "version": "1.0.0" }, "paths": { "/pets": { "get": { "operationId": "listPets", "summary": "List all pets", "tags": ["pets"], "parameters": [ { "name": "limit", "in": "query", "required": false } ], "responses": { "200": { "description": "OK" } } }, "post": { "operationId": "createPet", "summary": "Create a pet", "tags": ["pets"], "requestBody": { "required": true, "content": { "application/json": {} } }, "responses": { "201": { "description": "Created" } } } }, "/pets/{petId}": { "get": { "operationId": "showPetById", "summary": "Get a pet", "tags": ["pets"], "parameters": [ { "name": "petId", "in": "path", "required": true } ], "responses": { "200": { "description": "OK" } } } } }, "components": { "schemas": { "Pet": { "type": "object" }, "Error": { "type": "object" } } } }); let index = build_index(&spec, "sha256:abc", 42).unwrap(); assert_eq!(index.index_version, 1); assert_eq!(index.generation, 42); assert_eq!(index.content_hash, "sha256:abc"); assert_eq!(index.openapi, "3.0.3"); assert_eq!(index.info.title, "Pet Store"); assert_eq!(index.info.version, "1.0.0"); // 3 endpoints total. assert_eq!(index.endpoints.len(), 3); // Sorted: /pets GET < /pets POST < /pets/{petId} GET. assert_eq!(index.endpoints[0].path, "/pets"); assert_eq!(index.endpoints[0].method, "GET"); assert_eq!(index.endpoints[1].path, "/pets"); assert_eq!(index.endpoints[1].method, "POST"); assert_eq!(index.endpoints[2].path, "/pets/{petId}"); // POST /pets has request body. assert!(index.endpoints[1].request_body_required); assert_eq!( index.endpoints[1].request_body_content_types, vec!["application/json"] ); // Schemas sorted: Error < Pet. assert_eq!(index.schemas.len(), 2); assert_eq!(index.schemas[0].name, "Error"); assert_eq!(index.schemas[1].name, "Pet"); // Single tag with count 3. assert_eq!(index.tags.len(), 1); assert_eq!(index.tags[0].name, "pets"); assert_eq!(index.tags[0].endpoint_count, 3); // Verify pointers resolve. for ep in &index.endpoints { assert!( resolve_pointer(&spec, &ep.operation_ptr), "Pointer should resolve: {}", ep.operation_ptr, ); } for schema in &index.schemas { assert!( resolve_pointer(&spec, &schema.schema_ptr), "Pointer should resolve: {}", schema.schema_ptr, ); } } #[test] fn test_security_inheritance() { let spec: serde_json::Value = serde_json::json!({ "openapi": "3.0.3", "info": { "title": "Auth Test", "version": "1.0.0" }, "security": [{ "api_key": [] }], "paths": { "/secured": { "get": { "summary": "Inherits root security", "responses": { "200": { "description": "OK" } } } }, "/public": { "get": { "summary": "Explicitly no auth", "security": [], "responses": { "200": { "description": "OK" } } } }, "/custom": { "get": { "summary": "Custom auth", "security": [{ "bearer": [] }], "responses": { "200": { "description": "OK" } } } } } }); let index = build_index(&spec, "sha256:test", 1).unwrap(); // /custom -> custom security. let custom = index .endpoints .iter() .find(|e| e.path == "/custom") .unwrap(); assert_eq!(custom.security_schemes, vec!["bearer"]); assert!(custom.security_required); // /public -> empty security array means no auth. let public = index .endpoints .iter() .find(|e| e.path == "/public") .unwrap(); assert!(public.security_schemes.is_empty()); assert!(!public.security_required); // /secured -> inherits root security. let secured = index .endpoints .iter() .find(|e| e.path == "/secured") .unwrap(); assert_eq!(secured.security_schemes, vec!["api_key"]); assert!(secured.security_required); } #[test] fn test_resolve_pointer_valid_and_invalid() { let val: serde_json::Value = serde_json::json!({ "a": { "b": { "c": 1 } } }); assert!(resolve_pointer(&val, "/a/b/c")); assert!(resolve_pointer(&val, "/a/b")); assert!(!resolve_pointer(&val, "/a/b/d")); assert!(!resolve_pointer(&val, "/x")); } #[test] fn test_build_index_from_fixture() { let fixture = include_str!("../../tests/fixtures/petstore.json"); let spec: serde_json::Value = serde_json::from_str(fixture).unwrap(); let index = build_index(&spec, "sha256:fixture", 1).unwrap(); assert_eq!(index.openapi, "3.0.3"); assert_eq!(index.info.title, "Petstore"); assert!(!index.endpoints.is_empty()); assert!(!index.schemas.is_empty()); // Verify sort order: endpoints sorted by path then method rank. for window in index.endpoints.windows(2) { let ordering = window[0] .path .cmp(&window[1].path) .then_with(|| method_rank(&window[0].method).cmp(&method_rank(&window[1].method))); assert!( ordering.is_le(), "Endpoints not sorted: {} {} > {} {}", window[0].path, window[0].method, window[1].path, window[1].method, ); } } }