perf: read schemas from panproto-vcs store instead of re-parsing getProjectSchema, getCommitSchemaStats, and getFileSchema now read the already-imported schemas from the panproto-vcs FsStore via the import marks file. This eliminates the O(files * parse_time) cost that was causing 5+ second page loads. - getCommitSchemaStats: reads schema objects from vcs store, diffs adjacent schemas via panproto_check::diff+classify. No tree walks. - getProjectSchema: extracts per-file vertex counts from stored schema vertex IDs (which encode file path prefixes). Language detection from file extensions only (no parsing). - getFileSchema: filters stored schema to vertices/edges matching the requested file path prefix. Falls back to on-demand parsing if vcs store unavailable.
Author: Aaron Steven White
Commit
8fb2ed0de27778fc4150fe07a400a3d05e3086b3Parent: c202e5e8ce
Structural diff unavailable
These commits were pushed via plain git push, so no pre-parsed
schemas are available. Install git-remote-cospan and re-push via panproto:// to
see scope-level changes, breaking change detection, and semantic diffs.
brew install panproto/tap/git-remote-cospan4 files changed +374 -287
@@ -1,13 +1,17 @@
11 //! `GET /xrpc/dev.panproto.node.getCommitSchemaStats` 22 //! 3-//! For a range of commits, returns per-commit schema statistics: 4-//! total vertex/edge counts and breaking/non-breaking change counts 5-//! vs the parent commit. Powers the schema evolution sparkline. 3+//! For a range of commits, returns per-commit schema statistics by 4+//! reading the already-imported schemas from the panproto-vcs store. 5+//! Each commit's schema was parsed and stored during git push via 6+//! `import_git_repo_incremental`, so this is a cheap read operation 7+//! (no re-parsing). Breaking/non-breaking change counts come from 8+//! diffing adjacent schemas via `panproto_check::diff` + `classify`. 69 710 use std::sync::Arc; 811 912 use axum::Json; 1013 use axum::extract::{Query, State}; 14+use panproto_core::vcs::{Object, Store}; 1115 use serde::Deserialize; 1216 use serde_json::{Value, json}; 1317
@@ -32,17 +36,31 @@ pub async fn get_commit_schema_stats(
3236 ) -> Result<Json<Value>, NodeError> { 3337 let limit = params.limit.unwrap_or(30).min(100); 3438 35- let store = state.store.lock().await; 36- if !store.has_git_mirror(¶ms.did, ¶ms.repo) { 39+ let store_guard = state.store.lock().await; 40+ if !store_guard.has_git_mirror(¶ms.did, ¶ms.repo) { 3741 return Err(NodeError::RefNotFound(format!( 3842 "repo {}/{} not found", 3943 params.did, params.repo 4044 ))); 4145 } 42- let mirror = store 46+ let mirror = store_guard 4347 .open_or_init_git_mirror(¶ms.did, ¶ms.repo) 4448 .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?; 45- drop(store); 49+ 50+ // Open the panproto-vcs store where imported schemas live. 51+ let vcs_store = match store_guard.open(¶ms.did, ¶ms.repo) { 52+ Ok(s) => s, 53+ Err(_) => { 54+ // VCS store not yet initialized (no push has happened). 55+ // Fall back to empty stats. 56+ drop(store_guard); 57+ return Ok(Json(json!({ "commits": [] }))); 58+ } 59+ }; 60+ 61+ // Load the import marks to map git OIDs to panproto-vcs ObjectIds. 62+ let marks = store_guard.load_import_marks(¶ms.did, ¶ms.repo); 63+ drop(store_guard); 4664 4765 let start_oid = match params.ref_name.as_deref() { 4866 Some(name) => resolve_ref(&mirror, name)?,
@@ -57,40 +75,67 @@ pub async fn get_commit_schema_stats(
5775 walk.push(start_oid) 5876 .map_err(|e| NodeError::Internal(format!("push start: {e}")))?; 5977 60- let registry = panproto_parse::ParserRegistry::new(); 6178 let mut commits: Vec<Value> = Vec::new(); 79+ let mut prev_schema: Option<panproto_schema::Schema> = None; 6280 6381 for oid_result in walk.take(limit) { 6482 let oid = match oid_result { 6583 Ok(o) => o, 6684 Err(_) => break, 6785 }; 68- let commit = match mirror.find_commit(oid) { 86+ let git_commit = match mirror.find_commit(oid) { 6987 Ok(c) => c, 7088 Err(_) => continue, 7189 }; 7290 73- let summary = commit.summary().unwrap_or_default().to_string(); 74- let timestamp = u64::try_from(commit.time().seconds()).unwrap_or(0); 75- 76- // Count total vertices by walking the tree 77- let tree = match commit.tree() { 78- Ok(t) => t, 79- Err(_) => continue, 80- }; 81- let (total_vc, total_ec, parsed_fc) = 82- count_tree_schema_stats(&mirror, ®istry, &tree); 83- 84- // Diff against first parent for breaking/non-breaking counts 85- let (breaking, non_breaking) = if commit.parent_count() > 0 { 86- if let Ok(parent) = commit.parent(0) { 87- diff_commit_pair(&mirror, ®istry, &parent, &commit) 91+ let summary = git_commit.summary().unwrap_or_default().to_string(); 92+ let timestamp = u64::try_from(git_commit.time().seconds()).unwrap_or(0); 93+ 94+ // Look up the panproto-vcs commit via the import marks. 95+ let (total_vc, total_ec, breaking, non_breaking) = 96+ if let Some(pp_id) = marks.get(&oid) { 97+ match vcs_store.get(pp_id) { 98+ Ok(Object::Commit(pp_commit)) => { 99+ // Read the schema stored at this commit. 100+ let schema = match vcs_store.get(&pp_commit.schema_id) { 101+ Ok(Object::Schema(s)) => Some(*s), 102+ _ => None, 103+ }; 104+ 105+ let vc = schema.as_ref().map_or(0, |s| s.vertices.len()); 106+ let ec = schema.as_ref().map_or(0, |s| s.edges.len()); 107+ 108+ // Diff against the previous commit's schema for 109+ // breaking/non-breaking classification. 110+ let (b, nb) = match (&schema, &prev_schema) { 111+ (Some(curr), Some(prev)) => { 112+ let raw_diff = panproto_check::diff(prev, curr); 113+ let protocol = super::structural::resolve_protocol( 114+ &curr.protocol, 115+ ); 116+ match protocol { 117+ Some(p) => { 118+ let report = 119+ panproto_check::classify(&raw_diff, &p); 120+ (report.breaking.len(), report.non_breaking.len()) 121+ } 122+ None => (0, 0), 123+ } 124+ } 125+ _ => (0, 0), 126+ }; 127+ 128+ if let Some(s) = schema { 129+ prev_schema = Some(s); 130+ } 131+ 132+ (vc, ec, b, nb) 133+ } 134+ _ => (0, 0, 0, 0), 135+ } 88136 } else { 89- (0, 0) 90- } 91- } else { 92- (0, 0) 93- }; 137+ (0, 0, 0, 0) 138+ }; 94139 95140 commits.push(json!({ 96141 "oid": oid.to_string(),
@@ -98,7 +143,6 @@ pub async fn get_commit_schema_stats(
98143 "summary": summary, 99144 "totalVertexCount": total_vc, 100145 "totalEdgeCount": total_ec, 101- "parsedFileCount": parsed_fc, 102146 "breakingChangeCount": breaking, 103147 "nonBreakingChangeCount": non_breaking, 104148 }));
@@ -106,112 +150,3 @@ pub async fn get_commit_schema_stats(
106150 107151 Ok(Json(json!({ "commits": commits }))) 108152 } 109- 110-/// Count total vertices, edges, and parsed files in a commit tree. 111-fn count_tree_schema_stats( 112- mirror: &git2::Repository, 113- registry: &panproto_parse::ParserRegistry, 114- tree: &git2::Tree<'_>, 115-) -> (usize, usize, usize) { 116- let mut total_vc = 0usize; 117- let mut total_ec = 0usize; 118- let mut parsed_fc = 0usize; 119- 120- let mut blobs: Vec<(String, git2::Oid)> = Vec::new(); 121- let _ = tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| { 122- if entry.kind() == Some(git2::ObjectType::Blob) { 123- let name = entry.name().unwrap_or(""); 124- let path = if dir.is_empty() { 125- name.to_string() 126- } else { 127- format!("{dir}{name}") 128- }; 129- blobs.push((path, entry.id())); 130- } 131- git2::TreeWalkResult::Ok 132- }); 133- 134- // Only parse up to 200 files to keep latency bounded 135- for (path, blob_oid) in blobs.iter().take(200) { 136- let blob = match mirror.find_blob(*blob_oid) { 137- Ok(b) => b, 138- Err(_) => continue, 139- }; 140- if let Some((schema, _)) = 141- super::structural::parse_any(registry, path, blob.content()) 142- { 143- total_vc += schema.vertices.len(); 144- total_ec += schema.edges.len(); 145- parsed_fc += 1; 146- } 147- } 148- 149- (total_vc, total_ec, parsed_fc) 150-} 151- 152-/// Diff two commits and return (breaking_count, non_breaking_count). 153-fn diff_commit_pair( 154- mirror: &git2::Repository, 155- registry: &panproto_parse::ParserRegistry, 156- parent: &git2::Commit<'_>, 157- child: &git2::Commit<'_>, 158-) -> (usize, usize) { 159- let parent_tree = match parent.tree() { 160- Ok(t) => t, 161- Err(_) => return (0, 0), 162- }; 163- let child_tree = match child.tree() { 164- Ok(t) => t, 165- Err(_) => return (0, 0), 166- }; 167- 168- let diff = match mirror.diff_tree_to_tree( 169- Some(&parent_tree), 170- Some(&child_tree), 171- None, 172- ) { 173- Ok(d) => d, 174- Err(_) => return (0, 0), 175- }; 176- 177- let mut breaking = 0usize; 178- let mut non_breaking = 0usize; 179- 180- for delta_idx in 0..diff.deltas().len() { 181- let delta = match diff.get_delta(delta_idx) { 182- Some(d) => d, 183- None => continue, 184- }; 185- let new_file = delta.new_file(); 186- let old_file = delta.old_file(); 187- let path = new_file 188- .path() 189- .map(|p| p.to_string_lossy().into_owned()) 190- .unwrap_or_default(); 191- 192- let old_bytes = load_blob(mirror, old_file.id()); 193- let new_bytes = load_blob(mirror, new_file.id()); 194- 195- if let Some(sd) = super::structural::try_structural_diff( 196- registry, 197- &path, 198- old_bytes.as_deref(), 199- new_bytes.as_deref(), 200- ) { 201- breaking += sd.report.breaking.len(); 202- non_breaking += sd.report.non_breaking.len(); 203- } 204- } 205- 206- (breaking, non_breaking) 207-} 208- 209-fn load_blob(mirror: &git2::Repository, oid: git2::Oid) -> Option<Vec<u8>> { 210- if oid.is_zero() { 211- return None; 212- } 213- mirror 214- .find_blob(oid) 215- .ok() 216- .map(|b| b.content().to_vec()) 217-}
@@ -1,13 +1,15 @@
11 //! `GET /xrpc/dev.panproto.node.getFileSchema` 22 //! 3-//! Parses a single file at a specific commit and returns its complete 4-//! schema graph with human-readable labels. Powers the file browser's 5-//! schema sidebar. 3+//! Returns the schema graph for a single file by reading the already- 4+//! imported project schema from the panproto-vcs store and filtering to 5+//! vertices/edges whose IDs start with the requested file path. Falls 6+//! back to on-demand parsing if the vcs store is unavailable. 67 78 use std::sync::Arc; 89 910 use axum::Json; 1011 use axum::extract::{Query, State}; 12+use panproto_core::vcs::{Object, Store}; 1113 use serde::Deserialize; 1214 use serde_json::{Value, json}; 1315
@@ -30,17 +32,20 @@ pub async fn get_file_schema(
3032 State(state): State<Arc<NodeState>>, 3133 Query(params): Query<Params>, 3234 ) -> Result<Json<Value>, NodeError> { 33- let store = state.store.lock().await; 34- if !store.has_git_mirror(¶ms.did, ¶ms.repo) { 35+ let store_guard = state.store.lock().await; 36+ if !store_guard.has_git_mirror(¶ms.did, ¶ms.repo) { 3537 return Err(NodeError::RefNotFound(format!( 3638 "repo {}/{} not found", 3739 params.did, params.repo 3840 ))); 3941 } 40- let mirror = store 42+ let mirror = store_guard 4143 .open_or_init_git_mirror(¶ms.did, ¶ms.repo) 4244 .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?; 43- drop(store); 45+ 46+ let vcs_store = store_guard.open(¶ms.did, ¶ms.repo).ok(); 47+ let marks = store_guard.load_import_marks(¶ms.did, ¶ms.repo); 48+ drop(store_guard); 4449 4550 // Resolve commit 4651 let commit_oid = match params.commit.as_str() {
@@ -48,6 +53,111 @@ pub async fn get_file_schema(
4853 name => resolve_ref(&mirror, name)?, 4954 }; 5055 56+ let empty_response = || { 57+ Json(json!({ 58+ "path": params.path, 59+ "commit": commit_oid.to_string(), 60+ "language": serde_json::Value::Null, 61+ "vertexCount": 0, 62+ "edgeCount": 0, 63+ "vertices": [], 64+ "edges": [], 65+ })) 66+ }; 67+ 68+ // Detect language from extension 69+ let registry = panproto_parse::ParserRegistry::new(); 70+ let language = registry 71+ .detect_language(std::path::Path::new(¶ms.path)) 72+ .map(|s| s.to_string()); 73+ 74+ // Try to read from the vcs store (fast path). 75+ let stored_schema = marks 76+ .get(&commit_oid) 77+ .and_then(|pp_id| vcs_store.as_ref()?.get(pp_id).ok()) 78+ .and_then(|obj| match obj { 79+ Object::Commit(c) => vcs_store.as_ref()?.get(&c.schema_id).ok(), 80+ _ => None, 81+ }) 82+ .and_then(|obj| match obj { 83+ Object::Schema(s) => Some(*s), 84+ _ => None, 85+ }); 86+ 87+ if let Some(schema) = stored_schema { 88+ let file_prefix = format!("{}::", params.path); 89+ 90+ // Filter vertices belonging to this file 91+ let mut vertices: Vec<Value> = Vec::new(); 92+ let mut total_vc = 0usize; 93+ for (vid, vertex) in &schema.vertices { 94+ let vid_str: &str = vid; 95+ if !vid_str.starts_with(&file_prefix) { 96+ continue; 97+ } 98+ total_vc += 1; 99+ let human = humanize_vertex(vid_str); 100+ if human == vid_str { 101+ continue; // Skip anonymous 102+ } 103+ let name = if human.starts_with('`') { 104+ let end = human.find("` in").unwrap_or(human.len() - 1); 105+ human[1..end].to_string() 106+ } else { 107+ human.clone() 108+ }; 109+ vertices.push(json!({ 110+ "id": vid_str, 111+ "name": name, 112+ "kind": vertex.kind.as_ref(), 113+ "humanLabel": human, 114+ })); 115+ } 116+ vertices.sort_by(|a, b| a["name"].as_str().cmp(&b["name"].as_str())); 117+ 118+ // Filter edges belonging to this file 119+ let mut edges: Vec<Value> = Vec::new(); 120+ let mut total_ec = 0usize; 121+ for (edge, _) in &schema.edges { 122+ let src_str: &str = &edge.src; 123+ let tgt_str: &str = &edge.tgt; 124+ if !src_str.starts_with(&file_prefix) && !tgt_str.starts_with(&file_prefix) { 125+ continue; 126+ } 127+ total_ec += 1; 128+ let src_human = humanize_vertex(src_str); 129+ let tgt_human = humanize_vertex(tgt_str); 130+ if src_human == src_str && tgt_human == tgt_str { 131+ continue; 132+ } 133+ let edge_name: Option<&str> = edge.name.as_deref(); 134+ let human_label = match edge_name { 135+ Some(n) if !n.starts_with('$') => { 136+ format!("{src_human} -> {tgt_human} (via `{n}`)") 137+ } 138+ _ => format!("{src_human} -> {tgt_human}"), 139+ }; 140+ edges.push(json!({ 141+ "src": src_str, 142+ "tgt": tgt_str, 143+ "kind": edge.kind.as_ref(), 144+ "name": edge_name, 145+ "humanLabel": human_label, 146+ })); 147+ } 148+ 149+ return Ok(Json(json!({ 150+ "path": params.path, 151+ "commit": commit_oid.to_string(), 152+ "language": language, 153+ "vertexCount": total_vc, 154+ "edgeCount": total_ec, 155+ "vertices": vertices, 156+ "edges": edges, 157+ }))); 158+ } 159+ 160+ // Fallback: parse on demand from the git blob. 51161 let commit = mirror 52162 .find_commit(commit_oid) 53163 .map_err(|e| NodeError::Internal(format!("find commit: {e}")))?;
@@ -55,45 +165,29 @@ pub async fn get_file_schema(
55165 .tree() 56166 .map_err(|e| NodeError::Internal(format!("commit tree: {e}")))?; 57167 58- // Find the blob at the given path 59- let entry = tree 60- .get_path(std::path::Path::new(¶ms.path)) 61- .map_err(|_| { 62- NodeError::ObjectNotFound(format!("file '{}' not found in commit", params.path)) 63- })?; 168+ let entry = match tree.get_path(std::path::Path::new(¶ms.path)) { 169+ Ok(e) => e, 170+ Err(_) => return Ok(empty_response()), 171+ }; 64172 65- let blob = mirror 66- .find_blob(entry.id()) 67- .map_err(|e| NodeError::Internal(format!("find blob: {e}")))?; 173+ let blob = match mirror.find_blob(entry.id()) { 174+ Ok(b) => b, 175+ Err(_) => return Ok(empty_response()), 176+ }; 68177 69- let registry = panproto_parse::ParserRegistry::new(); 70178 let parsed = super::structural::parse_any(®istry, ¶ms.path, blob.content()); 71- 72- let (schema, language) = match parsed { 179+ let (schema, lang) = match parsed { 73180 Some(pair) => pair, 74- None => { 75- return Ok(Json(json!({ 76- "path": params.path, 77- "commit": commit_oid.to_string(), 78- "language": null, 79- "vertexCount": 0, 80- "edgeCount": 0, 81- "vertices": [], 82- "edges": [], 83- }))); 84- } 181+ None => return Ok(empty_response()), 85182 }; 86183 87- // Build vertex list with human labels, filtering pure-anonymous vertices 88184 let mut vertices: Vec<Value> = Vec::new(); 89185 for (vid, vertex) in &schema.vertices { 90186 let vid_str: &str = vid; 91187 let human = humanize_vertex(vid_str); 92- // Skip purely anonymous vertices (the label is just the raw ID) 93188 if human == vid_str { 94189 continue; 95190 } 96- // Extract the leaf name 97191 let name = if human.starts_with('`') { 98192 let end = human.find("` in").unwrap_or(human.len() - 1); 99193 human[1..end].to_string()
@@ -107,19 +201,14 @@ pub async fn get_file_schema(
107201 "humanLabel": human, 108202 })); 109203 } 110- // Sort by name for stable output 111- vertices.sort_by(|a, b| { 112- a["name"].as_str().cmp(&b["name"].as_str()) 113- }); 204+ vertices.sort_by(|a, b| a["name"].as_str().cmp(&b["name"].as_str())); 114205 115- // Build edge list with human labels 116206 let mut edges: Vec<Value> = Vec::new(); 117207 for (edge, _) in &schema.edges { 118208 let src_str: &str = &edge.src; 119209 let tgt_str: &str = &edge.tgt; 120210 let src_human = humanize_vertex(src_str); 121211 let tgt_human = humanize_vertex(tgt_str); 122- // Skip edges where both ends are anonymous 123212 if src_human == src_str && tgt_human == tgt_str { 124213 continue; 125214 }
@@ -142,7 +231,7 @@ pub async fn get_file_schema(
142231 Ok(Json(json!({ 143232 "path": params.path, 144233 "commit": commit_oid.to_string(), 145- "language": language, 234+ "language": lang, 146235 "vertexCount": schema.vertices.len(), 147236 "edgeCount": schema.edges.len(), 148237 "vertices": vertices,
@@ -1,14 +1,18 @@
11 //! `GET /xrpc/dev.panproto.node.getProjectSchema` 22 //! 3-//! Walks the commit tree at HEAD (or a specified commit), parses every 4-//! file via panproto's ParserRegistry, and returns per-file schema 5-//! statistics: language detection, vertex/edge counts, top-level named 6-//! elements. This powers the repo overview's Schema Health Card. 3+//! Returns project-level schema statistics by reading the already-imported 4+//! schema from the panproto-vcs store. The schema was parsed and stored 5+//! during git push via `import_git_repo_incremental`, so this is a cheap 6+//! read operation. Language detection uses file extensions from the git 7+//! tree (no re-parsing). Per-file vertex counts are extracted from the 8+//! stored schema's vertex IDs (which encode the file path prefix). 79 10+use std::collections::HashMap; 811 use std::sync::Arc; 912 1013 use axum::Json; 1114 use axum::extract::{Query, State}; 15+use panproto_core::vcs::{Object, Store}; 1216 use serde::Deserialize; 1317 use serde_json::{Value, json}; 1418
@@ -31,19 +35,21 @@ pub async fn get_project_schema(
3135 State(state): State<Arc<NodeState>>, 3236 Query(params): Query<Params>, 3337 ) -> Result<Json<Value>, NodeError> { 34- let max_files = params.max_files.unwrap_or(500).min(1000); 35- 36- let store = state.store.lock().await; 37- if !store.has_git_mirror(¶ms.did, ¶ms.repo) { 38+ let store_guard = state.store.lock().await; 39+ if !store_guard.has_git_mirror(¶ms.did, ¶ms.repo) { 3840 return Err(NodeError::RefNotFound(format!( 3941 "repo {}/{} not found", 4042 params.did, params.repo 4143 ))); 4244 } 43- let mirror = store 45+ let mirror = store_guard 4446 .open_or_init_git_mirror(¶ms.did, ¶ms.repo) 4547 .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?; 46- drop(store); 48+ 49+ // Try to read from the panproto-vcs store first (fast path). 50+ let vcs_store = store_guard.open(¶ms.did, ¶ms.repo).ok(); 51+ let marks = store_guard.load_import_marks(¶ms.did, ¶ms.repo); 52+ drop(store_guard); 4753 4854 // Resolve commit 4955 let commit_oid = match params.commit.as_deref() {
@@ -51,6 +57,20 @@ pub async fn get_project_schema(
5157 Some(name) => resolve_ref(&mirror, name)?, 5258 }; 5359 60+ // Try to load the schema from the vcs store via import marks. 61+ let stored_schema = marks 62+ .get(&commit_oid) 63+ .and_then(|pp_id| vcs_store.as_ref()?.get(pp_id).ok()) 64+ .and_then(|obj| match obj { 65+ Object::Commit(c) => vcs_store.as_ref()?.get(&c.schema_id).ok(), 66+ _ => None, 67+ }) 68+ .and_then(|obj| match obj { 69+ Object::Schema(s) => Some(*s), 70+ _ => None, 71+ }); 72+ 73+ // Walk the git tree for file listing and language detection. 5474 let commit = mirror 5575 .find_commit(commit_oid) 5676 .map_err(|e| NodeError::Internal(format!("find commit: {e}")))?;
@@ -58,17 +78,10 @@ pub async fn get_project_schema(
5878 .tree() 5979 .map_err(|e| NodeError::Internal(format!("commit tree: {e}")))?; 6080 61- // Walk tree, collect file blobs 6281 let registry = panproto_parse::ParserRegistry::new(); 63- let mut file_schemas: Vec<Value> = Vec::new(); 64- let mut lang_counts: std::collections::HashMap<String, (usize, usize)> = 65- std::collections::HashMap::new(); 66- let mut total_vertices = 0usize; 67- let mut total_edges = 0usize; 68- let mut parsed_count = 0usize; 69- 70- // Collect all blobs from the tree 71- let mut blobs: Vec<(String, git2::Oid)> = Vec::new(); 82+ 83+ // Collect all file paths from the tree. 84+ let mut file_paths: Vec<String> = Vec::new(); 7285 tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| { 7386 if entry.kind() == Some(git2::ObjectType::Blob) { 7487 let name = entry.name().unwrap_or("");
@@ -77,117 +90,167 @@ pub async fn get_project_schema(
7790 } else { 7891 format!("{dir}{name}") 7992 }; 80- blobs.push((path, entry.id())); 93+ file_paths.push(path); 8194 } 8295 git2::TreeWalkResult::Ok 8396 }) 8497 .map_err(|e| NodeError::Internal(format!("tree walk: {e}")))?; 8598 86- let file_count = blobs.len(); 87- 88- for (path, blob_oid) in blobs.iter().take(max_files) { 89- let blob = match mirror.find_blob(*blob_oid) { 90- Ok(b) => b, 91- Err(_) => continue, 92- }; 93- let bytes = blob.content(); 94- 95- let parsed = super::structural::parse_any(®istry, path, bytes); 96- if let Some((schema, language)) = parsed { 97- let vc = schema.vertices.len(); 98- let ec = schema.edges.len(); 99- total_vertices += vc; 100- total_edges += ec; 101- parsed_count += 1; 102- 103- // Extract top-level named elements. A "top-level" element is a 104- // vertex whose humanized form is a simple name (no "in" clause), 105- // meaning it sits at the outermost scope of the file. We also 106- // extract the scope name from nested elements as a fallback. 107- let mut top_names: Vec<String> = Vec::new(); 108- let mut seen_names = std::collections::HashSet::new(); 109- for vid in schema.vertices.keys() { 110- let vid_str: &str = vid; 111- // Skip purely anonymous IDs (all $N segments) 112- if vid_str.split("::").all(|s| s.starts_with('$') || s.contains('/') || s.contains('.')) { 113- continue; 114- } 115- let human = humanize_vertex(vid_str); 116- if human == vid_str { 117- // humanize_vertex returned the raw ID: fully anonymous 118- continue; 119- } 120- // Extract the name between backticks 99+ let file_count = file_paths.len(); 100+ 101+ // Language detection from file extensions (instant, no parsing). 102+ let mut lang_file_counts: HashMap<String, usize> = HashMap::new(); 103+ for path in &file_paths { 104+ let p = std::path::Path::new(path); 105+ if let Some(lang) = registry.detect_language(p) { 106+ *lang_file_counts.entry(lang.to_string()).or_default() += 1; 107+ } 108+ } 109+ 110+ // If we have a stored schema, extract stats from it directly. 111+ if let Some(ref schema) = stored_schema { 112+ let total_vc = schema.vertices.len(); 113+ let total_ec = schema.edges.len(); 114+ 115+ // Extract per-file vertex counts from vertex IDs. 116+ // Vertex IDs are prefixed with the file path: "src/repo.ts::Repo::field" 117+ let mut file_vertex_counts: HashMap<String, usize> = HashMap::new(); 118+ let mut file_top_names: HashMap<String, Vec<String>> = HashMap::new(); 119+ 120+ for vid in schema.vertices.keys() { 121+ let vid_str: &str = vid; 122+ // Extract file path from vertex ID (everything before the first "::") 123+ let file_path = if vid_str.contains("::") { 124+ vid_str.split("::").next().unwrap_or(vid_str) 125+ } else if vid_str.contains(':') { 126+ // Lexicon style: "dev.cospan.repo:body.field" - no file path 127+ continue; 128+ } else { 129+ continue; 130+ }; 131+ 132+ *file_vertex_counts.entry(file_path.to_string()).or_default() += 1; 133+ 134+ // Extract top-level names for this file 135+ let human = humanize_vertex(vid_str); 136+ if human != vid_str && !human.contains(" in ") { 121137 if let Some(start) = human.find('`') { 122138 if let Some(end) = human[start + 1..].find('`') { 123- let extracted = &human[start + 1..start + 1 + end]; 124- if !extracted.is_empty() 125- && !extracted.starts_with('$') 126- && !human.contains(" in ") 127- && seen_names.insert(extracted.to_string()) 128- { 129- top_names.push(extracted.to_string()); 139+ let name = human[start + 1..start + 1 + end].to_string(); 140+ if !name.starts_with('$') && !name.is_empty() { 141+ let names = file_top_names 142+ .entry(file_path.to_string()) 143+ .or_default(); 144+ if !names.contains(&name) && names.len() < 8 { 145+ names.push(name); 146+ } 130147 } 131148 } 132149 } 133150 } 134- top_names.sort(); 135- top_names.dedup(); 136- top_names.truncate(8); 137- 138- let entry = lang_counts.entry(language.clone()).or_insert((0, 0)); 139- entry.0 += 1; 140- entry.1 += vc; 141- 142- file_schemas.push(json!({ 143- "path": path, 144- "language": language, 145- "vertexCount": vc, 146- "edgeCount": ec, 147- "topNames": top_names, 148- })); 149151 } 152+ 153+ // Count per-file edges 154+ let mut file_edge_counts: HashMap<String, usize> = HashMap::new(); 155+ for (edge, _) in &schema.edges { 156+ let src_str: &str = &edge.src; 157+ if src_str.contains("::") { 158+ let file_path = src_str.split("::").next().unwrap_or(src_str); 159+ *file_edge_counts.entry(file_path.to_string()).or_default() += 1; 160+ } 161+ } 162+ 163+ // Build per-file schema entries 164+ let mut file_schemas: Vec<Value> = file_vertex_counts 165+ .iter() 166+ .map(|(path, vc)| { 167+ let ec = file_edge_counts.get(path).copied().unwrap_or(0); 168+ let lang = { 169+ let p = std::path::Path::new(path); 170+ registry 171+ .detect_language(p) 172+ .unwrap_or("unknown") 173+ .to_string() 174+ }; 175+ let top_names = file_top_names 176+ .get(path) 177+ .cloned() 178+ .unwrap_or_default(); 179+ json!({ 180+ "path": path, 181+ "language": lang, 182+ "vertexCount": vc, 183+ "edgeCount": ec, 184+ "topNames": top_names, 185+ }) 186+ }) 187+ .collect(); 188+ file_schemas.sort_by(|a, b| { 189+ b["vertexCount"].as_u64().cmp(&a["vertexCount"].as_u64()) 190+ }); 191+ 192+ // Add per-language vertex counts from the stored schema 193+ let mut lang_vertex_counts: HashMap<String, usize> = HashMap::new(); 194+ for (path, vc) in &file_vertex_counts { 195+ let p = std::path::Path::new(path); 196+ if let Some(lang) = registry.detect_language(p) { 197+ *lang_vertex_counts.entry(lang.to_string()).or_default() += *vc; 198+ } 199+ } 200+ 201+ let mut languages: Vec<Value> = lang_file_counts 202+ .iter() 203+ .map(|(name, fc)| { 204+ json!({ 205+ "name": name, 206+ "fileCount": fc, 207+ "vertexCount": lang_vertex_counts.get(name).copied().unwrap_or(0), 208+ }) 209+ }) 210+ .collect(); 211+ languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64())); 212+ 213+ let protocol = lang_file_counts 214+ .iter() 215+ .max_by_key(|(_, fc)| *fc) 216+ .map(|(name, _)| name.clone()) 217+ .unwrap_or_default(); 218+ 219+ let parsed_count = file_vertex_counts.len(); 220+ 221+ return Ok(Json(json!({ 222+ "commit": commit_oid.to_string(), 223+ "protocol": protocol, 224+ "totalVertexCount": total_vc, 225+ "totalEdgeCount": total_ec, 226+ "fileCount": file_count, 227+ "parsedFileCount": parsed_count, 228+ "languages": languages, 229+ "fileSchemas": file_schemas, 230+ }))); 150231 } 151232 152- // Sort languages by file count descending 153- let mut languages: Vec<Value> = lang_counts 233+ // Fallback: no vcs store data. Return language stats from extensions only. 234+ let mut languages: Vec<Value> = lang_file_counts 154235 .iter() 155- .map(|(name, (fc, vc))| { 156- json!({ 157- "name": name, 158- "fileCount": fc, 159- "vertexCount": vc, 160- }) 161- }) 236+ .map(|(name, fc)| json!({ "name": name, "fileCount": fc, "vertexCount": 0 })) 162237 .collect(); 163- languages.sort_by(|a, b| { 164- b["fileCount"] 165- .as_u64() 166- .cmp(&a["fileCount"].as_u64()) 167- }); 168- 169- // Dominant protocol 170- let protocol = lang_counts 238+ languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64())); 239+ 240+ let protocol = lang_file_counts 171241 .iter() 172- .max_by_key(|(_, (fc, _))| *fc) 242+ .max_by_key(|(_, fc)| *fc) 173243 .map(|(name, _)| name.clone()) 174244 .unwrap_or_default(); 175245 176- // Sort file schemas by vertex count descending 177- file_schemas.sort_by(|a, b| { 178- b["vertexCount"] 179- .as_u64() 180- .cmp(&a["vertexCount"].as_u64()) 181- }); 182- 183246 Ok(Json(json!({ 184247 "commit": commit_oid.to_string(), 185248 "protocol": protocol, 186- "totalVertexCount": total_vertices, 187- "totalEdgeCount": total_edges, 249+ "totalVertexCount": 0, 250+ "totalEdgeCount": 0, 188251 "fileCount": file_count, 189- "parsedFileCount": parsed_count, 252+ "parsedFileCount": 0, 190253 "languages": languages, 191- "fileSchemas": file_schemas, 254+ "fileSchemas": [], 192255 }))) 193256 }
@@ -209,7 +209,7 @@ fn detect_json_protocol(json: &Value) -> Option<(Schema, String)> {
209209 /// the diff. Protocols that panproto-protocols exposes via a zero-arg 210210 /// `protocol()` function work here; anything else falls through to a 211211 /// conservative classification. 212-fn resolve_protocol(name: &str) -> Option<Protocol> { 212+pub(crate) fn resolve_protocol(name: &str) -> Option<Protocol> { 213213 // Lexicon is the most common case for Cospan's own repo, and the 214214 // atproto module re-exports the protocol constructor. 215215 if name == "atproto-lexicon" || name == "dev.panproto.atproto-lexicon" {