Code Issues Merge Requests Branches Tags Releases Compare Graph

perf: read schemas from panproto-vcs store instead of re-parsing getProjectSchema, getCommitSchemaStats, and getFileSchema now read the already-imported schemas from the panproto-vcs FsStore via the import marks file. This eliminates the O(files * parse_time) cost that was causing 5+ second page loads. - getCommitSchemaStats: reads schema objects from vcs store, diffs adjacent schemas via panproto_check::diff+classify. No tree walks. - getProjectSchema: extracts per-file vertex counts from stored schema vertex IDs (which encode file path prefixes). Language detection from file extensions only (no parsing). - getFileSchema: filters stored schema to vertices/edges matching the requested file path prefix. Falls back to on-demand parsing if vcs store unavailable.

Author: Aaron Steven White Apr 12, 2026, 07:45 PM

Commit 8fb2ed0de27778fc4150fe07a400a3d05e3086b3

Structural diff unavailable

These commits were pushed via plain git push, so no pre-parsed schemas are available. Install git-remote-cospan and re-push via panproto:// to see scope-level changes, breaking change detection, and semantic diffs.

brew install panproto/tap/git-remote-cospan

4 files changed +374 -287

@@ -1,13 +1,17 @@

11 //! `GET /xrpc/dev.panproto.node.getCommitSchemaStats`
22 //!
3-//! For a range of commits, returns per-commit schema statistics:
4-//! total vertex/edge counts and breaking/non-breaking change counts
5-//! vs the parent commit. Powers the schema evolution sparkline.
3+//! For a range of commits, returns per-commit schema statistics by
4+//! reading the already-imported schemas from the panproto-vcs store.
5+//! Each commit's schema was parsed and stored during git push via
6+//! `import_git_repo_incremental`, so this is a cheap read operation
7+//! (no re-parsing). Breaking/non-breaking change counts come from
8+//! diffing adjacent schemas via `panproto_check::diff` + `classify`.
69 
710 use std::sync::Arc;
811 
912 use axum::Json;
1013 use axum::extract::{Query, State};
14+use panproto_core::vcs::{Object, Store};
1115 use serde::Deserialize;
1216 use serde_json::{Value, json};
1317

@@ -32,17 +36,31 @@ pub async fn get_commit_schema_stats(

3236 ) -> Result<Json<Value>, NodeError> {
3337     let limit = params.limit.unwrap_or(30).min(100);
3438 
35-    let store = state.store.lock().await;
36-    if !store.has_git_mirror(&params.did, &params.repo) {
39+    let store_guard = state.store.lock().await;
40+    if !store_guard.has_git_mirror(&params.did, &params.repo) {
3741         return Err(NodeError::RefNotFound(format!(
3842             "repo {}/{} not found",
3943             params.did, params.repo
4044         )));
4145     }
42-    let mirror = store
46+    let mirror = store_guard
4347         .open_or_init_git_mirror(&params.did, &params.repo)
4448         .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?;
45-    drop(store);
49+
50+    // Open the panproto-vcs store where imported schemas live.
51+    let vcs_store = match store_guard.open(&params.did, &params.repo) {
52+        Ok(s) => s,
53+        Err(_) => {
54+            // VCS store not yet initialized (no push has happened).
55+            // Fall back to empty stats.
56+            drop(store_guard);
57+            return Ok(Json(json!({ "commits": [] })));
58+        }
59+    };
60+
61+    // Load the import marks to map git OIDs to panproto-vcs ObjectIds.
62+    let marks = store_guard.load_import_marks(&params.did, &params.repo);
63+    drop(store_guard);
4664 
4765     let start_oid = match params.ref_name.as_deref() {
4866         Some(name) => resolve_ref(&mirror, name)?,

@@ -57,40 +75,67 @@ pub async fn get_commit_schema_stats(

5775     walk.push(start_oid)
5876         .map_err(|e| NodeError::Internal(format!("push start: {e}")))?;
5977 
60-    let registry = panproto_parse::ParserRegistry::new();
6178     let mut commits: Vec<Value> = Vec::new();
79+    let mut prev_schema: Option<panproto_schema::Schema> = None;
6280 
6381     for oid_result in walk.take(limit) {
6482         let oid = match oid_result {
6583             Ok(o) => o,
6684             Err(_) => break,
6785         };
68-        let commit = match mirror.find_commit(oid) {
86+        let git_commit = match mirror.find_commit(oid) {
6987             Ok(c) => c,
7088             Err(_) => continue,
7189         };
7290 
73-        let summary = commit.summary().unwrap_or_default().to_string();
74-        let timestamp = u64::try_from(commit.time().seconds()).unwrap_or(0);
75-
76-        // Count total vertices by walking the tree
77-        let tree = match commit.tree() {
78-            Ok(t) => t,
79-            Err(_) => continue,
80-        };
81-        let (total_vc, total_ec, parsed_fc) =
82-            count_tree_schema_stats(&mirror, &registry, &tree);
83-
84-        // Diff against first parent for breaking/non-breaking counts
85-        let (breaking, non_breaking) = if commit.parent_count() > 0 {
86-            if let Ok(parent) = commit.parent(0) {
87-                diff_commit_pair(&mirror, &registry, &parent, &commit)
91+        let summary = git_commit.summary().unwrap_or_default().to_string();
92+        let timestamp = u64::try_from(git_commit.time().seconds()).unwrap_or(0);
93+
94+        // Look up the panproto-vcs commit via the import marks.
95+        let (total_vc, total_ec, breaking, non_breaking) =
96+            if let Some(pp_id) = marks.get(&oid) {
97+                match vcs_store.get(pp_id) {
98+                    Ok(Object::Commit(pp_commit)) => {
99+                        // Read the schema stored at this commit.
100+                        let schema = match vcs_store.get(&pp_commit.schema_id) {
101+                            Ok(Object::Schema(s)) => Some(*s),
102+                            _ => None,
103+                        };
104+
105+                        let vc = schema.as_ref().map_or(0, |s| s.vertices.len());
106+                        let ec = schema.as_ref().map_or(0, |s| s.edges.len());
107+
108+                        // Diff against the previous commit's schema for
109+                        // breaking/non-breaking classification.
110+                        let (b, nb) = match (&schema, &prev_schema) {
111+                            (Some(curr), Some(prev)) => {
112+                                let raw_diff = panproto_check::diff(prev, curr);
113+                                let protocol = super::structural::resolve_protocol(
114+                                    &curr.protocol,
115+                                );
116+                                match protocol {
117+                                    Some(p) => {
118+                                        let report =
119+                                            panproto_check::classify(&raw_diff, &p);
120+                                        (report.breaking.len(), report.non_breaking.len())
121+                                    }
122+                                    None => (0, 0),
123+                                }
124+                            }
125+                            _ => (0, 0),
126+                        };
127+
128+                        if let Some(s) = schema {
129+                            prev_schema = Some(s);
130+                        }
131+
132+                        (vc, ec, b, nb)
133+                    }
134+                    _ => (0, 0, 0, 0),
135+                }
88136             } else {
89-                (0, 0)
90-            }
91-        } else {
92-            (0, 0)
93-        };
137+                (0, 0, 0, 0)
138+            };
94139 
95140         commits.push(json!({
96141             "oid": oid.to_string(),

@@ -98,7 +143,6 @@ pub async fn get_commit_schema_stats(

98143             "summary": summary,
99144             "totalVertexCount": total_vc,
100145             "totalEdgeCount": total_ec,
101-            "parsedFileCount": parsed_fc,
102146             "breakingChangeCount": breaking,
103147             "nonBreakingChangeCount": non_breaking,
104148         }));

@@ -106,112 +150,3 @@ pub async fn get_commit_schema_stats(

106150 
107151     Ok(Json(json!({ "commits": commits })))
108152 }
109-
110-/// Count total vertices, edges, and parsed files in a commit tree.
111-fn count_tree_schema_stats(
112-    mirror: &git2::Repository,
113-    registry: &panproto_parse::ParserRegistry,
114-    tree: &git2::Tree<'_>,
115-) -> (usize, usize, usize) {
116-    let mut total_vc = 0usize;
117-    let mut total_ec = 0usize;
118-    let mut parsed_fc = 0usize;
119-
120-    let mut blobs: Vec<(String, git2::Oid)> = Vec::new();
121-    let _ = tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
122-        if entry.kind() == Some(git2::ObjectType::Blob) {
123-            let name = entry.name().unwrap_or("");
124-            let path = if dir.is_empty() {
125-                name.to_string()
126-            } else {
127-                format!("{dir}{name}")
128-            };
129-            blobs.push((path, entry.id()));
130-        }
131-        git2::TreeWalkResult::Ok
132-    });
133-
134-    // Only parse up to 200 files to keep latency bounded
135-    for (path, blob_oid) in blobs.iter().take(200) {
136-        let blob = match mirror.find_blob(*blob_oid) {
137-            Ok(b) => b,
138-            Err(_) => continue,
139-        };
140-        if let Some((schema, _)) =
141-            super::structural::parse_any(registry, path, blob.content())
142-        {
143-            total_vc += schema.vertices.len();
144-            total_ec += schema.edges.len();
145-            parsed_fc += 1;
146-        }
147-    }
148-
149-    (total_vc, total_ec, parsed_fc)
150-}
151-
152-/// Diff two commits and return (breaking_count, non_breaking_count).
153-fn diff_commit_pair(
154-    mirror: &git2::Repository,
155-    registry: &panproto_parse::ParserRegistry,
156-    parent: &git2::Commit<'_>,
157-    child: &git2::Commit<'_>,
158-) -> (usize, usize) {
159-    let parent_tree = match parent.tree() {
160-        Ok(t) => t,
161-        Err(_) => return (0, 0),
162-    };
163-    let child_tree = match child.tree() {
164-        Ok(t) => t,
165-        Err(_) => return (0, 0),
166-    };
167-
168-    let diff = match mirror.diff_tree_to_tree(
169-        Some(&parent_tree),
170-        Some(&child_tree),
171-        None,
172-    ) {
173-        Ok(d) => d,
174-        Err(_) => return (0, 0),
175-    };
176-
177-    let mut breaking = 0usize;
178-    let mut non_breaking = 0usize;
179-
180-    for delta_idx in 0..diff.deltas().len() {
181-        let delta = match diff.get_delta(delta_idx) {
182-            Some(d) => d,
183-            None => continue,
184-        };
185-        let new_file = delta.new_file();
186-        let old_file = delta.old_file();
187-        let path = new_file
188-            .path()
189-            .map(|p| p.to_string_lossy().into_owned())
190-            .unwrap_or_default();
191-
192-        let old_bytes = load_blob(mirror, old_file.id());
193-        let new_bytes = load_blob(mirror, new_file.id());
194-
195-        if let Some(sd) = super::structural::try_structural_diff(
196-            registry,
197-            &path,
198-            old_bytes.as_deref(),
199-            new_bytes.as_deref(),
200-        ) {
201-            breaking += sd.report.breaking.len();
202-            non_breaking += sd.report.non_breaking.len();
203-        }
204-    }
205-
206-    (breaking, non_breaking)
207-}
208-
209-fn load_blob(mirror: &git2::Repository, oid: git2::Oid) -> Option<Vec<u8>> {
210-    if oid.is_zero() {
211-        return None;
212-    }
213-    mirror
214-        .find_blob(oid)
215-        .ok()
216-        .map(|b| b.content().to_vec())
217-}

@@ -1,13 +1,15 @@

11 //! `GET /xrpc/dev.panproto.node.getFileSchema`
22 //!
3-//! Parses a single file at a specific commit and returns its complete
4-//! schema graph with human-readable labels. Powers the file browser's
5-//! schema sidebar.
3+//! Returns the schema graph for a single file by reading the already-
4+//! imported project schema from the panproto-vcs store and filtering to
5+//! vertices/edges whose IDs start with the requested file path. Falls
6+//! back to on-demand parsing if the vcs store is unavailable.
67 
78 use std::sync::Arc;
89 
910 use axum::Json;
1011 use axum::extract::{Query, State};
12+use panproto_core::vcs::{Object, Store};
1113 use serde::Deserialize;
1214 use serde_json::{Value, json};
1315

@@ -30,17 +32,20 @@ pub async fn get_file_schema(

3032     State(state): State<Arc<NodeState>>,
3133     Query(params): Query<Params>,
3234 ) -> Result<Json<Value>, NodeError> {
33-    let store = state.store.lock().await;
34-    if !store.has_git_mirror(&params.did, &params.repo) {
35+    let store_guard = state.store.lock().await;
36+    if !store_guard.has_git_mirror(&params.did, &params.repo) {
3537         return Err(NodeError::RefNotFound(format!(
3638             "repo {}/{} not found",
3739             params.did, params.repo
3840         )));
3941     }
40-    let mirror = store
42+    let mirror = store_guard
4143         .open_or_init_git_mirror(&params.did, &params.repo)
4244         .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?;
43-    drop(store);
45+
46+    let vcs_store = store_guard.open(&params.did, &params.repo).ok();
47+    let marks = store_guard.load_import_marks(&params.did, &params.repo);
48+    drop(store_guard);
4449 
4550     // Resolve commit
4651     let commit_oid = match params.commit.as_str() {

@@ -48,6 +53,111 @@ pub async fn get_file_schema(

4853         name => resolve_ref(&mirror, name)?,
4954     };
5055 
56+    let empty_response = || {
57+        Json(json!({
58+            "path": params.path,
59+            "commit": commit_oid.to_string(),
60+            "language": serde_json::Value::Null,
61+            "vertexCount": 0,
62+            "edgeCount": 0,
63+            "vertices": [],
64+            "edges": [],
65+        }))
66+    };
67+
68+    // Detect language from extension
69+    let registry = panproto_parse::ParserRegistry::new();
70+    let language = registry
71+        .detect_language(std::path::Path::new(&params.path))
72+        .map(|s| s.to_string());
73+
74+    // Try to read from the vcs store (fast path).
75+    let stored_schema = marks
76+        .get(&commit_oid)
77+        .and_then(|pp_id| vcs_store.as_ref()?.get(pp_id).ok())
78+        .and_then(|obj| match obj {
79+            Object::Commit(c) => vcs_store.as_ref()?.get(&c.schema_id).ok(),
80+            _ => None,
81+        })
82+        .and_then(|obj| match obj {
83+            Object::Schema(s) => Some(*s),
84+            _ => None,
85+        });
86+
87+    if let Some(schema) = stored_schema {
88+        let file_prefix = format!("{}::", params.path);
89+
90+        // Filter vertices belonging to this file
91+        let mut vertices: Vec<Value> = Vec::new();
92+        let mut total_vc = 0usize;
93+        for (vid, vertex) in &schema.vertices {
94+            let vid_str: &str = vid;
95+            if !vid_str.starts_with(&file_prefix) {
96+                continue;
97+            }
98+            total_vc += 1;
99+            let human = humanize_vertex(vid_str);
100+            if human == vid_str {
101+                continue; // Skip anonymous
102+            }
103+            let name = if human.starts_with('`') {
104+                let end = human.find("` in").unwrap_or(human.len() - 1);
105+                human[1..end].to_string()
106+            } else {
107+                human.clone()
108+            };
109+            vertices.push(json!({
110+                "id": vid_str,
111+                "name": name,
112+                "kind": vertex.kind.as_ref(),
113+                "humanLabel": human,
114+            }));
115+        }
116+        vertices.sort_by(|a, b| a["name"].as_str().cmp(&b["name"].as_str()));
117+
118+        // Filter edges belonging to this file
119+        let mut edges: Vec<Value> = Vec::new();
120+        let mut total_ec = 0usize;
121+        for (edge, _) in &schema.edges {
122+            let src_str: &str = &edge.src;
123+            let tgt_str: &str = &edge.tgt;
124+            if !src_str.starts_with(&file_prefix) && !tgt_str.starts_with(&file_prefix) {
125+                continue;
126+            }
127+            total_ec += 1;
128+            let src_human = humanize_vertex(src_str);
129+            let tgt_human = humanize_vertex(tgt_str);
130+            if src_human == src_str && tgt_human == tgt_str {
131+                continue;
132+            }
133+            let edge_name: Option<&str> = edge.name.as_deref();
134+            let human_label = match edge_name {
135+                Some(n) if !n.starts_with('$') => {
136+                    format!("{src_human} -> {tgt_human} (via `{n}`)")
137+                }
138+                _ => format!("{src_human} -> {tgt_human}"),
139+            };
140+            edges.push(json!({
141+                "src": src_str,
142+                "tgt": tgt_str,
143+                "kind": edge.kind.as_ref(),
144+                "name": edge_name,
145+                "humanLabel": human_label,
146+            }));
147+        }
148+
149+        return Ok(Json(json!({
150+            "path": params.path,
151+            "commit": commit_oid.to_string(),
152+            "language": language,
153+            "vertexCount": total_vc,
154+            "edgeCount": total_ec,
155+            "vertices": vertices,
156+            "edges": edges,
157+        })));
158+    }
159+
160+    // Fallback: parse on demand from the git blob.
51161     let commit = mirror
52162         .find_commit(commit_oid)
53163         .map_err(|e| NodeError::Internal(format!("find commit: {e}")))?;

@@ -55,45 +165,29 @@ pub async fn get_file_schema(

55165         .tree()
56166         .map_err(|e| NodeError::Internal(format!("commit tree: {e}")))?;
57167 
58-    // Find the blob at the given path
59-    let entry = tree
60-        .get_path(std::path::Path::new(&params.path))
61-        .map_err(|_| {
62-            NodeError::ObjectNotFound(format!("file '{}' not found in commit", params.path))
63-        })?;
168+    let entry = match tree.get_path(std::path::Path::new(&params.path)) {
169+        Ok(e) => e,
170+        Err(_) => return Ok(empty_response()),
171+    };
64172 
65-    let blob = mirror
66-        .find_blob(entry.id())
67-        .map_err(|e| NodeError::Internal(format!("find blob: {e}")))?;
173+    let blob = match mirror.find_blob(entry.id()) {
174+        Ok(b) => b,
175+        Err(_) => return Ok(empty_response()),
176+    };
68177 
69-    let registry = panproto_parse::ParserRegistry::new();
70178     let parsed = super::structural::parse_any(&registry, &params.path, blob.content());
71-
72-    let (schema, language) = match parsed {
179+    let (schema, lang) = match parsed {
73180         Some(pair) => pair,
74-        None => {
75-            return Ok(Json(json!({
76-                "path": params.path,
77-                "commit": commit_oid.to_string(),
78-                "language": null,
79-                "vertexCount": 0,
80-                "edgeCount": 0,
81-                "vertices": [],
82-                "edges": [],
83-            })));
84-        }
181+        None => return Ok(empty_response()),
85182     };
86183 
87-    // Build vertex list with human labels, filtering pure-anonymous vertices
88184     let mut vertices: Vec<Value> = Vec::new();
89185     for (vid, vertex) in &schema.vertices {
90186         let vid_str: &str = vid;
91187         let human = humanize_vertex(vid_str);
92-        // Skip purely anonymous vertices (the label is just the raw ID)
93188         if human == vid_str {
94189             continue;
95190         }
96-        // Extract the leaf name
97191         let name = if human.starts_with('`') {
98192             let end = human.find("` in").unwrap_or(human.len() - 1);
99193             human[1..end].to_string()

@@ -107,19 +201,14 @@ pub async fn get_file_schema(

107201             "humanLabel": human,
108202         }));
109203     }
110-    // Sort by name for stable output
111-    vertices.sort_by(|a, b| {
112-        a["name"].as_str().cmp(&b["name"].as_str())
113-    });
204+    vertices.sort_by(|a, b| a["name"].as_str().cmp(&b["name"].as_str()));
114205 
115-    // Build edge list with human labels
116206     let mut edges: Vec<Value> = Vec::new();
117207     for (edge, _) in &schema.edges {
118208         let src_str: &str = &edge.src;
119209         let tgt_str: &str = &edge.tgt;
120210         let src_human = humanize_vertex(src_str);
121211         let tgt_human = humanize_vertex(tgt_str);
122-        // Skip edges where both ends are anonymous
123212         if src_human == src_str && tgt_human == tgt_str {
124213             continue;
125214         }

@@ -142,7 +231,7 @@ pub async fn get_file_schema(

142231     Ok(Json(json!({
143232         "path": params.path,
144233         "commit": commit_oid.to_string(),
145-        "language": language,
234+        "language": lang,
146235         "vertexCount": schema.vertices.len(),
147236         "edgeCount": schema.edges.len(),
148237         "vertices": vertices,

@@ -1,14 +1,18 @@

11 //! `GET /xrpc/dev.panproto.node.getProjectSchema`
22 //!
3-//! Walks the commit tree at HEAD (or a specified commit), parses every
4-//! file via panproto's ParserRegistry, and returns per-file schema
5-//! statistics: language detection, vertex/edge counts, top-level named
6-//! elements. This powers the repo overview's Schema Health Card.
3+//! Returns project-level schema statistics by reading the already-imported
4+//! schema from the panproto-vcs store. The schema was parsed and stored
5+//! during git push via `import_git_repo_incremental`, so this is a cheap
6+//! read operation. Language detection uses file extensions from the git
7+//! tree (no re-parsing). Per-file vertex counts are extracted from the
8+//! stored schema's vertex IDs (which encode the file path prefix).
79 
10+use std::collections::HashMap;
811 use std::sync::Arc;
912 
1013 use axum::Json;
1114 use axum::extract::{Query, State};
15+use panproto_core::vcs::{Object, Store};
1216 use serde::Deserialize;
1317 use serde_json::{Value, json};
1418

@@ -31,19 +35,21 @@ pub async fn get_project_schema(

3135     State(state): State<Arc<NodeState>>,
3236     Query(params): Query<Params>,
3337 ) -> Result<Json<Value>, NodeError> {
34-    let max_files = params.max_files.unwrap_or(500).min(1000);
35-
36-    let store = state.store.lock().await;
37-    if !store.has_git_mirror(&params.did, &params.repo) {
38+    let store_guard = state.store.lock().await;
39+    if !store_guard.has_git_mirror(&params.did, &params.repo) {
3840         return Err(NodeError::RefNotFound(format!(
3941             "repo {}/{} not found",
4042             params.did, params.repo
4143         )));
4244     }
43-    let mirror = store
45+    let mirror = store_guard
4446         .open_or_init_git_mirror(&params.did, &params.repo)
4547         .map_err(|e| NodeError::Internal(format!("open mirror: {e}")))?;
46-    drop(store);
48+
49+    // Try to read from the panproto-vcs store first (fast path).
50+    let vcs_store = store_guard.open(&params.did, &params.repo).ok();
51+    let marks = store_guard.load_import_marks(&params.did, &params.repo);
52+    drop(store_guard);
4753 
4854     // Resolve commit
4955     let commit_oid = match params.commit.as_deref() {

@@ -51,6 +57,20 @@ pub async fn get_project_schema(

5157         Some(name) => resolve_ref(&mirror, name)?,
5258     };
5359 
60+    // Try to load the schema from the vcs store via import marks.
61+    let stored_schema = marks
62+        .get(&commit_oid)
63+        .and_then(|pp_id| vcs_store.as_ref()?.get(pp_id).ok())
64+        .and_then(|obj| match obj {
65+            Object::Commit(c) => vcs_store.as_ref()?.get(&c.schema_id).ok(),
66+            _ => None,
67+        })
68+        .and_then(|obj| match obj {
69+            Object::Schema(s) => Some(*s),
70+            _ => None,
71+        });
72+
73+    // Walk the git tree for file listing and language detection.
5474     let commit = mirror
5575         .find_commit(commit_oid)
5676         .map_err(|e| NodeError::Internal(format!("find commit: {e}")))?;

@@ -58,17 +78,10 @@ pub async fn get_project_schema(

5878         .tree()
5979         .map_err(|e| NodeError::Internal(format!("commit tree: {e}")))?;
6080 
61-    // Walk tree, collect file blobs
6281     let registry = panproto_parse::ParserRegistry::new();
63-    let mut file_schemas: Vec<Value> = Vec::new();
64-    let mut lang_counts: std::collections::HashMap<String, (usize, usize)> =
65-        std::collections::HashMap::new();
66-    let mut total_vertices = 0usize;
67-    let mut total_edges = 0usize;
68-    let mut parsed_count = 0usize;
69-
70-    // Collect all blobs from the tree
71-    let mut blobs: Vec<(String, git2::Oid)> = Vec::new();
82+
83+    // Collect all file paths from the tree.
84+    let mut file_paths: Vec<String> = Vec::new();
7285     tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
7386         if entry.kind() == Some(git2::ObjectType::Blob) {
7487             let name = entry.name().unwrap_or("");

@@ -77,117 +90,167 @@ pub async fn get_project_schema(

7790             } else {
7891                 format!("{dir}{name}")
7992             };
80-            blobs.push((path, entry.id()));
93+            file_paths.push(path);
8194         }
8295         git2::TreeWalkResult::Ok
8396     })
8497     .map_err(|e| NodeError::Internal(format!("tree walk: {e}")))?;
8598 
86-    let file_count = blobs.len();
87-
88-    for (path, blob_oid) in blobs.iter().take(max_files) {
89-        let blob = match mirror.find_blob(*blob_oid) {
90-            Ok(b) => b,
91-            Err(_) => continue,
92-        };
93-        let bytes = blob.content();
94-
95-        let parsed = super::structural::parse_any(&registry, path, bytes);
96-        if let Some((schema, language)) = parsed {
97-            let vc = schema.vertices.len();
98-            let ec = schema.edges.len();
99-            total_vertices += vc;
100-            total_edges += ec;
101-            parsed_count += 1;
102-
103-            // Extract top-level named elements. A "top-level" element is a
104-            // vertex whose humanized form is a simple name (no "in" clause),
105-            // meaning it sits at the outermost scope of the file. We also
106-            // extract the scope name from nested elements as a fallback.
107-            let mut top_names: Vec<String> = Vec::new();
108-            let mut seen_names = std::collections::HashSet::new();
109-            for vid in schema.vertices.keys() {
110-                let vid_str: &str = vid;
111-                // Skip purely anonymous IDs (all $N segments)
112-                if vid_str.split("::").all(|s| s.starts_with('$') || s.contains('/') || s.contains('.')) {
113-                    continue;
114-                }
115-                let human = humanize_vertex(vid_str);
116-                if human == vid_str {
117-                    // humanize_vertex returned the raw ID: fully anonymous
118-                    continue;
119-                }
120-                // Extract the name between backticks
99+    let file_count = file_paths.len();
100+
101+    // Language detection from file extensions (instant, no parsing).
102+    let mut lang_file_counts: HashMap<String, usize> = HashMap::new();
103+    for path in &file_paths {
104+        let p = std::path::Path::new(path);
105+        if let Some(lang) = registry.detect_language(p) {
106+            *lang_file_counts.entry(lang.to_string()).or_default() += 1;
107+        }
108+    }
109+
110+    // If we have a stored schema, extract stats from it directly.
111+    if let Some(ref schema) = stored_schema {
112+        let total_vc = schema.vertices.len();
113+        let total_ec = schema.edges.len();
114+
115+        // Extract per-file vertex counts from vertex IDs.
116+        // Vertex IDs are prefixed with the file path: "src/repo.ts::Repo::field"
117+        let mut file_vertex_counts: HashMap<String, usize> = HashMap::new();
118+        let mut file_top_names: HashMap<String, Vec<String>> = HashMap::new();
119+
120+        for vid in schema.vertices.keys() {
121+            let vid_str: &str = vid;
122+            // Extract file path from vertex ID (everything before the first "::")
123+            let file_path = if vid_str.contains("::") {
124+                vid_str.split("::").next().unwrap_or(vid_str)
125+            } else if vid_str.contains(':') {
126+                // Lexicon style: "dev.cospan.repo:body.field" - no file path
127+                continue;
128+            } else {
129+                continue;
130+            };
131+
132+            *file_vertex_counts.entry(file_path.to_string()).or_default() += 1;
133+
134+            // Extract top-level names for this file
135+            let human = humanize_vertex(vid_str);
136+            if human != vid_str && !human.contains(" in ") {
121137                 if let Some(start) = human.find('`') {
122138                     if let Some(end) = human[start + 1..].find('`') {
123-                        let extracted = &human[start + 1..start + 1 + end];
124-                        if !extracted.is_empty()
125-                            && !extracted.starts_with('$')
126-                            && !human.contains(" in ")
127-                            && seen_names.insert(extracted.to_string())
128-                        {
129-                            top_names.push(extracted.to_string());
139+                        let name = human[start + 1..start + 1 + end].to_string();
140+                        if !name.starts_with('$') && !name.is_empty() {
141+                            let names = file_top_names
142+                                .entry(file_path.to_string())
143+                                .or_default();
144+                            if !names.contains(&name) && names.len() < 8 {
145+                                names.push(name);
146+                            }
130147                         }
131148                     }
132149                 }
133150             }
134-            top_names.sort();
135-            top_names.dedup();
136-            top_names.truncate(8);
137-
138-            let entry = lang_counts.entry(language.clone()).or_insert((0, 0));
139-            entry.0 += 1;
140-            entry.1 += vc;
141-
142-            file_schemas.push(json!({
143-                "path": path,
144-                "language": language,
145-                "vertexCount": vc,
146-                "edgeCount": ec,
147-                "topNames": top_names,
148-            }));
149151         }
152+
153+        // Count per-file edges
154+        let mut file_edge_counts: HashMap<String, usize> = HashMap::new();
155+        for (edge, _) in &schema.edges {
156+            let src_str: &str = &edge.src;
157+            if src_str.contains("::") {
158+                let file_path = src_str.split("::").next().unwrap_or(src_str);
159+                *file_edge_counts.entry(file_path.to_string()).or_default() += 1;
160+            }
161+        }
162+
163+        // Build per-file schema entries
164+        let mut file_schemas: Vec<Value> = file_vertex_counts
165+            .iter()
166+            .map(|(path, vc)| {
167+                let ec = file_edge_counts.get(path).copied().unwrap_or(0);
168+                let lang = {
169+                    let p = std::path::Path::new(path);
170+                    registry
171+                        .detect_language(p)
172+                        .unwrap_or("unknown")
173+                        .to_string()
174+                };
175+                let top_names = file_top_names
176+                    .get(path)
177+                    .cloned()
178+                    .unwrap_or_default();
179+                json!({
180+                    "path": path,
181+                    "language": lang,
182+                    "vertexCount": vc,
183+                    "edgeCount": ec,
184+                    "topNames": top_names,
185+                })
186+            })
187+            .collect();
188+        file_schemas.sort_by(|a, b| {
189+            b["vertexCount"].as_u64().cmp(&a["vertexCount"].as_u64())
190+        });
191+
192+        // Add per-language vertex counts from the stored schema
193+        let mut lang_vertex_counts: HashMap<String, usize> = HashMap::new();
194+        for (path, vc) in &file_vertex_counts {
195+            let p = std::path::Path::new(path);
196+            if let Some(lang) = registry.detect_language(p) {
197+                *lang_vertex_counts.entry(lang.to_string()).or_default() += *vc;
198+            }
199+        }
200+
201+        let mut languages: Vec<Value> = lang_file_counts
202+            .iter()
203+            .map(|(name, fc)| {
204+                json!({
205+                    "name": name,
206+                    "fileCount": fc,
207+                    "vertexCount": lang_vertex_counts.get(name).copied().unwrap_or(0),
208+                })
209+            })
210+            .collect();
211+        languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64()));
212+
213+        let protocol = lang_file_counts
214+            .iter()
215+            .max_by_key(|(_, fc)| *fc)
216+            .map(|(name, _)| name.clone())
217+            .unwrap_or_default();
218+
219+        let parsed_count = file_vertex_counts.len();
220+
221+        return Ok(Json(json!({
222+            "commit": commit_oid.to_string(),
223+            "protocol": protocol,
224+            "totalVertexCount": total_vc,
225+            "totalEdgeCount": total_ec,
226+            "fileCount": file_count,
227+            "parsedFileCount": parsed_count,
228+            "languages": languages,
229+            "fileSchemas": file_schemas,
230+        })));
150231     }
151232 
152-    // Sort languages by file count descending
153-    let mut languages: Vec<Value> = lang_counts
233+    // Fallback: no vcs store data. Return language stats from extensions only.
234+    let mut languages: Vec<Value> = lang_file_counts
154235         .iter()
155-        .map(|(name, (fc, vc))| {
156-            json!({
157-                "name": name,
158-                "fileCount": fc,
159-                "vertexCount": vc,
160-            })
161-        })
236+        .map(|(name, fc)| json!({ "name": name, "fileCount": fc, "vertexCount": 0 }))
162237         .collect();
163-    languages.sort_by(|a, b| {
164-        b["fileCount"]
165-            .as_u64()
166-            .cmp(&a["fileCount"].as_u64())
167-    });
168-
169-    // Dominant protocol
170-    let protocol = lang_counts
238+    languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64()));
239+
240+    let protocol = lang_file_counts
171241         .iter()
172-        .max_by_key(|(_, (fc, _))| *fc)
242+        .max_by_key(|(_, fc)| *fc)
173243         .map(|(name, _)| name.clone())
174244         .unwrap_or_default();
175245 
176-    // Sort file schemas by vertex count descending
177-    file_schemas.sort_by(|a, b| {
178-        b["vertexCount"]
179-            .as_u64()
180-            .cmp(&a["vertexCount"].as_u64())
181-    });
182-
183246     Ok(Json(json!({
184247         "commit": commit_oid.to_string(),
185248         "protocol": protocol,
186-        "totalVertexCount": total_vertices,
187-        "totalEdgeCount": total_edges,
249+        "totalVertexCount": 0,
250+        "totalEdgeCount": 0,
188251         "fileCount": file_count,
189-        "parsedFileCount": parsed_count,
252+        "parsedFileCount": 0,
190253         "languages": languages,
191-        "fileSchemas": file_schemas,
254+        "fileSchemas": [],
192255     })))
193256 }

@@ -209,7 +209,7 @@ fn detect_json_protocol(json: &Value) -> Option<(Schema, String)> {

209209 /// the diff. Protocols that panproto-protocols exposes via a zero-arg
210210 /// `protocol()` function work here; anything else falls through to a
211211 /// conservative classification.
212-fn resolve_protocol(name: &str) -> Option<Protocol> {
212+pub(crate) fn resolve_protocol(name: &str) -> Option<Protocol> {
213213     // Lexicon is the most common case for Cospan's own repo, and the
214214     // atproto module re-exports the protocol constructor.
215215     if name == "atproto-lexicon" || name == "dev.panproto.atproto-lexicon" {

Back to repository