fix: remove server-side schema import, add on-demand fallback parsing Server-side import via import_git_repo_incremental in receive_pack was parsing every file in every commit through tree-sitter on a 2GB server, taking 30+ minutes and blocking all requests. This is the wrong architecture: panproto's git-remote-cospan does client-side parsing and pushes pre-parsed Schema objects via XRPC. Changes: - receive_pack: remove background import_git_repo_incremental call. Raw git push now only updates the git mirror. Schema data requires pushing via git-remote-cospan (panproto:// URL). - getProjectSchema: add on-demand fallback that parses up to 50 files from HEAD when vcs store has no data. Gives immediate schema stats while the user transitions to git-remote-cospan. Tracked: panproto/panproto#28 (distribute git-remote-cospan binary)

Author: Aaron Steven White
Commit c043cb60aeebc5a9cbc7185af76d2a495e6b2de4
Parent: 75f860c20d
Structural diff unavailable

These commits were pushed via plain git push, so no pre-parsed schemas are available. Install git-remote-cospan and re-push via panproto:// to see scope-level changes, breaking change detection, and semantic diffs.

brew install panproto/tap/git-remote-cospan
2 files changed +88 -72
@@ -214,68 +214,26 @@ pub async fn git_receive_pack(
214214 
215215     drop(store_guard);
216216 
217-    // 5. Import into panproto-vcs asynchronously using incremental
218-    //    import. The marks file tracks which git OIDs have already been
219-    //    translated, so repeated pushes only import new commits.
217+    // 5. Schema import is NOT done server-side. The correct flow is:
218+    //
219+    //    a) User installs git-remote-cospan (from panproto)
220+    //    b) User pushes via: git push panproto://did/repo main
221+    //    c) git-remote-cospan parses files LOCALLY via panproto
222+    //    d) Pre-parsed Schema + Commit objects are sent via XRPC
223+    //    e) Node stores them directly (zero parsing)
224+    //
225+    //    Raw git push (this handler) only updates the git mirror.
226+    //    Schema data becomes available when pushed via git-remote-cospan
227+    //    or when the on-demand parser in getProjectSchema runs.
228+    //
229+    //    See: https://github.com/panproto/panproto/issues/28
220230     if !import_tasks.is_empty() {
221-        let store_clone = state.store.clone();
222-        let did_clone = did.clone();
223-        let repo_clone = repo.clone();
224-        tokio::task::spawn_blocking(move || {
225-            // Open the stores under the lock, load marks, then DROP the
226-            // lock before the expensive import.
227-            let (mirror, mut vcs_store, known) = {
228-                let store_guard = store_clone.blocking_lock();
229-                let mirror = match store_guard.open_or_init_git_mirror(&did_clone, &repo_clone) {
230-                    Ok(m) => m,
231-                    Err(e) => {
232-                        tracing::error!(error = %e, "background import: open mirror failed");
233-                        return;
234-                    }
235-                };
236-                let vcs_store = match store_guard.open_or_init(&did_clone, &repo_clone) {
237-                    Ok(s) => s,
238-                    Err(e) => {
239-                        tracing::error!(error = %e, "background import: open vcs store failed");
240-                        return;
241-                    }
242-                };
243-                let known = store_guard.load_import_marks(&did_clone, &repo_clone);
244-                (mirror, vcs_store, known)
245-                // store_guard dropped here: lock released
246-            };
247-            for (new_oid, refname) in &import_tasks {
248-                match panproto_git::import_git_repo_incremental(
249-                    &mirror,
250-                    &mut vcs_store,
251-                    new_oid,
252-                    &known,
253-                ) {
254-                    Ok(result) => {
255-                        let _ = panproto_vcs::Store::set_ref(&mut vcs_store, refname, result.head_id);
256-                        // Persist the new OID mappings for future incremental imports.
257-                        let store_guard = store_clone.blocking_lock();
258-                        store_guard.save_import_marks(
259-                            &did_clone,
260-                            &repo_clone,
261-                            &result.oid_map,
262-                        );
263-                        drop(store_guard);
264-                        tracing::info!(
265-                            did = %did_clone, repo = %repo_clone, %refname,
266-                            commits = result.commit_count,
267-                            "background: incrementally imported git commits into panproto-vcs"
268-                        );
269-                    }
270-                    Err(e) => {
271-                        tracing::warn!(
272-                            did = %did_clone, repo = %repo_clone, %refname, error = %e,
273-                            "background: panproto-vcs import failed"
274-                        );
275-                    }
276-                }
277-            }
278-        });
231+        tracing::info!(
232+            %did, %repo,
233+            refs = import_tasks.len(),
234+            "git push received; git mirror updated. For schema analysis, \
235+             push via git-remote-cospan (panproto:// URL) to send pre-parsed schemas."
236+        );
279237     }
280238 
281239     let full_response = format!("{}{}0000", pkt_line("unpack ok\n"), response);
@@ -80,8 +80,8 @@ pub async fn get_project_schema(
8080 
8181     let registry = panproto_parse::ParserRegistry::new();
8282 
83-    // Collect all file paths from the tree.
84-    let mut file_paths: Vec<String> = Vec::new();
83+    // Collect all file paths and blob OIDs from the tree.
84+    let mut file_entries: Vec<(String, git2::Oid)> = Vec::new();
8585     tree.walk(git2::TreeWalkMode::PreOrder, |dir, entry| {
8686         if entry.kind() == Some(git2::ObjectType::Blob) {
8787             let name = entry.name().unwrap_or("");
@@ -90,17 +90,17 @@ pub async fn get_project_schema(
9090             } else {
9191                 format!("{dir}{name}")
9292             };
93-            file_paths.push(path);
93+            file_entries.push((path, entry.id()));
9494         }
9595         git2::TreeWalkResult::Ok
9696     })
9797     .map_err(|e| NodeError::Internal(format!("tree walk: {e}")))?;
9898 
99-    let file_count = file_paths.len();
99+    let file_count = file_entries.len();
100100 
101101     // Language detection from file extensions (instant, no parsing).
102102     let mut lang_file_counts: HashMap<String, usize> = HashMap::new();
103-    for path in &file_paths {
103+    for (path, _) in &file_entries {
104104         let p = std::path::Path::new(path);
105105         if let Some(lang) = registry.detect_language(p) {
106106             *lang_file_counts.entry(lang.to_string()).or_default() += 1;
@@ -230,10 +230,68 @@ pub async fn get_project_schema(
230230         })));
231231     }
232232 
233-    // Fallback: no vcs store data. Return language stats from extensions only.
233+    // Fallback: no vcs store data. Parse a subset of files on demand
234+    // to give users immediate schema data while the full import runs
235+    // in the background (or they switch to git-remote-cospan).
236+    // Cap at 50 files to keep latency under ~3 seconds.
237+    let on_demand_limit = 50;
238+    let mut total_vc = 0usize;
239+    let mut total_ec = 0usize;
240+    let mut parsed_count = 0usize;
241+    let mut lang_vertex_counts: HashMap<String, usize> = HashMap::new();
242+    let mut file_schemas: Vec<Value> = Vec::new();
243+
244+    for (path, blob_oid) in file_entries.iter().take(on_demand_limit) {
245+        let blob = match mirror.find_blob(*blob_oid) {
246+            Ok(b) => b,
247+            Err(_) => continue,
248+        };
249+        if let Some((schema, language)) = super::structural::parse_any(&registry, path, blob.content()) {
250+            let vc = schema.vertices.len();
251+            let ec = schema.edges.len();
252+            total_vc += vc;
253+            total_ec += ec;
254+            parsed_count += 1;
255+
256+            *lang_vertex_counts.entry(language.clone()).or_default() += vc;
257+
258+            let mut top_names: Vec<String> = Vec::new();
259+            let mut seen = std::collections::HashSet::new();
260+            for vid in schema.vertices.keys() {
261+                let vid_str: &str = vid;
262+                let human = humanize_vertex(vid_str);
263+                if human != vid_str && !human.contains(" in ") {
264+                    if let Some(start) = human.find('`') {
265+                        if let Some(end) = human[start + 1..].find('`') {
266+                            let name = &human[start + 1..start + 1 + end];
267+                            if !name.is_empty() && !name.starts_with('$') && seen.insert(name.to_string()) {
268+                                top_names.push(name.to_string());
269+                            }
270+                        }
271+                    }
272+                }
273+            }
274+            top_names.truncate(8);
275+
276+            file_schemas.push(json!({
277+                "path": path,
278+                "language": language,
279+                "vertexCount": vc,
280+                "edgeCount": ec,
281+                "topNames": top_names,
282+            }));
283+        }
284+    }
285+
286+    file_schemas.sort_by(|a, b| b["vertexCount"].as_u64().cmp(&a["vertexCount"].as_u64()));
287+
234288     let mut languages: Vec<Value> = lang_file_counts
235289         .iter()
236-        .map(|(name, fc)| json!({ "name": name, "fileCount": fc, "vertexCount": 0 }))
290+        .map(|(name, fc)| json!({
291+            "name": name,
292+            "fileCount": fc,
293+            "vertexCount": lang_vertex_counts.get(name.as_str()).copied().unwrap_or(0),
294+        }))
237295         .collect();
238296     languages.sort_by(|a, b| b["fileCount"].as_u64().cmp(&a["fileCount"].as_u64()));
239297 
@@ -246,11 +304,11 @@ pub async fn get_project_schema(
246304     Ok(Json(json!({
247305         "commit": commit_oid.to_string(),
248306         "protocol": protocol,
249-        "totalVertexCount": 0,
250-        "totalEdgeCount": 0,
307+        "totalVertexCount": total_vc,
308+        "totalEdgeCount": total_ec,
251309         "fileCount": file_count,
252-        "parsedFileCount": 0,
310+        "parsedFileCount": parsed_count,
253311         "languages": languages,
254-        "fileSchemas": [],
312+        "fileSchemas": file_schemas,
255313     })))
256314 }
cospan · schematic version control on atproto built on AT Protocol