Skip to content

Commit bc6b908

Browse files
committed
chore(test): dump_csharp debug tool for extractor inspection
Standalone binary that runs the C# extractor over one file and prints the resulting defs / type_assigns / calls / Field nodes with their parent_class and return_type. Useful when iterating on producer-side gRPC detection — being able to point the extractor at a real source file and read structured output is how a few of the C# 12 primary-ctor edge cases got found. Built via `make -f Makefile.cbm dump-csharp`. Not wired into the main test suite or CI.
1 parent 93fbfbf commit bc6b908

2 files changed

Lines changed: 134 additions & 0 deletions

File tree

Makefile.cbm

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,20 @@ $(BUILD_DIR)/codebase-memory-mcp: $(MAIN_SRC) $(PROD_SRCS) $(EXTRACTION_SRCS) $(
504504
cbm: $(BUILD_DIR)/codebase-memory-mcp
505505
@echo "Built: $(BUILD_DIR)/codebase-memory-mcp"
506506

507+
# Standalone debug tool: dump C# extraction results for one file.
508+
$(BUILD_DIR)/dump_csharp: $(OBJS_VENDORED_PROD) | $(BUILD_DIR)
509+
$(CC) $(CFLAGS_PROD) -o $@ \
510+
tests/dump_csharp.c \
511+
$(FOUNDATION_SRCS) \
512+
$(SIMHASH_SRCS) $(SEMANTIC_SRCS) \
513+
src/pipeline/worker_pool.c \
514+
$(EXTRACTION_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) \
515+
$(OBJS_VENDORED_PROD) \
516+
$(LDFLAGS)
517+
518+
dump-csharp: $(BUILD_DIR)/dump_csharp
519+
@echo "Built: $(BUILD_DIR)/dump_csharp"
520+
507521
# ── Build with embedded UI (requires Node.js) ───────────────────
508522

509523
# Swap embedded_stub.c for the generated embedded_assets.c

tests/dump_csharp.c

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/* dump_csharp.c — Standalone inspector for C# extraction.
2+
* Reads a file path, runs cbm_extract_file, prints calls/type_assigns/defs.
3+
*/
4+
#include "cbm.h"
5+
#include "tree_sitter/api.h"
6+
#include <stdio.h>
7+
#include <stdlib.h>
8+
#include <string.h>
9+
10+
extern const TSLanguage *tree_sitter_c_sharp(void);
11+
12+
static void walk(TSNode n, int depth, const char *src, int max_depth) {
13+
if (depth > max_depth) return;
14+
const char *kind = ts_node_type(n);
15+
uint32_t sb = ts_node_start_byte(n);
16+
uint32_t eb = ts_node_end_byte(n);
17+
char preview[64];
18+
int plen = (int)(eb - sb);
19+
if (plen > 60) plen = 60;
20+
memcpy(preview, src + sb, plen);
21+
preview[plen] = 0;
22+
for (int i = 0; i < plen; i++) if (preview[i] == '\n') preview[i] = ' ';
23+
printf("%*s%s [%.*s]\n", depth * 2, "", kind, plen, preview);
24+
uint32_t nc = ts_node_child_count(n);
25+
for (uint32_t i = 0; i < nc; i++) {
26+
TSNode c = ts_node_child(n, i);
27+
const char *fld = ts_node_field_name_for_child(n, i);
28+
if (fld) printf("%*s.%s:\n", (depth + 1) * 2, "", fld);
29+
walk(c, depth + 1, src, max_depth);
30+
}
31+
}
32+
33+
static char *slurp(const char *path, int *len_out) {
34+
FILE *f = fopen(path, "rb");
35+
if (!f) return NULL;
36+
fseek(f, 0, SEEK_END);
37+
long n = ftell(f);
38+
fseek(f, 0, SEEK_SET);
39+
char *buf = malloc((size_t)n + 1);
40+
fread(buf, 1, (size_t)n, f);
41+
buf[n] = 0;
42+
*len_out = (int)n;
43+
fclose(f);
44+
return buf;
45+
}
46+
47+
int main(int argc, char **argv) {
48+
if (argc < 2) {
49+
fprintf(stderr, "usage: %s <file.cs>\n", argv[0]);
50+
return 1;
51+
}
52+
if (cbm_init() != 0) {
53+
fprintf(stderr, "cbm_init failed\n");
54+
return 1;
55+
}
56+
int n = 0;
57+
char *src = slurp(argv[1], &n);
58+
if (!src) {
59+
fprintf(stderr, "cannot read %s\n", argv[1]);
60+
return 1;
61+
}
62+
CBMFileResult *r = cbm_extract_file(src, n, CBM_LANG_CSHARP, "test", argv[1], 0, NULL, NULL);
63+
if (!r) { fprintf(stderr, "extract returned NULL\n"); return 1; }
64+
if (r->has_error) {
65+
fprintf(stderr, "parse error: %s\n", r->error_msg ? r->error_msg : "?");
66+
}
67+
68+
printf("=== DEFS (%d) ===\n", r->defs.count);
69+
for (int i = 0; i < r->defs.count; i++) {
70+
const CBMDefinition *d = &r->defs.items[i];
71+
printf(" [%s] %s qn=%s parent=%s rt=%s",
72+
d->label ? d->label : "?", d->name ? d->name : "?",
73+
d->qualified_name ? d->qualified_name : "",
74+
d->parent_class ? d->parent_class : "",
75+
d->return_type ? d->return_type : "");
76+
printf(" params=");
77+
if (d->param_names) {
78+
printf("[");
79+
for (int j = 0; d->param_names[j]; j++) {
80+
printf("%s%s:%s", j ? "," : "", d->param_names[j],
81+
d->param_types && d->param_types[j] ? d->param_types[j] : "?");
82+
}
83+
printf("]");
84+
} else {
85+
printf("NULL");
86+
}
87+
printf(" sig=%s\n", d->signature ? d->signature : "NULL");
88+
}
89+
90+
printf("=== CALLS (%d) ===\n", r->calls.count);
91+
for (int i = 0; i < r->calls.count; i++) {
92+
const CBMCall *c = &r->calls.items[i];
93+
printf(" callee=%s enc=%s\n",
94+
c->callee_name ? c->callee_name : "?",
95+
c->enclosing_func_qn ? c->enclosing_func_qn : "");
96+
}
97+
98+
printf("=== TYPE_ASSIGNS (%d) ===\n", r->type_assigns.count);
99+
for (int i = 0; i < r->type_assigns.count; i++) {
100+
const CBMTypeAssign *t = &r->type_assigns.items[i];
101+
printf(" var=%s type=%s enc=%s\n",
102+
t->var_name ? t->var_name : "?",
103+
t->type_name ? t->type_name : "?",
104+
t->enclosing_func_qn ? t->enclosing_func_qn : "");
105+
}
106+
107+
if (argc >= 3 && strcmp(argv[2], "--ast") == 0) {
108+
printf("\n=== AST ===\n");
109+
TSParser *p = ts_parser_new();
110+
ts_parser_set_language(p, tree_sitter_c_sharp());
111+
TSTree *tree = ts_parser_parse_string(p, NULL, src, n);
112+
walk(ts_tree_root_node(tree), 0, src, 8);
113+
ts_tree_delete(tree);
114+
ts_parser_delete(p);
115+
}
116+
117+
cbm_free_result(r);
118+
free(src);
119+
return 0;
120+
}

0 commit comments

Comments
 (0)