@@ -22,6 +22,19 @@ def __init__(self):
22
22
23
23
def _extract_metadata (self , summary : Dict [str , Any ], tree : Dict [str , Any ]) -> Dict [str , Any ]:
24
24
"""Extract metadata from repository summary and tree"""
25
+ # Handle case where summary might be a string
26
+ if isinstance (summary , str ):
27
+ return {
28
+ "repo_name" : "Unknown" ,
29
+ "description" : "" ,
30
+ "language" : "" ,
31
+ "topics" : [],
32
+ "stars" : 0 ,
33
+ "forks" : 0 ,
34
+ "last_updated" : "" ,
35
+ "file_count" : len (tree ) if tree else 0
36
+ }
37
+
25
38
return {
26
39
"repo_name" : summary .get ("name" , "" ),
27
40
"description" : summary .get ("description" , "" ),
@@ -48,18 +61,34 @@ def process_repo(self, repo_path: str | Path) -> Tuple[List[Dict[str, Any]], str
48
61
# Ingest repository
49
62
summary , tree , content = ingest (str (repo_path ))
50
63
51
- # Print repository information
52
- print ("\n Repository Summary:" )
53
- print (json .dumps (summary , indent = 2 ))
54
- print ("\n File Tree:" )
55
- print (json .dumps (tree , indent = 2 ))
64
+ # Print formatted repository information
65
+ if isinstance (summary , dict ):
66
+ repo_name = summary .get ("name" , "Unknown" )
67
+ file_count = len (tree ) if tree else 0
68
+ token_count = sum (len (str (c ).split ()) for c in content .values ()) * 1.3 # Rough estimate
69
+
70
+ print ("\n Repository Information:" )
71
+ print ("-" * 50 )
72
+ print (f"📦 Repository: { repo_name } " )
73
+ print (f"📄 Files analyzed: { file_count } " )
74
+ print (f"🔤 Estimated tokens: { int (token_count ):,} " )
75
+ else :
76
+ print ("\n Repository Information:" )
77
+ print ("-" * 50 )
78
+ print (f"📦 Repository: { repo_path } " )
79
+ print (f"📄 Files analyzed: { len (tree ) if tree else 0 } " )
80
+ print (f"🔤 Estimated tokens: { int (sum (len (str (c ).split ()) for c in content .values ()) * 1.3 ):,} " )
56
81
57
82
# Extract metadata
58
83
metadata = self ._extract_metadata (summary , tree )
59
84
60
85
# Process content into chunks
61
86
processed_chunks = []
62
87
for file_path , file_content in content .items ():
88
+ # Skip if content is not a string
89
+ if not isinstance (file_content , str ):
90
+ continue
91
+
63
92
chunk = {
64
93
"text" : file_content ,
65
94
"metadata" : {
0 commit comments