@@ -108,6 +108,12 @@ def generate_edges_with_criteria(
108108 edge_stats ['criteria_used' ] = [k for k , v in criteria_config .items () if v ]
109109 edge_stats ['combination_logic_applied' ] = True
110110
111+ # Debug information
112+ print (f"Generated { len (all_edges )} total edges" )
113+ print (f"Final edges after combination logic: { len (final_edges )} " )
114+ print (f"Edges in graph: { total_edges } " )
115+ print (f"Edge stats: { edge_stats } " )
116+
111117 return G , edge_stats
112118
113119 def _get_repos_for_topics (self , topics : List [str ]) -> List [Dict ]:
@@ -118,12 +124,17 @@ def _get_repos_for_topics(self, topics: List[str]) -> List[Dict]:
118124 topics_lower = [t .lower () for t in topics ]
119125 placeholders = "," .join (["?" ] * len (topics_lower ))
120126
127+ # Create a more flexible search pattern using OR conditions
128+ conditions = []
129+ for topic in topics_lower :
130+ conditions .append (f"LOWER(t.topics) LIKE '%{ topic } %'" )
131+
121132 query = f"""
122133 WITH matching_repos AS (
123134 SELECT DISTINCT r.nameWithOwner
124135 FROM repos r
125136 JOIN repo_topics t ON r.nameWithOwner = t.repo
126- WHERE LOWER(t.topic) IN ( { placeholders } )
137+ WHERE ( { " OR " . join ( conditions ) } )
127138 ),
128139 repo_data AS (
129140 SELECT
@@ -136,22 +147,19 @@ def _get_repos_for_topics(self, topics: List[str]) -> List[Dict]:
136147 r.pullRequests,
137148 r.issues,
138149 r.primaryLanguage,
139- r.createdAt ,
150+ r.createdAt_year ,
140151 r.license,
141152 r.bigquery_contributors,
142153 r.bigquery_stargazers,
143- GROUP_CONCAT(t.topic, '|') AS topics
154+ t. topics
144155 FROM repos r
145156 JOIN repo_topics t ON r.nameWithOwner = t.repo
146157 JOIN matching_repos mr ON r.nameWithOwner = mr.nameWithOwner
147- GROUP BY r.nameWithOwner, r.stars, r.forks, r.watchers, r.isArchived,
148- r.languageCount, r.pullRequests, r.issues, r.primaryLanguage,
149- r.createdAt, r.license, r.bigquery_contributors, r.bigquery_stargazers
150158 )
151159 SELECT * FROM repo_data
152160 """
153161
154- result = self .con .execute (query , topics_lower ).fetchall ()
162+ result = self .con .execute (query ).fetchall ()
155163
156164 columns = [
157165 "nameWithOwner" , "stars" , "forks" , "watchers" , "isArchived" ,
@@ -188,18 +196,27 @@ def _extract_year(self, date_val) -> int:
188196 if not date_val :
189197 return 0
190198 try :
191- if isinstance (date_val , str ):
199+ if isinstance (date_val , int ):
200+ return date_val
201+ elif isinstance (date_val , str ):
192202 date = datetime .strptime (date_val .split ('T' )[0 ], "%Y-%m-%d" )
203+ return date .year
193204 else :
194- date = date_val
195- return date .year
205+ return date_val .year
196206 except (ValueError , TypeError ):
197207 return 0
198208
199209 def _format_list_data (self , data ) -> str :
200210 """Format list data as comma-separated string."""
201- if data and isinstance (data , list ):
211+ if not data :
212+ return ""
213+ if isinstance (data , list ):
202214 return "," .join (data )
215+ elif isinstance (data , str ):
216+ # Handle string representation of list
217+ if data .startswith ('[' ) and data .endswith (']' ):
218+ # Remove brackets and split by comma
219+ return data [1 :- 1 ]
203220 return ""
204221
205222 def _generate_topic_based_edges (self , G : nx .Graph , repos : List [Dict ]) -> List [Tuple ]:
@@ -424,6 +441,18 @@ def save_graph_with_edges(self, G: nx.Graph, output_path: str):
424441 G .graph ['has_edges' ] = True
425442 G .graph ['edge_generation_criteria' ] = 'Multiple criteria combination'
426443
444+ # Ensure edge attributes are properly set
445+ for u , v , data in G .edges (data = True ):
446+ # Convert complex data structures to strings for GEXF compatibility
447+ if 'shared_topics' in data and isinstance (data ['shared_topics' ], list ):
448+ data ['shared_topics' ] = '|' .join (data ['shared_topics' ])
449+ if 'shared_contributors' in data and isinstance (data ['shared_contributors' ], list ):
450+ data ['shared_contributors' ] = '|' .join (data ['shared_contributors' ])
451+ if 'shared_stargazers' in data and isinstance (data ['shared_stargazers' ], list ):
452+ data ['shared_stargazers' ] = '|' .join (data ['shared_stargazers' ])
453+ if 'criteria_satisfied' in data and isinstance (data ['criteria_satisfied' ], list ):
454+ data ['criteria_satisfied' ] = '|' .join (data ['criteria_satisfied' ])
455+
427456 # Write to GEXF file
428457 nx .write_gexf (G , output_path )
429458 return output_path
0 commit comments