2121from datadog import api , initialize
2222
2323from ddev .cli .application import Application
24+ from ddev .utils .toml import load_toml_file
2425
2526METRIC_VERSION = 2
2627
@@ -108,8 +109,9 @@ def get_valid_versions(repo_path: Path | str) -> set[str]:
108109 """
109110 resolved_path = os .path .join (repo_path , os .path .join (repo_path , ".deps" , "resolved" ))
110111 versions = []
112+ pattern = re .compile (r"\d+\.\d+" )
111113 for file in os .listdir (resolved_path ):
112- match = re .search (r"\d+\.\d+" , file )
114+ match = pattern .search (file )
113115 if match :
114116 versions .append (match .group ())
115117 return set (versions )
@@ -119,7 +121,40 @@ def is_correct_dependency(platform: str, version: str, name: str) -> bool:
119121 return platform in name and version in name
120122
121123
122- def is_valid_integration (path : str , included_folder : str , ignored_files : set [str ], git_ignore : list [str ]) -> bool :
124+ def is_valid_integration_file (
125+ path : str ,
126+ repo_path : str ,
127+ ignored_files : set [str ] | None = None ,
128+ included_folder : str | None = None ,
129+ git_ignore : list [str ] | None = None ,
130+ ) -> bool :
131+ """
132+ Check if a file would be packaged with an integration.
133+
134+ Used to estimate integration package size by excluding:
135+ - Hidden files (starting with ".")
136+ - Files outside "datadog_checks"
137+ - Helper/test-only packages (e.g. datadog_checks_dev)
138+ - Files ignored by .gitignore
139+
140+ Args:
141+ path (str): File path to check.
142+ repo_path (str): Repository root, for loading .gitignore rules.
143+
144+ Returns:
145+ bool: True if the file would be packaged, False otherwise.
146+ """
147+ if ignored_files is None :
148+ ignored_files = {
149+ "datadog_checks_dev" ,
150+ "datadog_checks_tests_helper" ,
151+ }
152+
153+ if included_folder is None :
154+ included_folder = "datadog_checks" + os .sep
155+
156+ if git_ignore is None :
157+ git_ignore = get_gitignore_files (repo_path )
123158 # It is not an integration
124159 if path .startswith ("." ):
125160 return False
@@ -166,29 +201,31 @@ def compress(file_path: str) -> int:
166201 return compressed_size
167202
168203
169- def get_files (repo_path : str | Path , compressed : bool ) -> list [FileDataEntry ]:
204+ def get_files (repo_path : str | Path , compressed : bool , py_version : str ) -> list [FileDataEntry ]:
170205 """
171206 Calculates integration file sizes and versions from a repository.
207+ Only takes into account integrations with a valid version looking at the pyproject.toml file
208+ The pyproject.toml file should have a classifier with this format:
209+ classifiers = [
210+ ...
211+ "Programming Language :: Python :: 3.12",
212+ ...
213+ ]
172214 """
173- ignored_files = {"datadog_checks_dev" , "datadog_checks_tests_helper" }
174- git_ignore = get_gitignore_files (repo_path )
175- included_folder = "datadog_checks" + os .sep
176-
177215 integration_sizes : dict [str , int ] = {}
178216 integration_versions : dict [str , str ] = {}
217+ py_major_version = py_version .split ("." )[0 ]
179218
180219 for root , _ , files in os .walk (repo_path ):
220+ integration_name = str (os .path .relpath (root , repo_path ).split (os .sep )[0 ])
221+
222+ if not check_python_version (str (repo_path ), integration_name , py_major_version ):
223+ continue
181224 for file in files :
182225 file_path = os .path .join (root , file )
183226 relative_path = os .path .relpath (file_path , repo_path )
184-
185- if not is_valid_integration (relative_path , included_folder , ignored_files , git_ignore ):
227+ if not is_valid_integration_file (relative_path , str (repo_path )):
186228 continue
187- path = Path (relative_path )
188- parts = path .parts
189-
190- integration_name = parts [0 ]
191-
192229 size = compress (file_path ) if compressed else os .path .getsize (file_path )
193230 integration_sizes [integration_name ] = integration_sizes .get (integration_name , 0 ) + size
194231
@@ -208,6 +245,23 @@ def get_files(repo_path: str | Path, compressed: bool) -> list[FileDataEntry]:
208245 ]
209246
210247
248+ def check_python_version (repo_path : str , integration_name : str , py_major_version : str ) -> bool :
249+ pyproject_path = os .path .join (repo_path , integration_name , "pyproject.toml" )
250+ if os .path .exists (pyproject_path ):
251+ pyproject = load_toml_file (pyproject_path )
252+ if "project" not in pyproject or "classifiers" not in pyproject ["project" ]:
253+ return False
254+ classifiers = pyproject ["project" ]["classifiers" ]
255+ integration_py_version = ""
256+ pattern = re .compile (r"Programming Language :: Python :: (\d+)" )
257+ for classifier in classifiers :
258+ match = pattern .match (classifier )
259+ if match :
260+ integration_py_version = match .group (1 )
261+ return integration_py_version == py_major_version
262+ return False
263+
264+
211265def extract_version_from_about_py (path : str ) -> str :
212266 """
213267 Extracts the __version__ string from a given __about__.py file.
@@ -248,8 +302,9 @@ def get_dependencies_list(file_path: str) -> tuple[list[str], list[str], list[st
248302 versions = []
249303 with open (file_path , "r" , encoding = "utf-8" ) as file :
250304 file_content = file .read ()
305+ pattern = re .compile (r"([\w\-\d\.]+) @ (https?://[^\s#]+)" )
251306 for line in file_content .splitlines ():
252- match = re .search (r"([\w\-\d\.]+) @ (https?://[^\s#]+)" , line )
307+ match = pattern .search (line )
253308 if not match :
254309 raise WrongDependencyFormat ("The dependency format 'name @ link' is no longer supported." )
255310 name = match .group (1 )
@@ -327,43 +382,43 @@ def get_dependencies_sizes(
327382
328383
329384def is_excluded_from_wheel (path : str ) -> bool :
330- '''
385+ """
331386 These files are excluded from the wheel in the agent build:
332387 https://github.com/DataDog/datadog-agent/blob/main/omnibus/config/software/datadog-agent-integrations-py3.rb
333388 In order to have more accurate results, this files are excluded when computing the size of the dependencies while
334389 the wheels still include them.
335- '''
390+ """
336391 excluded_test_paths = [
337392 os .path .normpath (path )
338393 for path in [
339- ' idlelib/idle_test' ,
340- ' bs4/tests' ,
341- ' Cryptodome/SelfTest' ,
342- ' gssapi/tests' ,
343- ' keystoneauth1/tests' ,
344- ' openstack/tests' ,
345- ' os_service_types/tests' ,
346- ' pbr/tests' ,
347- ' pkg_resources/tests' ,
348- ' psutil/tests' ,
349- ' securesystemslib/_vendor/ed25519/test_data' ,
350- ' setuptools/_distutils/tests' ,
351- ' setuptools/tests' ,
352- ' simplejson/tests' ,
353- ' stevedore/tests' ,
354- ' supervisor/tests' ,
355- ' test' , # cm-client
356- ' vertica_python/tests' ,
357- ' websocket/tests' ,
394+ " idlelib/idle_test" ,
395+ " bs4/tests" ,
396+ " Cryptodome/SelfTest" ,
397+ " gssapi/tests" ,
398+ " keystoneauth1/tests" ,
399+ " openstack/tests" ,
400+ " os_service_types/tests" ,
401+ " pbr/tests" ,
402+ " pkg_resources/tests" ,
403+ " psutil/tests" ,
404+ " securesystemslib/_vendor/ed25519/test_data" ,
405+ " setuptools/_distutils/tests" ,
406+ " setuptools/tests" ,
407+ " simplejson/tests" ,
408+ " stevedore/tests" ,
409+ " supervisor/tests" ,
410+ " test" , # cm-client
411+ " vertica_python/tests" ,
412+ " websocket/tests" ,
358413 ]
359414 ]
360415
361416 type_annot_libraries = [
362- ' krb5' ,
363- ' Cryptodome' ,
364- ' ddtrace' ,
365- ' pyVmomi' ,
366- ' gssapi' ,
417+ " krb5" ,
418+ " Cryptodome" ,
419+ " ddtrace" ,
420+ " pyVmomi" ,
421+ " gssapi" ,
367422 ]
368423 rel_path = Path (path ).as_posix ()
369424
@@ -377,7 +432,7 @@ def is_excluded_from_wheel(path: str) -> bool:
377432 if path_parts :
378433 dependency_name = path_parts [0 ]
379434 if dependency_name in type_annot_libraries :
380- if path .endswith (' .pyi' ) or os .path .basename (path ) == ' py.typed' :
435+ if path .endswith (" .pyi" ) or os .path .basename (path ) == " py.typed" :
381436 return True
382437
383438 return False
@@ -830,14 +885,14 @@ def send_metrics_to_dd(
830885 ],
831886 }
832887 )
833- key_count = (item [' Platform' ], item [' Python_Version' ])
888+ key_count = (item [" Platform" ], item [" Python_Version" ])
834889 if key_count not in n_integrations :
835890 n_integrations [key_count ] = 0
836891 if key_count not in n_dependencies :
837892 n_dependencies [key_count ] = 0
838- if item [' Type' ] == ' Integration' :
893+ if item [" Type" ] == " Integration" :
839894 n_integrations [key_count ] += 1
840- elif item [' Type' ] == ' Dependency' :
895+ elif item [" Type" ] == " Dependency" :
841896 n_dependencies [key_count ] += 1
842897
843898 for (platform , py_version ), count in n_integrations .items ():
@@ -919,8 +974,8 @@ def get_last_commit_timestamp() -> int:
919974
920975def get_last_commit_data () -> tuple [str , list [str ], list [str ]]:
921976 result = subprocess .run (["git" , "log" , "-1" , "--format=%s" ], capture_output = True , text = True , check = True )
922- ticket_pattern = r' \b(?:DBMON|SAASINT|AGENT|AI)-\d+\b'
923- pr_pattern = r' #(\d+)'
977+ ticket_pattern = r" \b(?:DBMON|SAASINT|AGENT|AI)-\d+\b"
978+ pr_pattern = r" #(\d+)"
924979
925980 message = result .stdout .strip ()
926981 tickets = re .findall (ticket_pattern , message )
0 commit comments