Skip to content

Commit 9ae1c0c

Browse files
Merge pull request #4752 from linuxfoundation/unicron-4712-notify-cla-managers
Investigate 4712 case
2 parents c7e6b65 + b5be8bb commit 9ae1c0c

File tree

4 files changed

+207
-1
lines changed

4 files changed

+207
-1
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,5 @@ cover.out
255255

256256
# Local env vars
257257
.env
258+
src.txt
259+
src.txt.*

utils/dump_repo.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env python3
2+
import argparse, os, sys, pathlib, io, subprocess, shutil
3+
4+
DEFAULT_EXCLUDE_DIRS = {".git", ".hg", ".svn", ".idea", ".vscode", "dist", "build", "out", "target", ".next", ".nuxt", ".tox", "__pycache__"}
5+
DEFAULT_EXCLUDE_GLOBS = {"*.min.js", "*.map", "*.lock", "*.jar", "*.zip", "*.gz", "*.tgz", "*.bz2", "*.7z", "*.png",
6+
"*.jpg", "*.jpeg", "*.gif", "*.webp", "*.ico", "*.pdf", "*.woff", "*.woff2", "*.ttf", "*.otf",
7+
"*.mp4", "*.mov", "*.avi", "*.mp3", "*.flac", "*.wav", "*.iso", "*.bin", "*.secret"}
8+
DEFAULT_INCLUDE_EXTS = {
9+
".go",".ts",".tsx",".js",".jsx",".json",".yml",".yaml",".toml",".ini",".env",".md",".txt",
10+
".proto",".graphql",".sql",".py",".rs",".java",".c",".h",".cpp",".hpp",".cc",".m",".mm",
11+
".rb",".php",".pl",".sh",".bash",".zsh",".fish",".ps1",".bat",".dockerfile",".gradle",".properties"
12+
}
13+
ALSO_ALLOW_NAME = {"dockerfile", "makefile", "makefile.win"}
14+
15+
def is_text_file(path: str, max_probe=65536) -> bool:
16+
try:
17+
with open(path, "rb") as f:
18+
chunk = f.read(max_probe)
19+
if b"\x00" in chunk:
20+
return False
21+
chunk.decode("utf-8", errors="strict")
22+
return True
23+
except Exception:
24+
return False
25+
26+
def should_keep_by_ext(p: pathlib.Path, include_exts) -> bool:
27+
if include_exts and p.suffix.lower() not in include_exts:
28+
if p.name.lower() not in ALSO_ALLOW_NAME:
29+
return False
30+
return True
31+
32+
def should_exclude_by_glob(rel: str) -> bool:
33+
from pathlib import PurePath
34+
pp = PurePath(rel)
35+
for g in DEFAULT_EXCLUDE_GLOBS:
36+
if pp.match(g):
37+
return True
38+
return False
39+
40+
def git_available() -> bool:
41+
return shutil.which("git") is not None
42+
43+
def iter_files_git(repo_root: pathlib.Path):
44+
"""
45+
Yields repo files not ignored by .gitignore/.git/info/exclude/global ignores.
46+
Uses: git ls-files --cached --others --exclude-standard
47+
"""
48+
cmd = ["git", "-C", str(repo_root), "ls-files", "-z", "--cached", "--others", "--exclude-standard", "--"]
49+
out = subprocess.check_output(cmd)
50+
for rel_b in out.split(b"\x00"):
51+
if not rel_b:
52+
continue
53+
rel = rel_b.decode("utf-8", errors="replace")
54+
p = repo_root / rel
55+
if p.is_file():
56+
yield p
57+
58+
def iter_files_walk(repo_root: pathlib.Path):
59+
"""
60+
Fallback: walk the tree and filter manually (does NOT perfectly mirror .gitignore).
61+
"""
62+
for root, dirs, files in os.walk(repo_root):
63+
# prune common junk dirs
64+
dirs[:] = [d for d in dirs if d not in DEFAULT_EXCLUDE_DIRS]
65+
for name in files:
66+
p = pathlib.Path(root) / name
67+
rel = p.relative_to(repo_root).as_posix()
68+
if should_exclude_by_glob(rel):
69+
continue
70+
yield p
71+
72+
def iter_repo_files(repo_root: pathlib.Path, use_git: bool):
73+
if use_git:
74+
yield from iter_files_git(repo_root)
75+
else:
76+
# use git if available and .git exists
77+
if (repo_root/".git").exists() and git_available():
78+
yield from iter_files_git(repo_root)
79+
else:
80+
yield from iter_files_walk(repo_root)
81+
82+
def write_repo(repo_root: pathlib.Path, out_prefix: pathlib.Path, max_mb: float, force_git: bool):
83+
repo_root = repo_root.resolve()
84+
max_bytes = int(max_mb * (1024**2))
85+
chunk_idx = 1
86+
bytes_in_chunk = 0
87+
88+
def open_chunk(idx):
89+
suffix = "" if idx == 1 else f".part{idx}"
90+
path = out_prefix if idx == 1 else out_prefix.with_name(out_prefix.name + suffix)
91+
return path, io.open(path, "w", encoding="utf-8", newline="\n")
92+
93+
out_path, fh = open_chunk(chunk_idx)
94+
95+
count = 0
96+
for p in iter_repo_files(repo_root, force_git):
97+
rel = p.relative_to(repo_root).as_posix()
98+
if should_exclude_by_glob(rel):
99+
continue
100+
if not should_keep_by_ext(p, DEFAULT_INCLUDE_EXTS):
101+
continue
102+
if not is_text_file(str(p)):
103+
continue
104+
105+
header = f"File: {rel}\nContents:\n"
106+
try:
107+
with io.open(p, "r", encoding="utf-8") as rf:
108+
content = rf.read()
109+
except UnicodeDecodeError:
110+
with io.open(p, "r", encoding="latin-1") as rf:
111+
content = rf.read()
112+
113+
block = header + content.rstrip() + "\n\n"
114+
block_bytes = len(block.encode("utf-8"))
115+
116+
if bytes_in_chunk + block_bytes > max_bytes and bytes_in_chunk > 0:
117+
fh.close()
118+
chunk_idx += 1
119+
out_path, fh = open_chunk(chunk_idx)
120+
bytes_in_chunk = 0
121+
122+
fh.write(block)
123+
bytes_in_chunk += block_bytes
124+
count += 1
125+
126+
fh.close()
127+
return chunk_idx, count
128+
129+
def main():
130+
ap = argparse.ArgumentParser(description="Dump repo sources to 'File: ...\\nContents:\\n...' format, honoring .gitignore.")
131+
ap.add_argument("--repo", default=".", help="Path to repo root (default: .)")
132+
ap.add_argument("--out", default="src.txt", help="Output filename/prefix")
133+
ap.add_argument("--max-mb", type=float, default=100.0, help="Max size per output file in MB (default: 100MB)")
134+
ap.add_argument("--git-mode", action="store_true", help="Force using 'git ls-files' (best accuracy for .gitignore).")
135+
args = ap.parse_args()
136+
137+
repo_root = pathlib.Path(args.repo)
138+
out_prefix = pathlib.Path(args.out)
139+
140+
try:
141+
chunks, files = write_repo(repo_root, out_prefix, args.max_mb, args.git_mode)
142+
print(f"Wrote {chunks} file(s); included {files} text source files. Upload the first file and any '.partN' files too.")
143+
except subprocess.CalledProcessError as e:
144+
print("Warning: Git mode failed; falling back to walk() (may include gitignored files).", file=sys.stderr)
145+
chunks, files = write_repo(repo_root, out_prefix, args.max_mb, force_git=False)
146+
print(f"Wrote {chunks} file(s); included {files} text source files. Upload the first file and any '.partN' files too.")
147+
148+
if __name__ == "__main__":
149+
sys.exit(main())
150+

utils/get_company_project_cla_managers.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
#!/bin/bash
22
# API_URL=https://[xyz].ngrok-free.app (defaults to localhost:5000)
3-
# API_URL=https://api.lfcla.dev.platform.linuxfoundation.org
3+
# API_URL=https://api-gw.platform.linuxfoundation.org/cla-service
4+
# API_URL=https://api-gw.dev.platform.linuxfoundation.org/cla-service
45
# DEBUG='' ./utils/get_company_project_cla_managers.sh f7c7ac9c-4dbf-4104-ab3f-6b38a26d82dc a09P000000DsCE5IAN
56
# Note: To run manually see cla-backend-go/auth/authorizer.go:SecurityAuth() and update accordingly 'LG:'
7+
# STAGE=prod ./utils/scan.sh projects-cla-groups project_name 'Cloud Native Computing Foundation (CNCF)'
8+
# STAGE=prod ./utils/scan.sh companies company_name 'Red Hat, Inc.'
9+
# ./get_oauth_token_prod.sh
10+
# DEBUG='' API_URL=https://api-gw.platform.linuxfoundation.org/cla-service ./utils/get_company_project_cla_managers.sh '86fda258-9866-45e3-b4fe-50502ac0957c' 'a0941000002wBz4AAE'
611

712
if [ -z "$TOKEN" ]
813
then
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/bin/bash
2+
# API_URL=https://api-gw.platform.linuxfoundation.org/cla-service
3+
# API_URL=https://api-gw.dev.platform.linuxfoundation.org/cla-service
4+
# STAGE=prod ./utils/scan.sh projects project_id d8cead54-92b7-48c5-a2c8-b1e295e8f7f1
5+
# STAGE=prod ./utils/scan.sh projects-cla-groups project_name 'Cloud Native Computing Foundation (CNCF)'
6+
# STAGE=prod ./utils/scan.sh companies company_name 'Red Hat, Inc.'
7+
# STAGE=prod ./utils/scan.sh users lf_username lgryglicki
8+
# API_URL=https://api-gw.platform.linuxfoundation.org/cla-service ./utils/notify_cla_managers_example_post.sh
9+
10+
if [ -z "$TOKEN" ]
11+
then
12+
# source ./auth0_token.secret
13+
TOKEN="$(cat ./auth0.token.secret)"
14+
fi
15+
16+
if [ -z "$TOKEN" ]
17+
then
18+
echo "$0: TOKEN not specified and unable to obtain one"
19+
exit 1
20+
fi
21+
22+
if [ -z "$XACL" ]
23+
then
24+
XACL="$(cat ./x-acl.secret)"
25+
fi
26+
27+
if [ -z "$XACL" ]
28+
then
29+
echo "$0: XACL not specified and unable to obtain one"
30+
exit 2
31+
fi
32+
33+
if [ -z "$API_URL" ]
34+
then
35+
export API_URL="http://localhost:5001"
36+
fi
37+
38+
data='{
39+
"companyName": "Cloud Native Computing Foundation (CNCF)",
40+
"claGroupID": "d8cead54-92b7-48c5-a2c8-b1e295e8f7f1",
41+
"userID": "2c895887-d33a-11ef-9205-4e2baeedbda2",
42+
"list": [
43+
{ "email": "[email protected]", "name": "Lukasz Gryglicki 1" },
44+
{ "email": "[email protected]", "name": "Lukasz Gryglicki 2" },
45+
{ "email": "[email protected]", "name": "Lukasz Gryglicki 3" }
46+
]
47+
}'
48+
49+
curl -s -XPOST -H "X-ACL: ${XACL}" -H "Authorization: Bearer ${TOKEN}" -H "Content-Type: application/json" "${API_URL}/v4/notify-cla-managers" -d "$data" | jq -r '.'

0 commit comments

Comments
 (0)