@@ -23,6 +23,21 @@ def _dirlist(fs, path) -> list:
23
23
24
24
return [f .name for f in listing ]
25
25
26
+ def _has_new_structure (fs , base_dir , user , years ):
27
+ """Check if the directory uses the new structure (data_{year}, mc_{year}) or old structure ({year})."""
28
+ user_path = base_dir / user
29
+ try :
30
+ user_contents = _dirlist (fs , user_path )
31
+ except FileNotFoundError :
32
+ return False
33
+
34
+ # Check if any data_{year} or mc_{year} directories exist
35
+ for year in years :
36
+ if f"data_{ year } " in user_contents or f"mc_{ year } " in user_contents :
37
+ return True
38
+
39
+ return False
40
+
26
41
27
42
def xrootd_index_private_nano (
28
43
base_dir : str ,
@@ -39,21 +54,21 @@ def xrootd_index_private_nano(
39
54
Can specify specific users, years, samples, and subsamples to search for;
40
55
otherwise, it will search for all by default.
41
56
42
- Files are organized as :
57
+ Supports both old and new directory structures :
43
58
59
+ Old structure:
44
60
MC:
45
61
......redirector.......|...............base_dir....................|..user.|year|sample|
46
62
root://cmseos.fnal.gov//store/user/lpcdihiggsboost/NanoAOD_v12_ParT/rkansal/2022/HHbbtt/
47
- ....................................subsample.......................................|
48
- GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/
49
- .............................f1...........................|.....f2......|.f3.|......
50
- GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_TuneCP5_13p6TeV/241028_235514/000*/*.root
63
+
64
+ New structure:
65
+ MC:
66
+ ......redirector.......|...............base_dir....................|..user.|year|sample|
67
+ root://cmseos.fnal.gov//store/user/lpcdihiggsboost/NanoAOD_v12_ParT/rkansal/2022_mc/HHbbtt/
51
68
52
69
Data:
53
70
......redirector.......|...............base_dir....................|..user.|year|sample|
54
- root://cmseos.fnal.gov//store/user/lpcdihiggsboost/NanoAOD_v12_ParT/rkansal/2022/Tau/
55
- .f1|..subsample.|.....f2......|.f3.|......
56
- Tau/Tau_Run2022D/241114_222843/000*/*.root
71
+ root://cmseos.fnal.gov//store/user/lpcdihiggsboost/NanoAOD_v12_ParT/rkansal/2022_data/Tau/
57
72
"""
58
73
fs = client .FileSystem (redirector )
59
74
base_dir = Path (base_dir )
@@ -66,65 +81,127 @@ def xrootd_index_private_nano(
66
81
67
82
for user in users :
68
83
print (f"\t { user } " )
84
+
85
+ # Check version
86
+ use_new_structure = _has_new_structure (fs , base_dir , user , years )
87
+ print (f"\t \t Using { 'new' if use_new_structure else 'old' } directory structure" )
88
+
69
89
for year in years :
70
90
print (f"\t \t { year } " )
71
91
if year not in files :
72
92
files [year ] = {}
73
93
74
- ypath = base_dir / user / year
75
- tsamples = _dirlist (fs , ypath ) if samples is None else samples
76
- for sample in tsamples :
77
- if sample not in files [year ]:
78
- files [year ][sample ] = {}
79
- elif overwrite_sample :
80
- warnings .warn (f"Overwriting existing sample { sample } " , stacklevel = 2 )
81
- files [year ][sample ] = {}
82
-
83
- print (f"\t \t \t { sample } " )
84
- spath = ypath / sample
85
-
86
- is_data = sample in hh_vars .DATA_SAMPLES
87
-
88
- tsubsamples = _dirlist (fs , spath ) if subsamples is None else subsamples
89
- for subsample in tsubsamples :
90
- subsample_name = subsample .split ("_TuneCP5" )[0 ].split ("_LHEweights" )[0 ]
91
- if not is_data :
92
- if subsample_name in files [year ][sample ]:
93
- warnings .warn (
94
- f"Duplicate subsample found! { subsample_name } " , stacklevel = 2
95
- )
96
-
97
- print (f"\t \t \t \t { subsample_name } " )
98
-
99
- sspath = spath / subsample
100
- for f1 in _dirlist (fs , sspath ):
101
- # For Data files, f1 is the subsample name
102
- if is_data :
103
- if f1 in files [year ][sample ]:
104
- warnings .warn (f"Duplicate subsample found! { f1 } " , stacklevel = 2 )
105
-
106
- print (f"\t \t \t \t { f1 } " )
107
-
108
- f1path = sspath / f1
109
- for f2 in _dirlist (fs , f1path ):
110
- f2path = f1path / f2
111
- tfiles = []
112
- for f3 in _dirlist (fs , f2path ):
113
- f3path = f2path / f3
114
- tfiles += [
115
- f"{ redirector } { f3path !s} /{ f } "
116
- for f in _dirlist (fs , f3path )
117
- if f .endswith (".root" )
118
- ]
119
-
120
- if is_data :
121
- files [year ][sample ][f1 ] = tfiles
94
+ if use_new_structure :
95
+ # New structure: separate data_{year} and mc_{year} directories
96
+ for is_data in (True , False ):
97
+ if is_data :
98
+ ypath = base_dir / user / f"data_{ year } "
99
+ else :
100
+ ypath = base_dir / user / f"mc_{ year } "
101
+
102
+ tsamples = _dirlist (fs , ypath ) if samples is None else samples
103
+
104
+ for sample in tsamples :
105
+ if sample not in files [year ]:
106
+ files [year ][sample ] = {}
107
+ elif overwrite_sample :
108
+ warnings .warn (f"Overwriting existing sample { sample } " , stacklevel = 2 )
109
+ files [year ][sample ] = {}
110
+
111
+ print (f"\t \t \t { sample } " )
112
+ spath = ypath / sample
113
+
114
+ tsubsamples = _dirlist (fs , spath ) if subsamples is None else subsamples
115
+ for subsample in tsubsamples :
116
+ subsample_name = subsample .split ("_TuneCP5" )[0 ].split ("_LHEweights" )[0 ]
117
+ if not is_data :
118
+ if subsample_name in files [year ][sample ]:
119
+ warnings .warn (
120
+ f"Duplicate subsample found! { subsample_name } " , stacklevel = 2
121
+ )
122
+
123
+ print (f"\t \t \t \t { subsample_name } " )
124
+
125
+ sspath = spath / subsample
126
+ for f1 in _dirlist (fs , sspath ):
127
+ # For Data files, f1 is the subsample name
128
+ if is_data :
129
+ if f1 in files [year ][sample ]:
130
+ warnings .warn (f"Duplicate subsample found! { f1 } " , stacklevel = 2 )
131
+ print (f"\t \t \t \t { f1 } " )
132
+
133
+ f1path = sspath / f1
134
+ for f2 in _dirlist (fs , f1path ):
135
+ f2path = f1path / f2
136
+ tfiles = []
137
+ f2_contents = _dirlist (fs , f2path )
138
+ root_files = [f for f in f2_contents if f .endswith (".root" )]
139
+ if root_files :
140
+ tfiles += [f"{ redirector } { f2path !s} /{ f } " for f in root_files ]
141
+
142
+ if is_data :
143
+ files [year ][sample ][f1 ] = tfiles
144
+ print (f"\t \t \t \t \t { len (tfiles )} files" )
145
+
146
+ if not is_data :
147
+ files [year ][sample ][subsample_name ] = tfiles
148
+ print (f"\t \t \t \t \t { len (tfiles )} files" )
149
+ else :
150
+ # Old structure: single year directory
151
+ ypath = base_dir / user / year
152
+ tsamples = _dirlist (fs , ypath ) if samples is None else samples
153
+ for sample in tsamples :
154
+ if sample not in files [year ]:
155
+ files [year ][sample ] = {}
156
+ elif overwrite_sample :
157
+ warnings .warn (f"Overwriting existing sample { sample } " , stacklevel = 2 )
158
+ files [year ][sample ] = {}
159
+
160
+ print (f"\t \t \t { sample } " )
161
+ spath = ypath / sample
162
+
163
+ is_data = sample in hh_vars .DATA_SAMPLES
164
+
165
+ tsubsamples = _dirlist (fs , spath ) if subsamples is None else subsamples
166
+ for subsample in tsubsamples :
167
+ subsample_name = subsample .split ("_TuneCP5" )[0 ].split ("_LHEweights" )[0 ]
168
+ if not is_data :
169
+ if subsample_name in files [year ][sample ]:
170
+ warnings .warn (
171
+ f"Duplicate subsample found! { subsample_name } " , stacklevel = 2
172
+ )
173
+
174
+ print (f"\t \t \t \t { subsample_name } " )
175
+
176
+ sspath = spath / subsample
177
+ for f1 in _dirlist (fs , sspath ):
178
+ # For Data files, f1 is the subsample name
179
+ if is_data :
180
+ if f1 in files [year ][sample ]:
181
+ warnings .warn (f"Duplicate subsample found! { f1 } " , stacklevel = 2 )
182
+
183
+ print (f"\t \t \t \t { f1 } " )
184
+
185
+ f1path = sspath / f1
186
+ for f2 in _dirlist (fs , f1path ):
187
+ f2path = f1path / f2
188
+ tfiles = []
189
+ for f3 in _dirlist (fs , f2path ):
190
+ f3path = f2path / f3
191
+ tfiles += [
192
+ f"{ redirector } { f3path !s} /{ f } "
193
+ for f in _dirlist (fs , f3path )
194
+ if f .endswith (".root" )
195
+ ]
196
+
197
+ if is_data :
198
+ files [year ][sample ][f1 ] = tfiles
199
+ print (f"\t \t \t \t \t { len (tfiles )} files" )
200
+
201
+ if not is_data :
202
+ files [year ][sample ][subsample_name ] = tfiles
122
203
print (f"\t \t \t \t \t { len (tfiles )} files" )
123
204
124
- if not is_data :
125
- files [year ][sample ][subsample_name ] = tfiles
126
- print (f"\t \t \t \t \t { len (tfiles )} files" )
127
-
128
205
return files
129
206
130
207
0 commit comments