Skip to content

Commit 486fc59

Browse files
authored
Merge pull request #570 from marcocanto/os-folders
added object_storage_list_folders.py to examples
2 parents 4656fea + cfc3324 commit 486fc59

File tree

2 files changed

+368
-1
lines changed

2 files changed

+368
-1
lines changed

examples/object_storage/README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,4 +157,29 @@ optional arguments:
157157
-sn SOURCE_NAMESPACE Source Namespace (Default current connection)
158158
-textrem TEXT_REMOVE text remove prefix (can be used to remove folder)
159159
-textadd TEXT_APPEND text append prefix (can be used to add folder)
160-
```
160+
```
161+
162+
## object_storage_list_folders
163+
Bulk list bucket folders sizes and object count
164+
165+
```
166+
usage: object_storage_list_folders.py [-h] [-t CONFIG_PROFILE] [-p PROXY]
167+
[-ip] [-dt] [-c CONFIG_FILE]
168+
[-sb SOURCE_BUCKET] [-sp SOURCE_PREFIX]
169+
[-sn SOURCE_NAMESPACE]
170+
[-sr SOURCE_REGION]
171+
[-f FILE]
172+
173+
optional arguments:
174+
-h, --help show this help message and exit
175+
-t CONFIG_PROFILE Config file section to use (tenancy profile)
176+
-p PROXY Set Proxy (i.e. www-proxy-server.com:80)
177+
-ip Use Instance Principals for Authentication
178+
-dt Use Delegation Token for Authentication
179+
-c CONFIG_FILE Config File (default=~/.oci/config)
180+
-sb SOURCE_BUCKET Source Bucket Name
181+
-sp SOURCE_PREFIX Source Prefix Include
182+
-sr SOURCE_REGION Source Region
183+
-sn SOURCE_NAMESPACE Source Namespace (Default current connection)
184+
-f FILE Output to file (as csv)
185+
```
Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
# coding: utf-8
2+
# Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
3+
# This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
4+
5+
##########################################################################
6+
# object_storage_list_folders.py
7+
#
8+
# @author: Marco Canto and Adi Zohar
9+
#
10+
# Supports Python 3
11+
#
12+
# DISCLAIMER – This is not an official Oracle application, It does not supported by Oracle Support, It should NOT be used for utilization calculation purposes
13+
##########################################################################
14+
# Info:
15+
# count objects and file size of folders within buckets with option to filter by prefix and write to file
16+
#
17+
##########################################################################
18+
# Application Command line parameters
19+
#
20+
# -c config - Config file section to use (tenancy profile)
21+
# -t profile - Profile in config file, DEFAULT as default
22+
# -p proxy - Set Proxy (i.e. www-proxy-server.com:80)
23+
# -ip - Use Instance Principals for Authentication
24+
# -dt - Use Instance Principals with delegation token for cloud shell
25+
# -f - write to file
26+
# -sb source_bucket
27+
# -sp source_prefix_include
28+
# -sr source_region
29+
# -sn source_namespace
30+
##########################################################################
31+
import oci
32+
import argparse
33+
import datetime
34+
import sys
35+
import os
36+
import re
37+
38+
from collections import defaultdict
39+
40+
##########################################################################
41+
# Pre Main
42+
##########################################################################
43+
44+
# Get Command Line Parser
45+
parser = argparse.ArgumentParser()
46+
parser.add_argument(
47+
"-t",
48+
default="",
49+
dest="config_profile",
50+
help="Config file section to use (tenancy profile)",
51+
)
52+
parser.add_argument(
53+
"-p", default="", dest="proxy", help="Set Proxy (i.e. www-proxy-server.com:80) "
54+
)
55+
parser.add_argument(
56+
"-ip",
57+
action="store_true",
58+
default=False,
59+
dest="is_instance_principals",
60+
help="Use Instance Principals for Authentication",
61+
)
62+
parser.add_argument(
63+
"-dt",
64+
action="store_true",
65+
default=False,
66+
dest="is_delegation_token",
67+
help="Use Delegation Token for Authentication",
68+
)
69+
parser.add_argument(
70+
"-c", default="", dest="config_file", help="Config File (default=~/.oci/config)"
71+
)
72+
parser.add_argument("-sb", default="", dest="source_bucket", help="Source Bucket Name")
73+
parser.add_argument(
74+
"-sp", default="", dest="source_prefix", help="Source Prefix Include"
75+
)
76+
parser.add_argument("-sr", default="", dest="source_region", help="Source Region")
77+
parser.add_argument(
78+
"-sn",
79+
default="",
80+
dest="source_namespace",
81+
help="Source Namespace (Default current connection)",
82+
)
83+
parser.add_argument(
84+
"-f", type=argparse.FileType("w"), dest="file", help="Output to file (as csv)"
85+
)
86+
87+
cmd = parser.parse_args()
88+
89+
if len(sys.argv) < 1:
90+
parser.print_help()
91+
raise SystemExit
92+
93+
if not cmd.source_bucket:
94+
print("Source bucket parameter is required !!!\n")
95+
parser.print_help()
96+
raise SystemExit
97+
98+
# Update Variables based on the parameters
99+
config_file = cmd.config_file if cmd.config_file else oci.config.DEFAULT_LOCATION
100+
config_profile = (
101+
cmd.config_profile if cmd.config_profile else oci.config.DEFAULT_PROFILE
102+
)
103+
104+
105+
##########################################################################
106+
# Create signer for Authentication
107+
# Input - config_file, config_profile and is_instance_principals and is_delegation_token
108+
# Output - config and signer objects
109+
##########################################################################
110+
def create_signer(
111+
config_file, config_profile, is_instance_principals, is_delegation_token
112+
):
113+
# if instance principals authentications
114+
if is_instance_principals:
115+
try:
116+
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
117+
config = {"region": signer.region, "tenancy": signer.tenancy_id}
118+
return config, signer
119+
120+
except Exception:
121+
print_header("Error obtaining instance principals certificate, aborting")
122+
raise SystemExit
123+
124+
# -----------------------------
125+
# Delegation Token
126+
# -----------------------------
127+
elif is_delegation_token:
128+
try:
129+
# check if env variables OCI_CONFIG_FILE, OCI_CONFIG_PROFILE exist and use them
130+
env_config_file = os.environ.get("OCI_CONFIG_FILE")
131+
env_config_section = os.environ.get("OCI_CONFIG_PROFILE")
132+
133+
# check if file exist
134+
if env_config_file is None or env_config_section is None:
135+
print(
136+
"*** OCI_CONFIG_FILE and OCI_CONFIG_PROFILE env variables not found, abort. ***"
137+
)
138+
print("")
139+
raise SystemExit
140+
141+
config = oci.config.from_file(env_config_file, env_config_section)
142+
delegation_token_location = config["delegation_token_file"]
143+
144+
with open(delegation_token_location, "r") as delegation_token_file:
145+
delegation_token = delegation_token_file.read().strip()
146+
# get signer from delegation token
147+
signer = oci.auth.signers.InstancePrincipalsDelegationTokenSigner(
148+
delegation_token=delegation_token
149+
)
150+
151+
return config, signer
152+
153+
except KeyError:
154+
print("* Key Error obtaining delegation_token_file")
155+
raise SystemExit
156+
157+
except Exception:
158+
raise
159+
160+
# -----------------------------
161+
# config file authentication
162+
# -----------------------------
163+
else:
164+
config = oci.config.from_file(
165+
(config_file if config_file else oci.config.DEFAULT_LOCATION),
166+
(config_profile if config_profile else oci.config.DEFAULT_PROFILE),
167+
)
168+
signer = oci.signer.Signer(
169+
tenancy=config["tenancy"],
170+
user=config["user"],
171+
fingerprint=config["fingerprint"],
172+
private_key_file_location=config.get("key_file"),
173+
pass_phrase=oci.config.get_config_value_or_default(config, "pass_phrase"),
174+
private_key_content=config.get("key_content"),
175+
)
176+
return config, signer
177+
178+
179+
##############################################################################
180+
# get time
181+
##############################################################################
182+
def get_time(full=False):
183+
if full:
184+
return str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
185+
else:
186+
return str(datetime.datetime.now().strftime("%H:%M:%S"))
187+
188+
189+
##########################################################################
190+
# Print header centered
191+
##########################################################################
192+
def print_header(name):
193+
chars = int(90)
194+
print("")
195+
print("#" * chars)
196+
print("#" + name.center(chars - 2, " ") + "#")
197+
print("#" * chars)
198+
199+
200+
##########################################################################
201+
# Print Info
202+
##########################################################################
203+
def print_command_info(source_namespace):
204+
print_header("Running List/Count Objects")
205+
print("Written By Adi Zohar, June 2020")
206+
print("Starts at :" + get_time(full=True))
207+
print("Command Line : " + " ".join(x for x in sys.argv[1:]))
208+
print("Source Namespace : " + source_namespace)
209+
print("Source Bucket : " + cmd.source_bucket)
210+
print("Source Prefix : " + cmd.source_prefix)
211+
212+
213+
##############################################################################
214+
# Count Objects
215+
##############################################################################
216+
def main():
217+
object_storage_client = None
218+
source_bucket = cmd.source_bucket
219+
source_prefix = cmd.source_prefix
220+
source_namespace = cmd.source_namespace
221+
222+
# get signer
223+
config, signer = create_signer(
224+
cmd.config_file,
225+
cmd.config_profile,
226+
cmd.is_instance_principals,
227+
cmd.is_delegation_token,
228+
)
229+
230+
# if region is specified
231+
if cmd.source_region:
232+
config["region"] = cmd.source_region
233+
234+
try:
235+
# connect and fetch namespace
236+
print("\nConnecting to Object Storage Service...")
237+
object_storage_client = oci.object_storage.ObjectStorageClient(
238+
config, signer=signer
239+
)
240+
if cmd.proxy:
241+
object_storage_client.base_client.session.proxies = {"https": cmd.proxy}
242+
243+
# retrieve namespace from object storage
244+
if not source_namespace:
245+
source_namespace = object_storage_client.get_namespace(
246+
retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY
247+
).data
248+
249+
except Exception as e:
250+
print("\nError connecting to Object Storage - " + str(e))
251+
raise SystemExit
252+
253+
print("Success.")
254+
255+
# print information
256+
print_command_info(source_namespace)
257+
print_header("Start Processing...")
258+
if cmd.file:
259+
print("Writing to file..." + cmd.file.name)
260+
261+
# if output to file
262+
file = None
263+
if cmd.file:
264+
file = open(cmd.file.name + ".csv", "w+")
265+
file.write("Folder Name,Size (KB),File Count\n")
266+
267+
# start processing
268+
count = 0
269+
size = 0
270+
next_starts_with = None
271+
272+
# This regex will capture every section of the path
273+
regex = r"([^/]+)/"
274+
275+
while True:
276+
response = object_storage_client.list_objects(
277+
source_namespace,
278+
source_bucket,
279+
start=next_starts_with,
280+
prefix=source_prefix,
281+
fields="size,timeCreated,timeModified,storageTier",
282+
retry_strategy=oci.retry.DEFAULT_RETRY_STRATEGY,
283+
)
284+
next_starts_with = response.data.next_start_with
285+
286+
folders = defaultdict(float)
287+
file_count = defaultdict(int)
288+
289+
for object_file in response.data.objects:
290+
count += 1
291+
size += object_file.size
292+
if not object_file.name.endswith("/"):
293+
matches = re.findall(regex, object_file.name)
294+
current_path = ""
295+
296+
# Loop over each match
297+
for match in matches:
298+
# Concatenate the current match to the current path
299+
current_path += match + "/"
300+
# Add the current path to the subpaths
301+
folders[current_path] += object_file.size / 1024
302+
file_count[current_path] += 1
303+
304+
continue
305+
306+
format_row = "{:<50} | {:>10} | {:>7}"
307+
308+
if not cmd.file:
309+
print(format_row.format("Folder Name", "Size (KB)", "Count"))
310+
print("-" * 80)
311+
for folder, f_size in sorted(
312+
folders.items(), key=lambda item: item[1], reverse=True
313+
):
314+
if not cmd.file:
315+
print(
316+
# f"Folder: {folder:<50} | Size:{'{:.2f}'.format(round(f_size,2)):>10} KB | Count: {file_count[folder]:>5}"
317+
format_row.format(
318+
folder, "{:.2f}".format(round(f_size, 2)), file_count[folder]
319+
)
320+
)
321+
if cmd.file:
322+
file.write(str(folder) + "," + "{:.2f}".format(round(f_size, 2)) + "," + str(file_count[folder]) + "\n")
323+
324+
if not next_starts_with:
325+
break
326+
327+
# final output
328+
print_header("Completed")
329+
print("Completed at : " + get_time(True))
330+
print("Total Files : " + str("{:20,.0f}".format(count)).rjust(20))
331+
print("Total Size : " + str("{:20,.0f}".format(size)).rjust(20))
332+
333+
if cmd.file:
334+
file.write("Total Files : " + str("{:10,.0f}".format(count)).rjust(10) + " Size : " + str("{:20,.0f}".format(size)).rjust(20))
335+
file.close()
336+
337+
338+
##############################################################################
339+
# Execute
340+
##############################################################################
341+
if __name__ == "__main__":
342+
main()

0 commit comments

Comments
 (0)