Skip to content

Commit dd52098

Browse files
authored
Merge pull request Azure#12495 from aghodsi/feature/watchlist_auto_update
Azure Function to update watchlist from a network restricted Azure Blob Storage
2 parents b184251 + adf22b2 commit dd52098

File tree

4 files changed

+242
-0
lines changed

4 files changed

+242
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Watchlist from Storage Azure Function
2+
3+
This Azure Function reads a CSV file from Azure Blob Storage and updates a Microsoft Sentinel Watchlist on a schedule.
4+
5+
## Features
6+
- Timer-triggered Azure Function (runs daily at 2:00 AM UTC)
7+
- Reads a CSV file from Azure Blob Storage using Managed Identities
8+
- Updates a Microsoft Sentinel Watchlist with the file contents or alternatively upload data to a custom table
9+
- Supports batching for large files
10+
11+
## Prerequisites
12+
- Python 3.8+
13+
- Azure Subscription
14+
- Microsoft Sentinel enabled in your Log Analytics workspace
15+
- Required Azure roles:
16+
- **Storage Blob Data Reader** on the storage account
17+
- **Microsoft Sentinel Contributor** on the Log Analytics workspace
18+
19+
## Environment Variables
20+
Set these in your `local.settings.json` for local development or as Application Settings in Azure:
21+
22+
| Name | Description |
23+
|------------------------|---------------------------------------------|
24+
| WATCHLIST_NAME | Name of the Sentinel watchlist |
25+
| AZURE_SUBSCRIPTION_ID | Azure subscription ID |
26+
| RESOURCE_GROUP_NAME | Resource group containing the workspace |
27+
| WORKSPACE_NAME | Log Analytics workspace name |
28+
| FILE_NAME | Name of the CSV file in Blob Storage |
29+
| STORAGE_ACCOUNT_NAME | Name of the storage account |
30+
| STORAGE_CONTAINER_NAME | Name of the blob container |
31+
| WATCHLIST_PROVIDER | Provider name for the watchlist |
32+
| WATCHLIST_SEARCH_KEY | Search key for the watchlist |
33+
| WATCHLIST_DESCRIPTION | Description of the watchlist |
34+
35+
## Local Development
36+
1. Install dependencies:
37+
```sh
38+
pip install -r requirements.txt
39+
```
40+
2. Start Azurite (for local blob storage emulation) or use a real Azure Storage account.
41+
3. Update `local.settings.json` with your environment variables.
42+
4. Run the function locally:
43+
```sh
44+
func start
45+
```
46+
47+
## Deployment
48+
Deploy to Azure using the Azure Functions extension for VS Code or Azure CLI.
49+
50+
## Notes
51+
- The function uses `DefaultAzureCredential` for authentication. Ensure your environment is authenticated (e.g., `az login`).
52+
- For large files, the function batches updates to avoid API limits.
53+
54+
## License
55+
MIT License
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import logging
2+
import azure.functions as func
3+
# Import DefaultAzureCredentials from the identity package that will be used for authenticatin
4+
from azure.identity import DefaultAzureCredential
5+
# Import of SecurityInsights that allows to write to the Watclist API
6+
from azure.mgmt.securityinsight import SecurityInsights
7+
# Import the Watchlist model that allows us to create a watchlist
8+
from azure.mgmt.securityinsight.models import Watchlist
9+
# Import the service client to read from our storage account
10+
from azure.storage.blob import BlobServiceClient
11+
# Import the log ingestion client to write to the Log Analytics API for creating custom logs
12+
from azure.monitor.ingestion import LogsIngestionClient
13+
import csv
14+
from io import StringIO
15+
# Import os to read environment variables
16+
import os
17+
# Import HttpResponseError to handle errors from the Log Ingestion API
18+
from azure.core.exceptions import HttpResponseError
19+
20+
21+
app = func.FunctionApp()
22+
23+
credential = DefaultAzureCredential()
24+
25+
@app.timer_trigger(schedule="0 0 2 * * *", arg_name="myTimer", run_on_startup=False,
26+
use_monitor=False)
27+
def update_watchlist(myTimer: func.TimerRequest) -> None:
28+
29+
# Initialize variables from environment variables
30+
watchlist_name = os.getenv("WATCHLIST_NAME")
31+
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
32+
resource_group_name = os.getenv("RESOURCE_GROUP_NAME")
33+
workspace_name = os.getenv("WORKSPACE_NAME")
34+
file_name = os.getenv("FILE_NAME")
35+
storage_account_name = os.getenv("STORAGE_ACCOUNT_NAME")
36+
container_name = os.getenv("STORAGE_CONTAINER_NAME")
37+
provider = os.getenv("WATCHLIST_PROVIDER")
38+
search_key = os.getenv("WATCHLIST_SEARCH_KEY")
39+
description = os.getenv("WATCHLIST_DESCRIPTION")
40+
41+
42+
#If you want to use a custom table, please set the following variables in your environment
43+
# dce_url = os.getenv("DCE_URL")
44+
# rule_id = os.getenv("DCR_RULE_ID")
45+
# stream_name = os.getenv("STREAM_NAME")
46+
47+
# Read watchlist items from storage
48+
watchlist_content, needsBatching = read_watchlist_from_storage(file_name, storage_account_name, container_name, credential)
49+
50+
if needsBatching:
51+
batch_size = 800
52+
parsed_data = parse_csv_string(watchlist_content)
53+
for i in range(0, len(parsed_data), batch_size):
54+
batch = parsed_data[i+1:i + batch_size]
55+
csv_content = ",".join(parsed_data[0]) + "\n"
56+
for row in batch:
57+
csv_content += ",".join(row) + "\n"
58+
update_watchlist_sentinel(watchlist_name, csv_content, description, resource_group_name, workspace_name, search_key, provider, credential, subscription_id)
59+
# update_custom_table(csv_content, dce_url, rule_id, stream_name, credential)
60+
else:
61+
# Update the watchlist in Microsoft Sentinel
62+
update_watchlist_sentinel(watchlist_name, watchlist_content, description, resource_group_name, workspace_name, search_key, provider, credential, subscription_id)
63+
# update_custom_table(parsed_data, dce_url, rule_id, stream_name, credential)
64+
65+
66+
def read_watchlist_from_storage(file_name, storage_account_name, container_name, credential):
67+
"""
68+
Read data from a storage account container.
69+
70+
:param file_name: Name of the file to read
71+
:param storage_account_name: Name of the storage account
72+
:param container_name: Name of the container in which the file is stored
73+
:param credential: Azure credentials for authentication
74+
:return: Content of the file as a string
75+
"""
76+
if not file_name.endswith('.csv'):
77+
raise ValueError("File must be a CSV")
78+
79+
blob_service_client = BlobServiceClient(account_url=f"https://{storage_account_name}.blob.core.windows.net", credential=credential)
80+
blob_client = blob_service_client.get_blob_client(container=container_name, blob=file_name)
81+
82+
download_stream = blob_client.download_blob()
83+
file_content = download_stream.readall().decode('utf-8')
84+
85+
logging.info("Read from Storage account:" + file_content[0:100])
86+
87+
needsBatching = False
88+
89+
# TODO: Check if file exists and is not empty
90+
91+
if blob_client.get_blob_properties().get('size') >= 3984588:
92+
needsBatching = True
93+
94+
return file_content, needsBatching
95+
96+
97+
def parse_csv_string(csv_string):
98+
csv_data = []
99+
csv_reader = csv.reader(StringIO(csv_string))
100+
for row in csv_reader:
101+
csv_data.append(row)
102+
return csv_data
103+
104+
def update_watchlist_sentinel(watchlist_name, watchlist_items, description, resource_group_name, workspace_name, search_key, provider, credential, subscription_id):
105+
"""
106+
Update a watchlist in Microsoft Sentinel.
107+
108+
:param watchlist_name: Name of the watchlist to update
109+
:param watchlist_items: List of items to add to the watchlist
110+
:param resource_group_name: Name of the resource group where the workspace is located
111+
:param workspace_name: Name of the workspace where the watchlist is located
112+
:param search_key: Key to search for in the watchlist items
113+
:param credential: Azure credentials for authentication
114+
:param subscription_id: Azure subscription ID
115+
"""
116+
client = SecurityInsights(credential, subscription_id)
117+
118+
watchlist = Watchlist()
119+
watchlist.display_name = watchlist_name
120+
watchlist.items_search_key = search_key
121+
watchlist.provider = provider
122+
watchlist.source = "Local file"
123+
watchlist.raw_content = watchlist_items
124+
watchlist.number_of_lines_to_skip = 0
125+
watchlist.content_type = "text/csv"
126+
watchlist.description = description
127+
watchlist.watchlist_alias = watchlist_name
128+
client.watchlists.create_or_update(resource_group_name, workspace_name, watchlist_name, watchlist)
129+
130+
131+
def csv_to_json(csv_string):
132+
"""
133+
Convert CSV data to JSON format.
134+
135+
:param csv_string: CSV data as a string
136+
:return: JSON data as a string
137+
"""
138+
csv_reader = csv.DictReader(csv_string.splitlines())
139+
json_data = [row for row in csv_reader]
140+
return json_data
141+
142+
def update_custom_table(csv_data, dce, rule_id, stream_name, credential):
143+
"""
144+
Upload CSV data to a table in Microsoft Sentinel. Required permissons: Monitoring Metrics Publisher for identity.
145+
146+
:param csv_data: CSV data as a string
147+
:param dce: Data collection endpoint URL
148+
:param rule_id: Rule ID for the DCR
149+
:param stream_name: Name of the stream to upload to in your DCR
150+
:param credential: Azure credentials for authentication
151+
:return: None
152+
"""
153+
154+
client = LogsIngestionClient(
155+
endpoint=dce, credential=credential, logging_enable=True
156+
)
157+
158+
try:
159+
res = client.upload(rule_id=rule_id, stream_name=stream_name, logs=csv_to_json(csv_data))
160+
print(f"Upload succeeded: {res}")
161+
except HttpResponseError as e:
162+
print(f"Upload failed: {e}")
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "2.0",
3+
"logging": {
4+
"applicationInsights": {
5+
"samplingSettings": {
6+
"isEnabled": true,
7+
"excludedTypes": "Request"
8+
}
9+
}
10+
},
11+
"extensionBundle": {
12+
"id": "Microsoft.Azure.Functions.ExtensionBundle",
13+
"version": "[4.*, 5.0.0)"
14+
}
15+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# DO NOT include azure-functions-worker in this file
2+
# The Python Worker is managed by Azure Functions platform
3+
# Manually managing azure-functions-worker may cause unexpected issues
4+
5+
azure-functions
6+
7+
azure.mgmt.securityinsight
8+
azure.storage.blob
9+
azure.identity
10+
azure.monitor.ingestion

0 commit comments

Comments
 (0)