Skip to content

Commit b51f59b

Browse files
myadlaayefimov-1
authored andcommitted
Add synthetic data generation files and changes
Gemini AI used
1 parent 3f61b09 commit b51f59b

File tree

4 files changed

+200
-0
lines changed

4 files changed

+200
-0
lines changed
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import logging
2+
import argparse
3+
from datetime import datetime, timezone, timedelta
4+
from pathlib import Path
5+
from typing import Union
6+
from jinja2 import Template
7+
8+
# --- Configure logging with a default level that can be changed ---
9+
logging.basicConfig(
10+
level=logging.INFO,
11+
format='%(asctime)s - %(levelname)s - %(message)s',
12+
datefmt='%Y-%m-%d %H:%M:%S'
13+
)
14+
logger = logging.getLogger()
15+
16+
def _format_timestamp(epoch_seconds: float) -> str:
17+
"""
18+
Converts an epoch timestamp into a human-readable UTC string.
19+
20+
Args:
21+
epoch_seconds (float): The timestamp in seconds since the epoch.
22+
23+
Returns:
24+
str: The formatted datetime string (e.g., "2023-10-26T14:30:00 UTC").
25+
"""
26+
try:
27+
dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc)
28+
return dt_object.strftime("%Y-%m-%dT%H:%M:%S %Z")
29+
except (ValueError, TypeError):
30+
logger.warning(f"Invalid epoch value provided: {epoch_seconds}")
31+
return "INVALID_TIMESTAMP"
32+
33+
def generate_loki_data(
34+
template_path: Path,
35+
output_path: Path,
36+
start_time: datetime,
37+
end_time: datetime,
38+
time_step_seconds: int
39+
):
40+
"""
41+
Generates synthetic Loki log data by first preparing a data list
42+
and then rendering it with a single template.
43+
44+
Args:
45+
template_path (Path): Path to the main log template file.
46+
output_path (Path): Path for the generated output JSON file.
47+
start_time (datetime): The start time for data generation.
48+
end_time (datetime): The end time for data generation.
49+
time_step_seconds (int): The duration of each log entry in seconds.
50+
"""
51+
52+
# --- Step 1: Generate the data structure first ---
53+
logger.info(
54+
f"Generating data from {start_time.strftime('%Y-%m-%d')} to "
55+
f"{end_time.strftime('%Y-%m-%d')} with a {time_step_seconds}s step."
56+
)
57+
start_epoch = int(start_time.timestamp())
58+
end_epoch = int(end_time.timestamp())
59+
logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}")
60+
61+
log_data_list = [] # This list will hold all our data points
62+
63+
# Loop through the time range and generate data points
64+
for current_epoch in range(start_epoch, end_epoch, time_step_seconds):
65+
end_of_step_epoch = current_epoch + time_step_seconds - 1
66+
67+
# Prepare replacement values
68+
nanoseconds = int(current_epoch * 1_000_000_000)
69+
start_str = _format_timestamp(current_epoch)
70+
end_str = _format_timestamp(end_of_step_epoch)
71+
72+
logger.debug(f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}")
73+
74+
# Create a dictionary for this time step and add it to the list
75+
log_data_list.append({
76+
"nanoseconds": nanoseconds,
77+
"start_time": start_str,
78+
"end_time": end_str
79+
})
80+
81+
logger.info(f"Generated {len(log_data_list)} data points to be rendered.")
82+
83+
# --- Step 2: Load template and render ---
84+
try:
85+
logger.info(f"Loading main template from: {template_path}")
86+
template_content = template_path.read_text()
87+
template = Template(template_content, trim_blocks=True, lstrip_blocks=True)
88+
89+
except FileNotFoundError as e:
90+
logger.error(f"Error loading template file: {e}. Aborting.")
91+
raise # Re-raise the exception to be caught in main()
92+
93+
# --- Render the template in one pass with all the data ---
94+
logger.info("Rendering final output...")
95+
# The template expects a variable named 'log_data'
96+
final_output = template.render(log_data=log_data_list)
97+
98+
# --- Step 3: Write the final string to the file ---
99+
try:
100+
with output_path.open('w') as f_out:
101+
f_out.write(final_output)
102+
logger.info(f"Successfully generated synthetic data to '{output_path}'")
103+
except IOError as e:
104+
logger.error(f"Failed to write to output file '{output_path}': {e}")
105+
except Exception as e:
106+
logger.error(f"An unexpected error occurred during file write: {e}")
107+
108+
def main():
109+
"""Main entry point for the script."""
110+
parser = argparse.ArgumentParser(
111+
description="Generate synthetic Loki log data from a single main template.",
112+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
113+
)
114+
# --- Required File Path Arguments ---
115+
parser.add_argument("-o", "--output", required=True, help="Path to the output file.")
116+
# --- Only one template argument is needed now ---
117+
parser.add_argument("--template", required=True, help="Path to the main log template file (e.g., loki_main.tmpl).")
118+
119+
# --- Optional Generation Arguments ---
120+
parser.add_argument("--days", type=int, default=30, help="How many days of data to generate, ending today.")
121+
parser.add_argument("--step", type=int, default=300, help="Time step in seconds for each log entry.")
122+
123+
# --- Optional Utility Arguments ---
124+
parser.add_argument("--debug", action="store_true", help="Enable debug level logging for verbose output.")
125+
126+
args = parser.parse_args()
127+
128+
if args.debug:
129+
logger.setLevel(logging.DEBUG)
130+
logger.debug("Debug mode enabled.")
131+
132+
# Define the time range for data generation
133+
end_time_utc = datetime.now(timezone.utc)
134+
start_time_utc = end_time_utc - timedelta(days=args.days)
135+
logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}")
136+
137+
# Run the generator
138+
try:
139+
generate_loki_data(
140+
template_path=Path(args.template),
141+
output_path=Path(args.output),
142+
start_time=start_time_utc,
143+
end_time=end_time_utc,
144+
time_step_seconds=args.step
145+
)
146+
except FileNotFoundError:
147+
logger.error("Process aborted because the template file was not found.")
148+
except Exception as e:
149+
logger.critical(f"A critical, unhandled error stopped the script: {e}")
150+
151+
152+
if __name__ == "__main__":
153+
main()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{"streams": [{ "stream": { "service": "cloudkitty" }, "values": [
2+
{%- for item in log_data %}
3+
[
4+
"{{ item.nanoseconds }}",
5+
"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"image.size\", \"unit\": \"MiB\", \"description\": null, \"qty\": 20.6875, \"price\": 0.0206875, \"groupby\": {\"id\": \"cd65d30f-8b94-4fa3-95dc-e3b429f479b2\", \"project_id\": \"0030775de80e4d84a4fd0d73e0a1b3a7\", \"user_id\": null, \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"container_format\": \"bare\", \"disk_format\": \"qcow2\"}}"
6+
],
7+
[
8+
"{{ item.nanoseconds }}",
9+
"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"instance\", \"unit\": \"instance\", \"description\": null, \"qty\": 1.0, \"price\": 0.3, \"groupby\": {\"id\": \"de168c31-ed44-4a1a-a079-51bd238a91d6\", \"project_id\": \"9cf5bcfc61a24682acc448af2d062ad2\", \"user_id\": \"c29ab6e886354bbd88ee9899e62d1d40\", \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"flavor_name\": \"m1.tiny\", \"flavor_id\": \"1\", \"vcpus\": \"\"}}"
10+
]
11+
{#- This logic adds a comma after every pair, *except* for the very last one. #}
12+
{%- if not loop.last -%}
13+
,
14+
{%- endif -%}
15+
{%- endfor %}
16+
]}]}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
- name: TEST Check for preexisting output file
2+
ansible.builtin.stat:
3+
path: "{{ ck_json_db_file_out }}"
4+
register: file_preexists
5+
6+
- name: TEST Generate Synthetic Data
7+
ansible.builtin.command: python3 {{ ck_py_script }} --template {{ ck_data_template }} -o {{ ck_json_db_file_out }} --days {{ ck_days }} --step {{ ck_step }}
8+
register: script_output
9+
when: file_preexists.stat.exists is false
10+
changed_when: script_output.rc == 0
11+
failed_when: script_output.rc != 0
12+
13+
- name: TEST Validate that CK output file
14+
ansible.builtin.stat:
15+
path: "{{ ck_json_db_file_out }}"
16+
register: json_file_gen
17+
changed_when: false
18+
failed_when:
19+
- (json_file_gen.stat.exists is false or json_file_gen.stat.size | int < 20)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
11
---
22
- name: "Validate Chargeback Feature"
33
ansible.builtin.include_tasks: "chargeback_tests.yml"
4+
5+
- name: "Define Synthetic Data Variables"
6+
ansible.builtin.set_fact:
7+
ck_py_script: "{{ ansible_user_dir }}/{{ zuul.projects['github.com/infrawatch/feature-verification-tests'].src_dir }}/roles/telemetry_chargeback/files/gen_synth_loki_data.py"
8+
ck_data_template: "{{ ansible_user_dir }}/{{ zuul.projects['github.com/infrawatch/feature-verification-tests'].src_dir }}/roles/telemetry_chargeback/files/loki_data_templ.j2"
9+
# ck_json_db_file_out: "{{ cifmw_basedir|default(ansible_user_dir ~ '/ci-framework-data') }}/tests/feature-verification-tests/loki_synth_data.json"
10+
ck_json_db_file_out: "{{ cifmw_basedir }}/tests/feature-verification-tests/loki_synth_data.json"
11+
ck_days: 30
12+
ck_step: 300
13+
14+
- name: "Generate Synthetic Data"
15+
ansible.builtin.include_tasks: "gen_synth_loki_data.yml"

0 commit comments

Comments
 (0)