bellanov
diff --git a/‎.flake8‎
Lines changed: 0 additions & 5 deletions b/‎.flake8‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 48 additions & 3 deletions b/‎README.md‎
Lines changed: 48 additions & 3 deletions
diff --git a/‎examples/README.md‎
Lines changed: 44 additions & 0 deletions b/‎examples/README.md‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎examples/cloud/aws/s3.py‎
Lines changed: 23 additions & 0 deletions b/‎examples/cloud/aws/s3.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎examples/cloud_integrations/gcs/README.md‎ ‎examples/cloud/gcs/README.md‎examples/cloud_integrations/gcs/README.md renamed to examples/cloud/gcs/README.md b/‎examples/cloud_integrations/gcs/README.md‎ ‎examples/cloud/gcs/README.md‎examples/cloud_integrations/gcs/README.md renamed to examples/cloud/gcs/README.md
diff --git a/‎…ions/gcs/gcs_batch_processing_example.py‎ ‎…loud/gcs/gcs_batch_processing_example.py‎examples/cloud_integrations/gcs/gcs_batch_processing_example.py renamed to examples/cloud/gcs/gcs_batch_processing_example.py
Lines changed: 53 additions & 47 deletions b/‎…ions/gcs/gcs_batch_processing_example.py‎ ‎…loud/gcs/gcs_batch_processing_example.py‎examples/cloud_integrations/gcs/gcs_batch_processing_example.py renamed to examples/cloud/gcs/gcs_batch_processing_example.py
Lines changed: 53 additions & 47 deletions
@@ -1,8 +1,8 @@
 # SerenadeFlow
 
-*SerenadeFlow* is a powerful and flexible ETL (Extract, Transform, Load) pipeline framework designed to streamline data processing from both local and remote sources.
+**SerenadeFlow** is a powerful and flexible *ETL (Extract, Transform, Load)* pipeline framework designed to streamline data processing from both **local** and **remote** sources.
 
-It Extracts data from diverse sources, Transforms it through customizable and reusable operations, and Loads it into your desired destination with minimal configuration.
+It *Extracts* data from diverse sources, *Transforms* it through customizable and reusable operations, and *Loads* it into your desired destination with minimal configuration.
 
 Built to be the Swiss Army Knife of ETL solutions, SerenadeFlow offers a simple yet extensible architecture that makes data movement and transformation intuitive—whether you're a data engineer, analyst, or developer.
 
@@ -12,7 +12,37 @@ The project is configured to operate in _Python >= 3.8_ enviornments.
 
 ## Quickstart
 
-Coming Soon.
+The best way to get started with SerenadeFlow is to explore the examples. Each example is a self-contained recipe that demonstrates a specific use case:
+
+### Basic ETL Pipeline
+
+```bash
+python3 examples/basic_etl_pipeline.py --data-dir ./data --output-prefix output
+```
+
+This example demonstrates the core ETL workflow: extracting data from local JSON files, transforming it, and loading it to output files.
+
+### Remote Data Extraction
+
+```bash
+python3 examples/quickstart.py
+```
+
+This example shows how to extract data from a remote JSON API endpoint.
+
+### Using Plugins
+
+See the [Examples Documentation](examples/README.md) for more recipes and use cases.
+
+## Examples and Recipes
+
+SerenadeFlow is built around **examples** (recipes) that demonstrate how to use the framework. Each example is a self-contained script showing a specific ETL pattern:
+
+- **Basic Examples**: Core ETL workflows (`basic_etl_pipeline.py`, `hello_world.py`, `quickstart.py`)
+- **Cloud Integrations**: Examples for GCS, Firestore, and other cloud services
+- **Plugin Examples**: Demonstrations of using community plugins
+
+See the [Examples Documentation](examples/README.md) for a complete guide to available examples and how to create your own.
 
 ## Data Source Configuration
 
@@ -129,3 +159,18 @@ pipeline.load(transformed, output_prefix="fantasyace")
 ### Contributing Plugins
 
 See `serenade_flow/community/PLUGIN_TEMPLATE.md` for how to document and contribute your own plugins.
+
+## Architecture
+
+SerenadeFlow follows a plugin-based architecture where:
+
+- **Core Utilities**: The `pipeline` module provides basic utilities for data processing
+- **Plugins**: Extensible components for specific data sources and transformations
+- **Examples**: Self-contained recipes that demonstrate complete ETL workflows
+
+This architecture allows you to:
+- Use existing examples as templates for your own pipelines
+- Extend functionality through plugins
+- Build custom ETL workflows by combining plugins and utilities
+
+For new projects, we recommend starting with an example that matches your use case and customizing it as needed.
@@ -0,0 +1,44 @@
+# SerenadeFlow Examples
+
+Examples are recipes that demonstrate how to use SerenadeFlow with different plugins, data sources, and configurations.
+
+## Example Structure
+
+Each example should be:
+- Self-contained and runnable independently
+- Configurable via command-line arguments
+- Testable
+
+## Example Categories
+
+### Basic Examples
+- `hello_world.py`: Simple introduction
+- `quickstart.py`: Remote data extraction
+- `basic_etl_pipeline.py`: Core ETL pipeline
+- `sports_odds_processing.py`: Sports betting data processing
+
+### Cloud Integrations
+- `cloud_integrations/gcs/`: Google Cloud Storage integration examples
+- `cloud_integrations/Firestore/`: Firestore integration examples
+
+## Running Examples
+
+Most examples can be run directly:
+
+```bash
+python3 examples/basic_etl_pipeline.py
+```
+
+Some examples accept command-line arguments:
+
+```bash
+python3 examples/cloud_integrations/Firestore/fantasyace_cf_example.py --sport-key americanfootball_nfl --limit 10
+```
+
+## Contributing Examples
+
+When contributing a new example:
+1. Place it in the appropriate directory
+2. Add command-line argument support where useful
+3. Add tests in `tests/test_examples.py`
+
@@ -0,0 +1,23 @@
+import boto3
+
+
+def get_public_bucket_keys(bucket_name):
+    """
+    Lists the keys (object names) in a public S3 bucket.
+    """
+    s3 = boto3.client("s3")
+    keys = []
+    paginator = s3.get_paginator("list_objects_v2")
+    pages = paginator.paginate(Bucket=bucket_name)
+    for page in pages:
+        if "Contents" in page:
+            for obj in page["Contents"]:
+                keys.append(obj["Key"])
+    return keys
+
+
+# Example usage:
+public_bucket_name = "your-public-bucket-name"  # Replace with your bucket name
+object_keys = get_public_bucket_keys(public_bucket_name)
+for key in object_keys:
+    print(key)
@@ -5,18 +5,20 @@
 for the organized bucket structure.
 """
 
-from serenade_flow import pipeline
+import statistics
 import time
-from typing import Dict, List, Any
+from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
-from collections import defaultdict
-import statistics
+from typing import Any, Dict, List
+
+from serenade_flow import pipeline
 
 
 @dataclass
 class ProcessingMetrics:
     """Comprehensive metrics for batch processing operations."""
+
     total_files: int
     successful_files: int
     failed_files: int
@@ -31,6 +33,7 @@ class ProcessingMetrics:
 @dataclass
 class DataQualityReport:
     """Data quality assessment for extracted data."""
+
     file_path: str
     record_count: int
     column_count: int
@@ -55,27 +58,30 @@ def _extract_dataframe(self, data: Dict[str, Any]) -> Any:
         if not data:
             return None
         for key, value in data.items():
-            if hasattr(value, 'shape') and value.shape[0] > 0:
+            if hasattr(value, "shape") and value.shape[0] > 0:
                 return value
         return None
 
     def _analyze_data_types(self, df: Any) -> Dict[str, str]:
         """Analyze data types of DataFrame columns."""
         data_types = {}
         for col in df.columns:
-            if df[col].dtype == 'object':
-                data_types[col] = 'string'
-            elif 'int' in str(df[col].dtype):
-                data_types[col] = 'integer'
-            elif 'float' in str(df[col].dtype):
-                data_types[col] = 'float'
+            if df[col].dtype == "object":
+                data_types[col] = "string"
+            elif "int" in str(df[col].dtype):
+                data_types[col] = "integer"
+            elif "float" in str(df[col].dtype):
+                data_types[col] = "float"
             else:
                 data_types[col] = str(df[col].dtype)
         return data_types
 
     def _calculate_quality_score(
-        self, record_count: int, column_count: int,
-        missing_values: int, duplicate_records: int
+        self,
+        record_count: int,
+        column_count: int,
+        missing_values: int,
+        duplicate_records: int,
     ) -> tuple[float, List[str]]:
         """Calculate quality score and identify issues."""
         quality_score = 100.0
@@ -113,7 +119,7 @@ def assess_data_quality(
                 duplicate_records=0,
                 data_types={},
                 quality_score=0.0,
-                issues=["No valid DataFrame found"]
+                issues=["No valid DataFrame found"],
             )
 
         record_count = len(df)
@@ -133,31 +139,33 @@ def assess_data_quality(
             duplicate_records=duplicate_records,
             data_types=data_types,
             quality_score=quality_score,
-            issues=issues
+            issues=issues,
         )
 
     def process_file_with_analytics(self, file_path: str) -> Dict[str, Any]:
         """Process a single file with comprehensive analytics."""
         start_time = time.time()
         try:
             print(f"🔍 Processing: {file_path}")
-            pipeline.configure({
-                "data_source": "remote",
-                "data_source_path": self.bucket_url + file_path,
-                "data_format": "json"
-            })
+            pipeline.configure(
+                {
+                    "data_source": "remote",
+                    "data_source_path": self.bucket_url + file_path,
+                    "data_format": "json",
+                }
+            )
             data = pipeline.extract()
             quality_report = self.assess_data_quality(data, file_path)
             self.quality_reports.append(quality_report)
             processing_time = time.time() - start_time
             result = {
-                'success': True,
-                'file_path': file_path,
-                'processing_time': processing_time,
-                'record_count': quality_report.record_count,
-                'quality_score': quality_report.quality_score,
-                'data': data,
-                'quality_report': quality_report
+                "success": True,
+                "file_path": file_path,
+                "processing_time": processing_time,
+                "record_count": quality_report.record_count,
+                "quality_score": quality_report.quality_score,
+                "data": data,
+                "quality_report": quality_report,
             }
             print(
                 f"  ✅ {quality_report.record_count} records, "
@@ -171,12 +179,12 @@ def process_file_with_analytics(self, file_path: str) -> Dict[str, Any]:
             processing_time = time.time() - start_time
             print(f"  ❌ Error: {str(e)}")
             return {
-                'success': False,
-                'file_path': file_path,
-                'processing_time': processing_time,
-                'error': str(e),
-                'record_count': 0,
-                'quality_score': 0
+                "success": False,
+                "file_path": file_path,
+                "processing_time": processing_time,
+                "error": str(e),
+                "record_count": 0,
+                "quality_score": 0,
             }
 
     def process_batch_with_analytics(self, file_paths: List[str]) -> ProcessingMetrics:
@@ -194,10 +202,10 @@ def process_batch_with_analytics(self, file_paths: List[str]) -> ProcessingMetri
             if i < len(file_paths) - 1:
                 time.sleep(0.5)
         total_time = time.time() - start_time
-        successful_files = sum(1 for r in results if r['success'])
+        successful_files = sum(1 for r in results if r["success"])
         failed_files = len(results) - successful_files
-        total_records = sum(r['record_count'] for r in results if r['success'])
-        quality_scores = [r['quality_score'] for r in results if r['success']]
+        total_records = sum(r["record_count"] for r in results if r["success"])
+        quality_scores = [r["quality_score"] for r in results if r["success"]]
         avg_quality = statistics.mean(quality_scores) if quality_scores else 0
         throughput = total_records / total_time if total_time > 0 else 0
         metrics = ProcessingMetrics(
@@ -211,7 +219,7 @@ def process_batch_with_analytics(self, file_paths: List[str]) -> ProcessingMetri
             ),
             data_quality_score=avg_quality,
             error_rate=failed_files / len(file_paths) * 100,
-            throughput=throughput
+            throughput=throughput,
         )
         self._print_analytics_summary(metrics, results)
         return metrics
@@ -235,16 +243,14 @@ def _print_analytics_summary(self, metrics: ProcessingMetrics, results: List[Dic
         print(f"   • Throughput: {metrics.throughput:.1f} records/sec")
         print("\n🔍 Quality Metrics:")
         print(f"   • Avg Quality Score: {metrics.data_quality_score:.1f}/100")
-        quality_scores = [r['quality_score'] for r in results if r['success']]
+        quality_scores = [r["quality_score"] for r in results if r["success"]]
         if quality_scores:
             print(
                 f"   • Quality Range: {min(quality_scores):.1f} - "
                 f"{max(quality_scores):.1f}"
             )
             if len(quality_scores) > 1:
-                print(
-                    f"   • Quality Std Dev: {statistics.stdev(quality_scores):.1f}"
-                )
+                print(f"   • Quality Std Dev: {statistics.stdev(quality_scores):.1f}")
         if self.quality_reports:
             data_types = defaultdict(int)
             for report in self.quality_reports:
@@ -273,13 +279,14 @@ def get_all_odds_files_by_format():
         "decimal": [
             "odds/decimal/event_0089bc8773d8ce4ce20f9df90723cac9.json",
             # ... add more as needed ...
-        ]
+        ],
     }
 
 
 # --- REPLACE CATEGORIES with new odds file discovery ---
 odds_files_by_format = get_all_odds_files_by_format()
 
+
 # Flatten for batch processing, tagging each with its format
 def get_tagged_odds_file_list():
     tagged = []
@@ -293,14 +300,13 @@ def demonstrate_batch_processing():
     """Demonstrate batch processing with analytics for both odds formats."""
     print("\n🎯 Batch Processing with SerenadeFlow (American & Decimal Odds)")
     print("=" * 70)
-    processor = BatchProcessor(
-        bucket_url=BUCKET_BASE_URL,
-        max_concurrent=3
-    )
+    processor = BatchProcessor(bucket_url=BUCKET_BASE_URL, max_concurrent=3)
     tagged_files = get_tagged_odds_file_list()
     all_metrics = {}
     for odds_format in odds_files_by_format:
-        format_files = [f["file_path"] for f in tagged_files if f["odds_format"] == odds_format]
+        format_files = [
+            f["file_path"] for f in tagged_files if f["odds_format"] == odds_format
+        ]
         print(f"\n🎯 Processing {odds_format.upper()} Odds Format")
         print("-" * 50)
         metrics = processor.process_batch_with_analytics(format_files)