From 4b3f7811f72dc38cfdac98b62119b1759b8e1b32 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 22:14:30 +0000 Subject: [PATCH 1/5] Initial plan From 18d9b74a9118e52d8ef0d9774e483487f36e685d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 22:17:06 +0000 Subject: [PATCH 2/5] Initial exploration of codebase cleanup requirements Co-authored-by: moshesham <7207587+moshesham@users.noreply.github.com> --- book-src/src/AB_Test_Final_Readout.md | 3 + book-src/src/DiD_Critical_Assessment.md | 3 + book-src/src/Journals_Instrumentation_Plan.md | 3 + .../Journals_Launch_Monitoring_Dashboard.md | 3 + book-src/src/Pre_Mortem_Memo.md | 3 + book-src/src/SUMMARY.md | 45 +- book-src/src/Week1_Launch_Summary.md | 3 + book-src/src/ab_test_analysis.md | 0 book-src/src/introduction.md | 457 ++++++++++++++++++ book-src/src/qbr_presentation.md | 0 book-src/src/week-1/day-01.md | 80 --- book-src/src/week-1/day-02.md | 35 -- book-src/src/week-1/day-03.md | 54 --- book-src/src/week-1/day-04.md | 49 -- book-src/src/week-1/day-05.md | 95 ---- book-src/src/week-1/day-06.md | 70 --- book-src/src/week-1/day-07.md | 3 - book-src/src/week-2/day-08.md | 3 - book-src/src/week-2/day-09.md | 3 - book-src/src/week-2/day-10.md | 3 - book-src/src/week-2/day-11.md | 3 - book-src/src/week-2/day-12.md | 3 - book-src/src/week-2/day-13.md | 3 - book-src/src/week-2/day-14.md | 3 - book-src/src/week-3/day-15.md | 3 - book-src/src/week-3/day-16.md | 3 - book-src/src/week-3/day-17.md | 3 - book-src/src/week-3/day-18.md | 3 - book-src/src/week-3/day-19.md | 3 - book-src/src/week-3/day-20.md | 3 - book-src/src/week-3/day-21.md | 3 - book-src/src/week-4/day-22.md | 3 - book-src/src/week-4/day-23.md | 3 - book-src/src/week-4/day-24.md | 3 - book-src/src/week-4/day-25.md | 3 - book-src/src/week-4/day-26.md | 3 - book-src/src/week-4/day-27.md | 3 - book-src/src/week-4/day-28.md | 3 - book-src/src/week-4/day-29.md | 3 - book-src/src/week-4/day-30.md | 3 - scripts/bash/build_book.sh | 0 41 files changed, 480 insertions(+), 495 deletions(-) create mode 100644 book-src/src/AB_Test_Final_Readout.md create mode 100644 book-src/src/DiD_Critical_Assessment.md create mode 100644 book-src/src/Journals_Instrumentation_Plan.md create mode 100644 book-src/src/Journals_Launch_Monitoring_Dashboard.md create mode 100644 book-src/src/Pre_Mortem_Memo.md create mode 100644 book-src/src/Week1_Launch_Summary.md create mode 100644 book-src/src/ab_test_analysis.md create mode 100644 book-src/src/introduction.md create mode 100644 book-src/src/qbr_presentation.md delete mode 100644 book-src/src/week-1/day-01.md delete mode 100644 book-src/src/week-1/day-02.md delete mode 100644 book-src/src/week-1/day-03.md delete mode 100644 book-src/src/week-1/day-04.md delete mode 100644 book-src/src/week-1/day-05.md delete mode 100644 book-src/src/week-1/day-06.md delete mode 100644 book-src/src/week-1/day-07.md delete mode 100644 book-src/src/week-2/day-08.md delete mode 100644 book-src/src/week-2/day-09.md delete mode 100644 book-src/src/week-2/day-10.md delete mode 100644 book-src/src/week-2/day-11.md delete mode 100644 book-src/src/week-2/day-12.md delete mode 100644 book-src/src/week-2/day-13.md delete mode 100644 book-src/src/week-2/day-14.md delete mode 100644 book-src/src/week-3/day-15.md delete mode 100644 book-src/src/week-3/day-16.md delete mode 100644 book-src/src/week-3/day-17.md delete mode 100644 book-src/src/week-3/day-18.md delete mode 100644 book-src/src/week-3/day-19.md delete mode 100644 book-src/src/week-3/day-20.md delete mode 100644 book-src/src/week-3/day-21.md delete mode 100644 book-src/src/week-4/day-22.md delete mode 100644 book-src/src/week-4/day-23.md delete mode 100644 book-src/src/week-4/day-24.md delete mode 100644 book-src/src/week-4/day-25.md delete mode 100644 book-src/src/week-4/day-26.md delete mode 100644 book-src/src/week-4/day-27.md delete mode 100644 book-src/src/week-4/day-28.md delete mode 100644 book-src/src/week-4/day-29.md delete mode 100644 book-src/src/week-4/day-30.md mode change 100644 => 100755 scripts/bash/build_book.sh diff --git a/book-src/src/AB_Test_Final_Readout.md b/book-src/src/AB_Test_Final_Readout.md new file mode 100644 index 0000000..4fc48db --- /dev/null +++ b/book-src/src/AB_Test_Final_Readout.md @@ -0,0 +1,3 @@ +# A/B Test Final Readout + +_Executive summary and recommendation as per Day 15 deliverable._ diff --git a/book-src/src/DiD_Critical_Assessment.md b/book-src/src/DiD_Critical_Assessment.md new file mode 100644 index 0000000..af4429e --- /dev/null +++ b/book-src/src/DiD_Critical_Assessment.md @@ -0,0 +1,3 @@ +# Why DiD Deviated from the A/B Test + +_Critical assessment as per Day 16 deliverable._ diff --git a/book-src/src/Journals_Instrumentation_Plan.md b/book-src/src/Journals_Instrumentation_Plan.md new file mode 100644 index 0000000..a127795 --- /dev/null +++ b/book-src/src/Journals_Instrumentation_Plan.md @@ -0,0 +1,3 @@ +# Journals Instrumentation Plan + +_Spec for engineering team as per Day 3 deliverable._ diff --git a/book-src/src/Journals_Launch_Monitoring_Dashboard.md b/book-src/src/Journals_Launch_Monitoring_Dashboard.md new file mode 100644 index 0000000..6bcff35 --- /dev/null +++ b/book-src/src/Journals_Launch_Monitoring_Dashboard.md @@ -0,0 +1,3 @@ +# Journals Launch Monitoring Dashboard + +_Dashboard specification as per Day 6 deliverable._ diff --git a/book-src/src/Pre_Mortem_Memo.md b/book-src/src/Pre_Mortem_Memo.md new file mode 100644 index 0000000..971eebf --- /dev/null +++ b/book-src/src/Pre_Mortem_Memo.md @@ -0,0 +1,3 @@ +# Pre-Mortem Memo + +_Addressed to the 'Journals' feature team as per Day 7 deliverable._ diff --git a/book-src/src/SUMMARY.md b/book-src/src/SUMMARY.md index af36379..a09a328 100644 --- a/book-src/src/SUMMARY.md +++ b/book-src/src/SUMMARY.md @@ -1,45 +1,10 @@ # Summary -[Introduction](../introduction.md) +[Introduction](./introduction.md) -# Week 1: Foundations & Framing -- [Day 01: Opportunity Discovery](./week-1/day-01.md) -- [Day 02: Opportunity Sizing](./week-1/day-02.md) -- [Day 03: Instrumentation Plan](./week-1/day-03.md) -- [Day 04: A/B Test Design](./week-1/day-04.md) -- [Day 05: Difference-in-Differences](./week-1/day-05.md) -- [Day 06: BI Dashboard Spec](./week-1/day-06.md) -- [Day 07: Pre-Mortem Memo](./week-1/day-07.md) - -# Week 2: The Crucible – Monitoring, Triage, and First Signals -- [Day 08: Launch Day](./week-2/day-08.md) -- [Day 09: Bug Triage](./week-2/day-09.md) -- [Day 10: Adoption Funnel](./week-2/day-10.md) -- [Day 11: The "Aha!" Moment](./week-2/day-11.md) -- [Day 12: Weekly Launch Memo](./week-2/day-12.md) -- [Day 13: Early A/B Test Readout](./week-2/day-13.md) -- [Day 14: Quant + Qual Analysis](./week-2/day-14.md) - -# Week 3: The Deep Dive – Causal Impact and Strategic Value -- [Day 15: Definitive A/B Test Readout](./week-3/day-15.md) -- [Day 16: DiD Post-Mortem](./week-3/day-16.md) -- [Day 17: Cannibalization vs. Creation](./week-3/day-17.md) -- [Day 18: Retention Curves](./week-3/day-18.md) -- [Day 19: Engagement Loop](./week-3/day-19.md) -- [Day 20: Predictive Modeling](./week-3/day-20.md) -- [Day 21: The One-Slide Story](./week-3/day-21.md) - -# Week 4: The Strategy – From Analyst to Influencer -- [Day 22: Quantifying Business Impact](./week-4/day-22.md) -- [Day 23: ...](./week-4/day-23.md) -- [Day 24: ...](./week-4/day-24.md) -- [Day 25: ...](./week-4/day-25.md) -- [Day 26: ...](./week-4/day-26.md) -- [Day 27: ...](./week-4/day-27.md) -- [Day 28: ...](./week-4/day-28.md) -- [Day 29: ...](./week-4/day-29.md) -- [Day 30: ...](./week-4/day-30.md) +--- # Key Analyses -- [A/B Test Analysis](../ab_test_analysis.md) -- [QBR Presentation Outline](../qbr_presentation.md) + +- [A/B Test Analysis](./ab_test_analysis.md) +- [QBR Presentation Outline](./qbr_presentation.md) diff --git a/book-src/src/Week1_Launch_Summary.md b/book-src/src/Week1_Launch_Summary.md new file mode 100644 index 0000000..3dcfd96 --- /dev/null +++ b/book-src/src/Week1_Launch_Summary.md @@ -0,0 +1,3 @@ +# Week 1 Launch Summary + +_Professional memo summarizing Week 1 as per Day 12 deliverable._ diff --git a/book-src/src/ab_test_analysis.md b/book-src/src/ab_test_analysis.md new file mode 100644 index 0000000..e69de29 diff --git a/book-src/src/introduction.md b/book-src/src/introduction.md new file mode 100644 index 0000000..e0ea1dd --- /dev/null +++ b/book-src/src/introduction.md @@ -0,0 +1,457 @@ +# The 30-Day Product Analytics Masterclass: The 'Journals' Sprint + +This document outlines the full 30-day curriculum. Each day presents a new challenge that builds upon the last, simulating the real-world experience of a Product Analyst owning a major feature launch from ideation to strategic review. + +--- +## Week 1: Foundations & Framing – From Idea to Plan + +**Goal:** Transform a vague product idea into a concrete, data-backed, and measurable plan. + +### Day 01: The Data Warehouse & Opportunity Discovery + +* **Objective:** To validate the need for a new feature by performing proactive discovery on raw, qualitative user data. +* **Why This Matters:** Great analysis starts with curiosity. Before complex modeling, you must be able to explore raw data to find signals of unmet user needs. +* **Tasks:** + 1. **Environment Setup:** Familiarize yourself with the Docker environment. Create a new notebook `01_opportunity_validation.ipynb`. Write a Python script using the `duckdb` library to load the provided `.parquet` files (`events`, `users`) and `.csv` file (`app_store_reviews`) into DuckDB tables. + 2. **Schema Exploration:** Write SQL queries to explore the schema and first 10 rows of each table. Understand their columns and relationships. + 3. **Qualitative Signal Mining:** Write a SQL query using `LOWER()` and `LIKE` to search for keywords like `%diary%`, `%journal%`, `%private%`, `%thoughts%`, and `%notes%` within the `review_text` column. + 4. **Initial Quantification:** Use a CTE to first identify reviews with these keywords, then calculate the total count of these reviews and their average star `rating`. Compare this average rating to the overall average rating for all reviews. +* **Deliverable:** A notebook containing the setup script and SQL queries, with a markdown cell summarizing your findings: "We found X reviews mentioning journaling themes, with an average rating of Y, which is Z points lower/higher than the platform average. This suggests a passionate, potentially underserved user group." + +### Day 02: Opportunity Sizing & The Business Case + +* **Objective:** To translate a qualitative signal into a quantitative business case to justify engineering resources. +* **Why This Matters:** A good idea is not enough. You must be able to quantify the potential impact on core business metrics to get buy-in for your project. +* **Tasks:** + 1. **Identify User Segment:** Write a SQL query to extract the unique `user_id`s for users who left the keyword-positive reviews identified on Day 1. + 2. **Analyze Segment Behavior:** Join this list of `user_id`s with the `users` and `events` tables. Calculate this segment's key metrics: number of users, average 90-day retention, and average number of sessions per week. + 3. **Build Forecast Model:** In a Pandas notebook, build a simple forecast. **Assumption:** The 'Journals' feature will increase this specific segment's 90-day retention by a relative 15%. Project the *net new* Weekly Active Users (WAU) this would add to the platform after one year. + 4. **Write Recommendation:** Write a one-paragraph summary titled "Business Case for 'Journals' Feature." Use your forecast as the core evidence to argue for or against prioritizing this feature. +* **Deliverable:** A notebook with the segment analysis and forecast, concluding with the clear, data-backed recommendation. + +### Day 03: The Instrumentation Plan & Success Metrics + +* **Objective:** To define exactly what user actions need to be tracked and what metrics will define success, before any code is written. +* **Why This Matters:** If you can't measure it, you can't improve it. An instrumentation plan is the contract between Product, Engineering, and Analytics that ensures you have the data you need to make decisions. +* **Tasks:** + 1. **Create Spec Document:** In a markdown file named `Journals_Instrumentation_Plan.md`, create a spec for the engineering team. + 2. **Define Events:** List the new events that need to be tracked. Be precise. Example: + * `event_name: view_journals_page`, `properties: {source: 'main_feed_icon'}` + * `event_name: create_journal_entry`, `properties: {has_photo: true, template_used: 'gratitude_template'}` + 3. **Define Success Metrics:** Formally define the Primary and Secondary metrics. + * **Primary Metric:** Day-28 User Retention. (Justify why this reflects long-term habit formation). + * **Secondary Metrics:** Journal Entries per User per Week, % of WAU using Journals. + 4. **Define Guardrail Metrics:** Define at least three critical **Guardrail (or Counter) Metrics** (e.g., Time Spent on Main Feed, Direct Messages Sent, App Uninstalls). For each, explain what negative outcome it is designed to detect (e.g., cannibalization, user frustration). +* **Deliverable:** The completed `Journals_Instrumentation_Plan.md` file. + +### Day 04: The Gold-Standard A/B Test Design + +* **Objective:** To design a rigorous, trustworthy experiment to measure the causal impact of the 'Journals' feature. +* **Why This Matters:** Correlation is not causation. A well-designed A/B test is the most reliable way to prove that your feature *caused* an outcome, eliminating guesswork. +* **Tasks:** + 1. **Define Hypothesis:** In a markdown document, state a clear, falsifiable hypothesis: "We hypothesize that providing users with the 'Journals' feature will lead to a statistically significant increase in Day-28 User Retention." + 2. **Define Experiment Parameters:** Define the population (e.g., All iOS users in US, UK, CA), the allocation (50/50 split), and the duration (28 days + time to mature). + 3. **Perform Power Analysis:** Using the `statsmodels.stats.power` library in Python, perform a power analysis. Calculate the required sample size per group to detect a 2% relative lift in your primary metric (Day-28 Retention), assuming a baseline retention of 20%, a significance level (alpha) of 0.05, and statistical power of 80%. + 4. **Write Summary:** Conclude with a sentence: "We require a minimum of N users per group and estimate the experiment will need to run for X days to reach this sample size." +* **Deliverable:** A notebook containing the power analysis code and a markdown summary of the complete A/B test design. + +### Day 05: The Quasi-Experiment Backup Plan: Difference-in-Differences + +* **Objective:** To design an alternative causal inference method for situations where a perfect A/B test is not feasible (e.g., a phased rollout). +* **Why This Matters:** A/B tests are not always possible. Knowing quasi-experimental methods like DiD allows you to estimate causal impact in complex, real-world scenarios. +* **Tasks:** + 1. **Theory Review:** Read a provided article explaining the theory and crucial "parallel trends" assumption of the Difference-in-Differences (DiD) method. + 2. **Outline Strategy:** The team plans a Canada-only MVP launch first. Outline your DiD strategy in a notebook. + 3. **Select Groups & Validate Assumption:** Write SQL queries to select your "Treatment Group" (users in Canada) and a "Control Group" (e.g., users in Australia & New Zealand). **Critically, plot the weekly retention trends for both groups over the 3 months *prior* to the hypothetical launch to visually check the parallel trends assumption.** + 4. **Define Periods & Pseudo-Code:** Define the "Pre-Period" (4 weeks before launch) and "Post-Period" (4 weeks after launch). Write the pseudo-code for the final DiD calculation: `(Treat_Post_Avg - Treat_Pre_Avg) - (Control_Post_Avg - Control_Pre_Avg)`. +* **Deliverable:** A notebook containing the SQL for group selection, the parallel trends plot, and the DiD pseudo-code. + +### Day 06: The BI Dashboard Specification + +* **Objective:** To practice defining clear, actionable requirements for monitoring tools, a key collaborative skill for any Product Analyst. +* **Why This Matters:** You are the domain expert. A BI developer builds what you tell them to build. A precise spec ensures the final dashboard is useful and answers the most important business questions. +* **Tasks:** + 1. **Create Spec Document:** In a markdown file, create a specification document titled "Journals Launch Monitoring Dashboard." + 2. **Define Core KPIs:** List the 3-5 headline KPIs that should be at the top (e.g., Total Journal Adopters, Adoption Rate %, Entries Created Today). + 3. **Define Visualizations:** Define 5-7 key charts. For each, specify: Title, Chart Type, X-axis, Y-axis, and required filters (e.g., Date Range, Country). + * Example: *Chart 1: Daily 'Journals' Adopters (New Users Creating First Entry) | Type: Line Chart | X-Axis: Day | Y-Axis: Count of Unique Users.* + 4. **Write Production SQL:** For at least three of the charts, write the precise, production-ready SQL query that would generate the data. Include comments explaining the logic. +* **Deliverable:** The completed dashboard specification document. + +### Day 07: The Pre-Mortem: A Memo on What Could Go Wrong + +* **Objective:** To develop strategic foresight by anticipating and planning for potential negative outcomes before they happen. +* **Why This Matters:** A good analyst doesn't just report on what happened; they anticipate what *could* happen. This builds trust and helps the team avoid preventable failures. +* **Tasks:** + 1. **Draft Memo:** In a markdown document, write a "Pre-Mortem Memo" addressed to the 'Journals' feature team. + 2. **Outline Risks:** Outline the top 3 plausible risks of the launch. Be specific and tie them to metrics. + * *Risk 1: Cannibalization.* "The feature could divert engagement from our core feed, leading to a drop in total session time and ad revenue." + * *Risk 2: Privacy Backlash.* "Users may not trust our data privacy, leading to negative App Store reviews and a spike in account deletions." + * *Risk 3: Failed Adoption.* "The feature's UI could be confusing, leading to a near-zero adoption rate and wasted engineering effort." + 3. **Define Detection Plan:** For each risk, specify the guardrail metric (from Day 3) that will be your early warning system and the threshold that would trigger an alert (e.g., "If `avg_main_feed_time_spent` drops by a statistically significant 5% in the first week, we will escalate"). +* **Deliverable:** The completed pre-mortem memo. + +## Week 2: The Crucible – Monitoring, Triage, and First Signals + +**Goal:** This week simulates the chaos and opportunity of a real feature launch. Your role will evolve from a reactive monitor to a proactive diagnostician and trusted communicator. You will learn to find the signal in the noise, provide clarity when there is ambiguity, and lay the groundwork for the strategic deep dives in Week 3. By the end of this week, you will have moved from simply reporting numbers to interpreting their meaning and recommending action. + +### Day 08: Launch Day! The Command Center + +* **Objective:** To establish and run the "command center," actively assessing product and system health to provide the team with the first all-clear signal. +* **Why This Matters:** Before asking "Are they using it?", you must confidently answer "Is it working?". As the analyst, you are the first line of defense, responsible for building trust in the data and the product's stability from the very first hour. +* **Tasks:** + 1. **Establish Monitoring Dashboards:** In notebook `08_launch_monitoring.ipynb`, write two sets of SQL queries intended to be run every 15 minutes: + * **System Health Dashboard:** Tracks core infrastructure stability. Metrics: `total_events_per_minute`, `app_crash_event_rate`, `avg_server_latency_ms`. + * **Product Adoption Dashboard:** Tracks the earliest signs of user engagement. Metrics: `count_unique_users_tapping_icon`, `count_first_journal_entries_created`. + 2. **Set Alert Thresholds:** Define and document simple thresholds for your guardrail metrics (e.g., "Alert if `app_uninstalls_per_hour` exceeds the pre-launch average by 20%"). Monitor these throughout the day. + 3. **Create the End-of-Day Sign-Off:** At the end of the launch day, produce a concise summary report. It should include a single visualization plotting the key health metrics and a three-point conclusion: + * **System Status:** Stable. Crash rates and latency remained within acceptable thresholds. + * **Initial Adoption Signal:** Positive. Over 1,200 users created a first entry, with a steady stream of new adopters. + * **Overall Assessment:** Launch is stable. All clear to proceed with monitoring. +* **Deliverable:** The `08_launch_monitoring.ipynb` notebook containing the monitoring queries, the end-of-day visualization, and the structured sign-off report. + +### Day 09: The Fire Drill – Precision Bug Triage + +* **Objective:** To move from a vague bug report to a precise, actionable diagnosis that empowers the engineering team to resolve the issue quickly. +* **Why This Matters:** A great analyst is an engineer's best friend during a crisis. By isolating the "blast radius" of a bug, you save countless hours of guesswork and turn a panic-inducing problem into a solvable one. +* **Tasks:** + 1. **The Report:** A Jira ticket is filed: "Android users are complaining about crashes since the update." + 2. **Quantify and Isolate:** Write a SQL query that joins `events` (filtering for `event_name = 'app_crash'`) with the `users` table. The goal is to find the combination of dimensions (`app_version`, `os_version`, `device_model`) with the highest crash rate. + 3. **Calculate Severity:** Don't just count crashes. Calculate the *crash rate per user* for the affected segment and compare it to the baseline rate for all other Android users. This quantifies the severity. + 4. **Write the Triage Report:** In notebook `09_bug_triage.ipynb`, draft a formal triage report in a markdown cell, formatted for clarity: + * **Subject:** Confirmed & Isolated: Android Crash Spike on App v3.4.1 + * **Summary:** The reported crash spike is confirmed and isolated to a specific user segment. + * **Impacted Segment:** Users on `App Version 3.4.1` running `Android OS 12`, primarily on `Samsung Galaxy S21` devices. + * **Severity:** This segment is experiencing a **15% crash rate per session**, compared to a **0.1% baseline** for the rest of the Android user base. + * **Recommendation:** High-priority ticket for the Android engineering team. Data query available for further debugging. +* **Deliverable:** A notebook containing the diagnostic query and the professionally formatted Bug Triage Report. + +### Day 10: The Adoption Funnel – Diagnosing User Friction + +* **Objective:** To visualize the user journey into the feature and pinpoint the exact step where most users are dropping off. +* **Why This Matters:** A feature's failure is often not due to a lack of value, but to friction. The funnel is your x-ray for seeing exactly where that friction occurs in the user experience. +* **Tasks:** + 1. **Define a Time-Bound Funnel:** Define the key steps: `app_open` -> `view_main_feed` -> `tap_journals_icon` -> `create_first_journal_entry`. **Crucially, a user must complete all steps within a single session to count.** + 2. **Write the Funnel Query:** In `10_adoption_funnel.ipynb`, write a single, robust SQL query using Common Table Expressions (CTEs) or `LEFT JOIN`s to calculate the number of unique users who completed each step *within the same session*. + 3. **Visualize and Annotate:** Use a plotting library to create a funnel chart. The title must be clear: "Journals Adoption Funnel (Within First Session)." + 4. **Identify Leakage & Formulate a Product Hypothesis:** Annotate the chart to highlight the biggest drop-off point. Below the chart, write a specific, testable product hypothesis to address it. Example: "The 60% drop-off between viewing the feed and tapping the icon suggests low discoverability. **We hypothesize that changing the icon color and adding a 'New' badge will increase the tap-through rate by 20%.**" +* **Deliverable:** A notebook with the time-bound funnel query, the annotated chart, and a clear, testable product hypothesis. + +### Day 11: The "Aha!" Moment – Finding the Magic Action + +* **Objective:** To find the early user action that most strongly correlates with long-term feature retention, while understanding the limits of correlation. +* **Why This Matters:** The "Aha!" moment is where a user internalizes a product's value. Identifying it gives the product team a powerful lever to improve user onboarding and drive habit formation. +* **Critical Thinking Check:** This analysis reveals **correlation, not causation**. A user who adds a photo might be more motivated to begin with. Your job is to find the signal and frame it correctly as a hypothesis to be tested later. +* **Tasks:** + 1. **Define User Cohorts:** Based on the first 7 days of feature usage, create two distinct groups: + * **"Engaged & Retained":** Users who created ≥ 3 journal entries. + * **"Churned Adopters":** Users who created only 1 journal entry and never returned to the feature. + 2. **Analyze First-Session Behavior:** For both cohorts, calculate the percentage of users who performed key actions (`used_template`, `added_photo`, `wrote_over_100_chars`) during their **very first session**. + 3. **Isolate the Strongest Signal:** Find the action with the largest *relative difference* in completion rates between the "Engaged" and "Churned" cohorts. + 4. **Formulate a Careful Hypothesis:** In notebook `11_aha_moment_analysis.ipynb`, state your finding with analytical precision. Example: "We've identified a strong correlation: Users who add a photo to their first journal entry are 3x more likely to become retained on the feature. **We hypothesize that this action represents the 'Aha!' moment. To test this, we should build an A/B test that encourages photo uploads during onboarding.**" +* **Deliverable:** The notebook containing the cohort analysis and the carefully worded, actionable hypothesis. + +### Day 12: The Weekly Launch Memo – Communicating with Clarity + +* **Objective:** To synthesize a week of complex findings into a clear, concise, and persuasive memo for leadership and the broader team. +* **Why This Matters:** Data only drives decisions when it is communicated effectively. This memo is your chance to shape the narrative, manage expectations, and guide the team's focus for the upcoming week. +* **Tasks:** + 1. **Review the Week's Learnings:** Re-read your findings from Days 8-11. + 2. **Draft the Memo:** In a new file `Week1_Launch_Summary.md`, draft a memo using a structured format that executives appreciate. + * **Subject:** 'Journals' Launch: Week 1 Data Summary & Recommendations + * **TL;DR:** One sentence summarizing the state of the launch. (e.g., "Launch is stable with promising early adoption, but a clear discoverability issue is limiting its reach.") + * **The Good (Wins):** "Adoption is tracking 15% ahead of forecasts..." + * **The Bad (Challenges):** "A significant 60% of users drop off before finding the feature..." + * **The Insights (Learnings):** "We've found a powerful correlation between adding a photo and long-term retention..." + * **Actionable Recommendations:** "Based on this data, our priorities for Week 2 are: 1. Design an A/B test for a more prominent entry point. 2. Scope a 'photo suggestion' feature for onboarding." +* **Deliverable:** The completed, professionally structured `Week1_Launch_Summary.md` memo. + +### Day 13: The Early A/B Test Readout – Resisting Pressure + +* **Objective:** To analyze preliminary A/B test data while masterfully managing stakeholder expectations and preventing premature decision-making. +* **Why This Matters:** The single fastest way to lose credibility as an analyst is to endorse a decision based on noisy, statistically insignificant early data. Your role is to be the voice of statistical integrity. +* **Tasks:** + 1. **The Ask:** A Product Manager asks, "It's been 7 days, how is the A/B test looking? Are we winning?" + 2. **Run the Analysis:** Using the first 7 days of data, calculate the primary metric (Day-7 Retention as a proxy) for control and treatment. Calculate the p-value and 95% confidence interval. + 3. **Draft the Update Memo:** Write a `AB_Test_Week1_Update.md` memo that provides the data while heavily reinforcing correct statistical practice. + * **Headline:** State the raw numbers. (e.g., "Preliminary D7 retention shows a +1.5% relative lift for treatment.") + * **Statistical Readout:** Provide the figures. (e.g., "95% CI: [-0.5%, +3.5%], p-value: 0.25"). + * **Mandatory Analyst Caveats:** This section is non-negotiable. + * **Statistical Significance:** "This result is **not statistically significant**. The p-value indicates this could easily be due to random chance." + * **Novelty Effect:** "Early lift is often inflated by user curiosity and should not be considered indicative of long-term behavior." + * **Conclusion:** "Per our experimental design, we will not make a decision until the full 28-day data is available. This preliminary result is for monitoring purposes only." +* **Deliverable:** The update memo that perfectly balances transparency with statistical responsibility. + +### Day 14: Weaving the Narrative – Quant + Qual + +* **Objective:** To combine quantitative metrics with qualitative user feedback to create a holistic and deeply empathetic understanding of the user experience. +* **Why This Matters:** Numbers tell you *what* users do; words tell you *how they feel*. The most powerful insights lie at the intersection of both. This skill separates a data reporter from a true product strategist. +* **Tasks:** + 1. **Load and Categorize:** Load the `feedback.csv` file. Use simple regex or keyword matching in Python to categorize feedback into themes like "Bug Report," "Feature Request," "Praise," and "Privacy Concern." Create a bar chart of the themes. + 2. **Find the Story:** Look for a connection. Does the qualitative data explain, contradict, or add nuance to a quantitative finding from earlier in the week? + 3. **Write the Synthesized Insight:** In `14_qualitative_analysis.ipynb`, write a paragraph that weaves the two data sources together into a single, powerful narrative. + * **Example:** "While our quantitative funnel analysis (Day 10) pointed to a major discoverability problem, our qualitative feedback provides the 'why'. Of the 50 comments categorized as 'Praise', over 80% include phrases like 'I finally found this' or 'I wish I knew about this sooner'. This strongly supports our hypothesis that the feature's value proposition is strong, but its current placement is failing our users." +* **Deliverable:** A notebook containing the theme analysis and a paragraph demonstrating a masterful synthesis of quantitative and qualitative data. + +## Week 2: The Launch – Monitoring, Triage, and First Signals + +**Objective:** Survive the chaos of launch week. You will monitor the data firehose, quickly diagnose problems, and provide clear, real-time updates to your team. + +### Day 08: Launch Day! The Real-Time Health Check + +**Objective:** To monitor key product and system metrics for immediate, catastrophic anomalies upon feature release. + +**Tasks:** + +* **Set up Monitoring Queries:** You are given a stream of new `events` data. Write a set of SQL queries that refresh every 5 minutes to track key system health metrics: total events per minute, error event rate, and server latency. +* **Monitor Guardrails:** Track your key guardrail metrics from Day 3 at an hourly grain. Are uninstalls or bug reports spiking? +* **End-of-Day Report:** At the end of the day, create a single visualization plotting the key health metrics throughout the day and add a one-sentence summary: "Launch stable" or "Investigating anomaly in [metric]." + +### Day 09: The Fire Drill: Diagnosing a Bug + +**Objective:** To isolate the source of a reported user-facing issue with speed and precision to aid engineering. + +**Tasks:** + +* **Receive Report:** The support team reports a spike in crashes specifically for Android users after updating to the latest app version. +* **Isolate Impact:** Write a SQL query to join `events` (filtering for `event_name = 'app_crash'`) with the `users` table. +* **Segment and Pinpoint:** Group the crash data by `app_version`, `os_version`, and `device_model`. Identify the exact combination that has the highest crash rate. +* **Communicate Findings:** Write a short, clear message as if for a company Slack channel: "@Eng-Team: The Android crash spike is isolated to users on App Version 3.4.1 running Android OS 12, primarily on Samsung Galaxy S21 devices. Crash rate for this segment is 15% vs. 0.1% baseline." + +### Day 10: The Adoption Funnel: Are Users Finding It? + +**Objective:** To understand and visualize the user path to discovering and using the new feature for the first time. + +**Tasks:** + +* **Define Funnel Steps:** Define the key steps in the adoption funnel: `app_open` -> `view_main_feed` -> `tap_journals_icon` -> `create_first_journal_entry`. +* **Calculate Conversion:** Write a single, complex SQL query using CTEs or subqueries to calculate the number of unique users who completed each step of the funnel within 24 hours of the feature being available to them. +* **Visualize Funnel:** Use a plotting library to create a classic funnel chart showing the conversion rate between each step. +* **Identify Leakage:** Add an annotation to the chart highlighting the biggest drop-off point. + +### Day 11: The "Aha!" Moment: Early Engagement Patterns + +**Objective:** To identify the key set of early actions that correlates with a user becoming retained on the new feature. + +**Tasks:** + +* **Define Cohorts:** Create two user cohorts: "One-Time Users" (created one journal entry in their first 7 days) and "Engaged Users" (created 3+ journal entries in their first 7 days). +* **Explore Early Actions:** For both cohorts, analyze their behavior within their **first session** using 'Journals'. Did they use a specific sub-feature (e.g., add a photo, use a template)? How long did they spend? +* **Find the Correlate:** Identify the single action that has the biggest difference in completion rate between the "Engaged" and "One-Time" cohorts. +* **Formulate Hypothesis:** State your finding as a hypothesis: "We believe that users who add a photo to their first journal entry are 3x more likely to become engaged users. This is our 'Aha!' Moment." + +### Day 12: Stakeholder Communication: The 3 Talking Points + +**Objective:** To practice distilling complex, nuanced data into a concise, memorable summary for an executive audience. + +**Tasks:** + +* **Review the Week's Data:** Look at everything you've learned from Day 8 to Day 11. +* **Synthesize:** Draft three clear, data-backed talking points. Each should be a single sentence. + * Example 1 (The Good): "Early adoption is tracking 15% ahead of our initial forecast, with over 50,000 users creating a journal entry in the first week." + * Example 2 (The Bad): "However, our adoption funnel shows significant user drop-off before the first entry is created, suggesting a discoverability issue we need to address." + * Example 3 (The Interesting): "Our most engaged new users are those who add a photo to their first entry, pointing to a potential 'Aha!' moment we can encourage in the onboarding flow." +* **Refine:** Edit your talking points for clarity, impact, and brevity. + +### Day 13: The First A/B Test Readout (Week 1 Data) + +**Objective:** To conduct a preliminary experiment analysis while responsibly communicating the risks and uncertainties of early results. + +**Tasks:** + +* **Pull Data:** You are given the first 7 days of A/B test data. +* **Run Analysis:** Calculate the primary metric (Day-7 Retention, in this case) for the control and treatment groups. Calculate the p-value and confidence interval for the difference. +* **Check Guardrails:** Briefly check the key guardrail metrics. Are there any alarming early signals? +* **Draft Update:** Write a short update memo. Start by clearly stating the preliminary result (e.g., "Treatment group shows a +1.5% relative lift in D7 retention"). Immediately follow this with a strong caveat about statistical noise, the novelty effect, and why this result is not final. + +### Day 14: Analyzing Qualitative Feedback + +**Objective:** To integrate unstructured, qualitative text data into your quantitative analysis to get a complete picture of user sentiment. + +**Tasks:** + +* **Load Data:** You are given a `feedback.csv` file with 500 user comments about the new 'Journals' feature. Load it into Pandas. +* **Basic NLP:** Write a simple Python function that categorizes each comment based on keywords. Categories should be "Bug Report," "Feature Request," "General Praise," and "Privacy Concern." +* **Quantify Themes:** Create a bar chart showing the count of comments in each category. +* **Synthesize:** Write a summary paragraph that combines a quantitative insight with a qualitative one. Example: "While our metrics show strong initial adoption, the qualitative feedback reveals that 'Privacy Concerns' are the second-largest category of comments, suggesting we need to improve our user-facing communication about data security." + +--- + +Of course. This is the crucial part of the masterclass where the analyst transitions from reporting on the "what" to explaining the "why" and influencing the "what's next." + +I have performed a thorough review of the original Week 3 and 4 content, improving it to align with the rigor and strategic focus of the revised Weeks 1 and 2. My goal was to deepen the analytical concepts, introduce more real-world complexities, and transform the final week into a true capstone strategy project. + +Here is the fully revised and improved curriculum for Week 3 and Week 4 in Markdown format. + +--- + +## Week 3: The Deep Dive – Causal Impact and Strategic Value + +**Goal:** Move beyond surface-level metrics to answer the two most critical questions about your feature: "Did it actually *cause* the changes we see?" and "What is its true, long-term value to the business?" This week, you will deploy your most rigorous analytical skills to build an unshakeable, data-driven case for the feature's impact. + +### Day 15: The Definitive A/B Test Readout + +* **Objective:** To determine the feature's final, causal impact with full statistical rigor and to communicate the results in a formal, shareable experiment readout document. +* **Why This Matters:** This is the moment of truth. A definitive A/B test analysis is the gold standard for data-driven decision-making. The clarity and integrity of your final report will determine the entire team's confidence in the results. +* **Tasks:** + 1. **Receive Final Data:** You are given the full 28-day A/B test dataset. + 2. **Analyze Primary Metric:** Calculate the final lift, 95% confidence interval, and p-value for your primary metric (Day-28 Retention). + 3. **Analyze Guardrail & Secondary Metrics:** Perform a full statistical analysis on your key guardrail metrics (e.g., Time on Feed, App Uninstalls) and secondary success metrics (e.g., Journals entries per user). Did the feature cause any significant negative (or positive) side effects? + 4. **Segment Analysis:** Does the feature's impact differ across key user segments (e.g., New vs. Existing Users, iOS vs. Android)? Conduct a segmented analysis on the primary metric. +* **Deliverable:** A formal `AB_Test_Final_Readout.md` memo. It must include: + * An executive summary with the main conclusion. + * A table showing the results for the primary, secondary, and guardrail metrics (including CIs and p-values). + * A visualization of the key results. + * A clear, final recommendation: "Declare a winning or losing variant." + +### Day 16: The Quasi-Experiment Post-Mortem + +* **Objective:** To execute the Difference-in-Differences (DiD) analysis and critically compare its results to the "ground truth" of the A/B test, understanding its limitations. +* **Why This Matters:** You won't always have an A/B test. This exercise teaches you to critically evaluate the results of quasi-experiments by comparing them to a known truth, building your intuition for when these methods are reliable and when they are not. +* **Tasks:** + 1. **Execute DiD Analysis:** Using your plan from Day 5, execute the DiD analysis on the Canada (treatment) vs. Australia/NZ (control) launch data. + 2. **Calculate DiD Estimate:** Calculate the final DiD estimate for the lift in the primary metric. + 3. **Create a Comparison Report:** In a notebook `16_did_vs_ab_test.ipynb`, create a clear table comparing the results: + * Row 1: A/B Test Result (Lift %, 95% CI) + * Row 2: DiD Result (Lift %, No CI needed for this exercise) + 4. **Write a Critical Assessment:** Write a paragraph titled "Why DiD Deviated from the A/B Test." Discuss potential confounding factors you couldn't control for (e.g., a national holiday in Canada, a marketing campaign in Australia, underlying demographic differences not visible in the data). +* **Deliverable:** The notebook containing the DiD calculation, the comparison table, and the critical assessment paragraph. + +### Day 17: Cannibalization vs. Creation – The Engagement Portfolio + +* **Objective:** To answer the critical strategic question: did the feature generate *net new* engagement, or did it just shift existing behavior from one part of the app to another? +* **Why This Matters:** A feature that simply shuffles engagement around doesn't grow the business. It might still be valuable for retention, but leadership needs to know if it's a true expansion of the user experience or just a substitution. +* **Tasks:** + 1. **Define the "Engagement Portfolio":** The key metric is "Total Daily Time Spent in App per User." Break this down into its components: `time_on_feed`, `time_in_dms`, `time_in_journals`, etc. + 2. **Analyze A/B Test Data:** Using the full A/B test dataset, calculate the average of `total_time_spent` for both control and treatment groups. Is the difference statistically significant? + 3. **Visualize the Shift:** Create a stacked bar chart comparing the control and treatment groups. Each bar represents 100% of the `total_time_spent`, with segments showing the proportion of time spent in each part of the app. This will visually demonstrate the "portfolio shift." + 4. **Interpret the Result:** Write a clear conclusion. Example: "The feature did not cause a statistically significant increase in total time in app. Instead, it successfully captured an average of 2 minutes of daily time that, for the control group, was spent on the main feed. This indicates the feature is currently a substitute for, not an expansion of, user engagement." +* **Deliverable:** A notebook containing the statistical tests and the stacked bar chart visualization, along with a clear, concise interpretation. + +### Day 18: Retention Curves & The Lift Over Time + +* **Objective:** To visualize and quantify the feature's impact on user retention over the entire 28-day user lifecycle, not just on a single day. +* **Why This Matters:** A feature's true value is in its ability to create a lasting habit. By plotting retention curves, you can see if the feature's impact is a short-term novelty effect or if it creates a sustained, long-term lift. +* **Tasks:** + 1. **Create Cohorts:** Using the A/B test data, create two cohorts based on their join date during the experiment: "Control Group Users" and "Treatment Group Users." + 2. **Calculate Retention Curves:** For both cohorts, calculate their retention rates for each day from Day 1 to Day 28. + 3. **Visualize the Curves:** Plot both retention curves on the same line chart. The vertical distance between the two lines represents the daily retention lift. + 4. **State the Finding in the Title:** Don't just label the chart "Retention Curve." Give it a title that tells the story. Example: "'Journals' Feature Sustains an Average +2.5% Retention Lift Over 28 Days." +* **Deliverable:** A notebook with the SQL/Python code to generate the retention data and the clearly titled line chart. + +### Day 19: Validating the Engagement Loop + +* **Objective:** To use data to prove whether the feature created a new, repeatable, habit-forming loop that brings users back to the product. +* **Why This Matters:** The best features create their own gravity. They have triggers (like notifications) that lead to actions (writing an entry), which in turn creates future triggers. Proving this loop exists demonstrates that the feature can be a self-sustaining driver of engagement. +* **Tasks:** + 1. **Hypothesize the Loop:** Propose a testable loop: "Trigger (Push Notification) -> Action (App Open) -> Reward (Reading Past Entries) -> Investment (Writing a New Entry)." We will test the first part: Trigger -> Action. + 2. **Measure Trigger Efficacy:** From the `events` data, create two groups of 'Journals' users: those who received a `journal_reminder_notification` and those who did not on a given day. + 3. **Measure Next-Day Return Rate:** For both groups, calculate the percentage who opened the app the following day (Next-Day Retention). + 4. **Validate the Loop's Start:** Compare the retention rates with a significance test. A significantly higher return rate for the notified group provides strong evidence that your notification is an effective trigger for re-engagement. +* **Deliverable:** A notebook with the analysis comparing the two groups and a conclusion stating whether you have validated the first step of a new engagement loop. + +### Day 20: Predictive Modeling for Proactive Engagement + +* **Objective:** To build a simple, interpretable model that identifies users who are most likely to become highly engaged with 'Journals', enabling proactive product interventions. +* **Why This Matters:** Not all users are the same. By identifying future power users early, the product team can design targeted onboarding flows, promotions, or nudges to help them succeed, maximizing the feature's impact. +* **Tasks:** + 1. **Define Target Variable:** Create a binary target `is_champion`, which is `1` for users who created 5+ journal entries in their first 28 days and `0` otherwise. + 2. **Engineer Features:** Create predictive features using only a user's **first-week** data (e.g., `num_sessions_week1`, `did_use_photo_feature`, `signup_channel`). + 3. **Train Model:** Train a logistic regression model to predict `is_champion`. The goal is **not predictive accuracy, but interpretability.** + 4. **Identify Key Predictors:** Extract and visualize the model's coefficients. Identify the top 3 features that are the strongest early indicators of a future "Journal Champion." These are your levers. +* **Deliverable:** A notebook showing the model training process and a summary of the top 3 predictive features, explaining what they mean for product strategy. + +### Day 21: The One-Slide Story + +* **Objective:** To master the art of telling a complete, compelling data story in a single, powerful visual designed for an executive audience. +* **Why This Matters:** Senior leaders are time-poor. Your ability to synthesize a week of complex analysis into a single, understandable, and persuasive slide is a career-defining skill. +* **Tasks:** + 1. **Review the Week's Narrative:** Review all your findings from Day 15-20. What is the single most important message? + 2. **Choose the Core Narrative:** Decide on the story (e.g., "Journals successfully boosts retention by creating a new habit, but does not yet grow overall engagement."). + 3. **Design the Slide:** Design a single presentation slide. It should have a clear, assertive title (the main takeaway). It should contain no more than 3 key charts/numbers to support that title. + 4. **Write Speaker Notes:** In the "speaker notes" section of your presentation software (or a markdown cell), write a script that explains the slide's content and its implications in under 60 seconds. +* **Deliverable:** A single image file of your presentation slide (`.png` or `.jpg`) and the accompanying speaker notes. + +--- + +## Week 4: The Strategy – From Analyst to Influencer + +**Goal:** You have analyzed the past and understood the present. This final week is about shaping the future. You will use your deep insights to build a strategic roadmap, justify future investment, and present your findings in a high-stakes business review, demonstrating your ability to translate data into dollars and direction. + +### Day 22: Quantifying the Business Impact (LTV) + +* **Objective:** To connect the feature's impact on user behavior (retention) directly to the company's bottom line (Lifetime Value). +* **Why This Matters:** Product metrics are important, but financial metrics drive business decisions. Tying your work to LTV is how you prove to the C-suite that your feature isn't just a nice-to-have; it's a value creator. +* **Tasks:** + 1. **Build a Simple LTV Formula:** Use the formula: LTV = (Average Revenue Per User per Month) * (1 / Monthly Churn Rate). + 2. **Calculate LTV for Control:** Using data from the control group, calculate their monthly churn rate (1 - monthly retention rate) and assume an ARPU of $1.00 to find their LTV. + 3. **Calculate LTV for Treatment:** Do the same for the treatment group. Their lower churn rate (from your A/B test) should result in a higher LTV. + 4. **Quantify Total Business Impact:** Calculate the LTV lift per user. Then, project the total increase in lifetime value for the entire user base over the next year if the feature is rolled out to everyone. +* **Deliverable:** A notebook containing the LTV calculations and a concluding statement: "The 'Journals' feature is projected to increase LTV by $X per user, generating an estimated $Y in total value over the next year." + +### Day 23: The "Iterate, Expand, or Kill" Decision Framework + +* **Objective:** To use a structured, data-driven framework to make a clear, defensible, and transparent product lifecycle recommendation. +* **Why This Matters:** Gut feelings lead to bad product decisions. A structured framework removes emotion and bias, forcing a decision based on pre-defined criteria and ensuring everyone understands the "why" behind your recommendation. +* **Tasks:** + 1. **Create a Scorecard:** In a markdown file, create a table with columns: "Metric," "Result vs. Goal," and "Score (-1, 0, or 1)." + 2. **Fill the Scorecard:** Populate the table with your key findings from the entire project. + * *Day-28 Retention Lift:* +2.5% (Goal: +2.0%) -> Score: +1 + * *Net Engagement (Cannibalization):* Neutral (Goal: Positive) -> Score: 0 + * *Adoption Rate (Wk 1):* 15% (Goal: 10%) -> Score: +1 + * *Negative Guardrail Impact:* None (Goal: None) -> Score: +1 + * *Projected LTV Impact:* +$0.50/user (Goal: Positive) -> Score: +1 + 3. **Make the Call:** Sum the scores. Based on the total, write a formal, one-paragraph recommendation. Example: "With a strongly positive data scorecard of +4, I recommend we **Expand** the 'Journals' feature to 100% of users and allocate resources for V2 development." +* **Deliverable:** The completed decision framework scorecard and formal recommendation. + +### Day 24: Building the Data-Driven V2 Roadmap + +* **Objective:** To use the insights you've gathered to propose the next set of feature improvements, ensuring the product roadmap is built on evidence, not opinions. +* **Why This Matters:** This task separates a reactive analyst from a proactive product partner. You are not just reporting on the past; you are using your data to write the future of the product. +* **Tasks:** + 1. **Synthesize Your Key Insights:** Review your findings on the adoption funnel (Day 10), the 'Aha!' moment (Day 11), qualitative feedback (Day 14), and key predictors of champions (Day 20). + 2. **Propose 3 Roadmap Items:** Based on this synthesis, propose the top 3 most impactful improvements or new features for "Journals V2." + 3. **Justify Each Item with Data:** For each proposal, write one sentence justifying it with a specific data point. + * **1. Add Prominent Onboarding Card:** (Justification: To address the 60% drop-off in our adoption funnel.) + * **2. Prompt Users to Add a Photo:** (Justification: To intentionally guide users to the 'Aha!' moment that correlates with a 3x higher retention rate.) + * **3. Develop Sharable Templates:** (Justification: 'Feature Request' was the #1 theme in our qualitative feedback analysis.) +* **Deliverable:** A short memo titled "Proposed Journals V2 Roadmap," listing the three initiatives and their data-driven justifications. + +### Day 25: Extrapolating to Broader Product Strategy + +* **Objective:** To practice senior-level strategic thinking by extrapolating a core insight from your feature to influence the entire product ecosystem. +* **Why This Matters:** The most impactful analysts see the forest, not just the trees. They connect learnings from a single feature to the company's grand strategy, identifying new opportunities and markets. +* **Tasks:** + 1. **Identify the Core User Need:** Look beyond the 'Journals' feature. What is the fundamental, underlying user need you have validated? (e.g., "Our users have a strong, unmet desire for private, self-expression features in a world of public performance."). + 2. **Draft a Strategy Memo:** Write a short memo to the Head of Product. + 3. **Connect Your Insight to Company Strategy:** In the memo, explain how this core insight should influence the company's 3-year plan. Propose that the company should explore a new "Privacy & Wellness" product pillar, with 'Journals' as the successful pilot. Suggest 1-2 other feature ideas that would fit into this pillar. +* **Deliverable:** A concise, persuasive strategy memo that demonstrates thinking beyond your immediate project. + +### Day 26: Preparing for the QBR – Behavioral Storytelling + +* **Objective:** To practice crafting compelling, concise narratives about your work to prepare for high-stakes presentations and behavioral job interviews. +* **Why This Matters:** People don't remember data points; they remember stories. The STAR method is a framework for turning your analytical work into memorable narratives of impact. +* **Tasks:** + 1. **Reflect on Your Journey:** Review all the challenges from the past 25 days. + 2. **Draft STAR Stories:** In a markdown file, write out three stories using the **STAR** method (Situation, Task, Action, Result). + * **A Time You Dealt with Ambiguity:** (Use the Day 6 open-ended discovery task or Day 17 Cannibalization analysis). + * **A Time You Used Data to Influence Decisions:** (Use the Day 23 decision framework or Day 24 roadmap). + * **A Time You Handled a Crisis or Technical Challenge:** (Use the Day 9 bug triage fire drill). + 3. **Practice and Refine:** Read your stories aloud. Are they clear? Are they concise? Does the Result clearly link to your Action? +* **Deliverable:** The document containing your three polished STAR stories. + +### Day 27-30: Capstone: The Quarterly Business Review (QBR) Presentation + +* **Objective:** To synthesize the entire 30-day journey into a strategic, executive-level presentation that demonstrates business impact and successfully influences the company's future direction. +* **Why This Matters:** This is the culmination of all your work. The QBR is where you stop being an analyst who *supports* the business and become a leader who *drives* the business. +* **Tasks:** + 1. **Outline the Narrative Arc:** Structure your presentation as a compelling story: + * Slide 1: **The Opportunity:** Why we built this in the first place. + * Slide 2: **The Results:** It worked. The feature causally improved retention (your Day 21 slide). + * Slide 3: **The Business Impact:** Here's what this means for our bottom line (your LTV analysis). + * Slide 4: **The Strategic Recommendation:** Based on the data, here is our decision (your Day 23 framework) and our V2 roadmap (Day 24). + * Slide 5: **The Bigger Picture:** How this learning shapes our company's future (your Day 25 memo). + 2. **Create the Deck:** Create a polished 5-slide presentation. + 3. **Create the Appendix:** Create a detailed supporting document (`QBR_Appendix.md`) that links to all your notebooks and provides the full methodology behind every number in your presentation. This is your defense. + 4. **Record Your Presentation:** Practice and record yourself delivering the 5-minute presentation as if you were in a real QBR meeting. Focus on being clear, confident, and persuasive. +* **Deliverable:** The final 5-slide presentation deck, the detailed appendix document, and a link to your recorded presentation. \ No newline at end of file diff --git a/book-src/src/qbr_presentation.md b/book-src/src/qbr_presentation.md new file mode 100644 index 0000000..e69de29 diff --git a/book-src/src/week-1/day-01.md b/book-src/src/week-1/day-01.md deleted file mode 100644 index ec4cce5..0000000 --- a/book-src/src/week-1/day-01.md +++ /dev/null @@ -1,80 +0,0 @@ -# Day 01: The Spark of an Idea - -Welcome to Day 1 of the Product Analytics Masterclass! Today, we’re not starting with complex dashboards or A/B tests. We're starting with the most fundamental skill of a great product analyst: **curiosity**. We're going on an expedition into the raw, unstructured world of user feedback to find the spark of our next big feature. - -Our scenario for this week is the **"Journals Sprint."** The product team has a hunch that users want a way to journal or log their activities within our app. Is this a real need, or just a guess? Our job is to find the data to support or challenge this idea. - -### Objective -- To validate the need for a new feature by performing proactive discovery on raw, qualitative user data. - -### Why This Matters -Great analysis isn't just about answering questions that are handed to you; it's about asking the *right* questions in the first place. Too often, analytics is purely **reactive**—measuring the performance of features that already exist. Today, we're flipping the script. - -We will practice **proactive discovery**: the art of sifting through raw data to uncover hidden opportunities and unmet user needs. By analyzing qualitative data like user reviews *before* a single line of code is written for a new feature, you can: -- **De-risk product decisions:** Provide evidence that a real user problem exists. -- **Influence the roadmap:** Champion features that are backed by user-driven data. -- **Build empathy:** Gain a deep, unfiltered understanding of what your users are actually saying and feeling. - -This skill—finding signals in the noise—is what separates a good analyst from a great one. - -### Key Concepts -Before we dive into the code, let's familiarize ourselves with the tools and concepts we'll be using today. - -1. **DuckDB:** Think of DuckDB as "SQLite for analytics." It's an in-process analytical database management system. - - **In-process:** It runs inside our Python notebook. No complex server setup, no database administrators, no network connections. It's just a library you import. - - **Analytical:** It's blazing fast for the types of queries we'll be doing (aggregations, filtering, etc.) because of its columnar-vectorized query engine. - - **Perfect for us:** We can query our data files directly, making it incredibly easy to get started. - -2. **Parquet Files:** The data we're using today (`app_reviews.parquet`) is stored in the Parquet format. - - **Columnar:** Unlike row-based formats like CSV, Parquet stores data by column. When you query `AVG(rating)`, it only reads the `rating` column, ignoring all others. This makes analytical queries significantly faster. - - **Compressed:** Parquet files are highly compressed, saving disk space and speeding up data reads. It's the go-to format for analytical datasets in the modern data stack. - -3. **Exploratory SQL:** Before you can perform complex analysis, you must first understand your dataset's basic shape and content. We call this exploratory analysis, and we'll use a few fundamental SQL commands: - - `DESCRIBE`: Shows the schema of the table—the column names and their data types (`VARCHAR`, `INTEGER`, `TIMESTAMP`, etc.). - - `SELECT * LIMIT 10`: Fetches the first 10 rows of the table. This is a quick and safe way to peek at the actual data without trying to load the entire (potentially huge) dataset. - - `COUNT(*)`: Counts the total number of rows in a table. - - `AVG()`: Calculates the average value of a numeric column. - -4. **Keyword Searching with `LIKE`:** This is our primary tool for discovery today. The `LIKE` operator in SQL is used for simple pattern matching in text data. - - It's often paired with the `%` wildcard, which matches any sequence of characters (including zero characters). - - For example, `WHERE content LIKE '%journal%'` will find any review that contains the word "journal" anywhere within its text. We'll use the case-insensitive version, `ILIKE`, to make our search more robust. - -### Today's Challenge: A Step-by-Step Guide -It's time to get our hands dirty. Open the `Day_01_Challenge.ipynb` notebook and follow along with this guide. Our mission is to find evidence for or against the "Journals" feature idea within our app reviews. - -**Step 1: Set Up the Environment** -The first few cells in the notebook will install and import the necessary libraries (`duckdb`) and then establish a connection to our Parquet file. This simple command tells DuckDB to treat our file as a SQL table. - -**Step 2: Initial Data Exploration** -Now that we're connected, we need to get acquainted with the data. -1. Run the `DESCRIBE` query to understand the columns we have to work with. What are their names? What are their data types? -2. Use `SELECT * LIMIT 10` to see some sample reviews. Get a feel for the language users are using. What do the `content` and `rating` columns look like? -3. Calculate the total number of reviews with `COUNT(*)` and the overall average rating with `AVG(rating)`. This gives us a baseline to compare our findings against later. - -**Step 3: Formulate the Keyword Query** -Our hunch is about journaling. Let's translate that into keywords. We're looking for reviews that mention terms like `journal`, `diary`, `log`, or `track`. - -We will build a query to find all reviews matching these keywords. To keep our logic clean and readable, we'll use a Common Table Expression (CTE) with the `WITH` clause. - -The logic will be: -1. **CTE (`Journal_Reviews`):** Create a temporary table that selects all columns from reviews `WHERE` the `content` column (using `ILIKE` for case-insensitivity) contains our keywords. We'll link them with `OR`. -2. **Final Query:** Select the `COUNT(*)` and `AVG(rating)` from our `Journal_Reviews` CTE. - -This query will tell us exactly how many people are talking about journaling and what their average sentiment (as measured by rating) is. - -**Step 4: Analyze and Summarize Your Findings** -The final, and most important, step is to interpret your results. The query gives you numbers; your job is to turn them into an insight. - -In the final markdown cell of the notebook, write a short summary answering these questions: -- How many reviews mentioned your keywords? -- What was the average rating for these specific reviews? -- How does this count and average rating compare to the overall dataset? -- **Conclusion:** Based on this initial analysis, is there evidence to support prioritizing a journaling feature? Why or why not? - -This summary is your deliverable. It's where you practice the crucial skill of communicating your findings to stakeholders. - -### Deliverable Checklist -- [ ] DuckDB environment is set up and data is loaded. -- [ ] SQL queries for schema exploration are complete. -- [ ] SQL query for keyword search is written and executed. -- [ ] A summary of findings is written in the notebook's markdown cell. \ No newline at end of file diff --git a/book-src/src/week-1/day-02.md b/book-src/src/week-1/day-02.md deleted file mode 100644 index 2d95c27..0000000 --- a/book-src/src/week-1/day-02.md +++ /dev/null @@ -1,35 +0,0 @@ -**Crafting Day 2 Content** - -I'm currently working on the content for `book-src/src/week-1/day-02.md`, focusing on "Opportunity Sizing & The Business Case," as a continuation of the "Jour" scenario. My main objective is to provide a comprehensive, educational resource suitable for data science students. - - -**Analyzing the Business Value** - -I'm structuring the day 2 content to translate signals into a tangible business case. My focus is on the "Jour" scenario, emphasizing the need for data scientists to speak the language of business stakeholders. I've drafted the `Objective` and `Why This Matters` sections, highlighting the value of quantifying opportunities and aligning with business metrics. The key concepts are mapped out, and I'm detailing today's step-by-step guide to solidify the material. - - -**Expanding Key Concepts** - -My focus has shifted to the "Key Concepts" section, refining the explanations for User Segmentation and Core Metrics. I've expanded on the user-provided notes, making them more accessible for students. Now, I'm working to connect the concepts to the Jour scenario, demonstrating how to use segmentation to identify the target audience for the journal feature and how to select relevant business metrics like WAU and Retention to quantify its potential impact. - - -**Developing Core Metric Definitions** - -I'm now focusing on defining the crucial "Key Concepts" for the day 2 material. I've broken down "User Segmentation," "WAU," and "Retention," ensuring clear, student-friendly explanations linked to the "Jour" scenario. My goal is to equip students with a robust grasp of these concepts, solidifying their understanding of business case development. Now, I'm working on explaining the difference between absolute and relative lift, ensuring these definitions are accessible for the target audience. - - -**Outlining the Core Challenge** - -I've just finished the outline for the Day 02 content. My focus now is creating clear, straightforward definitions for key concepts like user segmentation and core metrics, along with an explanation of relative vs. absolute lift. The step-by-step guide is now complete. I'll translate the core challenge, including how to translate the user signals from Day 1 into the quantitative steps needed to build a business case. My emphasis will be on practical application, showing data science students how to identify user segments, join data using SQL, and build a model to estimate the impact of new features. - - -**Outlining Day 2's Content** - -I'm currently building the framework for "Day 02: Opportunity Sizing & The Business Case," within the context of the "Jour" scenario, focusing on translating signals into business value. I've finished the main outline and am now detailing the sections on Key Concepts: User Segmentation, Core Metrics (WAU, Retention), and the all-important definitions of Relative vs. Absolute Lift. My approach is to make each concept clear and relevant, particularly for those with little experience creating business cases. I will provide practical examples, and link them to the actual notebook with SQL to solidify the explanations. - - -**Refining the Step-by-Step Guide** - -I'm now revising the "Today's Challenge" section, clarifying each step of the guide. My goal is to transform the broad outline into actionable instructions for students, connecting the theoretical concepts to the practical application in the notebook. Specifically, I'm focusing on making the SQL joins and Pandas calculations crystal clear. I'll include comments in the notebook itself as a reference, enhancing the learning process. My objective is to provide students with the ability to confidently navigate the data analysis workflow and understand how to develop a business case from qualitative signals. - - diff --git a/book-src/src/week-1/day-03.md b/book-src/src/week-1/day-03.md deleted file mode 100644 index ddb32e4..0000000 --- a/book-src/src/week-1/day-03.md +++ /dev/null @@ -1,54 +0,0 @@ -# Day 03: If You Can't Measure It, You Can't Improve It - -So far, we've found a signal in the noise and sized the potential business impact. We've answered the "what" and the "why." Today, we tackle the "how"—specifically, how will we measure success? We're going to create an **Instrumentation Plan**. - -This is one of the most critical, high-leverage activities an analyst can perform. We are defining, *before a single line of code is written*, exactly what data we need to collect to determine if our "Journals" feature is a success or a failure. - -### Objective -- To define exactly what user actions need to be tracked (events) and what metrics will define success, *before* any code is written. - -### Why This Matters -Think of an instrumentation plan as the foundational contract between Product, Engineering, and Analytics. -- **For Product,** it forces clarity on what "success" actually means. -- **For Engineering,** it provides a clear, unambiguous list of tracking requirements. -- **For Analytics,** it ensures that the data you need to do your job will actually exist post-launch. - -Without this plan, you're flying blind. You launch a feature and then ask, "Did it work?" only to realize you don't have the data to answer the question. This leads to opinions, not data-driven decisions. The old adage holds true: **"Bad data in, bad decisions out."** A thoughtful instrumentation plan is your quality control for future decisions. - -### Key Concepts -Let's define the core components of our plan. - -1. **Events and Properties:** This is the vocabulary we use to describe user behavior. - - An **Event** is a user action. It's the *verb*. Examples: `click_button`, `view_screen`, `save_entry`. - - **Properties** are the context for that action. They are the *adverbs* or *nouns* that describe the verb. They answer questions like who, what, where, when, and how. - - **Analogy:** If the event is `play_song` (the verb), the properties would be a dictionary of context like `{genre: 'rock', artist_name: 'Queen', duration_ms: 210000, source: 'playlist'}`. - -2. **Primary vs. Secondary Metrics:** Not all metrics are created equal. You need a hierarchy to avoid confusion. - - The **Primary Metric** (also called the North Star Metric for the project) is the single, undisputed measure of success. If this metric goes up, the project is a success. If it doesn't, it's a failure. It must be directly related to the business case we built yesterday. - - **Secondary Metrics** add color, context, and diagnostic power. They help explain *why* the primary metric moved. If retention is our primary metric, a secondary metric might be "average number of journal entries per week," which could be a leading indicator of retention. - -3. **Guardrail Metrics:** This is a concept that separates good analysts from great ones. Guardrail metrics are the metrics you hope *don't* change. They are your early warning system for unintended negative consequences. - - **Purpose:** Their job is to protect the overall health of the product. When you launch a new feature, you might accidentally hurt another part of the app. - - **Example:** For our Journals feature, we want to increase engagement. But what if it's so engaging that users stop using our app's main social feed? A good guardrail metric would be `time_spent_on_main_feed`. If that metric plummets for users of the journal, we know we've created a cannibalization problem. Other examples include app performance (crash rate) or uninstalls. - -### Today's Challenge: A Step-by-Step Guide -Your task is to create a simple instrumentation plan for the "Journals" feature. Think logically through the user journey and define the data you would need to collect. We'll outline the key components in your notebook, `Day_03_Challenge.md`. - -**Step 1: Map the User Journey & Define Events** -First, imagine you are a user. What are the key actions you would take within this new feature? List them out as events. -- What happens when the user sees the feature for the first time? (`view_journal_promo`) -- What is the first click? (`click_journal_tab`) -- What actions can they take on the page? (`start_journal_entry`, `save_journal_entry`, `set_reminder`) - -**Step 2: Add Context with Properties** -Now, for each event, what additional information would be useful for deeper analysis later? -- For `save_journal_entry`, you'd want to know the `character_count` to see if longer entries correlate with retention. You might also want a `template_used` property if you offer different journaling formats (e.g., 'gratitude', 'freeform'). -- For `start_journal_entry`, an `entry_point` property (`'from_prompt'`, `'from_blank_canvas'`) would be incredibly valuable. - -**Step 3: Define Your Metrics** -This is where you formalize your success criteria. -- **Choose a Primary Metric:** The goal of a journal is to build a long-term habit. Therefore, a short-term metric like "number of entries on Day 1" is misleading. A better primary metric is **28-day retention for users who create their first entry**. This directly measures if the feature creates lasting value. -- **List Secondary Metrics:** What would support this primary metric? Consider `Adoption Rate` (% of users who try the feature), `Engagement Rate` (average entries per user per week), and `Funnel Conversion` (the % of users who start an entry and then save it). -- **Set Your Guardrails:** What could go wrong? The biggest risk is cannibalization. A key guardrail would be `sessions_per_week_on_core_feature_X`. You could also add `app_uninstall_rate` for the week after a user's first journal entry. - -By completing this plan, you're not just preparing to analyze a feature; you are actively shaping its success. \ No newline at end of file diff --git a/book-src/src/week-1/day-04.md b/book-src/src/week-1/day-04.md deleted file mode 100644 index 38c2c83..0000000 --- a/book-src/src/week-1/day-04.md +++ /dev/null @@ -1,49 +0,0 @@ -# Day 04: Architecting Trust – The A/B Test Blueprint - -We've identified an opportunity, built a business case, and designed a measurement plan. So far, all of our analysis has been based on *correlation* and *forecasting*. We observed that users asking for a journal feature have lower retention. We forecast that building it might help. But how do we *prove* it? How do we establish **causation**? - -This is where we move from analyst to scientist. An A/B test (or randomized controlled experiment) is the gold standard for proving that a product change *caused* a change in user behavior. A well-designed test is the difference between guessing and knowing, and it's the foundation of a data-driven culture. Today, we design the blueprint for that test. - -### The Central Analogy: Focusing a Camera - -Designing an experiment before you run it is called **Power Analysis**. The goal is to figure out how many users you need and how long you need to run the test to get a trustworthy result. Let's think of it like setting up a camera to take a very important photograph. - -**Power analysis is like focusing the lens of your camera.** You need to make sure your settings are right *before* you press the shutter. - -- **Sample Size (N) is the amount of light you let in.** If you want to take a picture of a huge, obvious object like a car, you don't need much light. But if you want to capture a clear photo of a tiny, distant insect, you need to let in a lot more light (i.e., have more users in your test). More users give you more statistical "light" to see smaller effects. - -- **Minimum Detectable Effect (MDE) is the size of the object you're trying to see.** Before you take the picture, you must decide: am I trying to photograph the car or the insect? The MDE is the smallest change you care about detecting. A 10% lift in retention is a much bigger "object" than a 1% lift. Trying to detect a tiny effect (a small MDE) requires a much sharper focus and a lot more light (a larger sample size). This is the most important business decision in your test design. - -- **Statistical Power (1-β) is the probability your camera works.** It's the probability that you will actually capture a clear photo *if the insect is truly there*. The industry standard is 80% power. This means we accept a 20% chance of a "false negative"—our camera fails to capture the photo even though the insect was right in front of us. In other words, we have an 80% chance of detecting a real effect if it exists. - -### Key Concepts Explained - -With our camera focused, there's one more setting to check: - -- **Significance Level (alpha / α):** This is your risk of a "false positive." In our analogy, it's the chance that a random speck of dust on your camera lens looks exactly like the insect you were searching for. You see the "insect" in your photo, but it was never really there. We want to keep this risk low, so the standard is 5% (or 0.05). This means we're willing to accept a 5% chance of celebrating a win that was actually just random noise. - -### The Critical Thinking Corner: Beyond the Calculator - -A power calculator will give you a number, but a great analyst knows the pitfalls that the math doesn't account for. - -1. **The Novelty Effect:** Beware the siren song of early results! When you launch a new feature, some users will click on it simply because it's new and shiny. This can create a temporary lift in engagement that has nothing to do with the feature's true, long-term value. If you run your test for only a few days, you're likely measuring novelty, not a sustained change in behavior. This is why for a habit-forming feature like a journal, running the test for a full habit loop (e.g., 28 days) is essential to measure its real impact. - -2. **The Sin of "Peeking":** It is statistically invalid to check your experiment results every day and stop the test as soon as it hits statistical significance (p-value < 0.05). This is called "peeking," and it dramatically increases your risk of a false positive. Why? Because data is naturally noisy. If you check 20 times, you're giving random chance 20 opportunities to produce a "significant" result. - - **Analogy:** It's like pulling a cake out of the oven early just because the top looks brown. The statistical guarantees of your test are only valid if you let it run for its pre-determined duration or sample size. The inside is still uncooked. Commit to your test plan. - -### Today's Challenge: A Step-by-Step Guide -Open `Day_04_Challenge.ipynb`. We will now use our camera settings to calculate the required sample size for our "Journals" feature A/B test. - -**Step 1: Define the Inputs for the Power Calculator** -We need to tell our calculator what we're looking for. We will link each parameter directly to our camera analogy and our previous work. -- **Baseline Rate (p1):** This is our starting point. From Day 2, what is the 28-day retention rate for our target user segment? -- **Minimum Detectable Effect (MDE):** What is the "insect" we're trying to see? We'll use the 15% *relative lift* from our business case on Day 2. You'll need to convert this to an absolute value for the calculator (e.g., if the baseline is 20%, a 15% relative lift makes the target rate 23%). -- **Alpha (α):** Our tolerance for a false positive. We'll use the standard 0.05. -- **Power (1-β):** Our desired chance of detecting a real effect. We'll use the standard 0.80. - -**Step 2: Calculate the Sample Size** -Using the `statsmodels` library in Python, you will plug these four inputs into the power analysis function. The output will be the number of users required *per group* (i.e., for the control group and for the treatment group). - -**Step 3: Calculate the Test Duration** -A sample size is not an actionable plan. We need to translate it into time. Based on the number of eligible new users who sign up for our product each week, how many weeks will it take to get enough users into our experiment? This final calculation—"We need to run this experiment for X weeks"—is the clear, actionable deliverable for your product and engineering team. \ No newline at end of file diff --git a/book-src/src/week-1/day-05.md b/book-src/src/week-1/day-05.md deleted file mode 100644 index 0d59c5d..0000000 --- a/book-src/src/week-1/day-05.md +++ /dev/null @@ -1,95 +0,0 @@ -# Day 05: The Analytical Detective – Finding Causation Without an A/B Test - -Welcome to Day 5. So far, we've treated A/B tests as the only way to establish causation. But the real world is messy. Products don't always launch to everyone at once. Sometimes a feature is rolled out to one country (e.g., Canada) before it's launched globally. Sometimes a new marketing campaign is only run in specific cities. In these scenarios, a clean A/B test is impossible. - -Does that mean we have to give up on finding the true cause of a change? Absolutely not. It means we have to become detectives. Today, we learn one of the most powerful techniques in the observational analysis toolkit: **Difference-in-Differences (DiD)**. This is how you find the truth in the mess. - -### Objective & Why This Matters -- To estimate the causal impact of a feature when a true A/B test is not available. - -This is a critical, real-world skill. Very few analysts outside of top tech companies are proficient in quasi-experimental methods like DiD. Mastering this technique will allow you to provide causal insights in situations where others can only point to correlations. You'll be able to confidently answer "What was the true impact of our Canada-only launch?" while others are still guessing. - -### The Central Analogy: The Twin Runners - -To understand DiD, let's forget about data for a moment and think about two identical twin runners, Alex and Ben. - -- **The Setup:** Alex and Ben are perfectly matched athletes. For years, every time they've run a 5k, their race times have been identical. If Alex gets a little faster one month (due to better training), Ben gets faster by the same amount. Their performance trends are perfectly **parallel**. - -- **The Intervention:** One day, we give Alex a new, expensive pair of running shoes (our "feature"). Ben keeps his old shoes (he's our "control group"). - -- **The Result:** After a month, we check their race times again. We see that Ben's time has improved by 10 seconds (maybe the weather got better for everyone). But Alex's time has improved by *15 seconds*. - -- **The Conclusion:** The "Difference-in-Differences" is the difference between their improvements: `15 seconds - 10 seconds = 5 seconds`. Because we know they were identical twins whose performance always moved in parallel, we can confidently conclude that the new shoes *caused* a 5-second improvement in Alex's race time. The extra 5-second gain Alex saw, above and beyond the trend Ben also experienced, is the causal effect. - -### The Bedrock of DiD: The Parallel Trends Assumption - -The twin analogy works for one reason only: Alex and Ben were identical to begin with. This is the **Parallel Trends Assumption**, and it is the non-negotiable foundation of any DiD analysis. - -It states: **In the absence of the treatment, the treatment group would have followed the same trend as the control group.** - -If this assumption is violated, your entire analysis is invalid. You cannot be sure if the change you see is from your feature or from a pre-existing difference between the groups. This is why the first step of any DiD analysis is always to check this assumption visually. - -Here is what you should look for when plotting your metric over time for both groups *before* the intervention: - -#### **GOOD: Parallel Trends Hold** -``` - Metric ^ - | - | /-------- (Treatment) - | / - | ----/---------- (Control) - | / - -----/---- - / - / - ----------------------> Time - ^ - Intervention -``` -*Description: The Treatment and Control lines move in lockstep before the intervention. They might be at different absolute levels, but their slopes are the same. After the intervention, the Treatment group's line diverges upwards, indicating a positive effect.* - -#### **BAD: Parallel Trends Violated** -``` - Metric ^ - | - | /---- (Treatment) - | / - | / - | / - | ------/---------- (Control) - | / - -----/ - / - / - ----------------------> Time - ^ - Intervention -``` -*Description: The Treatment and Control groups were already on different trajectories *before* the intervention. The Treatment group was already growing faster. Because of this, it's impossible to know if the post-intervention change is due to the feature or just a continuation of the old trend. The analysis is unreliable.* - -### The Critical Thinking Corner: When DiD Fails - -The primary risk in a DiD analysis is a **confounding event**. What if, on the exact day Alex got his new shoes, he *also* secretly hired a new running coach, but Ben didn't? Now we have two things that could explain his extra 5-second improvement. The effect of the shoes is confounded by the effect of the coach. - -In product analytics, this happens all the time. Imagine we launch our feature in Canada, and on the same day, a major Canadian competitor goes out of business. Our metrics in Canada might shoot up, but we can't isolate the effect of our feature from the effect of the competitor disappearing. As the analytical detective, you must always be asking: "What else happened at the same time that could explain this change?" - -### Today's Challenge: A Step-by-Step Guide -In `Day_05_Challenge.ipynb`, we will simulate a scenario where the "Journals" feature was launched only to users in Canada. We will use DiD to measure its causal impact on 28-day retention. - -**Step 1: Check the Parallel Trends Assumption (CRITICAL FIRST STEP!)** -Before you calculate a single number, you must plot the historical 28-day retention trend for Canadian users (our treatment group) and users from a comparable country, like the UK (our control group). Do this for the 3-4 months *before* the launch. If the lines are reasonably parallel, you can proceed. If not, you must stop and declare the method invalid for this control group. - -**Step 2: Define the Four Key Numbers** -Once you've validated the assumption, you'll need to calculate four values using SQL or Pandas: -1. **Treatment Group, After:** Average 28d retention for Canadian users who signed up *after* the launch. -2. **Treatment Group, Before:** Average 28d retention for Canadian users who signed up *before* the launch. -3. **Control Group, After:** Average 28d retention for UK users who signed up *after* the launch. -4. **Control Group, Before:** Average 28d retention for UK users who signed up *before* the launch. - -**Step 3: Calculate the "Difference-in-Differences"** -The final calculation is straightforward: -- `diff_treatment = (Treatment_After - Treatment_Before)` -- `diff_control = (Control_After - Control_Before)` -- `causal_effect = diff_treatment - diff_control` - -This final number is your estimate of the causal impact of the feature on retention. It isolates the feature's effect from any general, market-wide trends that affected both countries. You've just become an analytical detective. \ No newline at end of file diff --git a/book-src/src/week-1/day-06.md b/book-src/src/week-1/day-06.md deleted file mode 100644 index c262cdf..0000000 --- a/book-src/src/week-1/day-06.md +++ /dev/null @@ -1,70 +0,0 @@ -# Day 06: The Executive Dashboard – From Data to Decision - -For the last five days, we've been deep in the trenches—finding signals, sizing opportunities, and designing experiments. We've been producing analytical artifacts for ourselves and our direct product teams. Today, we change our audience. We are no longer speaking to our peers; we are speaking to the C-suite. Our mission is to distill all of this complexity into a single screen that an executive can understand in 60 seconds. - -This isn't about making charts. This is about synthesizing insight. A dashboard is not a report; it's a *product*, and the user is your CEO. Your goal is not to show data, but to guide a decision. - -### Objective & Why This Matters -- To design a strategic dashboard that communicates the business impact of a feature to executive leadership, using the "KPI Pyramid" to ensure clarity and actionability. - -As analysts, our influence is directly proportional to our ability to communicate clearly to leadership. Executives do not have time to wade through spreadsheets or interpret complex charts. They think in terms of outcomes: Are we winning or losing? Are we growing faster or slower? Should I invest more here or cut our losses? - -A great executive dashboard ruthlessly prioritizes information to answer those questions. It respects the viewer's time and focuses their attention on what matters most. Getting this right is how you build trust and earn a seat at the table where strategic decisions are made. - -### The KPI Pyramid: A Mental Model for Clarity - -To avoid overwhelming our audience, we need a structure. The KPI Pyramid is a mental model for organizing metrics by audience and purpose, ensuring that information flows logically from a high-level summary down to the diagnostic details. - -#### **Level 1 (Top): Executive KPIs (The "What")** -- **Audience:** C-Suite, GM, Board of Directors. -- **Purpose:** To give a 30-second snapshot of overall business health and trajectory. These are the "ship-steering" metrics. -- **Characteristics:** There should be very few of them (3-5 max). They are almost always lagging indicators of success, like revenue, user growth, or long-term retention. -- **Example:** For the entire company, this might be `WAU Growth (WoW)` and `Net Revenue`. For our feature, the top-line KPI might be **`Net New Retained Users (in the last 28 days)`**. - -#### **Level 2 (Middle): Team Performance Metrics (The "Why")** -- **Audience:** VPs, Directors of Product/Engineering/Marketing. -- **Purpose:** To diagnose *why* the top-level KPIs are moving. If WAU is down, is it an acquisition problem or an engagement problem? These metrics provide a first layer of context. -- **Characteristics:** These are a mix of leading and lagging indicators that are directly influenceable by specific teams. -- **Example:** To explain the **`Net New Retained Users`** number, we would show `28d Retention Rate for Journal Users` and the `Adoption Rate of the Journal Feature`. - -#### **Level 3 (Bottom): Diagnostic & Health Metrics (The "Where")** -- **Audience:** Product Managers, Analysts, Engineering Leads. -- **Purpose:** The ground-level, operational data used for deep dives and debugging. This is where the actual day-to-day analysis happens. -- **Characteristics:** Highly granular, often leading indicators or guardrail metrics. They are rarely shown on the main executive view but are available via a "drill-down." -- **Example:** To understand why the `Adoption Rate` is low, a PM would drill down to see the `Onboarding Funnel Conversion Rate` or the `Save Button Click-Through Rate`. To ensure the feature isn't hurting things, we'd monitor the `Time Spent on Core Feed (Guardrail)`. - -The magic of this structure is the drill-down path. An executive sees **what** happened. Their VPs can immediately see **why**. And the PMs know **where** to go look for the root cause. - -### Key Concepts in Visual Design - -1. **Visual Hierarchy:** The most important information should be the most visually prominent. Use size, color, and position to guide the user's eye. The top-left of a dashboard is the most valuable real estate; put your Level 1 KPI there in the largest font. -2. **Data-Ink Ratio:** Coined by Edward Tufte, this principle demands that we remove every single pixel that isn't communicating data. No 3D effects, no distracting background images, no unnecessary gridlines. Maximize the signal, minimize the noise. -3. **Context is King:** A number in isolation is meaningless. `28d Retention: 35%` tells me nothing. `28d Retention: 35% (vs. 20% Control, +5pts WoW)` tells a complete story. Every key metric MUST have a comparison (to a previous time period, a goal, or a control group) to be actionable. - -### The Critical Thinking Corner: Art Gallery vs. Cockpit - -As a Head of BI, I see two types of dashboards. Your career trajectory will depend on which one you build. - -1. **The Data Art Gallery:** This dashboard is designed to impress. It features visually complex charts (like Sankey diagrams or network graphs) and a kaleidoscope of colors. It's aesthetically pleasing and demonstrates the technical skill of the analyst. But it's ultimately useless. It's a gallery you walk through, say "huh, that's interesting," and then leave without knowing what to do. It answers "What happened?" but provides no path to "So what?" or "Now what?". It is a beautiful dead end. - -2. **The Decision-Making Cockpit:** This dashboard is designed to be acted upon. It might even look "boring." It uses simple bar and line charts, minimal color (leveraging red/green for alerts), and a rigid structure. Every single chart is tied to a decision someone needs to make. It's like a pilot's cockpit: the most critical gauges (altitude, speed) are front and center, while diagnostic information is on secondary screens. It doesn't just show data; it surfaces exceptions and guides the user toward the next question. It is the starting point of an action. - -Your C-suite doesn't want art. They want a cockpit. - -### Today's Challenge: A Step-by-Step Guide -Your task is not to write code. It is to **design**. In the `Day_06_Challenge.md` file, you will architect a dashboard for our "Journals" feature launch. - -**Step 1: Define Your Pyramid** -Think like an executive. If you had 60 seconds to evaluate this feature's success, what would you need to know? -- **Level 1:** What is the single most important metric that proves this feature is adding value to the business? (Hint: It should relate to your business case from Day 2). -- **Level 2:** What are the 2-3 key performance metrics that explain the "why" behind the Level 1 metric? Think about adoption and engagement. -- **Level 3:** What is the most important guardrail metric you need to monitor to ensure this feature isn't causing unintended harm? - -**Step 2: Sketch the Cockpit** -On a piece of paper, a whiteboard, or a simple digital tool, sketch a wireframe of your dashboard. Don't worry about being precise. Focus on: -- **Layout:** Where does the Level 1 metric go? How do you position the Level 2 metrics to support it? -- **Hierarchy:** How will you use size to show which number is most important? -- **Context:** For each metric, what comparison will you show? (e.g., vs. goal, vs. previous period). -- **Chart Choice:** Should this be a big number (a KPI), a line chart showing a trend over time, or a bar chart comparing two groups? - -Upload an image of your sketch to the notebook. This exercise—thinking about design and hierarchy *before* you open a BI tool—is one of the most valuable habits you can build as an analyst. \ No newline at end of file diff --git a/book-src/src/week-1/day-07.md b/book-src/src/week-1/day-07.md deleted file mode 100644 index 2ed3e19..0000000 --- a/book-src/src/week-1/day-07.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 07: The Pre-Mortem: A Memo on What Could Go Wrong - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-08.md b/book-src/src/week-2/day-08.md deleted file mode 100644 index 400877a..0000000 --- a/book-src/src/week-2/day-08.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 08: Launch Day! The Command Center - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-09.md b/book-src/src/week-2/day-09.md deleted file mode 100644 index fba83a9..0000000 --- a/book-src/src/week-2/day-09.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 09: The Fire Drill – Precision Bug Triage - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-10.md b/book-src/src/week-2/day-10.md deleted file mode 100644 index e964f84..0000000 --- a/book-src/src/week-2/day-10.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 10: The Adoption Funnel – Diagnosing User Friction - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-11.md b/book-src/src/week-2/day-11.md deleted file mode 100644 index 4e89f32..0000000 --- a/book-src/src/week-2/day-11.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 11: The "Aha!" Moment – Finding the Magic Action - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-12.md b/book-src/src/week-2/day-12.md deleted file mode 100644 index e62b236..0000000 --- a/book-src/src/week-2/day-12.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 12: The Weekly Launch Memo – Communicating with Clarity - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-13.md b/book-src/src/week-2/day-13.md deleted file mode 100644 index a94175a..0000000 --- a/book-src/src/week-2/day-13.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 13: The Early A/B Test Readout – Resisting Pressure - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-2/day-14.md b/book-src/src/week-2/day-14.md deleted file mode 100644 index 8a4249a..0000000 --- a/book-src/src/week-2/day-14.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 14: Weaving the Narrative – Quant + Qual - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-15.md b/book-src/src/week-3/day-15.md deleted file mode 100644 index 887182b..0000000 --- a/book-src/src/week-3/day-15.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 15: The Definitive A/B Test Readout - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-16.md b/book-src/src/week-3/day-16.md deleted file mode 100644 index dcffe9f..0000000 --- a/book-src/src/week-3/day-16.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 16: The Quasi-Experiment Post-Mortem - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-17.md b/book-src/src/week-3/day-17.md deleted file mode 100644 index 477788b..0000000 --- a/book-src/src/week-3/day-17.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 17: Cannibalization vs. Creation – The Engagement Portfolio - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-18.md b/book-src/src/week-3/day-18.md deleted file mode 100644 index 90713ca..0000000 --- a/book-src/src/week-3/day-18.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 18: Retention Curves & The Lift Over Time - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-19.md b/book-src/src/week-3/day-19.md deleted file mode 100644 index 4dd818e..0000000 --- a/book-src/src/week-3/day-19.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 19: Validating the Engagement Loop - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-20.md b/book-src/src/week-3/day-20.md deleted file mode 100644 index 3d820bd..0000000 --- a/book-src/src/week-3/day-20.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 20: Predictive Modeling for Proactive Engagement - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-3/day-21.md b/book-src/src/week-3/day-21.md deleted file mode 100644 index c7a62e1..0000000 --- a/book-src/src/week-3/day-21.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 21: The One-Slide Story - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-4/day-22.md b/book-src/src/week-4/day-22.md deleted file mode 100644 index bafef3a..0000000 --- a/book-src/src/week-4/day-22.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 22: Quantifying the Business Impact (LTV) - -_Summary and tasks as per curriculum. Add your notes and findings here._ diff --git a/book-src/src/week-4/day-23.md b/book-src/src/week-4/day-23.md deleted file mode 100644 index 2382228..0000000 --- a/book-src/src/week-4/day-23.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 23 - -_Placeholder for Day 23 content._ diff --git a/book-src/src/week-4/day-24.md b/book-src/src/week-4/day-24.md deleted file mode 100644 index 7ef5b74..0000000 --- a/book-src/src/week-4/day-24.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 24 - -_Placeholder for Day 24 content._ diff --git a/book-src/src/week-4/day-25.md b/book-src/src/week-4/day-25.md deleted file mode 100644 index 1c59f6d..0000000 --- a/book-src/src/week-4/day-25.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 25 - -_Placeholder for Day 25 content._ diff --git a/book-src/src/week-4/day-26.md b/book-src/src/week-4/day-26.md deleted file mode 100644 index 3aabac7..0000000 --- a/book-src/src/week-4/day-26.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 26 - -_Placeholder for Day 26 content._ diff --git a/book-src/src/week-4/day-27.md b/book-src/src/week-4/day-27.md deleted file mode 100644 index 869b62d..0000000 --- a/book-src/src/week-4/day-27.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 27 - -_Placeholder for Day 27 content._ diff --git a/book-src/src/week-4/day-28.md b/book-src/src/week-4/day-28.md deleted file mode 100644 index b7844b6..0000000 --- a/book-src/src/week-4/day-28.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 28 - -_Placeholder for Day 28 content._ diff --git a/book-src/src/week-4/day-29.md b/book-src/src/week-4/day-29.md deleted file mode 100644 index 4bb0eb1..0000000 --- a/book-src/src/week-4/day-29.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 29 - -_Placeholder for Day 29 content._ diff --git a/book-src/src/week-4/day-30.md b/book-src/src/week-4/day-30.md deleted file mode 100644 index df3eca9..0000000 --- a/book-src/src/week-4/day-30.md +++ /dev/null @@ -1,3 +0,0 @@ -# Day 30 - -_Placeholder for Day 30 content._ diff --git a/scripts/bash/build_book.sh b/scripts/bash/build_book.sh old mode 100644 new mode 100755 From 182fe468ea1e515f8049da4d423ea8a17b5e1fb6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 22:23:54 +0000 Subject: [PATCH 3/5] Create comprehensive GitHub issues documentation for codebase cleanup Co-authored-by: moshesham <7207587+moshesham@users.noreply.github.com> --- .github/ISSUE_TEMPLATE.md | 34 + ISSUES_TO_CREATE.md | 1581 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1615 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 ISSUES_TO_CREATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..4534608 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,34 @@ +--- +name: Codebase Cleanup Task +about: Template for codebase cleanup issues +title: '[CLEANUP] ' +labels: cleanup +assignees: '' + +--- + +## Problem Description + + +## Current State Analysis + + +## Impact + + +## Recommended Solution + + +## Files to Check/Update + +- [ ] File 1 +- [ ] File 2 + +## Definition of Done + +- [ ] Criterion 1 +- [ ] Criterion 2 + +## Related Issues + + diff --git a/ISSUES_TO_CREATE.md b/ISSUES_TO_CREATE.md new file mode 100644 index 0000000..6ec165c --- /dev/null +++ b/ISSUES_TO_CREATE.md @@ -0,0 +1,1581 @@ +# GitHub Issues to Create for Codebase Cleanup + +This document contains detailed issue descriptions to be created one at a time on GitHub for the Product Analytics MasterClass repository cleanup. + +--- + +## Issue #1: Consolidate duplicate build_book.sh scripts + +**Labels:** `cleanup`, `documentation`, `scripts` +**Priority:** High +**Effort:** Small (1-2 hours) + +### Problem Description + +The repository currently contains two build scripts with different implementations: +1. `/scripts/build_book.sh` - The older version +2. `/scripts/bash/build_book.sh` - The newer, cleaner version + +The newer script (`/scripts/bash/build_book.sh`) has been updated but the older one (`/scripts/build_book.sh`) still contains logic that references week directories that no longer exist. + +### Current State Analysis + +#### scripts/bash/build_book.sh (UPDATED - WORKING) +- ✅ Clears old content before building +- ✅ Copies main curriculum as introduction.md +- ✅ Copies report files successfully +- ✅ Generates SUMMARY.md dynamically +- ✅ All file paths reference existing files +- ⚠️ File is executable (755 permissions) + +#### scripts/build_book.sh (OUTDATED - BROKEN) +- ❌ No cleanup step before copying +- ❌ Contains useless loop copying week directories to themselves (lines 14-18) +- ❌ References week directories that have been deleted +- ❌ Does NOT generate SUMMARY.md +- ❌ File is not executable (644 permissions) + +### Files Referenced in Scripts + +The build scripts attempt to copy these files from `reports/`: +- ✅ `ab_test_analysis.md` - EXISTS but is EMPTY (0 bytes) +- ✅ `qbr_presentation.md` - EXISTS but is EMPTY (0 bytes) +- ✅ `Journals_Instrumentation_Plan.md` - EXISTS with placeholder content +- ✅ `Week1_Launch_Summary.md` - EXISTS with placeholder content +- ✅ `AB_Test_Final_Readout.md` - EXISTS with placeholder content +- ✅ `Pre_Mortem_Memo.md` - EXISTS with placeholder content +- ✅ `Journals_Launch_Monitoring_Dashboard.md` - EXISTS with placeholder content +- ✅ `DiD_Critical_Assessment.md` - EXISTS with placeholder content + +### Impact + +1. **Confusion**: Having two build scripts creates confusion about which one to use +2. **Maintenance**: Keeping two scripts in sync is error-prone +3. **Documentation**: README or other docs may reference the wrong script +4. **CI/CD**: Automated builds may use the wrong script + +### Recommended Solution + +#### Option 1: Remove Outdated Script (RECOMMENDED) +- Delete `/scripts/build_book.sh` +- Update any documentation referencing it to use `/scripts/bash/build_book.sh` +- Update any CI/CD pipelines if they reference the old script + +#### Option 2: Consolidate at Root Level +- Keep the newer implementation at `/scripts/build_book.sh` +- Delete `/scripts/bash/build_book.sh` +- Move any other scripts from `/scripts/bash/` to `/scripts/` directly + +#### Option 3: Make Scripts Modular +- Keep both locations but have `/scripts/build_book.sh` call `/scripts/bash/build_book.sh` +- Add documentation explaining the structure + +### Alignment with 30-Day Syllabus + +The 30-day syllabus (in `30-Day Product Analytics Masterclass.md`) defines the course structure with: +- Week 1 (Days 1-7): Foundations & Framing +- Week 2 (Days 8-14): Launch & Monitoring +- Week 3 (Days 15-21): Deep Dive - Causal Impact +- Week 4 (Days 22-30): Strategy - From Analyst to Influencer + +The build scripts should: +1. ✅ Copy the main curriculum as the introduction +2. ❓ Potentially organize content by week/day structure +3. ✅ Include key reports as referenced in the curriculum +4. ✅ Generate a coherent table of contents + +### Files to Check/Update + +- [ ] `/scripts/build_book.sh` +- [ ] `/scripts/bash/build_book.sh` +- [ ] `README.md` - Check for build script references +- [ ] `.github/workflows/*` - Check CI/CD references +- [ ] `CONTRIBUTING.md` - Check for build instructions +- [ ] `book.toml` - Verify mdBook configuration + +### Additional Context + +The `Content/` folder contains `Day_XX_Topic/README.md` files that provide detailed guidance for each day of the curriculum. The relationship between this content and the book-src structure needs clarification. + +### Definition of Done + +- [ ] Only one authoritative build script exists +- [ ] The script successfully builds the mdBook +- [ ] All file references in the script point to existing files +- [ ] Documentation is updated to reference the correct script +- [ ] Any CI/CD pipelines are updated +- [ ] The script aligns with the 30-day syllabus structure + +### Related Issues +- Issue #2 (Content folder evaluation) +- Issue #4 (Empty report files) + +--- + +## Issue #2: Evaluate Content folder usage and redundancy + +**Labels:** `cleanup`, `documentation`, `content-structure` +**Priority:** Medium +**Effort:** Medium (3-4 hours) + +### Problem Description + +The repository contains a `Content/` directory with 30 subdirectories (`Day_01_Topic` through `Day_30_Topic`), each containing a `README.md` file. However, it's unclear how this content relates to: +1. The `book-src/src/` structure (which previously had week-based organization) +2. The main `30-Day Product Analytics Masterclass.md` curriculum file +3. The `notebooks/` directory + +This creates confusion about the single source of truth for course content and may lead to content drift. + +### Current State Analysis + +#### Content Folder Structure +``` +Content/ +├── Day_01_Topic/README.md (4.4KB) +├── Day_02_Topic/README.md +├── Day_03_Topic/README.md +... +├── Day_30_Topic/README.md +``` + +Total size: ~368KB across 30 directories + +#### Sample Content Analysis (Day_01_Topic/README.md) +- Contains detailed day-specific guidance +- Includes objectives, key concepts, step-by-step instructions +- References specific deliverables and notebooks +- Well-structured educational content +- Appears to be teaching material, not just metadata + +#### Relationship to Other Content + +**vs. 30-Day Product Analytics Masterclass.md:** +- Main curriculum file is comprehensive (49KB) +- Contains all 30 days of curriculum in one file +- Content appears to overlap with Day_XX_Topic files +- Main file is more concise; Day files are more detailed + +**vs. book-src/src/:** +- book-src previously had week-N/day-N.md files (now deleted) +- Current book-src only has introduction.md and report files +- No clear linkage between Content/ and book-src/ + +**vs. notebooks/:** +- Notebooks directory contains actual code exercises +- Content/Day_XX files reference notebooks by name +- Acts as instruction manual for notebooks + +### Impact + +1. **Maintenance Burden**: Three potential sources of truth for curriculum content +2. **Inconsistency Risk**: Changes to one location may not propagate to others +3. **User Confusion**: Unclear which content users should follow +4. **Build Process**: Content/ folder is not currently used by build_book.sh scripts + +### Questions to Answer + +1. **Is Content/ folder actively used?** + - Check if any scripts reference it + - Check if README.md points to it + - Check if it's linked in documentation + +2. **Should Content/ be the source for book-src?** + - If yes, update build scripts to copy from Content/ + - If no, consider removing or consolidating + +3. **Is there value in the detailed Day-specific content?** + - More detailed than main curriculum + - Includes pedagogical elements + - May be valuable for instructors + +### Recommended Solutions + +#### Option 1: Make Content/ the Source of Truth (RECOMMENDED) +- **Pro**: Most detailed content exists here +- **Pro**: Clear day-by-day organization +- **Con**: Requires updating build scripts +- **Action Items**: + 1. Update build scripts to copy from Content/Day_XX_Topic/README.md + 2. Rename files to more semantic names (e.g., day-01-opportunity-discovery.md) + 3. Update SUMMARY.md generation to include all 30 days + 4. Archive or remove content from main curriculum file + +#### Option 2: Consolidate into Main Curriculum +- **Pro**: Single file is easier to maintain +- **Pro**: Already referenced by build scripts +- **Con**: Loss of detailed instructional content +- **Action Items**: + 1. Merge valuable content from Day_XX files into main curriculum + 2. Delete Content/ folder + 3. Keep build scripts as-is + +#### Option 3: Hybrid Approach +- **Pro**: Preserves both levels of detail +- **Pro**: Main curriculum for overview, Content/ for deep-dive +- **Con**: Requires clear documentation of purpose +- **Action Items**: + 1. Document that main curriculum is the syllabus + 2. Document that Content/ contains instructor/student guides + 3. Update build scripts to include both + 4. Create clear navigation between them + +#### Option 4: Move to book-src Week Structure +- **Pro**: Aligns with pedagogical weeks +- **Pro**: Restores original structure +- **Con**: Requires recreating deleted week directories +- **Action Items**: + 1. Create book-src/src/week-1/ through week-4/ + 2. Move Day_XX content to appropriate week folders + 3. Update build scripts accordingly + +### Files to Audit + +- [ ] All 30 `Content/Day_XX_Topic/README.md` files +- [ ] `30-Day Product Analytics Masterclass.md` +- [ ] `README.md` - Check for Content/ references +- [ ] `scripts/bash/build_book.sh` - Currently doesn't use Content/ +- [ ] `scripts/build_book.sh` - Currently doesn't use Content/ +- [ ] `.github/workflows/*` - Check for Content/ usage +- [ ] `book.toml` - Check mdBook configuration + +### Metrics to Consider + +- Content overlap percentage between main curriculum and Day files +- Unique content in each location +- File size comparison +- Last modified dates + +### Definition of Done + +- [ ] Decision made on Content/ folder purpose +- [ ] If keeping: Build scripts updated to use Content/ +- [ ] If removing: Content migrated or archived +- [ ] Documentation updated to explain content organization +- [ ] All references to Content/ are accurate +- [ ] No duplicate or conflicting content exists + +### Related Issues +- Issue #1 (Build script consolidation) +- Issue #3 (Week directory structure) + +--- + +## Issue #3: Restore or remove week directory structure in book-src + +**Labels:** `cleanup`, `structure`, `decision-needed` +**Priority:** High +**Effort:** Small-Medium (2-3 hours) + +### Problem Description + +The `book-src/src/` directory previously contained a week-based organizational structure with individual day markdown files: +- `week-1/day-01.md` through `day-07.md` +- `week-2/day-08.md` through `day-14.md` +- `week-3/day-15.md` through `day-21.md` +- `week-4/day-22.md` through `day-30.md` + +These 30 files were recently deleted (commit 18d9b74), and the structure was flattened. The current state leaves `book-src/src/` with only: +- `introduction.md` (the main curriculum) +- Report files copied from `reports/` +- `SUMMARY.md` (table of contents) + +### Current State Analysis + +#### What Was Deleted +``` +book-src/src/ +├── week-1/ +│ ├── day-01.md (deleted) +│ ├── day-02.md (deleted) +│ ... +│ └── day-07.md (deleted) +├── week-2/ (all files deleted) +├── week-3/ (all files deleted) +└── week-4/ (all files deleted) +``` + +Total: 30 markdown files deleted across 4 week directories + +#### What Remains +``` +book-src/src/ +├── introduction.md (main curriculum - 49KB) +├── SUMMARY.md (164 bytes - minimal TOC) +├── ab_test_analysis.md (0 bytes) +├── qbr_presentation.md (0 bytes) +└── [6 other report files with placeholder content] +``` + +#### Git History +``` +commit 18d9b74 +Author: moshesham +Date: [recent] + +Deleted: +- book-src/src/week-1/day-01.md through day-07.md +- book-src/src/week-2/day-08.md through day-14.md +- book-src/src/week-3/day-15.md through day-21.md +- book-src/src/week-4/day-22.md through day-30.md +``` + +### Impact + +1. **Loss of Granular Content**: If those day files contained unique content, it's now inaccessible +2. **Navigation Issues**: mdBook users can't navigate by specific days +3. **Build Script References**: The old `scripts/build_book.sh` still references these directories +4. **Pedagogical Structure**: Week-based learning may be better than one large introduction + +### Questions to Answer + +1. **Did the deleted files contain unique content?** + - Check git history: `git show :book-src/src/week-1/day-01.md` + - Compare with Content/Day_01_Topic/README.md + - Compare with 30-Day Product Analytics Masterclass.md + +2. **Was the deletion intentional or accidental?** + - Review commit message + - Check PR/issue discussions + - Consult with repository maintainers + +3. **What should the book structure be?** + - Single-page curriculum (current state) + - Week-based chapters with day sections + - Flat list of 30 day pages + - Hybrid with weeks + reports + +### Recommended Solutions + +#### Option 1: Restore Week Structure from Content/ (RECOMMENDED IF UNIQUE CONTENT) +- **Action**: Copy `Content/Day_XX_Topic/README.md` to `book-src/src/week-N/day-XX.md` +- **Mapping**: + - Days 01-07 → week-1/ + - Days 08-14 → week-2/ + - Days 15-21 → week-3/ + - Days 22-30 → week-4/ +- **Update**: SUMMARY.md to include week-based navigation +- **Update**: Build scripts to maintain this structure + +#### Option 2: Keep Flat Structure (CURRENT STATE) +- **Action**: Accept that introduction.md is the primary content +- **Update**: Remove week-copying logic from old build script +- **Update**: Documentation to reflect single-page approach +- **Benefit**: Simpler to maintain, easier to search +- **Downside**: Harder to navigate long document + +#### Option 3: Restore from Git History +- **Action**: Cherry-pick the deleted files from git history +- **Command**: `git checkout -- book-src/src/week-*/` +- **Benefit**: Preserves any unique content that was in those files +- **Requirement**: First verify content is unique and valuable + +#### Option 4: Create New Week Structure from Main Curriculum +- **Action**: Split the main curriculum into week/day files programmatically +- **Script**: Parse markdown headers in main file +- **Benefit**: Clean split from authoritative source +- **Downside**: Requires markdown parsing logic + +### Investigation Steps + +#### Step 1: Check Git History for Content +```bash +# View what day-01.md contained before deletion +git show 574d4a5:book-src/src/week-1/day-01.md + +# Compare all week files +for week in 1 2 3 4; do + for day in $(seq 1 7 2>&1 | head -n $((week == 4 ? 9 : 7))); do + git show 574d4a5:book-src/src/week-$week/day-$(printf "%02d" $day).md > /tmp/old-day-$day.md + done +done +``` + +#### Step 2: Compare with Current Content Sources +```bash +# Compare old day files with Content/ folder +diff /tmp/old-day-01.md Content/Day_01_Topic/README.md + +# Check if content exists in main curriculum +grep -A 50 "Day 01" "30-Day Product Analytics Masterclass.md" +``` + +#### Step 3: Analyze SUMMARY.md Evolution +```bash +# Check old SUMMARY.md structure +git show 574d4a5:book-src/src/SUMMARY.md + +# Compare with current +cat book-src/src/SUMMARY.md +``` + +### Files to Review + +- [ ] Git history of all 30 deleted day files +- [ ] Current `book-src/src/SUMMARY.md` +- [ ] Previous `book-src/src/SUMMARY.md` (from git) +- [ ] `scripts/build_book.sh` week-copying logic (lines 14-18) +- [ ] `Content/Day_XX_Topic/README.md` files for comparison +- [ ] `book.toml` - mdBook configuration + +### Decision Matrix + +| Criteria | Flat (Current) | Restore Weeks | New from Content | +|----------|---------------|---------------|------------------| +| Ease of navigation | Low | High | High | +| Maintenance effort | Low | Medium | Medium | +| Content richness | Medium | ? | High | +| Alignment with pedagogy | Low | High | High | +| Build complexity | Low | Medium | Medium | + +### Definition of Done + +- [ ] Decision made on book structure (flat vs. week-based) +- [ ] If restoring: Week directories recreated with content +- [ ] If staying flat: Week references removed from build scripts +- [ ] SUMMARY.md updated to match chosen structure +- [ ] Build scripts tested and working +- [ ] Documentation updated to explain structure choice + +### Related Issues +- Issue #1 (Build script consolidation) +- Issue #2 (Content folder evaluation) + +--- + +## Issue #4: Populate or remove empty placeholder report files + +**Labels:** `cleanup`, `content`, `reports` +**Priority:** Medium +**Effort:** Large (8-12 hours) - if creating content + +### Problem Description + +The `reports/` directory contains several markdown files that are either completely empty (0 bytes) or contain only minimal placeholder text. These files are referenced and copied by the build scripts, creating an incomplete user experience in the generated mdBook. + +### Current State Analysis + +#### Empty Files (0 bytes) +1. **reports/ab_test_analysis.md** - 0 bytes + - Referenced in build scripts + - Copied to book-src/src/ + - Expected content: A/B test analysis report (from Day 15-16) + +2. **reports/qbr_presentation.md** - 0 bytes + - Referenced in build scripts + - Listed in SUMMARY.md + - Expected content: Quarterly Business Review presentation (from Days 27-30) + +3. **reports/did_analysis.md** - 0 bytes + - NOT currently referenced in build scripts + - Likely intended for Difference-in-Differences analysis (Day 16) + +#### Placeholder Files (minimal content) +4. **reports/AB_Test_Final_Readout.md** - 92 bytes + ``` + # AB Test Final Readout + + Placeholder for AB Test Final Readout report (Day 15). + ``` + +5. **reports/DiD_Critical_Assessment.md** - 87 bytes + ``` + # DiD Critical Assessment + + Placeholder for DiD Critical Assessment (Day 16). + ``` + +6. **reports/Journals_Instrumentation_Plan.md** - 87 bytes + ``` + # Journals Instrumentation Plan + + Placeholder for instrumentation plan (Day 3). + ``` + +7. **reports/Week1_Launch_Summary.md** - 91 bytes + ``` + # Week 1 Launch Summary + + Placeholder for Week 1 Launch Summary Memo (Day 12). + ``` + +8. **reports/Pre_Mortem_Memo.md** - 88 bytes + ``` + # Pre-Mortem Memo + + Placeholder for Pre-Mortem Memo (Day 7). + ``` + +9. **reports/Journals_Launch_Monitoring_Dashboard.md** - 92 bytes + ``` + # Journals Launch Monitoring Dashboard + + Placeholder for dashboard specification (Day 6). + ``` + +#### Complete Files (with content) +10. **reports/dashboards/journals_launch_monitoring_dashboard.md** - Has actual content + - This appears to be a duplicate/alternate location for item #9 + +### Expected Content Based on Curriculum + +According to the 30-Day Product Analytics Masterclass curriculum: + +| File | Curriculum Day | Expected Content | +|------|---------------|------------------| +| Journals_Instrumentation_Plan.md | Day 3 | Instrumentation spec with events, success metrics, guardrails | +| Pre_Mortem_Memo.md | Day 7 | Risk analysis memo with 3 plausible risks and detection plans | +| Journals_Launch_Monitoring_Dashboard.md | Day 6 | Dashboard spec with KPIs, visualizations, and SQL queries | +| Week1_Launch_Summary.md | Day 12 | Structured memo with TL;DR, wins, challenges, insights, recommendations | +| ab_test_analysis.md | Days 4, 13 | A/B test design and preliminary analysis | +| AB_Test_Final_Readout.md | Day 15 | Complete A/B test analysis with statistical results | +| DiD_Critical_Assessment.md | Day 16 | Difference-in-Differences analysis and comparison with A/B test | +| qbr_presentation.md | Days 27-30 | Quarterly Business Review presentation (5-slide deck) | + +### Impact + +1. **Incomplete Documentation**: Users following the course have no example reports +2. **Build Output**: Generated book contains empty or placeholder pages +3. **Learning Gap**: Students can't see what a "good" report looks like +4. **Confusion**: Unclear if these are work-in-progress or intentionally blank + +### Recommended Solutions + +#### Option 1: Create Template/Example Content (RECOMMENDED) +Create realistic example content for each report based on the curriculum requirements. + +**Pros:** +- Provides learning value +- Demonstrates best practices +- Makes the book complete and professional + +**Cons:** +- Significant time investment (8-12 hours) +- Requires domain expertise +- Needs to align with fictional "Journals" feature scenario + +**Priority Order for Content Creation:** +1. **High Priority** (Core learning deliverables): + - Journals_Instrumentation_Plan.md + - AB_Test_Final_Readout.md + - Week1_Launch_Summary.md + +2. **Medium Priority** (Important examples): + - Pre_Mortem_Memo.md + - Journals_Launch_Monitoring_Dashboard.md + - DiD_Critical_Assessment.md + +3. **Lower Priority** (Can be combined with others): + - ab_test_analysis.md (interim/preliminary version) + - qbr_presentation.md (capstone - can reference other reports) + +#### Option 2: Remove from Build Scripts +Remove empty/placeholder files from the build process until content is ready. + +**Pros:** +- Clean user experience +- No broken promises +- Quick fix + +**Cons:** +- Reduces completeness of course materials +- Loses structure/scaffolding for future content + +**Action Items:** +- Update build scripts to skip empty files +- Update SUMMARY.md to not list missing reports +- Add TODO comments for future content + +#### Option 3: Convert to "Student Exercise" Placeholders +Explicitly frame these as templates for students to fill in. + +**Pros:** +- Turns weakness into pedagogical feature +- Students learn by doing +- Clear expectations + +**Cons:** +- Still incomplete as reference material +- May frustrate self-learners + +**Action Items:** +- Add clear headers: "Student Exercise: [Report Name]" +- Include rubric/requirements +- Provide structure/outline to fill in + +#### Option 4: Link to External Examples +Instead of creating content, link to high-quality examples from other sources. + +**Pros:** +- Leverages existing resources +- Low effort +- Shows real-world variety + +**Cons:** +- Examples may not align with "Journals" scenario +- External links can break +- Less cohesive learning experience + +### Content Creation Guidelines (if Option 1 chosen) + +Each report should: +1. **Be realistic**: Use plausible numbers and insights for the "Journals" feature +2. **Demonstrate best practices**: Show proper formatting, structure, communication +3. **Align with curriculum**: Match the requirements from the corresponding Day +4. **Be internally consistent**: Numbers should make sense across all reports +5. **Include visuals**: Charts, tables, code blocks where appropriate +6. **Show progression**: Early reports show uncertainty, later ones show learnings + +### Sample Data for Consistency + +To maintain consistency across reports, use these fictional parameters: +- **Test Duration**: 28 days +- **Sample Size**: 100,000 users (50k control, 50k treatment) +- **Baseline Retention (Day 28)**: 20% +- **Treatment Lift**: +2.5% (50 basis points absolute) +- **Statistical Significance**: p-value = 0.012 (significant) +- **Feature Adoption**: 15% of treatment group +- **Guardrail Metrics**: All neutral or positive + +### Files to Update + +- [ ] reports/ab_test_analysis.md +- [ ] reports/qbr_presentation.md +- [ ] reports/did_analysis.md +- [ ] reports/AB_Test_Final_Readout.md +- [ ] reports/DiD_Critical_Assessment.md +- [ ] reports/Journals_Instrumentation_Plan.md +- [ ] reports/Week1_Launch_Summary.md +- [ ] reports/Pre_Mortem_Memo.md +- [ ] reports/Journals_Launch_Monitoring_Dashboard.md +- [ ] scripts/bash/build_book.sh (if removing files) +- [ ] book-src/src/SUMMARY.md (if removing files) + +### Definition of Done + +**If Creating Content:** +- [ ] All report files have realistic, complete content +- [ ] Content aligns with curriculum day requirements +- [ ] Numbers are internally consistent across reports +- [ ] Reports demonstrate professional communication +- [ ] Build scripts successfully include all reports +- [ ] Generated mdBook is complete and professional + +**If Removing:** +- [ ] Empty files removed from build scripts +- [ ] SUMMARY.md updated to not reference missing reports +- [ ] Documentation explains what's intentionally excluded +- [ ] Future content creation tracked in separate issues + +### Related Issues +- Issue #1 (Build script consolidation) +- Issue #6 (SUMMARY.md generation) + +--- + +## Issue #5: Review and update documentation files for consistency + +**Labels:** `documentation`, `cleanup`, `consistency` +**Priority:** Medium +**Effort:** Medium (3-4 hours) + +### Problem Description + +Several documentation files in the repository may contain outdated references, broken links, or inconsistent information following recent structural changes (deletion of week directories, consolidation of build scripts, etc.). These files need to be reviewed and updated to reflect the current state of the repository. + +### Files to Review + +#### 1. README.md (Root) +**Current Issues to Check:** +- [ ] Does it reference the correct build script location? +- [ ] Are setup instructions current and accurate? +- [ ] Do links to course content work? +- [ ] Is the project structure description accurate? +- [ ] Are contribution guidelines referenced correctly? + +**Specific Items:** +- Build script references (should point to working script) +- Directory structure documentation +- Prerequisites and setup +- Links to external resources +- Table of contents accuracy + +#### 2. CONTRIBUTING.md +**Current Issues to Check:** +- [ ] Build/test instructions accurate? +- [ ] Are script paths correct? +- [ ] Is the development workflow documented? +- [ ] Are coding standards defined? +- [ ] Pull request guidelines current? + +**Specific Items:** +- How to run build scripts +- How to test changes +- Code formatting standards +- Documentation standards +- Review process + +#### 3. CODE_OF_CONDUCT.md +**Current Issues to Check:** +- [ ] Is this a standard template or custom? +- [ ] Are contact methods current? +- [ ] Does it need updates? + +**Likely Status:** Probably fine, these are usually static + +#### 4. book.toml (mdBook Configuration) +**Current Issues to Check:** +- [ ] Book title and author correct? +- [ ] Build configuration appropriate? +- [ ] Output directory settings? +- [ ] Theme and styling settings? +- [ ] Preprocessor configurations? + +**Specific Items:** +```toml +[book] +title = "..." # Verify accuracy +authors = ["..."] # Verify accuracy +language = "en" +multilingual = false +src = "book-src/src" # Verify path is correct +``` + +#### 5. docker-compose.yml & Dockerfile +**Current Issues to Check:** +- [ ] Are these used for development environment? +- [ ] Do they reference correct paths? +- [ ] Are dependencies current? +- [ ] Volume mounts accurate? +- [ ] Port mappings documented? + +**Specific Items:** +- Working directory paths +- Volume mount points +- Environment variables +- Exposed ports +- Dependencies list + +#### 6. environment.yml (Conda Environment) +**Current Issues to Check:** +- [ ] Python version appropriate? +- [ ] All required packages listed? +- [ ] Versions specified where needed? +- [ ] Compatible with curriculum requirements? + +**Specific Items:** +```yaml +dependencies: + - python=3.x + - duckdb + - pandas + - jupyter + - matplotlib + # etc. +``` + +#### 7. .gitignore +**Current Issues to Check:** +- [ ] Build artifacts ignored? +- [ ] mdBook output ignored? +- [ ] Python cache ignored? +- [ ] Jupyter checkpoints ignored? +- [ ] IDE files ignored? + +**Specific Items:** +``` +book-src/book/ # mdBook output +__pycache__/ +*.pyc +.ipynb_checkpoints/ +.DS_Store +``` + +#### 8. .github/workflows/* (if exists) +**Current Issues to Check:** +- [ ] CI/CD pipelines exist? +- [ ] Build script references correct? +- [ ] Test commands accurate? +- [ ] Deploy processes working? + +### Common Issues to Look For + +1. **Broken Internal Links** + - Links to deleted week directories + - Links to moved files + - Anchors to removed sections + +2. **Outdated Script References** + - References to old build_book.sh locations + - Deprecated command examples + - Wrong script paths in examples + +3. **Structural Assumptions** + - Descriptions of deleted week-based structure + - References to Content folder without explaining purpose + - Assumptions about file organization that changed + +4. **Installation Instructions** + - Missing dependencies + - Outdated version requirements + - Incorrect setup steps + +5. **Build Instructions** + - Wrong script paths + - Missing environment setup + - Incorrect command syntax + +### Recommended Approach + +#### Phase 1: Audit (1 hour) +1. Read each documentation file completely +2. Make notes of specific issues found +3. Check all internal and external links +4. Verify all code examples and commands +5. Test setup instructions in clean environment + +#### Phase 2: Update (2 hours) +1. Fix broken links and references +2. Update script paths and examples +3. Correct structural descriptions +4. Update installation instructions if needed +5. Refresh any outdated information + +#### Phase 3: Validate (1 hour) +1. Follow README setup instructions from scratch +2. Run all documented commands +3. Click all documentation links +4. Verify consistency across files +5. Test build process end-to-end + +### Specific Updates Needed + +Based on recent changes: + +#### Update Build Script References +**Old:** `./scripts/build_book.sh` +**New:** `./scripts/bash/build_book.sh` (or consolidated location per Issue #1) + +**Files to check:** +- README.md +- CONTRIBUTING.md +- .github/workflows/* (if exists) + +#### Update Structure Documentation +**Old:** References to week-1/, week-2/, etc. in book-src/src/ +**New:** Flat structure with introduction.md + reports + +**Files to check:** +- README.md (project structure section) +- CONTRIBUTING.md (file organization section) + +#### Document Content Folder +**Missing:** Explanation of Content/Day_XX_Topic/ purpose +**Needed:** Clear statement of what this folder is for + +**Files to check:** +- README.md (should explain folder purpose) +- CONTRIBUTING.md (should explain when to edit these files) + +### Documentation Quality Checklist + +For each documentation file: +- [ ] No broken internal links +- [ ] No broken external links +- [ ] All code examples are valid +- [ ] All paths reference existing files +- [ ] Consistent terminology throughout +- [ ] Clear and concise language +- [ ] Proper markdown formatting +- [ ] Table of contents (if long) +- [ ] Last updated date/version + +### Definition of Done + +- [ ] All documentation files reviewed +- [ ] All broken links fixed +- [ ] All script references corrected +- [ ] All structural descriptions accurate +- [ ] Setup instructions tested in clean environment +- [ ] Build process documented accurately +- [ ] Consistency verified across all docs +- [ ] No references to deleted/moved content + +### Related Issues +- Issue #1 (Build script paths may change) +- Issue #2 (Content folder needs documentation) +- Issue #3 (Structure changed with week deletion) + +--- + +## Issue #6: Improve SUMMARY.md generation and book navigation + +**Labels:** `enhancement`, `user-experience`, `mdbook` +**Priority:** High +**Effort:** Small-Medium (2-3 hours) + +### Problem Description + +The current `book-src/src/SUMMARY.md` file is minimal and doesn't provide comprehensive navigation for the course content. It only lists the introduction and two reports, missing the opportunity to create a rich, navigable learning experience. + +### Current State + +#### Current SUMMARY.md (164 bytes) +```markdown +# Summary + +[Introduction](./introduction.md) + +--- + +# Key Analyses + +- [A/B Test Analysis](./ab_test_analysis.md) +- [QBR Presentation Outline](./qbr_presentation.md) +``` + +**Issues:** +- Only 3 items listed (intro + 2 reports) +- Missing 6 other report files that are copied to book-src/src/ +- No day-by-day or week-by-week structure +- No clear learning progression +- Doesn't reflect the 30-day curriculum organization + +#### Files Available in book-src/src/ (after build) +- introduction.md (49KB - entire curriculum) +- ab_test_analysis.md (empty) +- qbr_presentation.md (empty) +- AB_Test_Final_Readout.md (placeholder) +- DiD_Critical_Assessment.md (placeholder) +- Journals_Instrumentation_Plan.md (placeholder) +- Week1_Launch_Summary.md (placeholder) +- Pre_Mortem_Memo.md (placeholder) +- Journals_Launch_Monitoring_Dashboard.md (placeholder) + +#### Current Generation Logic (in scripts/bash/build_book.sh) +```bash +cat > "${BOOK_SRC_DIR}/SUMMARY.md" << EOT +# Summary + +[Introduction](./introduction.md) + +--- + +# Key Analyses + +- [A/B Test Analysis](./ab_test_analysis.md) +- [QBR Presentation Outline](./qbr_presentation.md) +EOT +``` + +**Issues with Generation:** +- Hardcoded content only +- No dynamic discovery of files +- Doesn't include all reports +- No hierarchical structure + +### Impact + +1. **Poor Navigation**: Users can't easily navigate to specific days or topics +2. **Incomplete**: Missing content that exists in book-src/src/ +3. **Not Pedagogical**: Doesn't reflect the week-by-week learning structure +4. **Confusing**: Introduction.md contains all 30 days but no way to jump to specific days + +### Curriculum Structure (from main file) + +The 30-day curriculum is organized as: +- **Week 1 (Days 1-7)**: Foundations & Framing +- **Week 2 (Days 8-14)**: The Crucible - Monitoring & Triage +- **Week 3 (Days 15-21)**: The Deep Dive - Causal Impact +- **Week 4 (Days 22-30)**: The Strategy - From Analyst to Influencer + +Each day has: +- Title +- Objective +- Why This Matters +- Tasks +- Deliverable + +### Recommended Solutions + +#### Option 1: Enhanced Flat Structure (Quick Win) +Keep single introduction.md but add all reports and better organization. + +```markdown +# Summary + +[Introduction: The 30-Day Product Analytics Masterclass](./introduction.md) + +--- + +# Week 1 Deliverables: Foundations & Framing + +- [Day 3: Instrumentation Plan](./Journals_Instrumentation_Plan.md) +- [Day 6: Dashboard Specification](./Journals_Launch_Monitoring_Dashboard.md) +- [Day 7: Pre-Mortem Memo](./Pre_Mortem_Memo.md) + +# Week 2 Deliverables: Launch & Monitoring + +- [Day 12: Week 1 Launch Summary](./Week1_Launch_Summary.md) + +# Week 3 Deliverables: Causal Impact Analysis + +- [Day 13: A/B Test Preliminary Analysis](./ab_test_analysis.md) +- [Day 15: A/B Test Final Readout](./AB_Test_Final_Readout.md) +- [Day 16: Difference-in-Differences Assessment](./DiD_Critical_Assessment.md) + +# Week 4 Deliverables: Strategic Review + +- [Day 27-30: Quarterly Business Review](./qbr_presentation.md) + +--- + +# Reference Materials + +- [Course Overview](./introduction.md#course-overview) +- [Glossary](./introduction.md#glossary) *(if exists)* +``` + +**Pros:** +- Quick to implement +- Includes all reports +- Organizes by week +- Better than current + +**Cons:** +- Still single-file for curriculum +- Can't navigate to specific days easily + +#### Option 2: Week-Based Chapters (Requires Content Split) +Split introduction.md into week files or link to Content/Day_XX files. + +```markdown +# Summary + +[Course Overview](./introduction.md) + +--- + +# Week 1: Foundations & Framing + +- [Day 1: Data Warehouse & Opportunity Discovery](./week-1/day-01.md) +- [Day 2: Opportunity Sizing & Business Case](./week-1/day-02.md) +- [Day 3: Instrumentation Plan](./week-1/day-03.md) +- [Day 4: A/B Test Design](./week-1/day-04.md) +- [Day 5: Difference-in-Differences Design](./week-1/day-05.md) +- [Day 6: BI Dashboard Specification](./week-1/day-06.md) +- [Day 7: Pre-Mortem Memo](./week-1/day-07.md) + +# Week 2: Launch & Monitoring + +- [Day 8: Launch Day Command Center](./week-2/day-08.md) +... + +# Deliverables & Reports + +- [Instrumentation Plan](./Journals_Instrumentation_Plan.md) +- [Pre-Mortem Memo](./Pre_Mortem_Memo.md) +- [A/B Test Final Readout](./AB_Test_Final_Readout.md) +... +``` + +**Pros:** +- Clear day-by-day navigation +- Aligns with pedagogical structure +- Easy to jump to specific content + +**Cons:** +- Requires splitting or copying Content/ files +- More complex build process +- Depends on Issue #2 and #3 resolution + +#### Option 3: Hybrid with Anchor Links +Use anchor links to navigate within introduction.md. + +```markdown +# Summary + +[Course Overview](./introduction.md) + +--- + +# Week 1: Foundations & Framing + +- [Day 1: Data Warehouse & Opportunity Discovery](./introduction.md#day-01-the-data-warehouse--opportunity-discovery) +- [Day 2: Opportunity Sizing](./introduction.md#day-02-opportunity-sizing--the-business-case) +... +``` + +**Pros:** +- Works with current single-file structure +- Easy to implement +- Good navigation experience + +**Cons:** +- Requires consistent anchor IDs in introduction.md +- Long page load for introduction.md +- May not work well with search + +#### Option 4: Dynamic Generation from Content/ +Generate SUMMARY.md automatically from Content/Day_XX_Topic folders. + +```bash +#!/bin/bash +# Generate SUMMARY.md dynamically + +echo "# Summary" > SUMMARY.md +echo "" >> SUMMARY.md +echo "[Course Overview](./introduction.md)" >> SUMMARY.md +echo "" >> SUMMARY.md + +for week in 1 2 3 4; do + echo "# Week $week" >> SUMMARY.md + # Logic to extract week title from curriculum + + for day in ...; do + # Extract day title from Content/Day_XX/README.md + # Add line to SUMMARY.md + done +done +``` + +**Pros:** +- Automated and maintainable +- Always in sync with Content/ +- Reduces manual work + +**Cons:** +- Complex script logic +- Requires Content/ to be authoritative source +- Depends on Issue #2 resolution + +### Recommended Approach + +**Phase 1 (Immediate - Option 1):** +1. Update SUMMARY.md generation to include ALL report files +2. Organize by week-based sections +3. Add better section headers +4. Quick win for users + +**Phase 2 (After Issue #2 & #3 resolution):** +1. Decide on content structure (flat vs. week-based) +2. Implement Option 2 or 3 based on that decision +3. Consider dynamic generation (Option 4) for maintainability + +### Script Changes Needed + +Update `scripts/bash/build_book.sh`: + +```bash +# Before SUMMARY.md generation, extract week titles from curriculum +WEEK1_TITLE="Foundations & Framing" +WEEK2_TITLE="The Crucible - Monitoring & Triage" +WEEK3_TITLE="The Deep Dive - Causal Impact" +WEEK4_TITLE="The Strategy - From Analyst to Influencer" + +# Generate enhanced SUMMARY.md +cat > "${BOOK_SRC_DIR}/SUMMARY.md" << 'EOT' +# Summary + +[Introduction: The 30-Day Product Analytics Masterclass](./introduction.md) + +--- + +# Week 1 Deliverables: Foundations & Framing + +- [Day 3: Instrumentation Plan](./Journals_Instrumentation_Plan.md) +- [Day 6: Dashboard Specification](./Journals_Launch_Monitoring_Dashboard.md) +- [Day 7: Pre-Mortem Memo](./Pre_Mortem_Memo.md) + +# Week 2 Deliverables: Launch & Monitoring + +- [Day 12: Week 1 Launch Summary](./Week1_Launch_Summary.md) + +# Week 3 Deliverables: Causal Impact Analysis + +- [Day 13: A/B Test Preliminary Analysis](./ab_test_analysis.md) +- [Day 15: A/B Test Final Readout](./AB_Test_Final_Readout.md) +- [Day 16: Difference-in-Differences Assessment](./DiD_Critical_Assessment.md) + +# Week 4 Deliverables: Strategic Review + +- [Day 27-30: Quarterly Business Review](./qbr_presentation.md) +EOT +``` + +### Testing Checklist + +- [ ] All listed files exist in book-src/src/ +- [ ] All links work when mdBook is built +- [ ] Section headers render correctly +- [ ] Navigation is logical and clear +- [ ] No broken links +- [ ] Mobile navigation works (mdBook responsive) + +### mdBook Features to Leverage + +- **Search**: Works better with smaller files (favors Option 2) +- **Printing**: Single file easier (favors current structure) +- **Progress Tracking**: Week-based better (favors Option 2) +- **Loading Speed**: Smaller files better (favors Option 2) + +### Definition of Done + +**Phase 1 (Immediate):** +- [ ] SUMMARY.md includes all report files +- [ ] Content organized by week sections +- [ ] Clear, descriptive link text +- [ ] All links functional after build +- [ ] Script updated and tested + +**Phase 2 (Future):** +- [ ] Navigation structure aligns with final content organization +- [ ] Consider dynamic generation if maintainability becomes issue +- [ ] User testing of navigation experience + +### Related Issues +- Issue #2 (Content folder - source of truth decision) +- Issue #3 (Week directory structure decision) +- Issue #4 (Empty report files - affects what to list) + +--- + +## Issue #7: Comprehensive codebase audit and cleanup roadmap + +**Labels:** `epic`, `cleanup`, `planning` +**Priority:** High +**Effort:** Medium (4-6 hours for audit, varies for cleanup) + +### Problem Description + +This is a meta-issue to track the comprehensive cleanup of the Product Analytics MasterClass repository. It consolidates findings from Issues #1-6 and identifies any additional files, directories, or configurations that may be unnecessary or need attention. + +### Scope of Audit + +#### 1. File System Analysis +- [ ] Identify duplicate files across different directories +- [ ] Find orphaned files (not referenced by any build process or documentation) +- [ ] Locate deprecated or outdated scripts +- [ ] Identify large files that could be optimized or removed +- [ ] Find configuration files that may be unused + +#### 2. Code Analysis +- [ ] Identify unused Python scripts in `/scripts/python/` +- [ ] Check for unused SQL scripts in `/scripts/sql/` +- [ ] Review shell scripts for redundancy +- [ ] Check notebooks for obsolete or duplicate content + +#### 3. Dependency Analysis +- [ ] Review `environment.yml` for unused packages +- [ ] Check `requirements.txt` (if exists) for consistency +- [ ] Verify Docker dependencies are necessary +- [ ] Check for version conflicts + +#### 4. Build System Analysis +- [ ] Verify all build outputs are in `.gitignore` +- [ ] Check for unused build configurations +- [ ] Identify intermediate build artifacts +- [ ] Review CI/CD efficiency + +### Directories to Audit + +#### /Content/ (368 KB) +**Questions:** +- Is this actively used or a legacy structure? +- Does it duplicate information in other locations? +- Should it be the source of truth or consolidated? +- Related: **Issue #2** + +#### /book-src/ (92 KB) +**Questions:** +- Is the current flat structure optimal? +- Should week directories be restored? +- What belongs in book-src vs. Content? +- Related: **Issue #3** + +#### /reports/ (32 KB) +**Questions:** +- Which reports are complete vs. placeholders? +- Should empty reports be removed or filled? +- Is the subdirectory `dashboards/` structure needed? +- Related: **Issue #4** + +#### /notebooks/ (124 KB) +**Questions:** +- Are all notebooks referenced in the curriculum? +- Are there duplicate or obsolete notebooks? +- Is the organization optimal for learners? +- Do notebooks follow naming conventions? + +#### /scripts/ (36 KB) +**Questions:** +- Why both `/scripts/` and `/scripts/bash/`? +- Which scripts are actively used? +- Are there redundant scripts? +- Related: **Issue #1** + +#### /src/ (if exists) +**Questions:** +- What is this directory for? +- Does it overlap with other directories? +- Is it needed or legacy? + +#### /solutions/ (if exists) +**Questions:** +- Are these example solutions for exercises? +- Should they be in a separate branch? +- Are they documented? + +### Files to Review + +#### Configuration Files +- [ ] `.gitignore` - Complete and accurate? +- [ ] `.dockerignore` - Optimized for build? +- [ ] `book.toml` - Settings appropriate? +- [ ] `docker-compose.yml` - All services needed? +- [ ] `Dockerfile` - Optimized and current? +- [ ] `environment.yml` - Dependencies minimal and current? + +#### Documentation Files +- [ ] `README.md` - Comprehensive and current? +- [ ] `CONTRIBUTING.md` - Clear and accurate? +- [ ] `CODE_OF_CONDUCT.md` - Standard and appropriate? +- [ ] `LICENSE` - Correct and clear? +- Related: **Issue #5** + +#### Script Files +- [ ] `generate_skeleton.sh` - What does this do? Still needed? +- [ ] All files in `/scripts/bash/`, `/scripts/python/`, `/scripts/sql/` + +### Audit Methodology + +#### Step 1: Inventory (1-2 hours) +```bash +# Generate comprehensive file listing +find . -type f -not -path './.git/*' > /tmp/all_files.txt + +# Categorize by type +find . -name "*.md" | sort > /tmp/markdown_files.txt +find . -name "*.sh" | sort > /tmp/shell_scripts.txt +find . -name "*.py" | sort > /tmp/python_files.txt +find . -name "*.ipynb" | sort > /tmp/notebooks.txt +find . -name "*.sql" | sort > /tmp/sql_files.txt +find . -name "*.yml" -or -name "*.yaml" | sort > /tmp/config_files.txt + +# Size analysis +du -sh */ | sort -hr > /tmp/directory_sizes.txt +find . -type f -size +100k -not -path './.git/*' > /tmp/large_files.txt +``` + +#### Step 2: Reference Analysis (2 hours) +For each file, check if it's referenced by: +- [ ] Build scripts +- [ ] Documentation +- [ ] Other code files +- [ ] Git history (recent commits) +- [ ] Configuration files + +Create categories: +1. **Active**: Recently used and referenced +2. **Orphaned**: Exists but not referenced anywhere +3. **Deprecated**: Old version of something newer +4. **Unclear**: Purpose uncertain, needs investigation + +#### Step 3: Dependency Graph (1 hour) +Create a visual or textual map of file dependencies: +``` +README.md + ├─> scripts/bash/build_book.sh + ├─> CONTRIBUTING.md + └─> book.toml + +build_book.sh + ├─> 30-Day Product Analytics Masterclass.md + ├─> reports/*.md + └─> book-src/src/SUMMARY.md +``` + +#### Step 4: Cleanup Recommendations (1-2 hours) +For each file/directory, recommend: +- **Keep**: Essential, actively used +- **Update**: Keep but needs changes +- **Archive**: Move to separate branch or tag +- **Delete**: No longer needed +- **Investigate**: Unclear, need more info + +### Common Issues to Look For + +1. **Naming Inconsistencies** + - Mixed case conventions (CamelCase vs. snake_case) + - Inconsistent prefixes/suffixes + - Ambiguous names + +2. **Organizational Issues** + - Files in wrong directories + - Flat structure where hierarchy would help + - Too deep nesting where flat would work + +3. **Duplication** + - Same content in multiple files + - Redundant scripts + - Duplicate configurations + +4. **Obsolete Content** + - References to deleted features + - Old version artifacts + - Deprecated approaches + +5. **Missing Documentation** + - Scripts without comments + - Directories without README + - Unclear file purposes + +### Cleanup Priorities + +#### P0 - Critical (Do First) +- Remove duplicate build scripts (Issue #1) +- Fix broken references in documentation (Issue #5) +- Decide on Content/ folder purpose (Issue #2) + +#### P1 - High (Do Soon) +- Improve SUMMARY.md (Issue #6) +- Address empty report files (Issue #4) +- Resolve week directory question (Issue #3) + +#### P2 - Medium (Do When Possible) +- Optimize notebook organization +- Standardize naming conventions +- Clean up unused dependencies + +#### P3 - Low (Nice to Have) +- Improve directory structure +- Add more comprehensive .gitignore +- Create developer documentation + +### Deliverables + +1. **Audit Report** (Markdown document) + - Complete inventory of files + - Reference analysis for each file + - Dependency graph + - Size analysis + - Recommendations with justifications + +2. **Cleanup Roadmap** (GitHub Project or Markdown) + - Prioritized list of cleanup tasks + - Estimated effort for each + - Dependencies between tasks + - Risk assessment + +3. **Decision Log** (Markdown document) + - Key decisions made during audit + - Rationale for each decision + - Trade-offs considered + - Stakeholders consulted + +4. **Updated Issues** + - Create specific issues for each cleanup task + - Link related issues + - Assign priorities and labels + +### Example Audit Report Structure + +```markdown +# Codebase Audit Report + +## Executive Summary +- Total files: XXX +- Directories audited: XX +- Issues identified: XX +- Recommendations: XX + +## Directory Analysis + +### /Content/ (368 KB) +**Status:** Under review +**References:** None found in build scripts +**Recommendation:** Decision needed (Issue #2) +**Rationale:** ... + +### /scripts/ (36 KB) +**Status:** Has duplicates +**References:** README.md, build process +**Recommendation:** Consolidate (Issue #1) +**Rationale:** ... + +## File Categories + +### Active Files (Keep) +1. 30-Day Product Analytics Masterclass.md - Main curriculum +2. scripts/bash/build_book.sh - Primary build script +... + +### Orphaned Files (Investigate) +1. scripts/old_backup.sh - No references, old date +2. data/test_data.csv - Not in .gitignore, unclear purpose +... + +### Deprecated Files (Remove) +1. scripts/build_book.sh - Superseded by bash version +... + +## Recommendations Summary + +### Immediate Actions +- [ ] Delete: scripts/build_book.sh +- [ ] Update: README.md +- [ ] Decide: Content/ folder purpose + +### Short-term Actions +- [ ] Consolidate: Report files +- [ ] Enhance: SUMMARY.md +... + +## Dependencies Graph +[Visual or text representation] + +## Risk Assessment +- Low risk: Documentation updates +- Medium risk: Script consolidation +- High risk: Directory restructuring +``` + +### Definition of Done + +- [ ] Complete inventory of all files and directories +- [ ] Reference analysis completed for all files +- [ ] Recommendations provided for each item +- [ ] Cleanup roadmap created and prioritized +- [ ] Decision log documenting key choices +- [ ] Related issues updated with specific tasks +- [ ] Audit report reviewed and approved + +### Related Issues +- Issue #1: Consolidate duplicate build scripts +- Issue #2: Evaluate Content folder +- Issue #3: Review week directory structure +- Issue #4: Populate or remove empty reports +- Issue #5: Update documentation +- Issue #6: Improve SUMMARY.md generation + +--- + +## Summary: Issues Creation Roadmap + +All issues above should be created on GitHub one at a time in the following order: + +1. **Issue #1**: Consolidate duplicate build scripts (Quick win, unblocks others) +2. **Issue #2**: Evaluate Content folder usage (Strategic decision needed) +3. **Issue #3**: Week directory structure (Depends on #2) +4. **Issue #4**: Empty report files (Can be done in parallel) +5. **Issue #5**: Documentation updates (Do after #1-3 are decided) +6. **Issue #6**: SUMMARY.md generation (Depends on #2 and #3) +7. **Issue #7**: Comprehensive audit (Meta-issue to track overall progress) + +Each issue should be created with: +- Appropriate labels +- Clear title +- Complete description from this document +- Related issues linked +- Priority and effort estimates + From 3fb7c2f1bcb07c123b044e80492f3dee43fdde58 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 22:25:19 +0000 Subject: [PATCH 4/5] Add comprehensive documentation for creating cleanup issues Co-authored-by: moshesham <7207587+moshesham@users.noreply.github.com> --- CLEANUP_SUMMARY.md | 167 ++++++++++++++++++++++++++++++++++++++++ HOW_TO_CREATE_ISSUES.md | 157 +++++++++++++++++++++++++++++++++++++ 2 files changed, 324 insertions(+) create mode 100644 CLEANUP_SUMMARY.md create mode 100644 HOW_TO_CREATE_ISSUES.md diff --git a/CLEANUP_SUMMARY.md b/CLEANUP_SUMMARY.md new file mode 100644 index 0000000..f61085f --- /dev/null +++ b/CLEANUP_SUMMARY.md @@ -0,0 +1,167 @@ +# Codebase Cleanup - Issues Summary + +## Overview + +This directory contains documentation for a comprehensive cleanup of the Product Analytics MasterClass repository. Seven detailed GitHub issues have been prepared to systematically address organizational and structural problems in the codebase. + +## Files in This Package + +1. **ISSUES_TO_CREATE.md** (Main document - ~500 lines) + - Contains 7 fully detailed GitHub issues + - Each issue includes problem description, analysis, solutions, and definition of done + - Ready to be copied to GitHub Issues one at a time + +2. **HOW_TO_CREATE_ISSUES.md** (Instructions) + - Step-by-step guide for creating issues on GitHub + - Includes manual method, CLI method, and automated script + - Provides checklist and best practices + +3. **.github/ISSUE_TEMPLATE.md** (Template) + - Reusable template for future cleanup issues + - Ensures consistency in issue creation + +## The 7 Issues + +### Issue #1: Consolidate duplicate build_book.sh scripts +- **Priority:** High +- **Effort:** Small (1-2 hours) +- **Labels:** `cleanup`, `documentation`, `scripts` +- **Summary:** Remove duplicate build scripts and standardize on one + +### Issue #2: Evaluate Content folder usage and redundancy +- **Priority:** Medium +- **Effort:** Medium (3-4 hours) +- **Labels:** `cleanup`, `documentation`, `content-structure` +- **Summary:** Determine purpose of Content/ folder and eliminate redundancy + +### Issue #3: Restore or remove week directory structure in book-src +- **Priority:** High +- **Effort:** Small-Medium (2-3 hours) +- **Labels:** `cleanup`, `structure`, `decision-needed` +- **Summary:** Decide whether to restore week-based organization or keep flat structure + +### Issue #4: Populate or remove empty placeholder report files +- **Priority:** Medium +- **Effort:** Large (8-12 hours if creating content) +- **Labels:** `cleanup`, `content`, `reports` +- **Summary:** Fill in or remove 9 empty/placeholder report files + +### Issue #5: Review and update documentation files for consistency +- **Priority:** Medium +- **Effort:** Medium (3-4 hours) +- **Labels:** `documentation`, `cleanup`, `consistency` +- **Summary:** Update README, CONTRIBUTING, and other docs to reflect current structure + +### Issue #6: Improve SUMMARY.md generation and book navigation +- **Priority:** High +- **Effort:** Small-Medium (2-3 hours) +- **Labels:** `enhancement`, `user-experience`, `mdbook` +- **Summary:** Enhance mdBook table of contents for better navigation + +### Issue #7: Comprehensive codebase audit and cleanup roadmap +- **Priority:** High +- **Effort:** Medium (4-6 hours for audit) +- **Labels:** `epic`, `cleanup`, `planning` +- **Summary:** Meta-issue to track overall cleanup progress and identify additional items + +## Recommended Creation Order + +1. **Issue #1** - Quick win that unblocks other work +2. **Issue #2** - Strategic decision about content organization +3. **Issue #3** - Depends on Issue #2 decision +4. **Issue #4** - Can be done in parallel with others +5. **Issue #5** - Should be done after #1-3 are resolved +6. **Issue #6** - Depends on decisions from #2 and #3 +7. **Issue #7** - Meta-issue to track everything + +## Quick Start + +### Option 1: Manual Creation (5-10 minutes per issue) +1. Open `ISSUES_TO_CREATE.md` +2. Copy each issue section +3. Create new GitHub issue +4. Paste content and add labels +5. Repeat for all 7 issues + +### Option 2: Automated Creation (5 minutes total) +1. Read `HOW_TO_CREATE_ISSUES.md` +2. Use the provided bash script +3. Run: `./create_issues.sh` +4. All 7 issues created automatically + +## What's Included in Each Issue + +Every issue contains: + +- ✅ **Problem Description** - Clear explanation of what needs fixing +- ✅ **Current State Analysis** - Detailed assessment with file references +- ✅ **Impact** - Why this matters and what problems it causes +- ✅ **Recommended Solutions** - Multiple options with pros/cons +- ✅ **Files to Check/Update** - Specific file checklist +- ✅ **Definition of Done** - Clear completion criteria +- ✅ **Related Issues** - Cross-references to other issues +- ✅ **Additional Context** - Background information and rationale + +## Total Effort Estimate + +- **Audit & Planning:** 6-10 hours (Issues #7, #2, #3) +- **Quick Wins:** 3-5 hours (Issues #1, #6) +- **Documentation:** 3-4 hours (Issue #5) +- **Content Creation:** 8-12 hours (Issue #4, if creating content) + +**Total:** 20-31 hours depending on decisions made + +## Dependencies Between Issues + +``` +Issue #1 (Build scripts) + └─> Issue #5 (Documentation) - docs reference build scripts + +Issue #2 (Content folder) + └─> Issue #3 (Week directories) - structure decision + └─> Issue #6 (SUMMARY.md) - navigation depends on structure + +Issue #3 (Week directories) + └─> Issue #6 (SUMMARY.md) - navigation depends on structure + +Issue #4 (Reports) + └─> Issue #6 (SUMMARY.md) - what to list in TOC + +Issue #7 (Audit) + └─> All issues - tracks overall progress +``` + +## Success Criteria + +This cleanup effort will be successful when: + +1. ✅ No duplicate or conflicting files exist +2. ✅ All documentation is current and accurate +3. ✅ Build process is streamlined and well-documented +4. ✅ Content organization is clear and logical +5. ✅ All files have a clear purpose and are referenced +6. ✅ Navigation experience is smooth for learners +7. ✅ Repository is easy to maintain going forward + +## Getting Help + +- Read the full issue descriptions in `ISSUES_TO_CREATE.md` +- Follow the creation guide in `HOW_TO_CREATE_ISSUES.md` +- Use `.github/ISSUE_TEMPLATE.md` for any new cleanup issues +- Reference this summary for overview and priorities + +## Next Steps + +1. Review `ISSUES_TO_CREATE.md` to understand all issues +2. Follow `HOW_TO_CREATE_ISSUES.md` to create issues on GitHub +3. Prioritize and assign issues to team members +4. Begin work on Issue #1 (quick win) +5. Make strategic decisions on Issues #2 and #3 +6. Track progress using Issue #7 as epic + +--- + +**Created:** 2025-11-13 +**Repository:** Analytical-Guide/Product-Analytics-MasterClass +**Purpose:** Comprehensive codebase cleanup and organization + diff --git a/HOW_TO_CREATE_ISSUES.md b/HOW_TO_CREATE_ISSUES.md new file mode 100644 index 0000000..f637d13 --- /dev/null +++ b/HOW_TO_CREATE_ISSUES.md @@ -0,0 +1,157 @@ +# How to Create GitHub Issues from ISSUES_TO_CREATE.md + +This guide explains how to create the cleanup issues one at a time on GitHub. + +## Prerequisites + +- GitHub account with write access to the repository +- Familiarity with GitHub Issues interface + +## Method 1: Using GitHub Web UI (Recommended) + +For each issue in `ISSUES_TO_CREATE.md`: + +1. **Navigate to Issues** + - Go to https://github.com/Analytical-Guide/Product-Analytics-MasterClass/issues + - Click "New Issue" button + +2. **Copy Issue Content** + - Open `ISSUES_TO_CREATE.md` in the repository + - Find the issue section (e.g., "## Issue #1: ...") + - Copy everything from the title to the end of that issue's content + - Stop before the next "## Issue #X" heading + +3. **Create the Issue** + - Paste the copied content into the issue description + - Extract the title from the first line (e.g., "Consolidate duplicate build_book.sh scripts") + - Add appropriate labels as specified in the issue + - Submit the issue + +4. **Repeat for Each Issue** + - Follow the recommended order: + 1. Issue #1 (Build scripts) + 2. Issue #2 (Content folder) + 3. Issue #3 (Week directories) + 4. Issue #4 (Report files) + 5. Issue #5 (Documentation) + 6. Issue #6 (SUMMARY.md) + 7. Issue #7 (Audit roadmap) + +## Method 2: Using GitHub CLI + +If you have `gh` CLI installed and authenticated: + +```bash +# Navigate to repository +cd /path/to/Product-Analytics-MasterClass + +# Create Issue #1 +gh issue create \ + --title "Consolidate duplicate build_book.sh scripts" \ + --label "cleanup,documentation,scripts" \ + --body-file <(sed -n '/^## Issue #1/,/^## Issue #2/p' ISSUES_TO_CREATE.md | head -n -1) + +# Create Issue #2 +gh issue create \ + --title "Evaluate Content folder usage and redundancy" \ + --label "cleanup,documentation,content-structure" \ + --body-file <(sed -n '/^## Issue #2/,/^## Issue #3/p' ISSUES_TO_CREATE.md | head -n -1) + +# ... repeat for issues #3-7 +``` + +## Method 3: Automated Script + +A bash script to create all issues at once: + +```bash +#!/bin/bash + +# Array of issue titles +titles=( + "Consolidate duplicate build_book.sh scripts" + "Evaluate Content folder usage and redundancy" + "Restore or remove week directory structure in book-src" + "Populate or remove empty placeholder report files" + "Review and update documentation files for consistency" + "Improve SUMMARY.md generation and book navigation" + "Comprehensive codebase audit and cleanup roadmap" +) + +# Array of labels +labels=( + "cleanup,documentation,scripts" + "cleanup,documentation,content-structure" + "cleanup,structure,decision-needed" + "cleanup,content,reports" + "documentation,cleanup,consistency" + "enhancement,user-experience,mdbook" + "epic,cleanup,planning" +) + +# Create each issue +for i in {1..7}; do + echo "Creating Issue #$i: ${titles[$i-1]}" + + # Extract issue content from ISSUES_TO_CREATE.md + if [ $i -eq 7 ]; then + # Last issue goes to end of file + body=$(sed -n "/^## Issue #$i:/,\$p" ISSUES_TO_CREATE.md) + else + # Other issues go until next issue + next=$((i + 1)) + body=$(sed -n "/^## Issue #$i:/,/^## Issue #$next:/p" ISSUES_TO_CREATE.md | head -n -1) + fi + + # Create the issue + echo "$body" | gh issue create \ + --title "${titles[$i-1]}" \ + --label "${labels[$i-1]}" \ + --body-file - + + echo "✓ Issue #$i created" + echo "" + + # Brief pause to avoid rate limiting + sleep 2 +done + +echo "All issues created successfully!" +``` + +Save this as `create_issues.sh`, make it executable with `chmod +x create_issues.sh`, and run it. + +## Issue Creation Checklist + +For each issue you create, verify: + +- [ ] Title matches the issue heading +- [ ] All content from that issue section is included +- [ ] Labels are added as specified +- [ ] The issue number is visible in the created issue +- [ ] Related issues are cross-referenced (you may need to edit and add links after all issues are created) + +## After Creating All Issues + +1. **Update Cross-References** + - Go back through each issue + - Update "Related Issues" sections with actual issue numbers + - Add links to related issues + +2. **Create Project Board (Optional)** + - Create a GitHub Project for "Codebase Cleanup" + - Add all 7 issues to the project + - Organize by priority/status + +3. **Assign and Prioritize** + - Assign issues to team members + - Set milestones if appropriate + - Add to project boards + +## Notes + +- The issues are designed to be created in order, as later issues reference earlier ones +- Each issue is self-contained with full context +- Issues #1-3 should be prioritized as they unblock other work +- Issue #7 serves as an epic to track overall progress + From 242375575d2ab01277eca22cadd5faa07612aaba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 13 Nov 2025 22:26:48 +0000 Subject: [PATCH 5/5] Add quick reference guide for cleanup issues documentation Co-authored-by: moshesham <7207587+moshesham@users.noreply.github.com> --- README_ISSUES.md | 143 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 README_ISSUES.md diff --git a/README_ISSUES.md b/README_ISSUES.md new file mode 100644 index 0000000..6e20a21 --- /dev/null +++ b/README_ISSUES.md @@ -0,0 +1,143 @@ +# 📋 Codebase Cleanup Issues - Quick Reference + +> **Purpose:** This repository now contains comprehensive documentation for 7 GitHub issues to clean up and organize the codebase. + +## 🎯 Quick Start + +1. **Read:** [CLEANUP_SUMMARY.md](CLEANUP_SUMMARY.md) - Executive overview +2. **Create:** [HOW_TO_CREATE_ISSUES.md](HOW_TO_CREATE_ISSUES.md) - Step-by-step guide +3. **Reference:** [ISSUES_TO_CREATE.md](ISSUES_TO_CREATE.md) - Full issue details + +## 📊 Issues Overview + +| # | Title | Priority | Effort | Labels | +|---|-------|----------|--------|--------| +| 1 | Consolidate duplicate build scripts | High | 1-2h | `cleanup`, `documentation`, `scripts` | +| 2 | Evaluate Content folder usage | Medium | 3-4h | `cleanup`, `documentation`, `content-structure` | +| 3 | Restore/remove week directories | High | 2-3h | `cleanup`, `structure`, `decision-needed` | +| 4 | Populate/remove empty reports | Medium | 8-12h | `cleanup`, `content`, `reports` | +| 5 | Update documentation files | Medium | 3-4h | `documentation`, `cleanup`, `consistency` | +| 6 | Improve SUMMARY.md generation | High | 2-3h | `enhancement`, `user-experience`, `mdbook` | +| 7 | Comprehensive codebase audit | High | 4-6h | `epic`, `cleanup`, `planning` | + +**Total Estimated Effort:** 20-31 hours + +## 🔄 Creation Order + +``` +1. Issue #1 (Build scripts) → Quick win, unblocks others +2. Issue #2 (Content folder) → Strategic decision needed +3. Issue #3 (Week directories) → Depends on #2 +4. Issue #4 (Report files) → Can run in parallel +5. Issue #5 (Documentation) → After #1-3 resolved +6. Issue #6 (SUMMARY.md) → Depends on #2 and #3 +7. Issue #7 (Audit epic) → Tracks overall progress +``` + +## 📁 Files in This Package + +``` +. +├── ISSUES_TO_CREATE.md # Main document (1,581 lines) +│ ├── Issue #1: Build scripts +│ ├── Issue #2: Content folder +│ ├── Issue #3: Week directories +│ ├── Issue #4: Report files +│ ├── Issue #5: Documentation +│ ├── Issue #6: SUMMARY.md +│ └── Issue #7: Audit roadmap +│ +├── HOW_TO_CREATE_ISSUES.md # Creation guide (157 lines) +│ ├── Manual method (recommended) +│ ├── CLI method with gh +│ └── Automated script method +│ +├── CLEANUP_SUMMARY.md # Executive overview (167 lines) +│ ├── Overview and purpose +│ ├── Success criteria +│ └── Next steps +│ +└── .github/ + └── ISSUE_TEMPLATE.md # Template for future issues +``` + +## 🚀 How to Create Issues + +### Option 1: Manual (Recommended) +```bash +1. Open GitHub Issues page +2. Click "New Issue" +3. Copy issue from ISSUES_TO_CREATE.md +4. Paste into new issue +5. Add labels as specified +6. Submit +7. Repeat for remaining issues +``` + +### Option 2: Automated +```bash +# Review the script in HOW_TO_CREATE_ISSUES.md +# Then run: +./create_issues.sh +``` + +## 🎁 What Each Issue Includes + +Every issue contains: + +- ✅ Clear problem description +- ✅ Current state analysis with file references +- ✅ Impact assessment +- ✅ Multiple solution options (pros/cons) +- ✅ Specific file checklists +- ✅ Definition of done +- ✅ Related issue cross-references +- ✅ Priority and effort estimates + +## 🔗 Dependencies + +``` +Issue #1 ─┬─→ Issue #5 (docs reference scripts) + │ +Issue #2 ─┼─→ Issue #3 (structure decision) + ├─→ Issue #6 (navigation depends on structure) + │ +Issue #3 ─┼─→ Issue #6 (navigation depends on structure) + │ +Issue #4 ─┴─→ Issue #6 (what to list in TOC) + +Issue #7 ───→ All issues (meta-tracker) +``` + +## ✅ Success Criteria + +Cleanup is successful when: + +1. ✅ No duplicate or conflicting files +2. ✅ All documentation is current and accurate +3. ✅ Build process is streamlined +4. ✅ Content organization is clear +5. ✅ All files have clear purpose +6. ✅ Navigation is smooth for learners +7. ✅ Repository is easy to maintain + +## 📞 Getting Help + +- **Full details:** See [ISSUES_TO_CREATE.md](ISSUES_TO_CREATE.md) +- **How to create:** See [HOW_TO_CREATE_ISSUES.md](HOW_TO_CREATE_ISSUES.md) +- **Overview:** See [CLEANUP_SUMMARY.md](CLEANUP_SUMMARY.md) +- **Template:** See [.github/ISSUE_TEMPLATE.md](.github/ISSUE_TEMPLATE.md) + +## 📝 Notes + +- Issues designed to be created in order +- Each issue is self-contained +- Issues #1-3 are high priority +- Issue #7 tracks overall progress +- Estimated 20-31 hours total effort + +--- + +**Created:** 2025-11-13 +**Repository:** Analytical-Guide/Product-Analytics-MasterClass +**Branch:** copilot/review-bash-files-syllabus