S02-26-Equipo-48--EquineLead/src/data-science/scripts/demo_simulation.py at dev · No-Country-simulation/S02-26-Equipo-48--EquineLead · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import random
import pandas as pd
from datetime import datetime, timedelta, timezone
from model.lead_scoring import LeadScoring
import matplotlib.ticker as ticker

# Configuración de Matplotlib para visualización gráfica
import matplotlib
matplotlib.use("TkAgg") # Backend necesario para entornos Linux con interfaz gráfica
import matplotlib.pyplot as plt

# --- CONFIGURACIÓN DE LA SIMULACIÓN ---
num_leads = 100 # Cantidad configurable de leads a simular
user_types = ["B2B", "B2C"]
interaction_types = [1, 2, 3, 4, 5] # 1: Visita, 2: Click, 3: Registro, 4: Consulta, 5: Contacto

leads_data = []

print(f"🚀 Generando simulación de {num_leads} leads...")

# Generar perfiles de leads aleatorios para probar la robustez del modelo
for i in range(num_leads):
    user_type = random.choice(user_types)
    user_budget = random.randint(100, 60000) # Presupuesto entre 100 y 60,000 USD

    interactions = []

    # Generar de 1 a 4 interacciones aleatorias por cada lead
    for _ in range(random.randint(1, 4)):
        interaction = {
            "type": random.choices(
                interaction_types,
                weights=[4, 3, 2, 1, 1]  # Sesgo: más visitas y clicks que contactos directos
            )[0],
            "date": datetime.now(timezone.utc) - timedelta(days=random.randint(1, 400)) # Entre ayer y hace 400 días
        }
        interactions.append(interaction)

    # --- PROCESAMIENTO CON EL MOTOR DE SCORING ---
    lead = LeadScoring(user_type, user_budget, interactions)
    score = lead.calculate_score()
    classification = lead.get_classification()

    # Guardar resultados para análisis posterior con Pandas
    leads_data.append({
        "UserType": user_type,
        "Budget": user_budget,
        "Interactions": len(interactions),
        "InteractionDetail": ", ".join([str(i["type"]) for i in interactions]),
        "Score": score,
        "Classification": classification
    })

# Convertir la lista a un DataFrame de Pandas para análisis tabular
df = pd.DataFrame(leads_data)

# --- RESULTADOS EN CONSOLA ---
print("\n📊 Muestra de los primeros 10 leads generados:")
print(df.head(10))

print("\n📈 Resumen estadístico de la clasificación:")
print(df["Classification"].value_counts())

# --- VISUALIZACIÓN GRÁFICA ---

# 1. Histograma: Distribución de Scores
# PROPÓSITO: Ver la frecuencia de los puntajes.
plt.figure(figsize=(8,5))
plt.hist(df['Score'], bins=10, color='skyblue', edgecolor='black')
plt.title(f'Distribución de Score de Leads (Simulación {num_leads} muestras)')
plt.xlabel('Puntaje (Score)')
plt.ylabel('Cantidad de Leads')
plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # Forzar números enteros en el eje Y
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# 2. Gráfico de Barras: Comparativa Cold / Warm / Hot
# PROPÓSITO: Visualizar cuántos leads caen en cada categoría lógica.
plt.figure(figsize=(6,4))
classification_counts = df['Classification'].value_counts()
classification_counts.plot(kind='bar', color=['red','orange','green']) # Orden: Cold, Warm, Hot
plt.title(f'Leads por Clasificación Final (n={num_leads})')
plt.xlabel('Categoría')
plt.ylabel('Cantidad')
plt.gca().yaxis.set_major_locator(ticker.MaxNLocator(integer=True)) # Forzar números enteros en el eje Y
plt.xticks(rotation=0)
plt.show()

# 3. Boxplot: Distribución de Scores por Clasificación
# PROPÓSITO: Ver si los rangos de score están bien definidos para cada categoría.
plt.figure(figsize=(8,5))
df.boxplot(column='Score', by='Classification', patch_artist=True,
           boxprops=dict(facecolor="lightblue"))
plt.title(f'Distribución de Scores por Categoría (n={num_leads})')
plt.suptitle("")
plt.xlabel('Clasificación')
plt.ylabel('Puntaje (Score)')
plt.show()