MasterChef/app.py at master · jordantete/MasterChef · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import re
import streamlit as st
import pandas as pd
from collections import Counter
import plotly.express as px

@st.cache_data
def load_data():
    df_recipes = pd.read_csv('recipes.csv')
    df_recipes = df_recipes.drop_duplicates(subset='recipe_id', keep='first')
    df_recipes['recipe_id'] = pd.to_numeric(df_recipes['recipe_id'], errors='coerce')
    df_recipes.dropna(subset=['recipe_id'], inplace=True)
    df_recipes['recipe_id'] = pd.to_numeric(df_recipes['recipe_id'], errors='coerce').astype(int)
    df_recipes = parsed_ingredients_data(df=df_recipes)
    df_recipes['num_ingredients'] = df_recipes['parsed_ingredients'].apply(lambda x: len([ingredient for ingredient in x if ingredient]))
    fish_keywords = ['poisson', 'saumon', 'merlu', 'cabillaud', 'truite', 'thon', 'bar', 'maquereau', 'dorade', 'sole', 'églefin', 'sardine', 'morue', 'turbot', 'lieu']
    meat_keywords = ['viande', 'poulet', 'boeuf', 'volaille', 'steak', 'entrecôte', 'porc', 'cochon', 'dinde', 'saucisse', 'lapin']
    df_recipes['contains_fish'] = df_recipes['parsed_ingredients'].apply(lambda x: contains_keywords(x, fish_keywords))
    df_recipes['contains_meat'] = df_recipes['parsed_ingredients'].apply(lambda x: contains_keywords(x, meat_keywords))
    return df_recipes

def remove_text_inside_parentheses(text):
    return re.sub(r'\([^)]*\)', '', text)

def parsed_ingredients_data(df):
    df['parsed_ingredients'] = pd.Series(dtype='object')
    for index, row in df.iterrows():
        ingredients_list = row['recipe_ingredients'].split('\n')
        cleaned_ingredient_names = set()
        for ingredient in ingredients_list:
            ingredient = remove_text_inside_parentheses(ingredient)
            ingredient_name = ingredient.split('-')[0].split(':')[0].split(',')[0].lower().strip()
            cleaned_ingredient_names.add(ingredient_name)
        df.at[index, 'parsed_ingredients'] = cleaned_ingredient_names
    return df

def filter_recipes_ingredients(st, df):
    unique_ingredients = sorted(set([ingredient for sublist in df['parsed_ingredients'].tolist() for ingredient in sublist]))
    selected_ingredients = st.multiselect('Sélectionnez les ingrédients:', unique_ingredients)
    filtered_recipes = df[df['parsed_ingredients'].apply(lambda ingredients: all(item in ingredients for item in selected_ingredients))]

    if not filtered_recipes.empty and selected_ingredients:
        st.dataframe(filtered_recipes[['recipe_id', 'recipe_name', 'parsed_ingredients', 'num_ingredients']], use_container_width=True)
    elif filtered_recipes.empty and selected_ingredients:
        ingredients_str = ", ".join(f"**{ingredient}**" for ingredient in selected_ingredients)
        st.markdown(f"Aucune recette trouvée contenant les ingrédients: {ingredients_str}.")
    else:
        st.markdown(f"Veuillez sélectionner des ingrédients pour afficher les recettes correspondantes")

@st.cache_data
def plot_ingredients_repartitions(st, df, nbr_of_most_used_ingredients: int = 25):
    all_ingredients = []

    for index, row in df.iterrows():
        for ingredient in row["parsed_ingredients"]:
            ingredient_name = ingredient.split(',')[0]
            all_ingredients.append(ingredient_name)

    ingredient_counts = Counter(all_ingredients)
    ingredient_df = pd.DataFrame.from_dict(ingredient_counts, orient='index', columns=['frequency'])
    ingredient_df = ingredient_df.sort_values(by='frequency', ascending=False)
    most_used_ingredients = ingredient_df.head(nbr_of_most_used_ingredients)
    fig = px.bar(most_used_ingredients, x=most_used_ingredients.index,  y='frequency', labels={'x': 'Ingredient', 'frequency': 'Frequency'})
    fig.update_layout(xaxis_tickangle=-45, xaxis_title='Ingredient', yaxis_title='Frequency', plot_bgcolor='white')
    st.plotly_chart(fig, use_container_width=True)

def contains_keywords(ingredients_set, keywords):
    keywords = [keyword.lower() for keyword in keywords]
    return any(keyword in ingredient for ingredient in ingredients_set for keyword in keywords)

def calculate_shared_ingredients(ingredients_set, central_ingredients):
    return len(ingredients_set.intersection(central_ingredients))

def display_fish_meat_distributions(st, df):
    fish_recipes_count = df['contains_fish'].sum()
    meat_recipes_count = df['contains_meat'].sum()
    labels = ['Recettes de poisson', 'Recettes de viande']
    values = [fish_recipes_count, meat_recipes_count]
    fig = px.pie(names=labels, values=values)
    st.plotly_chart(fig, use_container_width=True)

def random_recipe_for_category(st, df, category):
    if st.button('Afficher une recette aléatoire'):
        if category == 'Poisson':
            filtered_recipes = df[df['contains_fish']]
        else:
            filtered_recipes = df[df['contains_meat']]

        if not filtered_recipes.empty:
            random_recipe = filtered_recipes.sample()
            random_recipe_id =  random_recipe.iloc[0]['recipe_id']
            random_recipe_url = f"https://www.quitoque.fr/recette/{random_recipe_id}"
            st.markdown(f"### {random_recipe.iloc[0]['recipe_name']} ([{random_recipe_id}]({random_recipe_url}))", unsafe_allow_html=True)
            ingredients_string = ", ".join(random_recipe.iloc[0]['parsed_ingredients'])
            st.markdown(f"**Ingrédients :** {ingredients_string}")
            st.markdown(f"**Etapes de préparation :**")
            st.markdown(f"{random_recipe.iloc[0]['recipe_reproduction_steps']}")
        else:
            st.write("Aucune recette trouvée dans cette catégorie.")

def calculate_shared_ingredients(ingredients_list, central_ingredients):
    return len(set(ingredients_list) & set(central_ingredients))

def find_similar_recipes(df, central_recipe_id, min_shared_ingredients):
    central_ingredients = df.loc[df['recipe_id'] == central_recipe_id, 'parsed_ingredients'].iloc[0]
    df['shared_ingredients_count'] = df.apply(lambda row: calculate_shared_ingredients(row['parsed_ingredients'], central_ingredients) if row['recipe_id'] != central_recipe_id else 0, axis=1)
    similar_recipes = df[(df['recipe_id'] != central_recipe_id) & (df['shared_ingredients_count'] >= min_shared_ingredients)].sort_values(by='shared_ingredients_count', ascending=False)
    return similar_recipes

def plot_recipe_complexity_histogram(df):
    fig = px.histogram(df, x='num_ingredients', nbins=50)
    fig.update_xaxes(title="Nombre d'ingrédients")
    fig.update_yaxes(title="Nombre de recettes")
    st.plotly_chart(fig, use_container_width=True)

def main():
    st.set_page_config(page_title="Master Chef", page_icon="👨‍🍳", layout="wide", initial_sidebar_state="auto")
    st.image("logo.png", width=150)
    st.title('Des recettes simples et savoureuses à la portée de tous! 😋')
    bmc_link = "https://www.buymeacoffee.com/pownedj"
    st.markdown(f"[![Buy Me A Coffee](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)]({bmc_link})", unsafe_allow_html=True)
    st.markdown("""
    Explorez notre collection de recettes et découvrez des plats délicieux à préparer.
    Que vous cherchiez des **recettes rapides à réaliser en moins de 30 minutes** ou des **idées pour intégrer des ingrédients spécifiques** à votre cuisine,
    notre guide est là pour vous inspirer.
    Trouvez votre prochain coup de cœur culinaire dès aujourd'hui !
    """)
    st.markdown("---")

    df_recipes = load_data()

    # Filter recipes based on selected ingredients
    st.markdown(f"## Trouver des recettes utilisant certains ingrédients")
    st.markdown("**Les recettes contenant tous les ingrédients sélectionnés seront affichées ici ci-dessous.**")
    filter_recipes_ingredients(st=st, df=df_recipes)

    ## Find the X recipes with the least amount of ingredients
    st.markdown(f"## Recettes utilisant le moins d'ingrédients")
    num_recipes = st.slider('Nombre de recettes à afficher', min_value=1, max_value=30, value=5)
    sorted_recipes = df_recipes.sort_values(by='num_ingredients', ascending=True)
    sorted_recipes_by_number_of_ingredients = sorted_recipes.iloc[:num_recipes]
    columns_to_display = ['recipe_id', 'recipe_name', 'parsed_ingredients', 'num_ingredients']
    st.dataframe(sorted_recipes_by_number_of_ingredients[columns_to_display], use_container_width=True)

    ## Show random Recipe
    st.markdown("---")
    st.markdown(f"## Choisir une recette")
    recipe_category = st.selectbox('Choisissez une catégorie de recette :', ['Poisson', 'Viande'])
    random_recipe_for_category(st=st, df=df_recipes, category=recipe_category)
    st.markdown("---")

    ## Find similar recipes from a "Central" recipe
    st.markdown(f"## Trouver des recettes similaires")
    recipe_options = df_recipes.apply(lambda x: f"{x['recipe_id']} - {x['recipe_name']}", axis=1).tolist()
    selected_recipe = st.selectbox('Sélectionnez votre recette de départ :', recipe_options)
    central_recipe_id = int(selected_recipe.split(' - ')[0])
    min_shared_ingredients = st.number_input('Nombre minimum d\'ingrédients en commun :', min_value=1, value=1, step=1)

    if st.button('Trouver des recettes similaires'):
        central_recipe = df_recipes[df_recipes['recipe_id'] == central_recipe_id]
        st.dataframe(central_recipe[['recipe_id', 'recipe_name', 'parsed_ingredients', 'num_ingredients']], use_container_width=True)
        similar_recipes = find_similar_recipes(df_recipes, central_recipe_id, min_shared_ingredients)
        if not similar_recipes.empty:
            st.dataframe(similar_recipes[['recipe_id', 'recipe_name', 'parsed_ingredients', 'num_ingredients']], use_container_width=True)
        else:
            st.write("Aucune recette similaire trouvée.")
    st.markdown("---")

    st.markdown(f"# Visualisation des données")
    st.markdown(f"## 1. Répartition du nombre d'ingrédients par recette")
    plot_recipe_complexity_histogram(df_recipes)

    st.markdown(f"## 2. Répartitions des ingrédients les plus utilisés dans les recettes")
    num_most_used_ingredients = st.number_input('Nombre d’ingrédients les plus utilisés à afficher', min_value=1, value=20, step=1)
    plot_ingredients_repartitions(st=st, df=df_recipes, nbr_of_most_used_ingredients=num_most_used_ingredients)

    st.markdown(f"## 3. Répartitions des recettes intégrant de la viande et du poisson")
    display_fish_meat_distributions(st=st, df=df_recipes)
    st.markdown("---")

    # Show dataset
    if st.button("Afficher toutes les recettes"):
        columns_to_display = ['recipe_id', 'recipe_name', 'parsed_ingredients', 'num_ingredients']
        st.dataframe(df_recipes[columns_to_display], use_container_width=True)

    ## TODO:
    # - Create groceries list

if __name__ == "__main__":
    main()