recodehive
diff --git a/‎Web_app/Home_Page.py
Lines changed: 27 additions & 0 deletions b/‎Web_app/Home_Page.py
Lines changed: 27 additions & 0 deletions
diff --git a/‎Web_app/README.md
Lines changed: 51 additions & 11 deletions b/‎Web_app/README.md
Lines changed: 51 additions & 11 deletions
diff --git a/‎Web_app/Scarper.py
Lines changed: 93 additions & 0 deletions b/‎Web_app/Scarper.py
Lines changed: 93 additions & 0 deletions
@@ -0,0 +1,27 @@
+import streamlit as st
+
+st.set_page_config(
+    page_title="Home Page",
+    page_icon="👋",
+)
+
+st.write("# Welcome to Movie Review Analysis and Recommendation System 👋")
+
+st.sidebar.success("Select above part.")
+
+st.markdown(
+    """
+    ### Introduction
+    The IMDb Movie Review Analysis and Recommendation System is a comprehensive 
+    tool designed to analyze movie reviews and provide personalized movie recommendations. 
+    It leverages natural language processing (NLP) techniques and machine learning 
+    algorithms to deliver insightful analysis and effective recommendations based on user preferences.
+
+    ### Features
+    1. **Sentiment Analysis** : Analyzes the sentiment of movie reviews (positive, negative).
+    2. **Personalized Recommendations** : Recommends movies based on content filtering.
+    
+    **👈 Select the part from the sidebar** 
+    
+"""
+)
@@ -1,13 +1,18 @@
-<h1 align="center">IMDb Movie Review Analysis and Recommendation System</h1>
-<blockquote align="center">Analyzing movie reviews and providing recommendations using Python and Streamlit. 🎬💻</blockquote>
-<p align="center">For new data generation and <b>sentiment analysis</b>, we have written a Python script to fetch📊 data from IMDb, analyze sentiments, and provide movie recommendations, all converted into an interactive web app using Streamlit. 🌐📈</p>
+# IMDb Movie Review Analysis and Recommendation System :film_projector:
+Analyzing movie reviews and providing recommendations using Python and Streamlit. 🎬💻. We have created two part in this WebApp :sunglasses:!!! :  
+1. We have created a movie review analysis part.
+2. We have created a movie recommendation part.
+
+<p align="center">
+For new data generation for <b>sentiment analysis</b> and <b>recommendation system</b>, we have written different Python script to fetch📊 data from IMDb, analyze sentiments, and provide movie recommendations, all converted into an interactive web app using Streamlit. 🌐📈</p>
+
 
 ## Features
 
 - **Scraping Movie Reviews**: Collects user reviews from IMDb using BeautifulSoup.
-- **Customizable Scraper**: Target specific movies and the number of pages to scrape.
+- **Customizable Scraper**: Collects Movie Description from IMDb using Selenium.
 - **Sentiment Analysis**: Uses Support Vector Machine (SVM) to classify reviews as positive or negative.
-- **Recommendations**: Recommends top movies based on positive reviews.
+- **Recommendations**: Recommends top movies based on content of previous movie watched by user .
 - **CSV Output**: Saves the scraped data into a CSV file for further analysis.
 
 ## Installation
@@ -19,27 +24,62 @@
    pip install requests
    pip install pandas
    pip install scikit-learn
-
+   pip install selenium
+   ```
 ## Usage
 
+### For Sentiment Analysis Part 
+
 1. **Run the scraping script** to collect movie reviews and save them into a CSV file. Open and execute the Jupyter notebook:
 
    ```bash
    jupyter notebook notebooks/movie_review_imdb_scrapping.ipynb
-
+   ```
 2. **Navigate to the Web_app directory:**
    ```bash
    cd Web_app
+   ```
+
+### For Content-Based Movie Recommendation Part 
+
+1. **Run the scraping script** to collect movie desciption and save them into a CSV file. Open and execute the Python Script.
+   
+   ***Note: you have to download web chromedriver and add it's path in Scrapper.py where it's mentioned driver path.***
 
-3. **Run the Streamlit app:**
    ```bash
-   streamlit run app.py
+   python -u "Scrapper.py"   
+   ```
+3. **Run the similarity_model generating script** to find out similarity we have made a model which we will use in our webapp. Open and execute the Jupyter notebook
+
+     ***Note: you have to necessarily run this model.ipynb as this will download similarity.pkl which is the model we use in Streamlit Webapp***
+
+   ```bash
+   jupyter notebook notebooks/model.ipynb
+   ```
+
+### For HomePage
+1. **Navigate to the Web_app directory:**
+   ```bash
+   cd Web_app
+   ```
+
+2. **Run the Streamlit app:**
+   ```bash
+   streamlit run Home_Page.py
+   ````
+### Home Page
+
+![Home_Page](https://github.com/Shraman-jain/Scrape-ML/assets/60072287/dbbafd78-e6c2-4469-b55f-d7e555f382ae "Home Page")
+
+### Sentiment Analysis Part
 
-4. **Upload a CSV file** containing the reviews when prompted by the app.
+![Movie_review](https://github.com/Shraman-jain/Scrape-ML/assets/60072287/dd449b6f-680c-4b00-bc45-6662bc82e48c "Sentiment Analysis")
 
+### Content-Based Movie Recommendation Part
 
 
+![Recommendation](https://github.com/Shraman-jain/Scrape-ML/assets/60072287/90599178-3d63-4a4a-8879-68408b2cc235 "Content-Based Movie Recommendation Part")
 
 
 
- 
+ 
@@ -0,0 +1,93 @@
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import csv
+import re
+from bs4 import BeautifulSoup
+from selenium.webdriver.chrome.options import Options
+from selenium import webdriver
+
+DRIVER_PATH = 'E:/chromedriver-win64/chromedriver'
+# Initialize the Chrome driver
+
+
+options = webdriver.ChromeOptions()
+options.add_argument('--no-sandbox')
+options.add_argument('--disable-dev-shm-usage')
+driver = webdriver.Chrome(options=options,executable_path=DRIVER_PATH)
+
+# Navigate to the URL
+driver.get('https://www.imdb.com/search/title/?title_type=tv_series,feature,tv_movie,tv_episode,tv_miniseries,tv_special&release_date=2000-01-01,2024-12-31')
+
+driver.set_script_timeout(10000)
+def load_more_results():
+    try:
+        load_more_button = WebDriverWait(driver, 10).until(
+            EC.element_to_be_clickable((By.XPATH, '//button[contains(@class, "ipc-see-more__button")]'))
+        )
+        driver.execute_script("arguments[0].scrollIntoView(true);", load_more_button)
+        driver.execute_script("arguments[0].click();", load_more_button)
+        time.sleep(2) 
+        return True
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+def save_to_csv(movies, filename='movies.csv'):
+    keys = movies[0].keys()
+    with open(filename, 'a', newline='', encoding='utf-8') as output_file:
+        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
+        dict_writer.writeheader()
+        dict_writer.writerows(movies)
+
+
+all_movies=[] 
+cnt=0
+while(cnt<300):
+    cnt+=1   
+    print(cnt)
+    if not load_more_results():
+            break
+    
+movie_elements = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[2]/div[3]/section/section/div/section/section/div[2]/div/section/div[2]/div[2]/ul")
+print("movie_list")
+
+html_content = movie_elements.get_attribute('outerHTML')
+print("html movie_list")
+soup = BeautifulSoup(html_content, 'html.parser')
+
+lst= soup.find_all("li", class_="ipc-metadata-list-summary-item")
+print("list")
+for i in lst:
+    org_title= i.find("h3",class_="ipc-title__text").text
+    try:
+        title=re.sub(r'\d+\.\s*', '', org_title)
+    except:
+        title="NA"
+    try:
+        year = i.find("span", class_="sc-b189961a-8 kLaxqf dli-title-metadata-item").text
+        
+    except:
+        year="NA"
+    try:
+        rating = i.find("span", class_='ipc-rating-star ipc-rating-star--base ipc-rating-star--imdb ratingGroup--imdb-rating').text.split()[0]
+    except:
+        rating="NA"
+    try:
+        description = i.find("div", class_='ipc-html-content-inner-div').text
+    except:
+        description = "NA"
+    all_movies.append({
+        'title': title,
+        'type':"Tv-Series",
+        'year': year,
+        'rating': rating,
+        'description': description
+    })
+   
+print("saving started")
+if all_movies:
+    save_to_csv(all_movies)   
+print("completed")
+driver.quit()