claromes
diff --git a/‎README.md‎
Lines changed: 1 addition & 2 deletions b/‎README.md‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎app/app.py‎
Lines changed: 81 additions & 127 deletions b/‎app/app.py‎
Lines changed: 81 additions & 127 deletions
diff --git a/‎app/requirements.txt‎
Lines changed: 2 additions & 2 deletions b/‎app/requirements.txt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎assets/download.svg‎
Lines changed: 0 additions & 9 deletions b/‎assets/download.svg‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 5 additions & 1 deletion b/‎docs/conf.py‎
Lines changed: 5 additions & 1 deletion
@@ -97,8 +97,7 @@ if archived_tweets:
 
 A prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud.
 
-> [!NOTE]
-> Starting from version 1.0, the web app will not receive all updates from the official package. To access all features, prefer the package via PyPI.
+Important: Starting from version 1.0, the web app will no longer receive all updates from the official package. To access all features, prefer using the package from PyPI.
 
 ## Documentation
 
 
@@ -1,4 +1,3 @@
-import base64
 from datetime import datetime, timedelta
 
 import streamlit as st
@@ -8,17 +7,32 @@
 from waybacktweets.api.parse import TweetsParser
 from waybacktweets.api.request import WaybackTweets
 from waybacktweets.api.visualize import HTMLTweetsVisualizer
-from waybacktweets.config import FIELD_OPTIONS, config
+from waybacktweets.config import config
 
 # ------ Initial Settings ------ #
 
 PAGE_ICON = "assets/parthenon.png"
 TITLE = "assets/waybacktweets.png"
-DOWNLOAD = "assets/download.svg"
+FIELD_OPTIONS = [
+    "archived_urlkey",
+    "archived_timestamp",
+    "parsed_archived_timestamp",
+    "archived_tweet_url",
+    "parsed_archived_tweet_url",
+    "original_tweet_url",
+    "parsed_tweet_url",
+    "available_tweet_text",
+    "available_tweet_is_RT",
+    "available_tweet_info",
+    "archived_mimetype",
+    "archived_statuscode",
+    "archived_digest",
+    "archived_length",
+]
 
 collapse = None
 matchtype = None
-start_date = datetime.now() - timedelta(days=365)
+start_date = datetime.now() - timedelta(days=30 * 6)
 end_date = datetime.now()
 min_date = datetime(2006, 1, 1)
 
@@ -34,13 +48,9 @@
     layout="centered",
     menu_items={
         "About": f"""
-    [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md)
+© 2023-{end_date.year} [Claromes](https://claromes.com). Licensed under the [GPL-3.0](https://raw.githubusercontent.com/claromes/waybacktweets/refs/heads/main/LICENSE.md). Icon by The Doodle Library. Title font by Google, licensed under the Open Font License (OFL).
 
-    The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool.
-
-    © 2023 - {end_date.year}, [Claromes](https://claromes.com)
-
-    ---
+---
 """,  # noqa: E501
         "Report a bug": "https://github.com/claromes/waybacktweets/issues",
     },
@@ -55,7 +65,7 @@
     st.session_state.count = False
 
 if "archived_timestamp_filter" not in st.session_state:
-    st.session_state.archived_timestamp_filter = None
+    st.session_state.archived_timestamp_filter = (start_date, end_date)
 
 if "username_value" not in st.session_state:
     st.session_state.username_value = ""
@@ -97,7 +107,7 @@
     """
 )
 
-# ------ Requestings ------ #
+# ------ Functions ------ #
 
 
 @st.cache_data(ttl=600, show_spinner=False)
@@ -107,7 +117,6 @@ def wayback_tweets(
     timestamp_from,
     timestamp_to,
     limit,
-    offset,
     matchtype,
 ):
     response = WaybackTweets(
@@ -116,21 +125,22 @@ def wayback_tweets(
         timestamp_from,
         timestamp_to,
         limit,
-        offset,
         matchtype,
     )
     archived_tweets = response.get()
 
     return archived_tweets
 
 
+@st.cache_data(ttl=600, show_spinner=False)
 def tweets_parser(archived_tweets, username, field_options):
     parser = TweetsParser(archived_tweets, username, field_options)
     parsed_tweets = parser.parse()
 
     return parsed_tweets
 
 
+@st.cache_data(ttl=600, show_spinner=False)
 def tweets_exporter(parsed_tweets, username, field_options):
     exporter = TweetsExporter(parsed_tweets, username, field_options)
 
@@ -164,75 +174,57 @@ def scroll_page():
     st.session_state.update_component += 1
     scroll_page()
 
-# ------ User Interface Settings ------ #
-
-st.image(TITLE, use_column_width="never")
-st.caption(
-    "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases)"  # noqa: E501
-)
-st.write(
-    "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets), CSV, and JSON formats."  # noqa: E501
-)
+# ------ UI Settings ------ #
 
+st.image(TITLE, use_container_width="never")
 st.write(
-    "For better performance, use the CLI version, available on [PyPI](https://pypi.org/project/waybacktweets)."  # noqa: E501
+    "Retrieves archived tweets CDX data in HTML, CSV, and JSON formats."  # noqa: E501
 )
 
 st.write(
-    "To access the legacy version of Wayback Tweets, [click here](https://waybacktweets-legacy.streamlit.app)."  # noqa: E501
+    "This application is a prototype based on the Python package and does not include all available features. To explore the package, including CLI and Module usage, visit the [GitHub repository](https://github.com/claromes/waybacktweets)."  # noqa: E501
 )
 
 st.divider()
 
 # -- Filters -- #
 
 username = st.text_input(
-    "Username *",
+    "Username",
     value=st.session_state.username_value,
     key="username",
     placeholder="Without @",
 )
 
-with st.expander("Filtering", expanded=st.session_state.expanded_value):
-
-    col1, col2 = st.columns(2)
-
-    with col1:
-        limit = st.number_input(
-            "Limit",
-            value=500,
-            max_value=500,
-            key="limit",
-            help="Query result limits. A maximum of 500 tweets per search to enhance the tool's performance",  # noqa: E501
-        )
-
-    with col2:
-        offset = st.text_input(
-            "Offset",
-            key="offset",
-            help="Enables efficient pagination. For instance, after retrieving an initial batch of 500 tweets, setting an offset of 500 fetches the next batch from 501 to 1000",  # noqa: E501
-        )
+st.session_state.archived_timestamp_filter = st.date_input(
+    "Tweets saved between",
+    (start_date, end_date),
+    min_date,
+    end_date,
+    format="YYYY/MM/DD",
+    help="Using the `from` and `to` filters. Format: YYYY/MM/DD",
+)
+st.caption(
+    ":gray[Note: Large date ranges may take longer to process and exceed the app's resource limits. Use smaller ranges for faster results.]"  # noqa: E501
+)
 
-    st.session_state.archived_timestamp_filter = st.date_input(
-        "Tweets saved between",
-        None,
-        min_date,
-        end_date,
-        format="YYYY/MM/DD",
-        help="Using the `from` and `to` filters. Format: YYYY/MM/DD",
-    )
+limit = st.text_input(
+    "Limit",
+    key="limit",
+    help="Query result limits",
+)
 
-    unique = st.checkbox(
-        "Only unique Wayback Machine URLs",
-        key="unique",
-        help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
-    )
-    st.caption(
-        ":orange[note: according to the official documentation of the Wayback CDX Server API, the query to retrieve unique URLs may be slow at the moment.]"  # noqa: E501
-    )
+unique = st.checkbox(
+    "Only unique Wayback Machine URLs",
+    key="unique",
+    help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
+)
+st.caption(
+    ":gray[Note: As noted in the official Wayback CDX Server API documentation, retrieving unique URLs may experience slow performance at this time.]"  # noqa: E501
+)
 
 
-query = st.button("Query", type="primary", use_container_width=True)
+query = st.button("Go", type="primary", use_container_width=True)
 
 if st.query_params.username == "":
     st.query_params.clear()
@@ -248,34 +240,22 @@ def scroll_page():
         collapse = "urlkey"
         matchtype = "prefix"
 
-    archived_timestamp_from = None
-    archived_timestamp_to = None
-
-    if st.session_state.archived_timestamp_filter:
-        archived_timestamp_from = st.session_state.archived_timestamp_filter[0]
-        archived_timestamp_to = st.session_state.archived_timestamp_filter[1]
-
     try:
-        with st.spinner(
-            f"Retrieving the archived tweets of @{st.session_state.current_username}..."
-        ):
+        with st.spinner(f"Retrieving @{st.session_state.current_username}..."):
             wayback_tweets = wayback_tweets(
                 st.session_state.current_username,
                 collapse,
-                archived_timestamp_from,
-                archived_timestamp_to,
+                st.session_state.archived_timestamp_filter[0],
+                st.session_state.archived_timestamp_filter[1],
                 limit,
-                offset,
                 matchtype,
             )
 
         if not wayback_tweets:
             st.error("No data was saved due to an empty response.")
             st.stop()
 
-        with st.spinner(
-            f"Parsing the archived tweets of @{st.session_state.current_username}"
-        ):
+        with st.spinner(f"Parsing @{st.session_state.current_username}..."):
             parsed_tweets = tweets_parser(
                 wayback_tweets, st.session_state.current_username, FIELD_OPTIONS
             )
@@ -292,73 +272,47 @@ def scroll_page():
         # -- Rendering -- #
 
         st.session_state.count = len(df)
-        st.write(f"**{st.session_state.count} URLs have been captured**")
+        st.caption(f"{st.session_state.count} URLs have been captured.")
 
         tab1, tab2, tab3 = st.tabs(["HTML", "CSV", "JSON"])
 
         # -- HTML -- #
         with tab1:
-            st.write(
-                f"Visualize tweets more efficiently through iframe tags. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
+            st.download_button(
+                label=f"Download @{st.session_state.current_username} in HTML",
+                data=html_content,
+                file_name=f"{file_name}.html",
+                mime="text/html",
+                icon=":material/download:",
             )
 
-            col5, col6 = st.columns([1, 18])
-
-            with col5:
-                st.image(DOWNLOAD, width=22)
-
-            with col6:
-                b64_html = base64.b64encode(html_content.encode()).decode()
-                href_html = f"data:text/html;base64,{b64_html}"
-
-                st.markdown(
-                    f'<a href="{href_html}" download="{file_name}.html" title="Download {file_name}.html">{file_name}.html</a>',  # noqa: E501
-                    unsafe_allow_html=True,
-                )
+            st.caption("Note: The iframes are best viewed in Firefox.")
 
             # -- CSV -- #
         with tab2:
-            st.write(
-                "Check the data returned in the dataframe below and download the file."
+            st.download_button(
+                label=f"Download @{st.session_state.current_username} in CSV",
+                data=csv_data,
+                file_name=f"{file_name}.csv",
+                mime="text/csv",
+                icon=":material/download:",
             )
 
-            col7, col8 = st.columns([1, 18])
-
-            with col7:
-                st.image(DOWNLOAD, width=22)
-
-            with col8:
-                b64_csv = base64.b64encode(csv_data.encode()).decode()
-                href_csv = f"data:file/csv;base64,{b64_csv}"
-
-                st.markdown(
-                    f'<a href="{href_csv}" download="{file_name}.csv" title="Download {file_name}.csv">{file_name}.csv</a>',  # noqa: E501
-                    unsafe_allow_html=True,
-                )
-
+            st.caption("Preview:")
             st.dataframe(df, use_container_width=True)
 
             # -- JSON -- #
         with tab3:
-            st.write(
-                "Check the data returned in JSON format below and download the file."
+            st.download_button(
+                label=f"Download @{st.session_state.current_username} in JSON",
+                data=json_data,
+                file_name=f"{file_name}.json",
+                mime="application/json",
+                icon=":material/download:",
             )
 
-            col9, col10 = st.columns([1, 18])
-
-            with col9:
-                st.image(DOWNLOAD, width=22)
-
-            with col10:
-                b64_json = base64.b64encode(json_data.encode()).decode()
-                href_json = f"data:file/json;base64,{b64_json}"
-
-                st.markdown(
-                    f'<a href="{href_json}" download="{file_name}.json" title="Download {file_name}.json">{file_name}.json</a>',  # noqa: E501
-                    unsafe_allow_html=True,
-                )
-
-            st.json(json_data, expanded=False)
+            st.caption("Preview:")
+            st.json(json_data, expanded=1)
     except TypeError as e:
         st.error(
             f"""
 
@@ -1,2 +1,2 @@
-streamlit==1.36.0
-waybacktweets==1.0a5
+streamlit==1.45.0
+waybacktweets
@@ -34,8 +34,12 @@
 html_css_files = ["css/custom.css"]
 html_context = {
     "project_links": [
-        ProjectLink("PyPI Releases", "https://pypi.org/project/waybacktweets/"),
+        ProjectLink("PyPI", "https://pypi.org/project/waybacktweets/"),
         ProjectLink("Source Code", "https://github.com/claromes/waybacktweets/"),
+        ProjectLink(
+            "License",
+            "https://raw.githubusercontent.com/claromes/waybacktweets/refs/heads/main/LICENSE.md",  # noqa: E501
+        ),
         ProjectLink(
             "Issue Tracker", "https://github.com/claromes/waybacktweets/issues/"
         ),