Cleanup streamlit (#273)

Cerchie · web-flow · commit 0fc7f217db48 · 2024-05-07T09:04:41.000-07:00
* move producer creation outside producing call

* make topic with 1 partition

* teardown instructions

* rename files to something more classicly pythony

* make cv-able

* mr sr out of data handler

* subscribe rather than assign

* rename import

* sql formatting

* comments

* note nyse hrs

---------

Co-authored-by: Cerchie &lt;lcerchie@confluent.io&gt;
diff --git a/flink-streamlit/README.md b/flink-streamlit/README.md
@@ -8,6 +8,8 @@ In this project you'll produce stock trade events from the [Alpaca API markets](
 
 <img width="718" alt="graph of the 4 technologies" src="https://github.com/Cerchie/alpaca-kafka-flink-streamlit/assets/54046179/7600d717-69bc-46c5-8679-d8d65b9ce810">
 
+Note: Although it may change in the future, at the time this README was written, the NYSE is open from 9:30 to 4 EST. If you run this application outside of those hours, you may not see data coming through.
+
 
 ## Step 1: Get set up in Confluent Cloud
 
@@ -150,7 +152,8 @@ In the cell of the new workspace, you can start running SQL statements. Copy and
 ```sql
 CREATE TABLE tumble_interval_SPY
 (`symbol` STRING, `window_start` STRING,`window_end` STRING,`price` DOUBLE, PRIMARY KEY (`symbol`) NOT ENFORCED)
-    WITH ('value.format' = 'json-registry');
+DISTRIBUTED BY (symbol) INTO 1 BUCKETS 
+WITH ('value.format' = 'json-registry');
 ```
 - Click 'Run'.
 
@@ -187,11 +190,15 @@ Generate a key using the widget you'll find on the right of the screen on the ho
 
 ## Step 3: Get started running the app
 
-`git clone https://github.com/Cerchie/finnhub.git && cd finnhub`
+```
+git clone https://github.com/Cerchie/finnhub.git && cd finnhub
+```
 
 then
 
-`pip install -r requirements.txt` 
+```
+pip install -r requirements.txt
+``` 
 
 Now, create a file in the root directory named `.streamlit/secrets.toml` (that initial `.` is part of the convention.)
 
@@ -210,6 +217,12 @@ Note that the `:` is necessary for `BASIC_AUTH_USER_INFO`.
 
 You'll need a [Streamlit account](https://streamlit.io/) as well for the [secrets to be in the environment](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app/secrets-management). 
 
-Now, run `streamlit run alpacaviz.py` in your root dir in order to run the app. 
+Now, run 
+```streamlit run app.py```
+in your root dir in order to run the app. 
+
+To deploy on Streamlit yourself, follow the [instructions here](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) and make sure to [include the secrets](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app/secrets-management) in your settings. 
+
+## Step 4: Teardown in Confluent Cloud
 
-To deploy on Streamlit yourself, follow the [instructions here](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app) and make sure to [include the secrets](https://docs.streamlit.io/streamlit-community-cloud/deploy-your-app/secrets-management) in your settings. 
+To avoid wasting resources after following this exercise, you can teardown your environment in Confluent Cloud. To do that, navigate to your environment's page and click 'Delete' at the lower right-hand side. This will delete your environment and its associated resources.
diff --git a/flink-streamlit/app.py b/flink-streamlit/app.py
@@ -1,11 +1,9 @@
 import asyncio
 import json
-import random
-import string
 import pandas as pd
 import streamlit as st
 from confluent_kafka import Consumer, TopicPartition
-from setupsocket import on_select
+from kafkaproducer import on_select
 import altair as alt
 
 
@@ -46,10 +44,8 @@ async def display_quotes(component):
     window_history = []
     topic_name = option
 
-    # starting from a specific partition here, it may be different depending on the topic so try a few out or just start from the beginning with the auto.offset.reset config
-    partition = TopicPartition(f"tumble_interval_{topic_name}", 0, 7)
-    consumer.assign([partition])
-    consumer.seek(partition)
+    topic_name = f"tumble_interval_{option}"
+    consumer.subscribe(topic_name)
 
     while True:
         try:
diff --git a/flink-streamlit/kafkaproducer.py b/flink-streamlit/kafkaproducer.py
@@ -11,6 +11,7 @@
 
 # set up kafka client
 print("Setting up Kafka client")
+
 config_dict = {
     "bootstrap.servers": "pkc-921jm.us-east-2.aws.confluent.cloud:9092",
     "sasl.mechanisms": "PLAIN",
@@ -20,8 +21,20 @@
     "sasl.password": st.secrets["SASL_PASSWORD"],
 }
 
+
 client_config = config_dict
 
+# setting up the producer
+producer = Producer(client_config)
+
+srconfig = {
+    "url": st.secrets["SR_URL"],
+    "basic.auth.user.info": st.secrets["BASIC_AUTH_USER_INFO"],
+}
+
+# setting up the schema registry connection
+schema_registry_client = SchemaRegistryClient(srconfig)
+
 # schema for producer matching one in SPY topic in Confluent Cloud
 schema_str = """{
   "$id": "http://example.com/myURI.schema.json",
@@ -47,13 +60,6 @@
 }"""
 
 
-def delivery_report(err, event):
-    if err is not None:
-        print(f'Delivery failed on reading for {event.key().decode("utf8")}: {err}')
-    else:
-        print(f"delivered new event from producer")
-
-
 def serialize_custom_data(custom_data, ctx):
     return {
         "bid_timestamp": str(custom_data.timestamp),
@@ -62,20 +68,22 @@ def serialize_custom_data(custom_data, ctx):
     }
 
 
-async def quote_data_handler(stockname, data):
-    # this will run when `wss_client.subscribe_quotes(fn, stockname)` is called
+# setting up the JSON serializer
+json_serializer = JSONSerializer(
+    schema_str, schema_registry_client, serialize_custom_data
+)
 
-    producer = Producer(client_config)
-    srconfig = {
-        "url": st.secrets["SR_URL"],
-        "basic.auth.user.info": st.secrets["BASIC_AUTH_USER_INFO"],
-    }
 
-    schema_registry_client = SchemaRegistryClient(srconfig)
+def delivery_report(err, event):
+    if err is not None:
+        print(f'Delivery failed on reading for {event.key().decode("utf8")}: {err}')
+    else:
+        print(f"delivered new event from producer")
+
+
+async def quote_data_handler(stockname, data):
+    # this will run when `wss_client.subscribe_quotes(fn, stockname)` is called
 
-    json_serializer = JSONSerializer(
-        schema_str, schema_registry_client, serialize_custom_data
-    )
     producer.produce(
         topic=stockname,
         key=stockname,