hack2skill · sharma-kshitij-ks · Aug 26, 2023 · Aug 27, 2023 · Aug 27, 2023 · Aug 28, 2023
diff --git a/KafkaConsumer.py b/KafkaConsumer.py
@@ -0,0 +1,47 @@
+import sklearn
+import json
+import pandas as pd
+from kafka import KafkaConsumer
+import tensorflow as tf
+import sklearn.preprocessing
+
+# Load the pre-trained model
+model = tf.keras.models.load_model(r'C:\Users\Sharm\Desktop\sbi\fraud_detection_model.h5')  
+
+consumer = KafkaConsumer(
+    'hello_world',
+    bootstrap_servers='localhost:9092',  # Remove 'tcp://'
+    value_deserializer=lambda x: json.loads(x.decode('utf-8')),
+    auto_offset_reset='latest',
+    enable_auto_commit=True,
+    auto_commit_interval_ms=500
+)
+
+label_encoders = {}
+
+for i, message in enumerate(consumer, start=1):
+    data = message.value  
+
+    columns = ['merchant', 'category', 'amt', 'city', 'state', 'lat', 'long', 'city_pop', 'job',
+               'merch_lat', 'merch_long', 'hour_of_day', 'day_of_week', 'age']
+    df = pd.DataFrame([data], columns=columns)
+    numerical_columns = ['amt', 'lat', 'long', 'city_pop', 'merch_lat', 'merch_long', 'age']
+    categorical_columns = ['merchant', 'category', 'city', 'state', 'job']
+    for col in categorical_columns:
+        le = sklearn.preprocessing.LabelEncoder()
+        df[col] = le.fit_transform(df[col])
+        label_encoders[col] = le
+
+    scaler = sklearn.preprocessing.StandardScaler()
+    df[numerical_columns] = scaler.fit_transform(df[numerical_columns])
+    prediction = model.predict(df)  
+
+    # Convert prediction to 'Yes' (1) or 'No' (0)
+    fraud_label = 'Yes' if prediction[0][0] == 1 else 'No'
+    df['Fraud'] = fraud_label
+
+    print("Received Message {}: {}".format(i, data))
+    print("Prediction: {}".format(fraud_label))
+
+consumer.close()
+
diff --git a/KafkaProducer.py b/KafkaProducer.py
@@ -0,0 +1,19 @@
+from kafka import KafkaProducer
+import json
+import csv
+import time
+producer = KafkaProducer(bootstrap_servers='localhost:9092')
+csv_file = r'C:\Users\Sharm\Desktop\sbi\X_test.csv'
+with open(csv_file, 'r') as file:
+    csv_reader = csv.reader(file)
+    next(csv_reader) 
+    for i, row in enumerate(csv_reader, start=1):
+        json_data = json.dumps(row)
+        # Send the JSON string as a Kafka message
+        producer.send('hello_world', value=json_data.encode('utf-8'))
+        print(f"Message {i} sent: {json_data}")
+        print("Transaction has been sent.")
+        # Adding delay for readability
+        time.sleep(0.1)
+producer.close()
+
diff --git a/README.md b/README.md
@@ -1,26 +1,91 @@
 # Pitch-to-SBI-Hackathon
 
-## Submission Instruction:
-  1. Fork this repository
-  2. Create a folder with your Team Name
-  3. Upload all the code and necessary files in the created folder
-  4. Upload a **README.md** file in your folder with the below mentioned informations.
-  5. Generate a Pull Request with your Team Name. (Example: submission-XYZ_team)
+## README.md :
 
-## README.md must consist of the following information:
+#### Team Name - Uncertainitycoders
+#### Problem Statement - Real-Time High Scale Financial Fraud Risk Management
+#### Team Leader Email - [email protected]
 
-#### Team Name -
-#### Problem Statement - 
-#### Team Leader Email -
+Data Dictionary
+transdatetrans_time	Transaction DateTime
+merchant	Merchant Name
+category	Category of Merchant
+amt	Amount of Transaction
+city	City of Credit Card Holder
+state	State of Credit Card Holder
+lat	Latitude Location of Purchase
+long	Longitude Location of Purchase
+city_pop	Credit Card Holder's City Population
+job	Job of Credit Card Holder
+dob	Date of Birth of Credit Card Holder
+trans_num	Transaction Number
+merch_lat	Latitude Location of Merchant
+merch_long	Longitude Location of Merchant
+is_fraud	Whether Transaction is Fraud (1) or Not (0)
 
 ## A Brief of the Prototype:
-  This section must include UML Diagrams and prototype description
-
+We have deployed highly scalable, open source data transaction streamer of Kafka. Kafka is used by various huge corporations like Netflix for enormous traffic channelization. Our model, converts the json data into a preprocessed format that our model further predicts upon. Our ann model shows a accuracy of 99.6 % and has been tuned to reduce amount of false positives.
+
+Anomaly detectors are not as scalable and can only predict based on the patterns learned from training data. In the modern world fraud threats evolve over time and hence there is a need for models that learn based on incoming data aswell, learning new threats and evolving overtime, hence ANN models are most accurate for this application. The training data and ANN model is as close to real world as possible with consideration of identity. Kafka is open source , highly scalable real time data straming network, which has been utilised.
+
 ## Tech Stack: 
-   List Down all technologies used to Build the prototype
-
+   kafka, ml libraries
+
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+
+![image](https://github.com/sharma-kshitij-ks/Pitch-to-SBI-Hackathon/assets/124446613/ca662326-dff4-4166-a400-13d99bd04c4d)
+
+--------------------------------------------------------------------------------------------------------------------------------------------------------
 ## Step-by-Step Code Execution Instructions:
-  This Section must contain a set of instructions required to clone and run the prototype so that it can be tested and deeply analyzed
+---------------------------------------------------
+Execute code in ipynb by loading custom dataset in ipynb
+------------------------------------------------------
+Set up Kafka --
+
+Create 2 folders in F drive--
+kafka_logs-- zookeeper
+kafka_logs-- server_logs
+
+change the zookeeper.properties:
+------------------------------------------------------
+dataDir=F:/kafka_logs/zookeeper
+maxClientCnxns=1
+
+This property limits the number of active connections from a host, specified by IP address, to a single ZooKeeper server.
+
+change the server.properties:
+----------------------------------------------------
+uncomment listeners
+log.dirs=F:/kafka_logs/server_logs
+zookeeper.connect=localhost:2181
+zookeeper.connection.timeout.ms=60000
+
+Start Zookeeper:
+---------------------------------------
+F:/kafka_2.12-3.2.0/bin/windows/zookeeper-server-start.bat F:/kafka_2.12-3.2.0/config/zookeeper.properties
+
+Start Kafka-server:
+-----------------------------------------
+F:/kafka_2.12-3.2.0/bin/windows/kafka-server-start.bat F:/kafka_2.12-3.2.0/config/server.properties
+
+Create topic:
+------------------------------------
+F:/kafka_2.12-3.2.0/bin/windows/kafka-topics.bat --create --topic hello_world --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1
+
+Start Producer:
+--------------------------------------
+F:/kafka_2.12-3.2.0/bin/windows/kafka-console-producer.bat --topic hello_world --bootstrap-server localhost:9092
+
+Start Consumer:
+-------------------------------------
+F:/kafka_2.12-3.2.0/bin/windows/kafka-console-consumer.bat --topic hello_world --from-beginning --bootstrap-server localhost:9092
+
+kafka-python installation:
+--------------------------------------------------
+pip install kafka-python
+(To know more about this client , you can refer this link :
+https://pypi.org/project/kafka-python/)
+
 
 ## What I Learned:
-   Write about the biggest learning you had while developing the prototype
+   World of real time , high scale , accurate functioning and processing networks. 
diff --git a/Uncertainitycoders.ipynb b/Uncertainitycoders.ipynb
diff --git a/Uncertainitycoders.pdf b/Uncertainitycoders.pdf