-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsp500_cot_sentiment_analysis.py
More file actions
executable file
·106 lines (87 loc) · 3.68 KB
/
sp500_cot_sentiment_analysis.py
File metadata and controls
executable file
·106 lines (87 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import zipfile
import urllib.request
import shutil
import os
def download_and_extract_cot_file(url, file_name):
"""
Download and extract COT files from the given URL and save them with the given file name.
"""
request = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
with urllib.request.urlopen(request) as response, open(file_name, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
with zipfile.ZipFile(file_name) as zf:
zf.extractall()
# Create an empty list to store data frames
frames = []
this_year = datetime.datetime.now().year
# Loop through last 5 years of data
for year in range(this_year-5, this_year+1):
# Download and extract COT files for the year
download_and_extract_cot_file(
f'https://www.cftc.gov/files/dea/history/fut_fin_xls_{year}.zip', f'{year}.zip'
)
# Rename the extracted file
os.rename('FinFutYY.xls', f'{year}.xls')
# Read the data from the renamed vfile
data = pd.read_excel(f'{year}.xls')
# Select relevant columns
data = data[
[
'Market_and_Exchange_Names',
'Report_Date_as_MM_DD_YYYY',
'Pct_of_OI_Dealer_Long_All',
'Pct_of_OI_Dealer_Short_All',
'Pct_of_OI_Lev_Money_Long_All',
'Pct_of_OI_Lev_Money_Short_All',
]
]
# Set the index as the report date and convert to datetime format
data = data.set_index('Report_Date_as_MM_DD_YYYY')
data.index = pd.to_datetime(data.index)
# Reverse the data frame
data = data.iloc[::-1]
# Select only the data for the E-MINI S&P 500 - CHICAGO MERCANTILE EXCHANGE
data = data.loc[data['Market_and_Exchange_Names'] == 'E-MINI S&P 500 - CHICAGO MERCANTILE EXCHANGE']
# Append the data frame to the frames list
frames.append(data)
# Concatenate the frames list into a single data frame
df = pd.concat(frames)
# Save the data frame to a CSV file
df.to_csv('COT_sp500_data.csv')
# Read the CSV file into a new data frame with the index as the first column
df = pd.read_csv('COT_sp500_data.csv', index_col=0)
df.index = pd.to_datetime(df.index)
# Assign columns to new variable names for easier referencing
dealer_long_percent = df['Pct_of_OI_Dealer_Long_All']
dealer_short_percent = df['Pct_of_OI_Dealer_Short_All']
lev_long_percent = df['Pct_of_OI_Lev_Money_Long_All']
lev_short_percent = df['Pct_of_OI_Lev_Money_Short_All']
# Line Chart
plt.plot(df.index, dealer_long_percent, label='Dealer Long')
plt.plot(df.index, lev_long_percent, label='Leveraged Long')
plt.plot(df.index, dealer_short_percent, label='Dealer Short')
plt.plot(df.index, lev_short_percent, label='Leveraged Short')
plt.xlabel('Date')
plt.ylabel('Percentage')
plt.title('Net Positions - Line Chart')
plt.legend()
plt.tight_layout()
plt.show()
# Box Plot
boxplot = plt.boxplot([df['Pct_of_OI_Dealer_Long_All'], df['Pct_of_OI_Dealer_Short_All'], df['Pct_of_OI_Lev_Money_Long_All'], df['Pct_of_OI_Lev_Money_Short_All']],
labels=['Dealer Long', 'Dealer Short', 'Leveraged Money Long', 'Leveraged Money Short'],
patch_artist=True)
current_values = [df['Pct_of_OI_Dealer_Long_All'].iloc[-1], df['Pct_of_OI_Dealer_Short_All'].iloc[-1],
df['Pct_of_OI_Lev_Money_Long_All'].iloc[-1], df['Pct_of_OI_Lev_Money_Short_All'].iloc[-1]]
# Add markers for current values
for i, box in enumerate(boxplot['boxes']):
box.set_facecolor('lightblue')
plt.text(i + 1, current_values[i], f"{current_values[i]:.2f}", ha='center', va='bottom')
plt.title('Distribution of Open Interest by Category')
plt.ylabel('Percentage')
plt.grid(True)
plt.show()