Skip to content

Commit d9ce28c

Browse files
committed
add file
1 parent 1ad5d4c commit d9ce28c

File tree

1 file changed

+326
-0
lines changed

1 file changed

+326
-0
lines changed

seoul_pops_eda.ipynb

Lines changed: 326 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,326 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"private_outputs": true,
7+
"provenance": []
8+
},
9+
"kernelspec": {
10+
"name": "python3",
11+
"display_name": "Python 3"
12+
},
13+
"language_info": {
14+
"name": "python"
15+
}
16+
},
17+
"cells": [
18+
{
19+
"cell_type": "markdown",
20+
"source": [
21+
"## 데이터 로드\n",
22+
"* https://data.seoul.go.kr/dataVisual/seoul/seoulLivingPopulation.do\n",
23+
"* https://data.seoul.go.kr/dataList/OA-14991/S/1/datasetView.do?tab=A"
24+
],
25+
"metadata": {
26+
"id": "HE3dPwwKZfgP"
27+
}
28+
},
29+
{
30+
"cell_type": "code",
31+
"metadata": {
32+
"id": "e7603863"
33+
},
34+
"source": [
35+
"import pandas as pd\n",
36+
"import matplotlib.pyplot as plt\n",
37+
"import seaborn as sns\n",
38+
"import csv\n",
39+
"\n",
40+
"df = pd.read_csv('/content/LOCAL_PEOPLE_20251123.csv', encoding='cp949', engine='python', on_bad_lines='skip')\n",
41+
"df"
42+
],
43+
"execution_count": null,
44+
"outputs": []
45+
},
46+
{
47+
"cell_type": "code",
48+
"metadata": {
49+
"id": "96f2454d"
50+
},
51+
"source": [
52+
"df.info()"
53+
],
54+
"execution_count": null,
55+
"outputs": []
56+
},
57+
{
58+
"cell_type": "code",
59+
"metadata": {
60+
"id": "03773dc1"
61+
},
62+
"source": [
63+
"numerical_cols = df.columns.drop(['기준일ID', '시간대구분', '행정동코드'])\n",
64+
"\n",
65+
"for col in numerical_cols:\n",
66+
" df[col] = pd.to_numeric(df[col], errors='coerce')\n",
67+
"\n",
68+
"df.info()"
69+
],
70+
"execution_count": null,
71+
"outputs": []
72+
},
73+
{
74+
"cell_type": "code",
75+
"metadata": {
76+
"id": "ce009a01"
77+
},
78+
"source": [
79+
"df.describe()"
80+
],
81+
"execution_count": null,
82+
"outputs": []
83+
},
84+
{
85+
"cell_type": "markdown",
86+
"source": [
87+
"## 깔끔한 데이터 만들기"
88+
],
89+
"metadata": {
90+
"id": "qqKxfZZ7Zcvt"
91+
}
92+
},
93+
{
94+
"cell_type": "code",
95+
"metadata": {
96+
"id": "732a3899"
97+
},
98+
"source": [
99+
"id_vars = ['기준일ID', '시간대구분', '행정동코드']\n",
100+
"value_vars = [col for col in df.columns if col not in id_vars]\n",
101+
"\n",
102+
"df_tidy = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='변수', value_name='생활인구수')\n",
103+
"df_tidy.head()"
104+
],
105+
"execution_count": null,
106+
"outputs": []
107+
},
108+
{
109+
"cell_type": "code",
110+
"metadata": {
111+
"id": "02e7ba68"
112+
},
113+
"source": [
114+
"df_tidy['성별'] = df_tidy['변수'].apply(lambda x: '남' if '남자' in x else ('여' if '여자' in x else 'Total'))\n",
115+
"df_tidy['연령대'] = df_tidy['변수'].apply(lambda x: x.replace('남자', '').replace('여자', '').replace('생활인구수', '') if '생활인구수' in x else x)\n",
116+
"df_tidy.loc[df_tidy['연령대'] == '총', '연령대'] = '총합'\n",
117+
"\n",
118+
"df_tidy = df_tidy.drop(columns=['변수'])\n",
119+
"\n",
120+
"df_tidy.head()"
121+
],
122+
"execution_count": null,
123+
"outputs": []
124+
},
125+
{
126+
"cell_type": "code",
127+
"metadata": {
128+
"id": "8b34e4c9"
129+
},
130+
"source": [
131+
"df_tidy['생활인구구분'] = df_tidy['성별'].apply(lambda x: '총생활인구수' if x == 'Total' else 'Age/Gender Specific Population')\n",
132+
"\n",
133+
"print(\"'생활인구구분' column created.\")\n",
134+
"\n",
135+
"print(df_tidy.info())\n",
136+
"df_tidy.describe()"
137+
],
138+
"execution_count": null,
139+
"outputs": []
140+
},
141+
{
142+
"cell_type": "code",
143+
"source": [
144+
"df_tidy.describe(include='object')"
145+
],
146+
"metadata": {
147+
"id": "lPGByrYZKsj3"
148+
},
149+
"execution_count": null,
150+
"outputs": []
151+
},
152+
{
153+
"cell_type": "code",
154+
"source": [
155+
"!pip install -Uq koreanize-matplotlib\n",
156+
"import koreanize_matplotlib"
157+
],
158+
"metadata": {
159+
"id": "pPBYjOgWKJXm"
160+
},
161+
"execution_count": null,
162+
"outputs": []
163+
},
164+
{
165+
"cell_type": "code",
166+
"source": [
167+
"df_tidy.hist();"
168+
],
169+
"metadata": {
170+
"id": "Qj2RCievGVvC"
171+
},
172+
"execution_count": null,
173+
"outputs": []
174+
},
175+
{
176+
"cell_type": "code",
177+
"metadata": {
178+
"id": "205e846f"
179+
},
180+
"source": [
181+
"mapping_df = pd.read_excel('/content/행정동코드_매핑정보_20241218.xlsx', skiprows=[0])\n",
182+
"display(mapping_df.head())\n",
183+
"mapping_df.info()"
184+
],
185+
"execution_count": null,
186+
"outputs": []
187+
},
188+
{
189+
"cell_type": "code",
190+
"source": [
191+
"H_DNG_CD = mapping_df[['H_DNG_CD', 'H_DNG_NM']].set_index('H_DNG_CD')['H_DNG_NM'].to_dict()\n",
192+
"H_DNG_CD"
193+
],
194+
"metadata": {
195+
"id": "tRjcSDfwiDnd"
196+
},
197+
"execution_count": null,
198+
"outputs": []
199+
},
200+
{
201+
"cell_type": "code",
202+
"source": [
203+
"df_tidy['행정동코드'] = df_tidy['행정동코드'].astype(int)\n",
204+
"df_tidy['행정동명'] = df_tidy['행정동코드'].map(H_DNG_CD)\n",
205+
"df_tidy.head()"
206+
],
207+
"metadata": {
208+
"id": "ORasNYZ6RMVT"
209+
},
210+
"execution_count": null,
211+
"outputs": []
212+
},
213+
{
214+
"cell_type": "code",
215+
"source": [
216+
"df_tidy['행정동명'].unique()"
217+
],
218+
"metadata": {
219+
"id": "KMDKMoIyYvb2"
220+
},
221+
"execution_count": null,
222+
"outputs": []
223+
},
224+
{
225+
"cell_type": "code",
226+
"source": [
227+
"df_tidy['행정동명'].value_counts()"
228+
],
229+
"metadata": {
230+
"id": "jq650eAURkqk"
231+
},
232+
"execution_count": null,
233+
"outputs": []
234+
},
235+
{
236+
"cell_type": "code",
237+
"source": [
238+
"df_tidy['연령대'].value_counts()"
239+
],
240+
"metadata": {
241+
"id": "x2_4Y9Dpi6vE"
242+
},
243+
"execution_count": null,
244+
"outputs": []
245+
},
246+
{
247+
"cell_type": "code",
248+
"source": [
249+
"hour_age = pd.crosstab(df_tidy['시간대구분'], df_tidy['연령대'], values=df_tidy['생활인구수'], aggfunc='sum')\n",
250+
"hour_age"
251+
],
252+
"metadata": {
253+
"id": "pfSU0QN_kgne"
254+
},
255+
"execution_count": null,
256+
"outputs": []
257+
},
258+
{
259+
"cell_type": "code",
260+
"source": [
261+
"plt.figure(figsize=(20, 10))\n",
262+
"sns.heatmap(hour_age, annot=True, cmap='Blues')"
263+
],
264+
"metadata": {
265+
"id": "wBQ8oCsqlKlJ"
266+
},
267+
"execution_count": null,
268+
"outputs": []
269+
},
270+
{
271+
"cell_type": "markdown",
272+
"source": [
273+
"## 특정 행정동 분석"
274+
],
275+
"metadata": {
276+
"id": "kIUxYheYkezM"
277+
}
278+
},
279+
{
280+
"cell_type": "code",
281+
"metadata": {
282+
"id": "291334ed"
283+
},
284+
"source": [
285+
"df_filtered = df_tidy[df_tidy['행정동명'].isin(['을지로동', '명동', '목1동'])]\n",
286+
"df_filtered = df_filtered[df_filtered['연령대'] != '총합']\n",
287+
"df_filtered.head()"
288+
],
289+
"execution_count": null,
290+
"outputs": []
291+
},
292+
{
293+
"cell_type": "code",
294+
"source": [
295+
"plt.figure(figsize=(20, 5))\n",
296+
"sns.pointplot(data=df_filtered, x='연령대', y='생활인구수', hue='행정동명')"
297+
],
298+
"metadata": {
299+
"id": "F1opIei5gre5"
300+
},
301+
"execution_count": null,
302+
"outputs": []
303+
},
304+
{
305+
"cell_type": "code",
306+
"source": [
307+
"plt.figure(figsize=(20, 5))\n",
308+
"sns.pointplot(data=df_filtered, x='시간대구분', y='생활인구수', hue='행정동명')"
309+
],
310+
"metadata": {
311+
"id": "p2jgBth7hoBw"
312+
},
313+
"execution_count": null,
314+
"outputs": []
315+
},
316+
{
317+
"cell_type": "code",
318+
"source": [],
319+
"metadata": {
320+
"id": "zBi5KXMMlCtI"
321+
},
322+
"execution_count": null,
323+
"outputs": []
324+
}
325+
]
326+
}

0 commit comments

Comments
 (0)