1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "private_outputs" : true ,
7+ "provenance" : []
8+ },
9+ "kernelspec" : {
10+ "name" : " python3" ,
11+ "display_name" : " Python 3"
12+ },
13+ "language_info" : {
14+ "name" : " python"
15+ }
16+ },
17+ "cells" : [
18+ {
19+ "cell_type" : " markdown" ,
20+ "source" : [
21+ " ## 데이터 로드\n " ,
22+ " * https://data.seoul.go.kr/dataVisual/seoul/seoulLivingPopulation.do\n " ,
23+ " * https://data.seoul.go.kr/dataList/OA-14991/S/1/datasetView.do?tab=A"
24+ ],
25+ "metadata" : {
26+ "id" : " HE3dPwwKZfgP"
27+ }
28+ },
29+ {
30+ "cell_type" : " code" ,
31+ "metadata" : {
32+ "id" : " e7603863"
33+ },
34+ "source" : [
35+ " import pandas as pd\n " ,
36+ " import matplotlib.pyplot as plt\n " ,
37+ " import seaborn as sns\n " ,
38+ " import csv\n " ,
39+ " \n " ,
40+ " df = pd.read_csv('/content/LOCAL_PEOPLE_20251123.csv', encoding='cp949', engine='python', on_bad_lines='skip')\n " ,
41+ " df"
42+ ],
43+ "execution_count" : null ,
44+ "outputs" : []
45+ },
46+ {
47+ "cell_type" : " code" ,
48+ "metadata" : {
49+ "id" : " 96f2454d"
50+ },
51+ "source" : [
52+ " df.info()"
53+ ],
54+ "execution_count" : null ,
55+ "outputs" : []
56+ },
57+ {
58+ "cell_type" : " code" ,
59+ "metadata" : {
60+ "id" : " 03773dc1"
61+ },
62+ "source" : [
63+ " numerical_cols = df.columns.drop(['기준일ID', '시간대구분', '행정동코드'])\n " ,
64+ " \n " ,
65+ " for col in numerical_cols:\n " ,
66+ " df[col] = pd.to_numeric(df[col], errors='coerce')\n " ,
67+ " \n " ,
68+ " df.info()"
69+ ],
70+ "execution_count" : null ,
71+ "outputs" : []
72+ },
73+ {
74+ "cell_type" : " code" ,
75+ "metadata" : {
76+ "id" : " ce009a01"
77+ },
78+ "source" : [
79+ " df.describe()"
80+ ],
81+ "execution_count" : null ,
82+ "outputs" : []
83+ },
84+ {
85+ "cell_type" : " markdown" ,
86+ "source" : [
87+ " ## 깔끔한 데이터 만들기"
88+ ],
89+ "metadata" : {
90+ "id" : " qqKxfZZ7Zcvt"
91+ }
92+ },
93+ {
94+ "cell_type" : " code" ,
95+ "metadata" : {
96+ "id" : " 732a3899"
97+ },
98+ "source" : [
99+ " id_vars = ['기준일ID', '시간대구분', '행정동코드']\n " ,
100+ " value_vars = [col for col in df.columns if col not in id_vars]\n " ,
101+ " \n " ,
102+ " df_tidy = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='변수', value_name='생활인구수')\n " ,
103+ " df_tidy.head()"
104+ ],
105+ "execution_count" : null ,
106+ "outputs" : []
107+ },
108+ {
109+ "cell_type" : " code" ,
110+ "metadata" : {
111+ "id" : " 02e7ba68"
112+ },
113+ "source" : [
114+ " df_tidy['성별'] = df_tidy['변수'].apply(lambda x: '남' if '남자' in x else ('여' if '여자' in x else 'Total'))\n " ,
115+ " df_tidy['연령대'] = df_tidy['변수'].apply(lambda x: x.replace('남자', '').replace('여자', '').replace('생활인구수', '') if '생활인구수' in x else x)\n " ,
116+ " df_tidy.loc[df_tidy['연령대'] == '총', '연령대'] = '총합'\n " ,
117+ " \n " ,
118+ " df_tidy = df_tidy.drop(columns=['변수'])\n " ,
119+ " \n " ,
120+ " df_tidy.head()"
121+ ],
122+ "execution_count" : null ,
123+ "outputs" : []
124+ },
125+ {
126+ "cell_type" : " code" ,
127+ "metadata" : {
128+ "id" : " 8b34e4c9"
129+ },
130+ "source" : [
131+ " df_tidy['생활인구구분'] = df_tidy['성별'].apply(lambda x: '총생활인구수' if x == 'Total' else 'Age/Gender Specific Population')\n " ,
132+ " \n " ,
133+ " print(\" '생활인구구분' column created.\" )\n " ,
134+ " \n " ,
135+ " print(df_tidy.info())\n " ,
136+ " df_tidy.describe()"
137+ ],
138+ "execution_count" : null ,
139+ "outputs" : []
140+ },
141+ {
142+ "cell_type" : " code" ,
143+ "source" : [
144+ " df_tidy.describe(include='object')"
145+ ],
146+ "metadata" : {
147+ "id" : " lPGByrYZKsj3"
148+ },
149+ "execution_count" : null ,
150+ "outputs" : []
151+ },
152+ {
153+ "cell_type" : " code" ,
154+ "source" : [
155+ " !pip install -Uq koreanize-matplotlib\n " ,
156+ " import koreanize_matplotlib"
157+ ],
158+ "metadata" : {
159+ "id" : " pPBYjOgWKJXm"
160+ },
161+ "execution_count" : null ,
162+ "outputs" : []
163+ },
164+ {
165+ "cell_type" : " code" ,
166+ "source" : [
167+ " df_tidy.hist();"
168+ ],
169+ "metadata" : {
170+ "id" : " Qj2RCievGVvC"
171+ },
172+ "execution_count" : null ,
173+ "outputs" : []
174+ },
175+ {
176+ "cell_type" : " code" ,
177+ "metadata" : {
178+ "id" : " 205e846f"
179+ },
180+ "source" : [
181+ " mapping_df = pd.read_excel('/content/행정동코드_매핑정보_20241218.xlsx', skiprows=[0])\n " ,
182+ " display(mapping_df.head())\n " ,
183+ " mapping_df.info()"
184+ ],
185+ "execution_count" : null ,
186+ "outputs" : []
187+ },
188+ {
189+ "cell_type" : " code" ,
190+ "source" : [
191+ " H_DNG_CD = mapping_df[['H_DNG_CD', 'H_DNG_NM']].set_index('H_DNG_CD')['H_DNG_NM'].to_dict()\n " ,
192+ " H_DNG_CD"
193+ ],
194+ "metadata" : {
195+ "id" : " tRjcSDfwiDnd"
196+ },
197+ "execution_count" : null ,
198+ "outputs" : []
199+ },
200+ {
201+ "cell_type" : " code" ,
202+ "source" : [
203+ " df_tidy['행정동코드'] = df_tidy['행정동코드'].astype(int)\n " ,
204+ " df_tidy['행정동명'] = df_tidy['행정동코드'].map(H_DNG_CD)\n " ,
205+ " df_tidy.head()"
206+ ],
207+ "metadata" : {
208+ "id" : " ORasNYZ6RMVT"
209+ },
210+ "execution_count" : null ,
211+ "outputs" : []
212+ },
213+ {
214+ "cell_type" : " code" ,
215+ "source" : [
216+ " df_tidy['행정동명'].unique()"
217+ ],
218+ "metadata" : {
219+ "id" : " KMDKMoIyYvb2"
220+ },
221+ "execution_count" : null ,
222+ "outputs" : []
223+ },
224+ {
225+ "cell_type" : " code" ,
226+ "source" : [
227+ " df_tidy['행정동명'].value_counts()"
228+ ],
229+ "metadata" : {
230+ "id" : " jq650eAURkqk"
231+ },
232+ "execution_count" : null ,
233+ "outputs" : []
234+ },
235+ {
236+ "cell_type" : " code" ,
237+ "source" : [
238+ " df_tidy['연령대'].value_counts()"
239+ ],
240+ "metadata" : {
241+ "id" : " x2_4Y9Dpi6vE"
242+ },
243+ "execution_count" : null ,
244+ "outputs" : []
245+ },
246+ {
247+ "cell_type" : " code" ,
248+ "source" : [
249+ " hour_age = pd.crosstab(df_tidy['시간대구분'], df_tidy['연령대'], values=df_tidy['생활인구수'], aggfunc='sum')\n " ,
250+ " hour_age"
251+ ],
252+ "metadata" : {
253+ "id" : " pfSU0QN_kgne"
254+ },
255+ "execution_count" : null ,
256+ "outputs" : []
257+ },
258+ {
259+ "cell_type" : " code" ,
260+ "source" : [
261+ " plt.figure(figsize=(20, 10))\n " ,
262+ " sns.heatmap(hour_age, annot=True, cmap='Blues')"
263+ ],
264+ "metadata" : {
265+ "id" : " wBQ8oCsqlKlJ"
266+ },
267+ "execution_count" : null ,
268+ "outputs" : []
269+ },
270+ {
271+ "cell_type" : " markdown" ,
272+ "source" : [
273+ " ## 특정 행정동 분석"
274+ ],
275+ "metadata" : {
276+ "id" : " kIUxYheYkezM"
277+ }
278+ },
279+ {
280+ "cell_type" : " code" ,
281+ "metadata" : {
282+ "id" : " 291334ed"
283+ },
284+ "source" : [
285+ " df_filtered = df_tidy[df_tidy['행정동명'].isin(['을지로동', '명동', '목1동'])]\n " ,
286+ " df_filtered = df_filtered[df_filtered['연령대'] != '총합']\n " ,
287+ " df_filtered.head()"
288+ ],
289+ "execution_count" : null ,
290+ "outputs" : []
291+ },
292+ {
293+ "cell_type" : " code" ,
294+ "source" : [
295+ " plt.figure(figsize=(20, 5))\n " ,
296+ " sns.pointplot(data=df_filtered, x='연령대', y='생활인구수', hue='행정동명')"
297+ ],
298+ "metadata" : {
299+ "id" : " F1opIei5gre5"
300+ },
301+ "execution_count" : null ,
302+ "outputs" : []
303+ },
304+ {
305+ "cell_type" : " code" ,
306+ "source" : [
307+ " plt.figure(figsize=(20, 5))\n " ,
308+ " sns.pointplot(data=df_filtered, x='시간대구분', y='생활인구수', hue='행정동명')"
309+ ],
310+ "metadata" : {
311+ "id" : " p2jgBth7hoBw"
312+ },
313+ "execution_count" : null ,
314+ "outputs" : []
315+ },
316+ {
317+ "cell_type" : " code" ,
318+ "source" : [],
319+ "metadata" : {
320+ "id" : " zBi5KXMMlCtI"
321+ },
322+ "execution_count" : null ,
323+ "outputs" : []
324+ }
325+ ]
326+ }
0 commit comments