Skip to content

Commit 692ea91

Browse files
committed
Change encoding to include BOM in read_csv in initial settings cell.
1 parent da393cc commit 692ea91

File tree

4 files changed

+260
-258
lines changed

4 files changed

+260
-258
lines changed

docker/doc/answer/ans_preprocess_knock_Python.html

Lines changed: 107 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -7599,12 +7599,12 @@ <h2 id="%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB">はじめに<a class="anchor-link"
75997599
<span class="s1">'street'</span><span class="p">:</span> <span class="nb">str</span>
76007600
<span class="p">}</span>
76017601

7602-
<span class="n">df_customer</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/customer.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7603-
<span class="n">df_category</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/category.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7604-
<span class="n">df_product</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/product.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7605-
<span class="n">df_receipt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/receipt.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7606-
<span class="n">df_store</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/store.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7607-
<span class="n">df_geocode</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/geocode.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7602+
<span class="n">df_customer</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/customer.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7603+
<span class="n">df_category</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/category.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7604+
<span class="n">df_product</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/product.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7605+
<span class="n">df_receipt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/receipt.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7606+
<span class="n">df_store</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/store.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7607+
<span class="n">df_geocode</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/geocode.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
76087608
</pre></div>
76097609
</div>
76107610
</div>
@@ -20738,143 +20738,143 @@ <h1 id="%E6%BC%94%E7%BF%92%E5%95%8F%E9%A1%8C">演習問題<a class="anchor-link"
2073820738
</thead>
2073920739
<tbody>
2074020740
<tr>
20741-
<th>11370</th>
20742-
<td>CS008313000032</td>
20743-
<td>吹越 めぐみ</td>
20744-
<td>1</td>
20745-
<td>女性</td>
20746-
<td>1985-07-18</td>
20741+
<th>20008</th>
20742+
<td>CS032302000021</td>
20743+
<td>綾瀬 有起哉</td>
20744+
<td>0</td>
20745+
<td>男性</td>
20746+
<td>1986-03-06</td>
2074720747
<td>33</td>
20748-
<td>157-0067</td>
20749-
<td>東京都世田谷区喜多見**********</td>
20750-
<td>S13008</td>
20751-
<td>20150304</td>
20752-
<td>0-00000000-0</td>
20753-
</tr>
20754-
<tr>
20755-
<th>15527</th>
20756-
<td>CS032311000021</td>
20757-
<td>生瀬 美菜</td>
20758-
<td>1</td>
20759-
<td>女性</td>
20760-
<td>1986-09-04</td>
20761-
<td>32</td>
20762-
<td>212-0054</td>
20763-
<td>神奈川県川崎市幸区小倉**********</td>
20748+
<td>212-0058</td>
20749+
<td>神奈川県川崎市幸区鹿島田**********</td>
2076420750
<td>S13032</td>
20765-
<td>20150328</td>
20751+
<td>20151111</td>
2076620752
<td>0-00000000-0</td>
2076720753
</tr>
2076820754
<tr>
20769-
<th>18316</th>
20770-
<td>CS024503000006</td>
20771-
<td>天野 裕次郎</td>
20772-
<td>0</td>
20773-
<td>男性</td>
20774-
<td>1962-12-10</td>
20755+
<th>19665</th>
20756+
<td>CS001513000244</td>
20757+
<td>馬場 慢太郎</td>
20758+
<td>9</td>
20759+
<td>不明</td>
20760+
<td>1962-06-20</td>
2077520761
<td>56</td>
20776-
<td>214-0036</td>
20777-
<td>神奈川県川崎市多摩区南生田**********</td>
20778-
<td>S14024</td>
20779-
<td>20150910</td>
20780-
<td>0-00000000-0</td>
20762+
<td>144-0045</td>
20763+
<td>東京都大田区南六郷**********</td>
20764+
<td>S13001</td>
20765+
<td>20160727</td>
20766+
<td>B-20090105-1</td>
2078120767
</tr>
2078220768
<tr>
20783-
<th>12511</th>
20784-
<td>CS014513000201</td>
20785-
<td>篠崎 沙耶</td>
20769+
<th>3097</th>
20770+
<td>CS034415000122</td>
20771+
<td>梅沢 朝陽</td>
2078620772
<td>1</td>
2078720773
<td>女性</td>
20788-
<td>1962-07-03</td>
20789-
<td>56</td>
20790-
<td>263-0021</td>
20791-
<td>千葉県千葉市稲毛区轟町**********</td>
20792-
<td>S12014</td>
20793-
<td>20150529</td>
20794-
<td>6-20091116-9</td>
20774+
<td>1973-11-13</td>
20775+
<td>45</td>
20776+
<td>216-0001</td>
20777+
<td>神奈川県川崎市宮前区野川**********</td>
20778+
<td>S14034</td>
20779+
<td>20150406</td>
20780+
<td>D-20100616-D</td>
2079520781
</tr>
2079620782
<tr>
20797-
<th>17863</th>
20798-
<td>CS010315000119</td>
20799-
<td>佐久間 美帆</td>
20783+
<th>3760</th>
20784+
<td>CS003311000038</td>
20785+
<td>木下 さとみ</td>
2080020786
<td>1</td>
2080120787
<td>女性</td>
20802-
<td>1986-06-18</td>
20803-
<td>32</td>
20804-
<td>222-0021</td>
20805-
<td>神奈川県横浜市港北区篠原北**********</td>
20806-
<td>S14010</td>
20807-
<td>20150608</td>
20788+
<td>1971-07-01</td>
20789+
<td>47</td>
20790+
<td>251-0052</td>
20791+
<td>神奈川県藤沢市藤沢**********</td>
20792+
<td>S13003</td>
20793+
<td>20151226</td>
20794+
<td>3-20080919-2</td>
20795+
</tr>
20796+
<tr>
20797+
<th>15090</th>
20798+
<td>CS019503000044</td>
20799+
<td>山田 隆之介</td>
20800+
<td>0</td>
20801+
<td>男性</td>
20802+
<td>1966-11-28</td>
20803+
<td>52</td>
20804+
<td>176-0003</td>
20805+
<td>東京都練馬区羽沢**********</td>
20806+
<td>S13019</td>
20807+
<td>20150603</td>
2080820808
<td>0-00000000-0</td>
2080920809
</tr>
2081020810
<tr>
20811-
<th>11548</th>
20812-
<td>CS023315000060</td>
20813-
<td>倉本 未華子</td>
20811+
<th>18593</th>
20812+
<td>CS032615000087</td>
20813+
<td>城戸 彩</td>
2081420814
<td>1</td>
2081520815
<td>女性</td>
20816-
<td>1986-10-26</td>
20817-
<td>32</td>
20818-
<td>212-0026</td>
20819-
<td>神奈川県川崎市幸区紺屋町**********</td>
20820-
<td>S14023</td>
20821-
<td>20150523</td>
20822-
<td>0-00000000-0</td>
20816+
<td>1951-09-26</td>
20817+
<td>67</td>
20818+
<td>144-0054</td>
20819+
<td>東京都大田区新蒲田**********</td>
20820+
<td>S13032</td>
20821+
<td>20150131</td>
20822+
<td>7-20091214-6</td>
2082320823
</tr>
2082420824
<tr>
20825-
<th>962</th>
20826-
<td>CS015812000009</td>
20827-
<td>西谷 薫</td>
20825+
<th>15149</th>
20826+
<td>CS002415000820</td>
20827+
<td>柳田 知世</td>
2082820828
<td>1</td>
2082920829
<td>女性</td>
20830-
<td>1936-09-06</td>
20831-
<td>82</td>
20832-
<td>135-0016</td>
20833-
<td>東京都江東区東陽**********</td>
20834-
<td>S13015</td>
20835-
<td>20150522</td>
20836-
<td>0-00000000-0</td>
20830+
<td>1969-09-07</td>
20831+
<td>49</td>
20832+
<td>185-0011</td>
20833+
<td>東京都国分寺市本多**********</td>
20834+
<td>S13002</td>
20835+
<td>20170620</td>
20836+
<td>4-20100515-7</td>
2083720837
</tr>
2083820838
<tr>
20839-
<th>7220</th>
20840-
<td>CS001511000100</td>
20841-
<td>玉田 彩華</td>
20839+
<th>16527</th>
20840+
<td>CS039312000024</td>
20841+
<td>奥山 麻緒</td>
2084220842
<td>1</td>
2084320843
<td>女性</td>
20844-
<td>1961-10-14</td>
20845-
<td>57</td>
20846-
<td>210-0811</td>
20847-
<td>神奈川県川崎市川崎区大師河原**********</td>
20848-
<td>S13001</td>
20849-
<td>20161028</td>
20844+
<td>1980-02-19</td>
20845+
<td>39</td>
20846+
<td>168-0073</td>
20847+
<td>東京都杉並区下高井戸**********</td>
20848+
<td>S13039</td>
20849+
<td>20150606</td>
2085020850
<td>0-00000000-0</td>
2085120851
</tr>
2085220852
<tr>
20853-
<th>18578</th>
20854-
<td>CS040215000036</td>
20855-
<td>岡本 陽子</td>
20853+
<th>4217</th>
20854+
<td>CS002413000657</td>
20855+
<td>柳田 真悠子</td>
2085620856
<td>1</td>
2085720857
<td>女性</td>
20858-
<td>1994-11-17</td>
20859-
<td>24</td>
20860-
<td>226-0026</td>
20861-
<td>神奈川県横浜市緑区長津田町**********</td>
20862-
<td>S14040</td>
20863-
<td>20150904</td>
20864-
<td>A-20101022-6</td>
20858+
<td>1971-05-03</td>
20859+
<td>47</td>
20860+
<td>185-0022</td>
20861+
<td>東京都国分寺市東元町**********</td>
20862+
<td>S13002</td>
20863+
<td>20161223</td>
20864+
<td>4-20090216-1</td>
2086520865
</tr>
2086620866
<tr>
20867-
<th>14441</th>
20868-
<td>CS003113000020</td>
20869-
<td>河野 恵子</td>
20867+
<th>3133</th>
20868+
<td>CS032412000227</td>
20869+
<td>藤島 芽以</td>
2087020870
<td>1</td>
2087120871
<td>女性</td>
20872-
<td>2000-03-31</td>
20873-
<td>19</td>
20874-
<td>182-0022</td>
20875-
<td>東京都調布市国領町**********</td>
20876-
<td>S13003</td>
20877-
<td>20170617</td>
20872+
<td>1971-01-28</td>
20873+
<td>48</td>
20874+
<td>212-0004</td>
20875+
<td>神奈川県川崎市幸区小向西町**********</td>
20876+
<td>S13032</td>
20877+
<td>20170228</td>
2087820878
<td>0-00000000-0</td>
2087920879
</tr>
2088020880
</tbody>

docker/doc/preprocess_knock_Python.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7599,12 +7599,12 @@ <h2 id="%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB">はじめに<a class="anchor-link"
75997599
<span class="s1">'street'</span><span class="p">:</span> <span class="nb">str</span>
76007600
<span class="p">}</span>
76017601

7602-
<span class="n">df_customer</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/customer.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7603-
<span class="n">df_category</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/category.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7604-
<span class="n">df_product</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/product.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7605-
<span class="n">df_receipt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/receipt.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7606-
<span class="n">df_store</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/store.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7607-
<span class="n">df_geocode</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/geocode.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8-sig'</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7602+
<span class="n">df_customer</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/customer.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7603+
<span class="n">df_category</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/category.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7604+
<span class="n">df_product</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/product.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7605+
<span class="n">df_receipt</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/receipt.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7606+
<span class="n">df_store</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/store.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
7607+
<span class="n">df_geocode</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s2">"../data/geocode.csv"</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8-sig"</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">)</span>
76087608
</pre></div>
76097609
</div>
76107610
</div>

0 commit comments

Comments
 (0)