diff --git a/lib/datasets.rb b/lib/datasets.rb index 9aa2256d..29bd6324 100644 --- a/lib/datasets.rb +++ b/lib/datasets.rb @@ -2,6 +2,7 @@ require_relative "datasets/adult" require_relative "datasets/afinn" +require_relative "datasets/ames-housing" require_relative "datasets/aozora-bunko" require_relative "datasets/california-housing" require_relative "datasets/cifar" diff --git a/lib/datasets/ames-housing.rb b/lib/datasets/ames-housing.rb new file mode 100644 index 00000000..8b8cfc85 --- /dev/null +++ b/lib/datasets/ames-housing.rb @@ -0,0 +1,131 @@ +require "csv" + +require_relative "dataset" + +module Datasets + class AmesHousing < Dataset + Record = Struct.new(:order, + :pid, + :ms_sub_class, + :ms_zoning, + :lot_frontage, + :lot_area, + :street_alley, + :lot_shape, + :land_contour, + :utilities, + :lot_config, + :land_slope, + :neighborhood, + :condition_1, + :condition_2, + :bldg_type, + :house_style, + :overall_qual, + :overall_cond, + :year_built, + :year_remod_add, + :roof_style, + :roof_matl, + :exterior_1st, + :exterior_2nd, + :mas_vnr_type, + :mas_vnr_area, + :exter_qual, + :exter_cond, + :foundation, + :bsmt_qual, + :bsmt_cond, + :bsmt_exposure, + :bsmt_fin_type_1, + :bsmt_fin_sf_1, + :bsmt_fin_type_2, + :bsmt_fin_sf_2, + :bsmt_unf_sf, + :total_bsmt_sf, + :heating, + :heating_qc, + :central_air, + :electrical, + :first_flr_sf, + :second_flr_sf, + :low_qual, + :fin_sf, + :gr_liv_area, + :bsmt_full_bath, + :bsmt_half_bath, + :full_bath, + :half_bath, + :bedroom_abv_gr, + :kitchen_abv_gr, + :kitchen_qual, + :tot_rms_abv_grd, + :functional, + :fireplaces, + :fireplace_qu, + :garage_type, + :garage_yr_blt, + :garage_finish, + :garage_cars, + :garage_area, + :garage_qual, + :garage_cond, + :paved_drive, + :wood_deck_sf, + :open_porch_sf, + :enclosed_porch, + :three_ssn_porch, + :screen_porch, + :pool_area, + :pool_qc, + :fence, + :misc_feature, + :misc_val, + :mo_sold, + :yr_sold, + :sale_type, + :sale_condition, + :sale_price) + + + def initialize + super() + @metadata.id = "ames-housing" + @metadata.name = "Ames Housing" + @metadata.url = "http://jse.amstat.org/v19n3/decock/DataDocumentation.txt" + @metadata.licenses = ["Unknown"] + @metadata.description = <<-DESCRIPTION +Data set contains information from the Ames Assessor’s Office +used in computing assessed values for individual residential +properties sold in Ames, IA from 2006 to 2010. +De Cock, D., +"Ames, Iowa: Alternative to the Boston Housing Data as an +End of Semester Regression Project", +Journal of Statistics Education, 19(3) (2011) 1-15. +Available from http://jse.amstat.org/v19n3/decock.pdf. + DESCRIPTION + end + + def each + return to_enum(__method__) unless block_given? + + open_data do |input| + input.each do |row| + next if row[0].nil? + record = Record.new(*row) + yield(record) + end + end + end + + private + def open_data + data_path = cache_dir_path + "AmesHousing.txt" + data_url = "http://jse.amstat.org/v19n3/decock/AmesHousing.txt" + download(data_path, data_url) + CSV.open(data_path, converters: [:numeric]) do |csv| + yield(csv) + end + end + end +end diff --git a/test/test-ames-housing.rb b/test/test-ames-housing.rb new file mode 100644 index 00000000..3ad517a9 --- /dev/null +++ b/test/test-ames-housing.rb @@ -0,0 +1,205 @@ +class AmesHousingTest < Test::Unit::TestCase + def setup + @dataset = Datasets::AmesHousing.new + end + + def record(*args) + Datasets::AmesHousing::Record.new(*args) + end + + test("#each") do + records = @dataset.each.to_a + assert_equal([ + 2930, + { + order: 1, + pid: 0526301100, + ms_sub_class: 020, + ms_zoning: "RL", + lot_frontage: 141, + lot_area: 31770, + street: "Pave", + alley: "NA", + lot_shape: "IR1", + land_contour: "Lvl", + utilities: "AllPub", + lot_config: "Corner", + land_slope: "Gtl", + neighborhood: "NAmes", + condition_1: "Norm", + condition_2: "Norm", + bldg_type: "1Fam", + house_style: "1Story", + overall_qual: 6, + overall_cond: 5, + year_built: 1960, + year_remod_add: 1960, + roof_style: "Hip", + roof_matl: "CompShg", + exterior_1st: "BrkFace", + exterior_2nd: "Plywood", + mas_vnr_type: "Stone", + mas_vnr_area: 112, + exter_qual: "TA", + exter_cond: "TA", + foundation: "CBlock", + bsmt_qual: "TA", + bsmt_cond: "Gd", + bsmt_exposure: "Gd", + bsmt_fin_type_1: "BLQ", + bsmt_fin_sf_1: 639, + bsmt_fin_type_2: "Unf", + bsmt_fin_sf_2: 0, + bsmt_unf_sf: 441, + total_bsmt_sf: 1080, + heating: "GasA", + heating_qc: "Fa", + central_air: "Y", + electrical: "SBrkr", + first_flr_sf: 1656, + second_flr_sf: 0, + low_qual_fin_sf: 0, + gr_liv_area: 1656, + bsmt_full_bath: 1, + bsmt_half_bath: 0, + full_bath: 1, + half_bath: 0, + bedroom_abv_gr: 3, + kitchen_abv_gr: 1, + kitchen_qual: "TA", + tot_rms_abv_grd: 7, + functional: "Typ", + fireplaces: 2, + fireplace_qu: "Gd", + garage_type: "Attchd", + garage_yr_blt: 1960, + garage_finish: "Fin", + garage_cars: 2, + garage_area: 528, + garage_qual: "TA", + garage_cond: "TA", + paved_drive: "P", + wood_deck_sf: 210, + open_porch_sf: 62, + enclosed_porch: 0, + three_ssn_porch: 0, + screen_porch: 0, + pool_area: 0, + pool_qc: "NA", + fence: "NA", + misc_feature: "NA", + misc_val: 0, + mo_sold: 5, + yr_sold: 2010, + sale_type: "WD", + sale_condition: "Normal", + sale_price: 215000 + }, + { + order: 2930, + pid: 0924151050, + ms_sub_class: 060, + ms_zoning: "RL", + lot_frontage: 74, + lot_area: 9627, + street: "Pave", + alley: "NA", + lot_shape: "Reg", + land_contour: "Lvl", + utilities: "AllPub", + lot_config: "Inside", + land_slope: "Mod", + neighborhood: "Mitchel", + condition_1: "Norm", + condition_2: "Norm", + bldg_type: "1Fam", + house_style: "2Story", + overall_qual: 7, + overall_cond: 5, + year_built: 1993, + year_remod_add: 1994, + roof_style: "Gable", + roof_matl: "CompShg", + exterior_1st: "HdBoard", + exterior_2nd: "HdBoard", + mas_vnr_type: "BrkFace", + mas_vnr_area: 94, + exter_qual: "TA", + exter_cond: "TA", + foundation: "PConc", + bsmt_qual: "Gd", + bsmt_cond: "TA", + bsmt_exposure: "Av", + bsmt_fin_type_1: "LwQ", + bsmt_fin_sf_1: 758, + bsmt_fin_type_2: "Unf", + bsmt_fin_sf_2: 0, + bsmt_unf_sf: 238, + total_bsmt_sf: 996, + heating: "GasA", + heating_qc: "Ex", + central_air: "Y", + electrical: "SBrkr", + first_flr_sf: 996, + second_flr_sf: 1004, + low_qual_fin_sf: 0, + gr_liv_area: 2000, + bsmt_full_bath: 0, + bsmt_half_bath: 0, + full_bath: 2, + half_bath: 1, + bedroom_abv_gr: 3, + kitchen_bv_gr: 1, + kitchen_qual: "TA", + tot_rms_abv_grd: 9, + functional: "Typ", + fireplaces: 1, + fireplace_qu: "TA", + garage_type: "Attchd", + garage_yr_blt: 1993, + garage_finish: "Fin", + garage_cars: 3, + garage_area: "650", + garage_qual: "TA", + garage_cond: "TA", + paved_drive: "Y", + wood_deck_sf: 190, + open_porch_sf: 48, + enclosed_porch: 0, + three_ssn_porch: 0, + screen_porch: 0, + pool_area: 0, + pool_qc: "NA", + fence: "NA", + misc_feature: "NA", + misc_val: 0, + mo_sold: 11, + yr_sold: 2006, + sale_type: "WD", + sale_condition: "Normal", + sale_price: 188000 + }, + ], + [ + records.size, + records[0].to_h, + records[-1].to_h + ]) + end + + sub_test_case("#metadata") do + test("#description") do + description = @dataset.metadata.description + assert_equal(<<-DESCRIPTION, description) +Data set contains information from the Ames Assessor’s Office +used in computing assessed values for individual residential +properties sold in Ames, IA from 2006 to 2010. +De Cock, D., +"Ames, Iowa: Alternative to the Boston Housing Data as an +End of Semester Regression Project", +Journal of Statistics Education, 19(3) (2011) 1-15. +Available from http://jse.amstat.org/v19n3/decock.pdf. + DESCRIPTION + end + end +end