|
47 | 47 | # pop_df.head() |
48 | 48 | # - |
49 | 49 |
|
50 | | -sample_df.head() |
| 50 | +sample_df.tail(10) |
51 | 51 |
|
52 | 52 |
|
53 | 53 | # ## Standard run of rate model |
|
146 | 146 |
|
147 | 147 | pop_df.iloc[0, 0] = np.nan |
148 | 148 | mod2 = ratemodel(pop_df, sample_df, id_nr="id") |
| 149 | + |
| 150 | + |
| 151 | +# ## Tests with data with 1 observation |
| 152 | + |
| 153 | +# + |
| 154 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 155 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 156 | + |
| 157 | +pop1["country"] = 1 |
| 158 | +sample1["country"] = 1 |
| 159 | +# - |
| 160 | + |
| 161 | +sample1.tail(10) |
| 162 | + |
| 163 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 164 | +mod1.fit( |
| 165 | + x_var="employees", |
| 166 | + y_var="job_vacancies", |
| 167 | + strata_var="industry", |
| 168 | + control_extremes=True, |
| 169 | +) |
| 170 | + |
| 171 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 172 | +mod1.fit( |
| 173 | + x_var="employees", |
| 174 | + y_var="job_vacancies", |
| 175 | + strata_var="industry", |
| 176 | + exclude=[5, 9], |
| 177 | + control_extremes=False, |
| 178 | +) |
| 179 | + |
| 180 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 181 | +mod1.fit(x_var="employees", y_var="job_vacancies", strata_var="industry") |
| 182 | +mod1.fit( |
| 183 | + x_var="employees", |
| 184 | + y_var="job_vacancies", |
| 185 | + strata_var="industry", |
| 186 | + exclude=[9855, 9912], |
| 187 | + control_extremes=False, |
| 188 | +) |
| 189 | + |
| 190 | + |
| 191 | +# ## Test when all x = 0 in one strata |
| 192 | + |
| 193 | +# + |
| 194 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 195 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 196 | + |
| 197 | +new_rows = pd.DataFrame( |
| 198 | + { |
| 199 | + "id": [10002, 10003], |
| 200 | + "employees": [50, 50], |
| 201 | + "employees_f": [20, 25], |
| 202 | + "employees_m": [30, 25], |
| 203 | + "turnover": [0, 0], |
| 204 | + "size": ["mid", "mid"], |
| 205 | + "industry": ["G", "G"], |
| 206 | + } |
| 207 | +) |
| 208 | +pop1 = pd.concat([pop1, new_rows], ignore_index=True) |
| 209 | + |
| 210 | +new_rows2 = pd.DataFrame( |
| 211 | + { |
| 212 | + "id": [10002, 10003], |
| 213 | + "employees": [50, 50], |
| 214 | + "employees_f": [20, 25], |
| 215 | + "employees_m": [30, 25], |
| 216 | + "turnover": [0, 0], |
| 217 | + "size": ["mid", "mid"], |
| 218 | + "industry": ["G", "G"], |
| 219 | + "job_vacancies": [35, 45], |
| 220 | + "sick_days": [70, 65], |
| 221 | + "sick_days_f": [35, 25], |
| 222 | + "sick_days_m": [35, 40], |
| 223 | + } |
| 224 | +) |
| 225 | +sample1 = pd.concat([sample1, new_rows2], ignore_index=True) |
| 226 | +sample1.loc[sample1.id == 10001, "turnover"] = 0 |
| 227 | +# - |
| 228 | + |
| 229 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 230 | +mod1.fit( |
| 231 | + x_var="turnover", |
| 232 | + y_var="job_vacancies", |
| 233 | + strata_var="industry", |
| 234 | + control_extremes=True, |
| 235 | +) |
| 236 | +mod1.get_obs["G"] |
| 237 | + |
| 238 | +# ## Test when one x=0 |
| 239 | + |
| 240 | +# + |
| 241 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 242 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 243 | + |
| 244 | +new_rows = pd.DataFrame( |
| 245 | + { |
| 246 | + "id": [10005, 10006], |
| 247 | + "employees": [50, 50], |
| 248 | + "employees_f": [20, 25], |
| 249 | + "employees_m": [30, 25], |
| 250 | + "turnover": [1000, 0], |
| 251 | + "size": ["mid", "mid"], |
| 252 | + "industry": ["G", "G"], |
| 253 | + } |
| 254 | +) |
| 255 | +pop1 = pd.concat([pop1, new_rows], ignore_index=True) |
| 256 | + |
| 257 | +new_rows2 = pd.DataFrame( |
| 258 | + { |
| 259 | + "id": [10005, 10006], |
| 260 | + "employees": [50, 50], |
| 261 | + "employees_f": [20, 25], |
| 262 | + "employees_m": [30, 25], |
| 263 | + "turnover": [1000, 0], |
| 264 | + "size": ["mid", "mid"], |
| 265 | + "industry": ["G", "G"], |
| 266 | + "job_vacancies": [35, 45], |
| 267 | + "sick_days": [70, 65], |
| 268 | + "sick_days_f": [35, 25], |
| 269 | + "sick_days_m": [35, 40], |
| 270 | + } |
| 271 | +) |
| 272 | +sample1 = pd.concat([sample1, new_rows2], ignore_index=True) |
| 273 | + |
| 274 | +# - |
| 275 | + |
| 276 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 277 | +mod1.fit( |
| 278 | + x_var="turnover", |
| 279 | + y_var="job_vacancies", |
| 280 | + strata_var="industry", |
| 281 | + control_extremes=True, |
| 282 | +) |
| 283 | +mod1.get_obs["G"] |
| 284 | + |
| 285 | +# ## test when one x=0 in pop |
| 286 | + |
| 287 | +# + |
| 288 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 289 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 290 | + |
| 291 | + |
| 292 | +# - |
| 293 | + |
| 294 | +pop1.head(10) |
| 295 | + |
| 296 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 297 | +mod1.fit( |
| 298 | + x_var="employees", |
| 299 | + y_var="job_vacancies", |
| 300 | + strata_var="industry", |
| 301 | + control_extremes=False, |
| 302 | +) |
| 303 | +imp = mod1.get_imputed() |
| 304 | +imp["job_vacancies_imputed"][0] == 0 |
| 305 | + |
| 306 | + |
| 307 | +mod1.get_extremes(threshold_type="rstud", rbound=15) |
| 308 | + |
| 309 | +# ## Test when one x is negative |
| 310 | + |
| 311 | +# + |
| 312 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 313 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 314 | + |
| 315 | +sample1.loc[sample1.id == 5, "turnover"] = -10 |
| 316 | +pop1.loc[pop1.id == 5, "turnover"] = -10 |
| 317 | +# - |
| 318 | + |
| 319 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 320 | +mod1.fit( |
| 321 | + x_var="turnover", |
| 322 | + y_var="job_vacancies", |
| 323 | + strata_var="industry", |
| 324 | + control_extremes=True, |
| 325 | +) |
| 326 | + |
| 327 | +# ## Test when there is one y=0 |
| 328 | + |
| 329 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 330 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 331 | +sample1.iloc[0, 7] = 0 |
| 332 | + |
| 333 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 334 | +mod1.fit( |
| 335 | + x_var="employees", |
| 336 | + y_var="job_vacancies", |
| 337 | + strata_var="industry", |
| 338 | + control_extremes=True, |
| 339 | +) |
| 340 | + |
| 341 | + |
| 342 | +# ## Test when all y = 0 in one strata |
| 343 | + |
| 344 | +# + |
| 345 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 346 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 347 | + |
| 348 | +new_rows = pd.DataFrame( |
| 349 | + { |
| 350 | + "id": [10006, 10007], |
| 351 | + "employees": [50, 50], |
| 352 | + "employees_f": [20, 25], |
| 353 | + "employees_m": [30, 25], |
| 354 | + "turnover": [15000, 15000], |
| 355 | + "size": ["mid", "mid"], |
| 356 | + "industry": ["G", "G"], |
| 357 | + } |
| 358 | +) |
| 359 | +pop1 = pd.concat([pop1, new_rows], ignore_index=True) |
| 360 | + |
| 361 | +new_rows2 = pd.DataFrame( |
| 362 | + { |
| 363 | + "id": [10006, 10007], |
| 364 | + "employees": [50, 50], |
| 365 | + "employees_f": [20, 25], |
| 366 | + "employees_m": [30, 25], |
| 367 | + "turnover": [15000, 15000], |
| 368 | + "size": ["mid", "mid"], |
| 369 | + "industry": ["G", "G"], |
| 370 | + "job_vacancies": [0, 0], |
| 371 | + "sick_days": [70, 65], |
| 372 | + "sick_days_f": [35, 25], |
| 373 | + "sick_days_m": [35, 40], |
| 374 | + } |
| 375 | +) |
| 376 | +sample1 = pd.concat([sample1, new_rows2], ignore_index=True) |
| 377 | +sample1.loc[sample1.id == 10001, "job_vacancies"] = 0 |
| 378 | +sample1 = sample1.loc[sample1.industry == "G"] |
| 379 | +pop1 = pop1.loc[pop1.industry == "G"] |
| 380 | +# - |
| 381 | + |
| 382 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 383 | +mod1.fit( |
| 384 | + x_var="employees", |
| 385 | + y_var="job_vacancies", |
| 386 | + strata_var="industry", |
| 387 | + control_extremes=True, |
| 388 | +) |
| 389 | + |
| 390 | + |
| 391 | +mod1.get_estimates() |
| 392 | + |
| 393 | +mod1.get_obs |
| 394 | + |
| 395 | +# ## Test y=0 for all but one in strata |
| 396 | + |
| 397 | +# + |
| 398 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 399 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 400 | + |
| 401 | +new_rows = pd.DataFrame( |
| 402 | + { |
| 403 | + "id": [10006, 10007], |
| 404 | + "employees": [50, 50], |
| 405 | + "employees_f": [20, 25], |
| 406 | + "employees_m": [30, 25], |
| 407 | + "turnover": [15000, 15000], |
| 408 | + "size": ["mid", "mid"], |
| 409 | + "industry": ["G", "G"], |
| 410 | + } |
| 411 | +) |
| 412 | +pop1 = pd.concat([pop1, new_rows], ignore_index=True) |
| 413 | + |
| 414 | +new_rows2 = pd.DataFrame( |
| 415 | + { |
| 416 | + "id": [10006, 10007], |
| 417 | + "employees": [50, 50], |
| 418 | + "employees_f": [20, 25], |
| 419 | + "employees_m": [30, 25], |
| 420 | + "turnover": [15000, 15000], |
| 421 | + "size": ["mid", "mid"], |
| 422 | + "industry": ["G", "G"], |
| 423 | + "job_vacancies": [0, 0], |
| 424 | + "sick_days": [70, 65], |
| 425 | + "sick_days_f": [35, 25], |
| 426 | + "sick_days_m": [35, 40], |
| 427 | + } |
| 428 | +) |
| 429 | +sample1 = pd.concat([sample1, new_rows2], ignore_index=True) |
| 430 | + |
| 431 | +# - |
| 432 | + |
| 433 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 434 | +mod1.fit( |
| 435 | + x_var="employees", |
| 436 | + y_var="job_vacancies", |
| 437 | + strata_var="industry", |
| 438 | + control_extremes=True, |
| 439 | +) |
| 440 | + |
| 441 | +mod1.get_obs["G"] |
| 442 | + |
| 443 | + |
| 444 | +# ## Test for only 2 in one strata |
| 445 | + |
| 446 | +# + |
| 447 | +pop1 = pd.read_csv("../tests/data/pop_data_1obs.csv") |
| 448 | +sample1 = pd.read_csv("../tests/data/sample_data_1obs.csv") |
| 449 | + |
| 450 | +new_rows = pd.DataFrame( |
| 451 | + { |
| 452 | + "id": [10006], |
| 453 | + "employees": [45], |
| 454 | + "employees_f": [20], |
| 455 | + "employees_m": [30], |
| 456 | + "turnover": [15000], |
| 457 | + "size": ["mid"], |
| 458 | + "industry": ["G"], |
| 459 | + } |
| 460 | +) |
| 461 | +pop1 = pd.concat([pop1, new_rows], ignore_index=True) |
| 462 | + |
| 463 | +new_rows2 = pd.DataFrame( |
| 464 | + { |
| 465 | + "id": [10006], |
| 466 | + "employees": [45], |
| 467 | + "employees_f": [20], |
| 468 | + "employees_m": [30], |
| 469 | + "turnover": [15000], |
| 470 | + "size": ["mid"], |
| 471 | + "industry": ["G"], |
| 472 | + "job_vacancies": [0], |
| 473 | + "sick_days": [70], |
| 474 | + "sick_days_f": [35], |
| 475 | + "sick_days_m": [35], |
| 476 | + } |
| 477 | +) |
| 478 | +sample1 = pd.concat([sample1, new_rows2], ignore_index=True) |
| 479 | + |
| 480 | +mod1 = ratemodel(pop1, sample1, id_nr="id") |
| 481 | +mod1.fit( |
| 482 | + x_var="employees", |
| 483 | + y_var="job_vacancies", |
| 484 | + strata_var="industry", |
| 485 | + control_extremes=True, |
| 486 | +) |
| 487 | +# - |
| 488 | + |
| 489 | +mod1.get_obs["G"] |
0 commit comments