Skip to content

Commit 42e7518

Browse files
merveenoyanosansevierobeurkinger
authored
Tabular Classification Task data (#332)
* Tabular Classification task page (#302) Co-authored-by: Omar Sanseviero <[email protected]> Co-authored-by: Thibault Goehringer <[email protected]>
1 parent e0459a6 commit 42e7518

File tree

5 files changed

+144
-28
lines changed

5 files changed

+144
-28
lines changed

js/src/lib/components/InferenceWidget/shared/WidgetTableInput/WidgetTableInput.svelte

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -62,37 +62,39 @@
6262
</script>
6363

6464
<div class="overflow-auto" bind:this={tableContainerEl}>
65-
<table class="table-question-answering">
66-
<thead>
67-
<tr>
68-
{#each table[0] as header, x}
69-
<th
70-
contenteditable={canAddCol && !isLoading}
71-
class="border-2 border-gray-100 h-6"
72-
on:keydown={onKeyDown}
73-
on:input={(e) => editCell(e, [x, 0])}
74-
>
75-
{header}
76-
</th>
77-
{/each}
78-
</tr>
79-
</thead>
80-
<tbody>
81-
{#each table.slice(1) as row, y}
82-
<tr class={highlighted[`${y}`] ?? "bg-white"}>
83-
{#each row as cell, x}
84-
<td
85-
class={(highlighted[`${y}-${x}`] ?? "border-gray-100") +
86-
" border-2 h-6"}
87-
contenteditable={!isLoading}
65+
{#if table.length > 1}
66+
<table class="table-question-answering">
67+
<thead>
68+
<tr>
69+
{#each table[0] as header, x}
70+
<th
71+
contenteditable={canAddCol && !isLoading}
72+
class="border-2 border-gray-100 h-6"
8873
on:keydown={onKeyDown}
89-
on:input={(e) => editCell(e, [x, y + 1])}>{cell}</td
74+
on:input={(e) => editCell(e, [x, 0])}
9075
>
76+
{header}
77+
</th>
9178
{/each}
9279
</tr>
93-
{/each}
94-
</tbody>
95-
</table>
80+
</thead>
81+
<tbody>
82+
{#each table.slice(1) as row, y}
83+
<tr class={highlighted[`${y}`] ?? "bg-white"}>
84+
{#each row as cell, x}
85+
<td
86+
class={(highlighted[`${y}-${x}`] ?? "border-gray-100") +
87+
" border-2 h-6"}
88+
contenteditable={!isLoading}
89+
on:keydown={onKeyDown}
90+
on:input={(e) => editCell(e, [x, y + 1])}>{cell}</td
91+
>
92+
{/each}
93+
</tr>
94+
{/each}
95+
</tbody>
96+
</table>
97+
{/if}
9698
</div>
9799

98100
<div class="flex mb-1 flex-wrap">

tasks/src/Types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ export type TaskDemoEntry = {
1818
} | {
1919
filename: string;
2020
type: "img";
21+
} | {
22+
table: string[][];
23+
type: "tabular";
2124
} | {
2225
content: string;
2326
label: string;
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
## About the Task
2+
3+
Tabular classification is the task of assigning a label or class given a limited number of attributes. For example, the input can be data related to a customer (balance of the customer, the time being a customer, or more) and the output can be whether the customer will churn from the service or not.
4+
There are three types of categorical variables:
5+
6+
- Binary variables: Variables that can take two values, like yes or no, open or closed. The task of predicting binary variables is called binary classification.
7+
- Ordinal variables: Variables with a ranking relationship, e.g., good, insignificant, and bad product reviews. The task of predicting ordinal variables is called ordinal classification.
8+
- Nominal variables: Variables with no ranking relationship among them, e.g., predicting an animal from their weight and height, where categories are cat, dog, or bird. The task of predicting nominal variables is called multinomial classification.
9+
10+
## Use Cases
11+
12+
### Fraud Detection
13+
Tabular classification models can be used in detecting fraudulent credit card transactions, where the features could be the amount of the transaction and the account balance, and the target to predict could be whether the transaction is fraudulent or not. This is an example of binary classification.
14+
15+
### Churn Prediction
16+
Tabular classification models can be used in predicting customer churn in telecommunication. An example dataset for the task is hosted [here](https://huggingface.co/datasets/scikit-learn/churn-prediction).
17+
18+
## Model Hosting and Inference
19+
20+
You can use the [skops](https://skops.readthedocs.io/en/stable/) library to share, explore, and use `scikit-learn` models on the Hugging Face Hub. `skops` models have widgets to try the models on the browser and have descriptive reports (also known as model cards) in their repositories. You can pull a `scikit-learn` model like below using `skops`:
21+
22+
23+
```python
24+
from skops import hub_utils
25+
import joblib
26+
27+
hub_utils.download(repo_id="user-name/my-awesome-model", dst=target_path)
28+
model = joblib.load(Path(target_path)/"model.pkl")
29+
model.predict(sample)
30+
```
31+
32+
33+
## Useful Resources
34+
35+
- Check out the [scikit-learn organization](https://huggingface.co/scikit-learn) to learn more about different algorithms used for this task.
36+
- [Skops documentation](https://skops.readthedocs.io/en/latest/)
37+
- [Skops announcement blog](https://huggingface.co/blog/skops)
38+
39+
### Training your own model in just a few seconds
40+
41+
We have built a [baseline trainer](https://huggingface.co/spaces/scikit-learn/baseline-trainer) application to which you can drag and drop your dataset. It will train a baseline and push it to your Hugging Face Hub profile with a model card containing information about the model.
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import type { TaskData } from "../Types";
2+
3+
import { PIPELINE_DATA } from "../../../js/src/lib/interfaces/Types";
4+
import { TASKS_MODEL_LIBRARIES } from "../const";
5+
6+
const taskData: TaskData = {
7+
datasets: [
8+
{
9+
description: "Binary classification dataset based on a census on income.",
10+
id: "scikit-learn/adult-census-income",
11+
},
12+
{
13+
description: "Multi-class dataset on iris flower species.",
14+
id: "scikit-learn/iris",
15+
},
16+
],
17+
demo: {
18+
inputs: [
19+
{
20+
table: [
21+
["Glucose", "Blood Pressure ", "Skin Thickness", "Insulin", "BMI"],
22+
["148", "72", "35", "0", "33.6"],
23+
["150", "50", "30", "0", "35.1"],
24+
["141", "60", "29", "1", "39.2"],
25+
],
26+
type: "tabular",
27+
},
28+
],
29+
outputs: [
30+
{
31+
table: [["Diabetes"], ["1"], ["1"], ["0"]],
32+
type: "tabular",
33+
},
34+
],
35+
},
36+
id: "tabular-classification",
37+
label: PIPELINE_DATA["tabular-classification"].name,
38+
libraries: TASKS_MODEL_LIBRARIES["tabular-classification"],
39+
metrics: [
40+
{
41+
description: "",
42+
id: "accuracy",
43+
},
44+
{
45+
description: "",
46+
id: "recall",
47+
},
48+
{
49+
description: "",
50+
id: "precision",
51+
},
52+
{
53+
description: "",
54+
id: "f1",
55+
},
56+
],
57+
models: [
58+
{
59+
description: "Breast cancer prediction model based on decision trees.",
60+
id: "scikit-learn/cancer-prediction-trees",
61+
},
62+
],
63+
summary:
64+
"Tabular classification is the task of classifying a target category (a group) based on set of attributes.",
65+
widgetModels: [""],
66+
youtubeId: "",
67+
};
68+
69+
export default taskData;

tasks/src/tasksData.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import objectDetection from "./object-detection/data";
1111
import questionAnswering from "./question-answering/data";
1212
import sentenceSimilarity from "./sentence-similarity/data";
1313
import summarization from "./summarization/data";
14+
import tabularClassification from "./tabular-classification/data"
1415
import textToSpeech from "./text-to-speech/data";
1516
import tokenClassification from "./token-classification/data";
1617
import translation from "./translation/data";
@@ -43,7 +44,7 @@ export const TASKS_DATA: Record<
4344
"summarization": summarization,
4445
"table-question-answering": undefined,
4546
"table-to-text": undefined,
46-
"tabular-classification": undefined,
47+
"tabular-classification": tabularClassification,
4748
"tabular-regression": undefined,
4849
"tabular-to-text": undefined,
4950
"text-classification": textClassification,

0 commit comments

Comments
 (0)