Skip to content

Commit 756c1c9

Browse files
authored
🤖 Autoupdate: Epoch (#5594)
1 parent 2af0857 commit 756c1c9

File tree

13 files changed

+24
-24
lines changed

13 files changed

+24
-24
lines changed

etl/steps/data/garden/artificial_intelligence/2025-03-12/epoch.meta.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ tables:
4848
zeroDay: '1949-01-01'
4949
yearIsDay: true
5050

51-
training_dataset_size__gradients:
51+
training_dataset_size__total:
5252
title: Training dataset size
5353
unit: 'unique datapoints'
5454
description_short: The number of unique data points used to train the model. Each domain has a specific data point unit; for example, for vision it is images, for language it is words, and for games it is timesteps. This means systems can only be compared directly within the same domain.

etl/steps/data/garden/artificial_intelligence/2025-03-12/epoch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def simplify_entry(entry):
112112
tb["training_computation_petaflop"] = tb["training_compute__flop"] / 1e15
113113

114114
# Convert training dataset size to numeric
115-
tb["training_dataset_size__gradients"] = pd.to_numeric(tb["training_dataset_size__gradients"], errors="coerce")
115+
tb["training_dataset_size__total"] = pd.to_numeric(tb["training_dataset_size__total"], errors="coerce")
116116

117117
# Convert publication date to a datetime objects
118118
tb["publication_date"] = pd.to_datetime(tb["publication_date"])
@@ -142,7 +142,7 @@ def simplify_entry(entry):
142142
)
143143
tb = tb.format(["days_since_1949", "model"])
144144
# Add metadata to the publication date column
145-
for col in ["publication_date", "training_dataset_size__gradients"]:
145+
for col in ["publication_date", "training_dataset_size__total"]:
146146
tb[col].metadata.origins = tb["domain"].metadata.origins
147147

148148
#

etl/steps/data/garden/artificial_intelligence/2025-03-12/epoch_aggregates_affiliation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def run() -> None:
3030
unused_columns = [
3131
"days_since_1949",
3232
"parameters",
33-
"training_dataset_size__gradients",
33+
"training_dataset_size__total",
3434
"domain",
3535
"training_computation_petaflop",
3636
]

etl/steps/data/garden/artificial_intelligence/2025-03-12/epoch_aggregates_domain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def run() -> None:
4040
"organization_categorization",
4141
"parameters",
4242
"training_compute__flop",
43-
"training_dataset_size__gradients",
43+
"training_dataset_size__total",
4444
"notability_criteria",
4545
]
4646
# Drop the unused columns

etl/steps/data/garden/artificial_intelligence/2025-03-12/epoch_regressions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def run_regression(tb):
9191
f"{DL_ERA_START}–{int(END_DATE)}": np.array([DL_ERA_START, END_DATE]),
9292
}
9393

94-
metrics = ["training_computation_petaflop", "parameters", "training_dataset_size__gradients"]
94+
metrics = ["training_computation_petaflop", "parameters", "training_dataset_size__total"]
9595
new_tables = []
9696

9797
for metric in metrics:

etl/steps/data/garden/artificial_intelligence/2026-01-30/frontiermath.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# Pre-compile regex patterns for performance
1212
PATTERNS = {
1313
"date": re.compile(r"(\d{4})-?(\d{2})-?(\d{2})"),
14-
"context_size": re.compile(r"\d+K$"),
14+
"context_size": re.compile(r"(\d+K|max)$"),
1515
"claude_numeric": re.compile(r"claude-(\d+)-(\d+)-(\w+)", re.IGNORECASE),
1616
"claude_variant": re.compile(r"claude-(\w+)-(\d+(?:\.\d+)?)", re.IGNORECASE),
1717
"gpt_version": re.compile(r"gpt-([\d.]+)(?:-(\w+))?", re.IGNORECASE),

etl/steps/data/grapher/artificial_intelligence/2025-03-12/epoch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def run() -> None:
2828
# For visualization purposes I am adding the rows with the maximum values of compute, data, and parameters in each year to the table as a separate "model". I don't want to do this in garden as it'd affect other datasets that depend on this one.
2929
columns = {
3030
"training_computation_petaflop": "compute",
31-
"training_dataset_size__gradients": "data",
31+
"training_dataset_size__total": "data",
3232
"parameters": "parameters",
3333
}
3434
# Find maximum values for a given column (compute, data, params) per year, label them, and add summary rows.

etl/steps/data/meadow/artificial_intelligence/2025-03-12/epoch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def run() -> None:
3636
"Publication date",
3737
"Parameters",
3838
"Training compute (FLOP)",
39-
"Training dataset size (gradients)",
39+
"Training dataset size (total)",
4040
"Notability criteria",
4141
]
4242

etl/steps/data/meadow/artificial_intelligence/2025-10-10/epoch_gpus.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def run() -> None:
2828
"FP32 (single precision) performance (FLOP/s)",
2929
"FP16 (half precision) performance (FLOP/s)",
3030
"INT8 performance (OP/s)",
31-
"Memory size per board (Byte)",
31+
"Memory (bytes)",
3232
"Memory bandwidth (byte/s)",
3333
"TDP (W)",
3434
"Process size (nm)",

snapshots/artificial_intelligence/2025-03-12/epoch.csv.dvc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ meta:
3030
# Files
3131
url_main: https://epoch.ai/mlinputs/visualization
3232
url_download: https://epoch.ai/data/epochdb/notable_ai_models.csv
33-
date_accessed: '2026-01-26'
33+
date_accessed: '2026-02-27'
3434

3535
# License
3636
license:
3737
name: CC BY 4.0
3838
url: https://creativecommons.org/licenses/by/4.0/
3939
outs:
40-
- md5: 1149d0b75df9f65144cdbe1f881353e1
41-
size: 2101444
40+
- md5: e387c13e7a6e35d505ced9a0a1d968f2
41+
size: 2107224
4242
path: epoch.csv

0 commit comments

Comments
 (0)