diff --git a/.gitignore b/.gitignore
index 93fc9385d..f3d47be72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -183,6 +183,8 @@ cython_debug/
# Datajoint related files
dj_local_conf.json
+datajoint.json
+.secrets/
*.env
!.vscode/launch.json
# pixi environments
diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml
index db2ea16f9..3aef00bad 100644
--- a/docs/mkdocs.yaml
+++ b/docs/mkdocs.yaml
@@ -7,6 +7,8 @@ repo_name: datajoint/datajoint-python
nav:
- Home: index.md
- Contributing: develop.md
+ - How-To Guides:
+ - Deferred Schema Activation: how-to/deferred-schema-activation.md
- Architecture:
- architecture/index.md
- SQL Transpilation: architecture/transpilation.md
diff --git a/docs/src/archive/tutorials/dj-top.ipynb b/docs/src/archive/tutorials/dj-top.ipynb
deleted file mode 100644
index 5920a9f25..000000000
--- a/docs/src/archive/tutorials/dj-top.ipynb
+++ /dev/null
@@ -1,1015 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Using the dj.Top restriction"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "First you will need to [install](../../getting-started/#installation) and [connect](../../getting-started/#connection) to a DataJoint [data pipeline](https://docs.datajoint.com/core/datajoint-python/latest/concepts/data-pipelines/#what-is-a-data-pipeline).\n",
- "\n",
- "Now let's start by importing the `datajoint` client."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[2024-12-20 11:10:20,120][INFO]: Connecting root@127.0.0.1:3306\n",
- "[2024-12-20 11:10:20,259][INFO]: Connected root@127.0.0.1:3306\n"
- ]
- }
- ],
- "source": [
- "import datajoint as dj\n",
- "\n",
- "dj.config[\"database.host\"] = \"127.0.0.1\"\n",
- "schema = dj.Schema(\"university\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Table Definition"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "@schema\n",
- "class Student(dj.Manual):\n",
- " definition = \"\"\"\n",
- " student_id : int unsigned # university-wide ID number\n",
- " ---\n",
- " first_name : varchar(40)\n",
- " last_name : varchar(40)\n",
- " sex : enum('F', 'M', 'U')\n",
- " date_of_birth : date\n",
- " home_address : varchar(120) # mailing street address\n",
- " home_city : varchar(60) # mailing address\n",
- " home_state : char(2) # US state acronym: e.g. OH\n",
- " home_zip : char(10) # zipcode e.g. 93979-4979\n",
- " home_phone : varchar(20) # e.g. 414.657.6883x0881\n",
- " \"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "@schema\n",
- "class Department(dj.Manual):\n",
- " definition = \"\"\"\n",
- " dept : varchar(6) # abbreviated department name, e.g. BIOL\n",
- " ---\n",
- " dept_name : varchar(200) # full department name\n",
- " dept_address : varchar(200) # mailing address\n",
- " dept_phone : varchar(20)\n",
- " \"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "@schema\n",
- "class StudentMajor(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Student\n",
- " ---\n",
- " -> Department\n",
- " declare_date : date # when student declared her major\n",
- " \"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "@schema\n",
- "class Course(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Department\n",
- " course : int unsigned # course number, e.g. 1010\n",
- " ---\n",
- " course_name : varchar(200) # e.g. \"Neurobiology of Sensation and Movement.\"\n",
- " credits : decimal(3,1) # number of credits earned by completing the course\n",
- " \"\"\"\n",
- "\n",
- "\n",
- "@schema\n",
- "class Term(dj.Manual):\n",
- " definition = \"\"\"\n",
- " term_year : year\n",
- " term : enum('Spring', 'Summer', 'Fall')\n",
- " \"\"\"\n",
- "\n",
- "\n",
- "@schema\n",
- "class Section(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Course\n",
- " -> Term\n",
- " section : char(1)\n",
- " ---\n",
- " auditorium : varchar(12)\n",
- " \"\"\"\n",
- "\n",
- "\n",
- "@schema\n",
- "class CurrentTerm(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Term\n",
- " \"\"\"\n",
- "\n",
- "\n",
- "@schema\n",
- "class Enroll(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Student\n",
- " -> Section\n",
- " \"\"\"\n",
- "\n",
- "\n",
- "@schema\n",
- "class LetterGrade(dj.Lookup):\n",
- " definition = \"\"\"\n",
- " grade : char(2)\n",
- " ---\n",
- " points : decimal(3,2)\n",
- " \"\"\"\n",
- " contents = [\n",
- " [\"A\", 4.00],\n",
- " [\"A-\", 3.67],\n",
- " [\"B+\", 3.33],\n",
- " [\"B\", 3.00],\n",
- " [\"B-\", 2.67],\n",
- " [\"C+\", 2.33],\n",
- " [\"C\", 2.00],\n",
- " [\"C-\", 1.67],\n",
- " [\"D+\", 1.33],\n",
- " [\"D\", 1.00],\n",
- " [\"F\", 0.00],\n",
- " ]\n",
- "\n",
- "\n",
- "@schema\n",
- "class Grade(dj.Manual):\n",
- " definition = \"\"\"\n",
- " -> Enroll\n",
- " ---\n",
- " -> LetterGrade\n",
- " \"\"\""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Insert"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "from tqdm import tqdm\n",
- "import faker\n",
- "import random\n",
- "import datetime\n",
- "\n",
- "fake = faker.Faker()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def yield_students():\n",
- " fake_name = {\"F\": fake.name_female, \"M\": fake.name_male}\n",
- " while True: # ignore invalid values\n",
- " try:\n",
- " sex = random.choice((\"F\", \"M\"))\n",
- " first_name, last_name = fake_name[sex]().split(\" \")[:2]\n",
- " street_address, city = fake.address().split(\"\\n\")\n",
- " city, state = city.split(\", \")\n",
- " state, zipcode = state.split(\" \")\n",
- " except ValueError:\n",
- " continue\n",
- " else:\n",
- " yield dict(\n",
- " first_name=first_name,\n",
- " last_name=last_name,\n",
- " sex=sex,\n",
- " home_address=street_address,\n",
- " home_city=city,\n",
- " home_state=state,\n",
- " home_zip=zipcode,\n",
- " date_of_birth=str(fake.date_time_between(start_date=\"-35y\", end_date=\"-15y\").date()),\n",
- " home_phone=fake.phone_number()[:20],\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "Student.insert(dict(k, student_id=i) for i, k in zip(range(100, 300), yield_students()))\n",
- "\n",
- "Department.insert(\n",
- " dict(\n",
- " dept=dept,\n",
- " dept_name=name,\n",
- " dept_address=fake.address(),\n",
- " dept_phone=fake.phone_number()[:20],\n",
- " )\n",
- " for dept, name in [\n",
- " [\"CS\", \"Computer Science\"],\n",
- " [\"BIOL\", \"Life Sciences\"],\n",
- " [\"PHYS\", \"Physics\"],\n",
- " [\"MATH\", \"Mathematics\"],\n",
- " ]\n",
- ")\n",
- "\n",
- "StudentMajor.insert(\n",
- " {**s, **d, \"declare_date\": fake.date_between(start_date=datetime.date(1999, 1, 1))}\n",
- " for s, d in zip(Student.fetch(\"KEY\"), random.choices(Department.fetch(\"KEY\"), k=len(Student())))\n",
- " if random.random() < 0.75\n",
- ")\n",
- "\n",
- "# from https://www.utah.edu/\n",
- "Course.insert(\n",
- " [\n",
- " [\"BIOL\", 1006, \"World of Dinosaurs\", 3],\n",
- " [\"BIOL\", 1010, \"Biology in the 21st Century\", 3],\n",
- " [\"BIOL\", 1030, \"Human Biology\", 3],\n",
- " [\"BIOL\", 1210, \"Principles of Biology\", 4],\n",
- " [\"BIOL\", 2010, \"Evolution & Diversity of Life\", 3],\n",
- " [\"BIOL\", 2020, \"Principles of Cell Biology\", 3],\n",
- " [\"BIOL\", 2021, \"Principles of Cell Science\", 4],\n",
- " [\"BIOL\", 2030, \"Principles of Genetics\", 3],\n",
- " [\"BIOL\", 2210, \"Human Genetics\", 3],\n",
- " [\"BIOL\", 2325, \"Human Anatomy\", 4],\n",
- " [\"BIOL\", 2330, \"Plants & Society\", 3],\n",
- " [\"BIOL\", 2355, \"Field Botany\", 2],\n",
- " [\"BIOL\", 2420, \"Human Physiology\", 4],\n",
- " [\"PHYS\", 2040, \"Classcal Theoretical Physics II\", 4],\n",
- " [\"PHYS\", 2060, \"Quantum Mechanics\", 3],\n",
- " [\"PHYS\", 2100, \"General Relativity and Cosmology\", 3],\n",
- " [\"PHYS\", 2140, \"Statistical Mechanics\", 4],\n",
- " [\"PHYS\", 2210, \"Physics for Scientists and Engineers I\", 4],\n",
- " [\"PHYS\", 2220, \"Physics for Scientists and Engineers II\", 4],\n",
- " [\"PHYS\", 3210, \"Physics for Scientists I (Honors)\", 4],\n",
- " [\"PHYS\", 3220, \"Physics for Scientists II (Honors)\", 4],\n",
- " [\"MATH\", 1250, \"Calculus for AP Students I\", 4],\n",
- " [\"MATH\", 1260, \"Calculus for AP Students II\", 4],\n",
- " [\"MATH\", 1210, \"Calculus I\", 4],\n",
- " [\"MATH\", 1220, \"Calculus II\", 4],\n",
- " [\"MATH\", 2210, \"Calculus III\", 3],\n",
- " [\"MATH\", 2270, \"Linear Algebra\", 4],\n",
- " [\"MATH\", 2280, \"Introduction to Differential Equations\", 4],\n",
- " [\"MATH\", 3210, \"Foundations of Analysis I\", 4],\n",
- " [\"MATH\", 3220, \"Foundations of Analysis II\", 4],\n",
- " [\"CS\", 1030, \"Foundations of Computer Science\", 3],\n",
- " [\"CS\", 1410, \"Introduction to Object-Oriented Programming\", 4],\n",
- " [\"CS\", 2420, \"Introduction to Algorithms & Data Structures\", 4],\n",
- " [\"CS\", 2100, \"Discrete Structures\", 3],\n",
- " [\"CS\", 3500, \"Software Practice\", 4],\n",
- " [\"CS\", 3505, \"Software Practice II\", 3],\n",
- " [\"CS\", 3810, \"Computer Organization\", 4],\n",
- " [\"CS\", 4400, \"Computer Systems\", 4],\n",
- " [\"CS\", 4150, \"Algorithms\", 3],\n",
- " [\"CS\", 3100, \"Models of Computation\", 3],\n",
- " [\"CS\", 3200, \"Introduction to Scientific Computing\", 3],\n",
- " [\"CS\", 4000, \"Senior Capstone Project - Design Phase\", 3],\n",
- " [\"CS\", 4500, \"Senior Capstone Project\", 3],\n",
- " [\"CS\", 4940, \"Undergraduate Research\", 3],\n",
- " [\"CS\", 4970, \"Computer Science Bachelors Thesis\", 3],\n",
- " ]\n",
- ")\n",
- "\n",
- "Term.insert(dict(term_year=year, term=term) for year in range(1999, 2019) for term in [\"Spring\", \"Summer\", \"Fall\"])\n",
- "\n",
- "Term().fetch(order_by=(\"term_year DESC\", \"term DESC\"), as_dict=True, limit=1)[0]\n",
- "\n",
- "CurrentTerm().insert1({**Term().fetch(order_by=(\"term_year DESC\", \"term DESC\"), as_dict=True, limit=1)[0]})\n",
- "\n",
- "\n",
- "def make_section(prob):\n",
- " for c in (Course * Term).proj():\n",
- " for sec in \"abcd\":\n",
- " if random.random() < prob:\n",
- " break\n",
- " yield {\n",
- " **c,\n",
- " \"section\": sec,\n",
- " \"auditorium\": random.choice(\"ABCDEF\") + str(random.randint(1, 100)),\n",
- " }\n",
- "\n",
- "\n",
- "Section.insert(make_section(0.5))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 200/200 [00:27<00:00, 7.17it/s]\n"
- ]
- }
- ],
- "source": [
- "# Enrollment\n",
- "terms = Term().fetch(\"KEY\")\n",
- "quit_prob = 0.1\n",
- "for student in tqdm(Student.fetch(\"KEY\")):\n",
- " start_term = random.randrange(len(terms))\n",
- " for term in terms[start_term:]:\n",
- " if random.random() < quit_prob:\n",
- " break\n",
- " else:\n",
- " sections = ((Section & term) - (Course & (Enroll & student))).fetch(\"KEY\")\n",
- " if sections:\n",
- " Enroll.insert(\n",
- " {**student, **section} for section in random.sample(sections, random.randrange(min(5, len(sections))))\n",
- " )\n",
- "\n",
- "# assign random grades\n",
- "grades = LetterGrade.fetch(\"grade\")\n",
- "\n",
- "grade_keys = Enroll.fetch(\"KEY\")\n",
- "random.shuffle(grade_keys)\n",
- "grade_keys = grade_keys[: len(grade_keys) * 9 // 10]\n",
- "\n",
- "Grade.insert({**key, \"grade\": grade} for key, grade in zip(grade_keys, random.choices(grades, k=len(grade_keys))))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# dj.Top Restriction"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
\n",
- " | | | | | | | | |
\n",
- " | 100 | \n",
- "MATH | \n",
- "2280 | \n",
- "2018 | \n",
- "Fall | \n",
- "a | \n",
- "A- | \n",
- "3.67 |
| 191 | \n",
- "MATH | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "b | \n",
- "A | \n",
- "4.00 |
| 211 | \n",
- "CS | \n",
- "2100 | \n",
- "2018 | \n",
- "Fall | \n",
- "a | \n",
- "A | \n",
- "4.00 |
| 273 | \n",
- "PHYS | \n",
- "2100 | \n",
- "2018 | \n",
- "Spring | \n",
- "a | \n",
- "A | \n",
- "4.00 |
| 282 | \n",
- "BIOL | \n",
- "2021 | \n",
- "2018 | \n",
- "Spring | \n",
- "d | \n",
- "A | \n",
- "4.00 |
\n",
- "
\n",
- " \n",
- "
Total: 5
\n",
- " "
- ],
- "text/plain": [
- "*student_id *dept *course *term_year *term *section *grade points \n",
- "+------------+ +------+ +--------+ +-----------+ +--------+ +---------+ +-------+ +--------+\n",
- "100 MATH 2280 2018 Fall a A- 3.67 \n",
- "191 MATH 2210 2018 Spring b A 4.00 \n",
- "211 CS 2100 2018 Fall a A 4.00 \n",
- "273 PHYS 2100 2018 Spring a A 4.00 \n",
- "282 BIOL 2021 2018 Spring d A 4.00 \n",
- " (Total: 5)"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "(Grade * LetterGrade) & \"term_year='2018'\" & dj.Top(limit=5, order_by=\"points DESC\", offset=5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\"SELECT `grade`,`student_id`,`dept`,`course`,`term_year`,`term`,`section`,`points` FROM `university`.`#letter_grade` NATURAL JOIN `university`.`grade` WHERE ( (term_year='2018')) ORDER BY `points` DESC LIMIT 10\""
- ]
- },
- "execution_count": 35,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "((LetterGrade * Grade) & \"term_year='2018'\" & dj.Top(limit=10, order_by=\"points DESC\", offset=0)).make_sql()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\"SELECT `student_id`,`dept`,`course`,`term_year`,`term`,`section`,`grade`,`points` FROM `university`.`grade` NATURAL JOIN `university`.`#letter_grade` WHERE ( (term_year='2018')) ORDER BY `points` DESC LIMIT 20\""
- ]
- },
- "execution_count": 44,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "((Grade * LetterGrade) & \"term_year='2018'\" & dj.Top(limit=20, order_by=\"points DESC\", offset=0)).make_sql()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
\n",
- " | | | | | | | | |
\n",
- " | 100 | \n",
- "CS | \n",
- "3200 | \n",
- "2018 | \n",
- "Fall | \n",
- "c | \n",
- "A | \n",
- "4.00 |
| 100 | \n",
- "MATH | \n",
- "2280 | \n",
- "2018 | \n",
- "Fall | \n",
- "a | \n",
- "A- | \n",
- "3.67 |
| 100 | \n",
- "PHYS | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "d | \n",
- "A | \n",
- "4.00 |
| 122 | \n",
- "CS | \n",
- "1030 | \n",
- "2018 | \n",
- "Fall | \n",
- "c | \n",
- "B+ | \n",
- "3.33 |
| 131 | \n",
- "BIOL | \n",
- "2030 | \n",
- "2018 | \n",
- "Spring | \n",
- "a | \n",
- "A | \n",
- "4.00 |
| 131 | \n",
- "CS | \n",
- "3200 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "B+ | \n",
- "3.33 |
| 136 | \n",
- "BIOL | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "c | \n",
- "B+ | \n",
- "3.33 |
| 136 | \n",
- "MATH | \n",
- "2210 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "B+ | \n",
- "3.33 |
| 141 | \n",
- "BIOL | \n",
- "2010 | \n",
- "2018 | \n",
- "Summer | \n",
- "c | \n",
- "B+ | \n",
- "3.33 |
| 141 | \n",
- "CS | \n",
- "2420 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "A | \n",
- "4.00 |
| 141 | \n",
- "CS | \n",
- "3200 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "A- | \n",
- "3.67 |
| 182 | \n",
- "CS | \n",
- "1410 | \n",
- "2018 | \n",
- "Summer | \n",
- "c | \n",
- "A- | \n",
- "3.67 |
\n",
- "
\n",
- "
...
\n",
- "
Total: 20
\n",
- " "
- ],
- "text/plain": [
- "*student_id *dept *course *term_year *term *section *grade points \n",
- "+------------+ +------+ +--------+ +-----------+ +--------+ +---------+ +-------+ +--------+\n",
- "100 CS 3200 2018 Fall c A 4.00 \n",
- "100 MATH 2280 2018 Fall a A- 3.67 \n",
- "100 PHYS 2210 2018 Spring d A 4.00 \n",
- "122 CS 1030 2018 Fall c B+ 3.33 \n",
- "131 BIOL 2030 2018 Spring a A 4.00 \n",
- "131 CS 3200 2018 Fall b B+ 3.33 \n",
- "136 BIOL 2210 2018 Spring c B+ 3.33 \n",
- "136 MATH 2210 2018 Fall b B+ 3.33 \n",
- "141 BIOL 2010 2018 Summer c B+ 3.33 \n",
- "141 CS 2420 2018 Fall b A 4.00 \n",
- "141 CS 3200 2018 Fall b A- 3.67 \n",
- "182 CS 1410 2018 Summer c A- 3.67 \n",
- " ...\n",
- " (Total: 20)"
- ]
- },
- "execution_count": 47,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "(Grade * LetterGrade) & \"term_year='2018'\" & dj.Top(limit=20, order_by=\"points DESC\", offset=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
\n",
- " | | | | | | | | |
\n",
- " | A | \n",
- "100 | \n",
- "CS | \n",
- "3200 | \n",
- "2018 | \n",
- "Fall | \n",
- "c | \n",
- "4.00 |
| A | \n",
- "100 | \n",
- "PHYS | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "d | \n",
- "4.00 |
| A | \n",
- "131 | \n",
- "BIOL | \n",
- "2030 | \n",
- "2018 | \n",
- "Spring | \n",
- "a | \n",
- "4.00 |
| A | \n",
- "141 | \n",
- "CS | \n",
- "2420 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "4.00 |
| A | \n",
- "186 | \n",
- "PHYS | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "a | \n",
- "4.00 |
| A | \n",
- "191 | \n",
- "MATH | \n",
- "2210 | \n",
- "2018 | \n",
- "Spring | \n",
- "b | \n",
- "4.00 |
| A | \n",
- "211 | \n",
- "CS | \n",
- "2100 | \n",
- "2018 | \n",
- "Fall | \n",
- "a | \n",
- "4.00 |
| A | \n",
- "273 | \n",
- "PHYS | \n",
- "2100 | \n",
- "2018 | \n",
- "Spring | \n",
- "a | \n",
- "4.00 |
| A | \n",
- "282 | \n",
- "BIOL | \n",
- "2021 | \n",
- "2018 | \n",
- "Spring | \n",
- "d | \n",
- "4.00 |
| A- | \n",
- "100 | \n",
- "MATH | \n",
- "2280 | \n",
- "2018 | \n",
- "Fall | \n",
- "a | \n",
- "3.67 |
| A- | \n",
- "141 | \n",
- "CS | \n",
- "3200 | \n",
- "2018 | \n",
- "Fall | \n",
- "b | \n",
- "3.67 |
| A- | \n",
- "182 | \n",
- "CS | \n",
- "1410 | \n",
- "2018 | \n",
- "Summer | \n",
- "c | \n",
- "3.67 |
\n",
- "
\n",
- "
...
\n",
- "
Total: 20
\n",
- " "
- ],
- "text/plain": [
- "*grade *student_id *dept *course *term_year *term *section points \n",
- "+-------+ +------------+ +------+ +--------+ +-----------+ +--------+ +---------+ +--------+\n",
- "A 100 CS 3200 2018 Fall c 4.00 \n",
- "A 100 PHYS 2210 2018 Spring d 4.00 \n",
- "A 131 BIOL 2030 2018 Spring a 4.00 \n",
- "A 141 CS 2420 2018 Fall b 4.00 \n",
- "A 186 PHYS 2210 2018 Spring a 4.00 \n",
- "A 191 MATH 2210 2018 Spring b 4.00 \n",
- "A 211 CS 2100 2018 Fall a 4.00 \n",
- "A 273 PHYS 2100 2018 Spring a 4.00 \n",
- "A 282 BIOL 2021 2018 Spring d 4.00 \n",
- "A- 100 MATH 2280 2018 Fall a 3.67 \n",
- "A- 141 CS 3200 2018 Fall b 3.67 \n",
- "A- 182 CS 1410 2018 Summer c 3.67 \n",
- " ...\n",
- " (Total: 20)"
- ]
- },
- "execution_count": 41,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "(LetterGrade * Grade) & \"term_year='2018'\" & dj.Top(limit=20, order_by=\"points DESC\", offset=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "elements",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/docs/src/archive/tutorials/json.ipynb b/docs/src/archive/tutorials/json.ipynb
deleted file mode 100644
index 9c5feebf6..000000000
--- a/docs/src/archive/tutorials/json.ipynb
+++ /dev/null
@@ -1,1080 +0,0 @@
-{
- "cells": [
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "7fe24127-c0d0-4ff8-96b4-6ab0d9307e73",
- "metadata": {},
- "source": [
- "# Using the json type"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "62450023",
- "metadata": {},
- "source": [
- "> ⚠️ Note the following before using the `json` type\n",
- "> - Supported only for MySQL >= 8.0 when [JSON_VALUE](https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html#function_json-value) introduced.\n",
- "> - Equivalent Percona is fully-compatible.\n",
- "> - MariaDB is not supported since [JSON_VALUE](https://mariadb.com/kb/en/json_value/#syntax) does not allow type specification like MySQL's.\n",
- "> - Not yet supported in DataJoint MATLAB"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "67cf93d2",
- "metadata": {},
- "source": [
- "First you will need to [install](../../getting-started/#installation) and [connect](../../getting-started/#connection) to a DataJoint [data pipeline](https://docs.datajoint.com/core/datajoint-python/latest/concepts/data-pipelines/#what-is-a-data-pipeline).\n",
- "\n",
- "Now let's start by importing the `datajoint` client."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "bc0b6f54-8f11-45f4-bf8d-e1058ee0056f",
- "metadata": {},
- "outputs": [],
- "source": [
- "import datajoint as dj"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3544cab9-f2db-458a-9431-939bea5affc5",
- "metadata": {},
- "source": [
- "## Table Definition"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a2998c71",
- "metadata": {},
- "source": [
- "For this exercise, let's imagine we work for an awesome company that is organizing a fun RC car race across various teams in the company. Let's see which team has the fastest car! 🏎️\n",
- "\n",
- "This establishes 2 important entities: a `Team` and a `Car`. Normally the entities are mapped to their own dedicated table, however, let's assume that `Team` is well-structured but `Car` is less structured than we'd prefer. In other words, the structure for what makes up a *car* is varying too much between entries (perhaps because users of the pipeline haven't agreed yet on the definition? 🤷).\n",
- "\n",
- "This would make it a good use-case to keep `Team` as a table but make `Car` a `json` type defined within the `Team` table.\n",
- "\n",
- "Let's begin."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "dc318298-b819-4f06-abbd-7bb7544dd431",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[2023-02-12 00:14:33,027][INFO]: Connecting root@fakeservices.datajoint.io:3306\n",
- "[2023-02-12 00:14:33,039][INFO]: Connected root@fakeservices.datajoint.io:3306\n"
- ]
- }
- ],
- "source": [
- "schema = dj.Schema(f\"{dj.config['database.user']}_json\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "4aaf96db-85d9-4e94-a4c3-3558f4cc6671",
- "metadata": {},
- "outputs": [],
- "source": [
- "@schema\n",
- "class Team(dj.Lookup):\n",
- " definition = \"\"\"\n",
- " # A team within a company\n",
- " name: varchar(40) # team name\n",
- " ---\n",
- " car=null: json # A car belonging to a team (null to allow registering first but specifying car later)\n",
- " \n",
- " unique index(car.length:decimal(4, 1)) # Add an index if this key is frequently accessed\n",
- " \"\"\""
- ]
- },
- {
- "cell_type": "markdown",
- "id": "640bf7a7-9e07-4953-9c8a-304e55c467f8",
- "metadata": {},
- "source": [
- "## Insert"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7081e577",
- "metadata": {},
- "source": [
- "Let's suppose that engineering is first up to register their car."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "30f0d62e",
- "metadata": {},
- "outputs": [],
- "source": [
- "Team.insert1(\n",
- " {\n",
- " \"name\": \"engineering\",\n",
- " \"car\": {\n",
- " \"name\": \"Rever\",\n",
- " \"length\": 20.5,\n",
- " \"inspected\": True,\n",
- " \"tire_pressure\": [32, 31, 33, 34],\n",
- " \"headlights\": [\n",
- " {\n",
- " \"side\": \"left\",\n",
- " \"hyper_white\": None,\n",
- " },\n",
- " {\n",
- " \"side\": \"right\",\n",
- " \"hyper_white\": None,\n",
- " },\n",
- " ],\n",
- " },\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ee5e4dcf",
- "metadata": {},
- "source": [
- "Next, business and marketing teams are up and register their cars.\n",
- "\n",
- "A few points to notice below:\n",
- "- The person signing up on behalf of marketing does not know the specifics of the car during registration but another team member will be updating this soon before the race.\n",
- "- Notice how the `business` and `engineering` teams appear to specify the same property but refer to it as `safety_inspected` and `inspected` respectfully."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "b532e16c",
- "metadata": {},
- "outputs": [],
- "source": [
- "Team.insert(\n",
- " [\n",
- " {\n",
- " \"name\": \"marketing\",\n",
- " \"car\": None,\n",
- " },\n",
- " {\n",
- " \"name\": \"business\",\n",
- " \"car\": {\n",
- " \"name\": \"Chaching\",\n",
- " \"length\": 100,\n",
- " \"safety_inspected\": False,\n",
- " \"tire_pressure\": [34, 30, 27, 32],\n",
- " \"headlights\": [\n",
- " {\n",
- " \"side\": \"left\",\n",
- " \"hyper_white\": True,\n",
- " },\n",
- " {\n",
- " \"side\": \"right\",\n",
- " \"hyper_white\": True,\n",
- " },\n",
- " ],\n",
- " },\n",
- " },\n",
- " ]\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "57365de7",
- "metadata": {},
- "source": [
- "We can preview the table data much like normal but notice how the value of `car` behaves like other BLOB-like attributes."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "0e3b517c",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " A team within a company\n",
- " \n",
- "
\n",
- " | | |
\n",
- " | marketing | \n",
- "=BLOB= |
| engineering | \n",
- "=BLOB= |
| business | \n",
- "=BLOB= |
\n",
- "
\n",
- " \n",
- "
Total: 3
\n",
- " "
- ],
- "text/plain": [
- "*name car \n",
- "+------------+ +--------+\n",
- "marketing =BLOB= \n",
- "engineering =BLOB= \n",
- "business =BLOB= \n",
- " (Total: 3)"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "Team()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c95cbbee-4ef7-4870-ad42-a60345a3644f",
- "metadata": {},
- "source": [
- "## Restriction"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8b454996",
- "metadata": {},
- "source": [
- "Now let's see what kinds of queries we can form to demostrate how we can query this pipeline."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "81efda24",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " A team within a company\n",
- " \n",
- "
\n",
- " | | |
\n",
- " | business | \n",
- "=BLOB= |
\n",
- "
\n",
- " \n",
- "
Total: 1
\n",
- " "
- ],
- "text/plain": [
- "*name car \n",
- "+----------+ +--------+\n",
- "business =BLOB= \n",
- " (Total: 1)"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Which team has a `car` equal to 100 inches long?\n",
- "Team & {\"car.length\": 100}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "fd7b855d",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " A team within a company\n",
- " \n",
- "
\n",
- " | | |
\n",
- " | engineering | \n",
- "=BLOB= |
\n",
- "
\n",
- " \n",
- "
Total: 1
\n",
- " "
- ],
- "text/plain": [
- "*name car \n",
- "+------------+ +--------+\n",
- "engineering =BLOB= \n",
- " (Total: 1)"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Which team has a `car` less than 50 inches long?\n",
- "Team & \"car->>'$.length' < 50\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "b76ebb75",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " A team within a company\n",
- " \n",
- "
\n",
- " | | |
\n",
- " | engineering | \n",
- "=BLOB= |
\n",
- "
\n",
- " \n",
- "
Total: 1
\n",
- " "
- ],
- "text/plain": [
- "*name car \n",
- "+------------+ +--------+\n",
- "engineering =BLOB= \n",
- " (Total: 1)"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Any team that has had their car inspected?\n",
- "Team & [{\"car.inspected:unsigned\": True}, {\"car.safety_inspected:unsigned\": True}]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "b787784c",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " A team within a company\n",
- " \n",
- "
\n",
- " | | |
\n",
- " | engineering | \n",
- "=BLOB= |
| marketing | \n",
- "=BLOB= |
\n",
- "
\n",
- " \n",
- "
Total: 2
\n",
- " "
- ],
- "text/plain": [
- "*name car \n",
- "+------------+ +--------+\n",
- "engineering =BLOB= \n",
- "marketing =BLOB= \n",
- " (Total: 2)"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Which teams do not have hyper white lights for their first head light?\n",
- "Team & {\"car.headlights[0].hyper_white\": None}"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5bcf0b5d",
- "metadata": {},
- "source": [
- "Notice that the previous query will satisfy the `None` check if it experiences any of the following scenarious:\n",
- "- if entire record missing (`marketing` satisfies this)\n",
- "- JSON key is missing\n",
- "- JSON value is set to JSON `null` (`engineering` satisfies this)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "bcf1682e-a0c7-4c2f-826b-0aec9052a694",
- "metadata": {},
- "source": [
- "## Projection"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "daea110e",
- "metadata": {},
- "source": [
- "Projections can be quite useful with the `json` type since we can extract out just what we need. This allows greater query flexibility but more importantly, for us to be able to fetch only what is pertinent."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "8fb8334a",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
\n",
- " | | | |
\n",
- " | business | \n",
- "Chaching | \n",
- "100 |
| engineering | \n",
- "Rever | \n",
- "20.5 |
| marketing | \n",
- "None | \n",
- "None |
\n",
- "
\n",
- " \n",
- "
Total: 3
\n",
- " "
- ],
- "text/plain": [
- "*name car_name car_length \n",
- "+------------+ +----------+ +------------+\n",
- "business Chaching 100 \n",
- "engineering Rever 20.5 \n",
- "marketing None None \n",
- " (Total: 3)"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Only interested in the car names and the length but let the type be inferred\n",
- "q_untyped = Team.proj(\n",
- " car_name=\"car.name\",\n",
- " car_length=\"car.length\",\n",
- ")\n",
- "q_untyped"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "bb5f0448",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[{'name': 'business', 'car_name': 'Chaching', 'car_length': '100'},\n",
- " {'name': 'engineering', 'car_name': 'Rever', 'car_length': '20.5'},\n",
- " {'name': 'marketing', 'car_name': None, 'car_length': None}]"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "q_untyped.fetch(as_dict=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "a307dfd7",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- "
\n",
- " | | | |
\n",
- " | business | \n",
- "Chaching | \n",
- "100.0 |
| engineering | \n",
- "Rever | \n",
- "20.5 |
| marketing | \n",
- "None | \n",
- "None |
\n",
- "
\n",
- " \n",
- "
Total: 3
\n",
- " "
- ],
- "text/plain": [
- "*name car_name car_length \n",
- "+------------+ +----------+ +------------+\n",
- "business Chaching 100.0 \n",
- "engineering Rever 20.5 \n",
- "marketing None None \n",
- " (Total: 3)"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Nevermind, I'll specify the type explicitly\n",
- "q_typed = Team.proj(\n",
- " car_name=\"car.name\",\n",
- " car_length=\"car.length:float\",\n",
- ")\n",
- "q_typed"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "8a93dbf9",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[{'name': 'business', 'car_name': 'Chaching', 'car_length': 100.0},\n",
- " {'name': 'engineering', 'car_name': 'Rever', 'car_length': 20.5},\n",
- " {'name': 'marketing', 'car_name': None, 'car_length': None}]"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "q_typed.fetch(as_dict=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "62dd0239-fa70-4369-81eb-3d46c5053fee",
- "metadata": {},
- "source": [
- "## Describe"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "73d9df01",
- "metadata": {},
- "source": [
- "Lastly, the `.describe()` function on the `Team` table can help us generate the table's definition. This is useful if we are connected directly to the pipeline without the original source."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "0e739932",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "# A team within a company\n",
- "name : varchar(40) # team name\n",
- "---\n",
- "car=null : json # A car belonging to a team (null to allow registering first but specifying car later)\n",
- "UNIQUE INDEX ((json_value(`car`, _utf8mb4'$.length' returning decimal(4, 1))))\n",
- "\n"
- ]
- }
- ],
- "source": [
- "rebuilt_definition = Team.describe()\n",
- "print(rebuilt_definition)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "be1070d5-765b-4bc2-92de-8a6ffd885984",
- "metadata": {},
- "source": [
- "## Cleanup"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "id": "cb959927",
- "metadata": {},
- "source": [
- "Finally, let's clean up what we created in this tutorial."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "d9cc28a3-3ffd-4126-b7e9-bc6365040b93",
- "metadata": {},
- "outputs": [],
- "source": [
- "schema.drop()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "68ad4340",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "all_purposes",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.18"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/src/how-to/deferred-schema-activation.md b/docs/src/how-to/deferred-schema-activation.md
new file mode 100644
index 000000000..900d3be13
--- /dev/null
+++ b/docs/src/how-to/deferred-schema-activation.md
@@ -0,0 +1,151 @@
+# Deferred Schema Activation
+
+Define table classes without an immediate database connection, then activate
+the schema later when ready to connect.
+
+## When to use deferred activation
+
+Deferred schema activation is useful when you want to:
+
+- Define reusable table modules that work with different databases
+- Write testable code where the database connection is injected at runtime
+- Deploy the same pipeline to multiple environments (development, staging,
+ production)
+- Import table definitions without triggering database connections
+
+## Define tables without a database connection
+
+Create a schema object without providing a schema name:
+
+```python
+import datajoint as dj
+
+# Create schema without activation
+schema = dj.Schema()
+
+@schema
+class Subject(dj.Manual):
+ definition = """
+ subject_id : int
+ ---
+ subject_name : varchar(64)
+ """
+
+@schema
+class Session(dj.Manual):
+ definition = """
+ -> Subject
+ session_date : date
+ ---
+ session_notes : varchar(256)
+ """
+```
+
+The `@schema` decorator queues table classes for later declaration. No database
+connection is made until you call `activate()`.
+
+## Check activation status
+
+To check whether a schema has been activated:
+
+```python
+schema.is_activated() # Returns False before activation
+```
+
+## Activate the schema
+
+When ready to connect, call `activate()` with the database schema name:
+
+```python
+schema.activate('my_project')
+```
+
+This:
+
+1. Connects to the database (using `dj.conn()` by default)
+2. Creates the schema if it doesn't exist
+3. Declares all queued tables in the order they were decorated
+
+## Activate with a specific connection
+
+To use a specific database connection:
+
+```python
+connection = dj.conn(
+ host='production-server.example.com',
+ user='pipeline_user',
+ password='secret'
+)
+
+schema.activate('my_project', connection=connection)
+```
+
+## Activate with options
+
+Control schema and table creation behavior:
+
+```python
+# Connect to existing schema only (don't create if missing)
+schema.activate('my_project', create_schema=False)
+
+# Don't create tables automatically
+schema.activate('my_project', create_tables=False)
+```
+
+## Example: environment-based activation
+
+```python
+# pipeline/tables.py
+import datajoint as dj
+
+schema = dj.Schema()
+
+@schema
+class Experiment(dj.Manual):
+ definition = """
+ experiment_id : int
+ ---
+ experiment_date : date
+ """
+
+# pipeline/activate.py
+import os
+from pipeline.tables import schema
+
+env = os.environ.get('ENVIRONMENT', 'development')
+
+schema_names = {
+ 'development': 'dev_experiments',
+ 'staging': 'staging_experiments',
+ 'production': 'prod_experiments',
+}
+
+schema.activate(schema_names[env])
+```
+
+## Example: test fixtures
+
+```python
+import pytest
+import datajoint as dj
+from mypackage.tables import schema, Subject, Session
+
+@pytest.fixture
+def test_schema(db_credentials):
+ """Activate schema with test database."""
+ schema.activate(
+ 'test_pipeline',
+ connection=dj.conn(**db_credentials)
+ )
+ yield schema
+ schema.drop() # Clean up after tests
+```
+
+## Restrictions
+
+- A schema can only be activated once. Attempting to activate for a different
+ database raises `DataJointError`.
+- Calling `activate()` without a schema name on an unactivated schema raises
+ `DataJointError`.
+- Part tables should not be decorated directly; they are processed automatically
+ with their master table.
diff --git a/src/datajoint/objectref.py b/src/datajoint/objectref.py
index 9a049b2cf..d318fe57e 100644
--- a/src/datajoint/objectref.py
+++ b/src/datajoint/objectref.py
@@ -44,7 +44,7 @@ class ObjectRef:
ext: File extension as tooling hint (e.g., ".dat", ".zarr") or None.
This is a conventional suffix for tooling, not a content-type declaration.
is_dir: True if stored content is a directory/key-prefix (e.g., Zarr store)
- timestamp: ISO 8601 upload timestamp
+ timestamp: ISO 8601 upload timestamp, or None if not recorded
mime_type: MIME type (files only, auto-detected from extension)
item_count: Number of files (folders only), or None if not computed
"""
@@ -54,7 +54,7 @@ class ObjectRef:
hash: str | None
ext: str | None
is_dir: bool
- timestamp: datetime
+ timestamp: datetime | None
url: str | None = None
store: str | None = None
mime_type: str | None = None
@@ -128,32 +128,6 @@ def to_json(self) -> dict:
data["item_count"] = self.item_count
return data
- def to_dict(self) -> dict:
- """
- Return the raw JSON metadata as a dictionary.
-
- This is useful for inspecting the stored metadata without triggering
- any storage backend operations. The returned dict matches the JSON
- structure stored in the database.
-
- Returns
- -------
- dict
- Dict containing the object metadata:
-
- - path: Relative storage path within the store
- - url: Full URI (e.g., 's3://bucket/path') (optional)
- - store: Store name (optional, None for default store)
- - size: File/folder size in bytes (or None)
- - hash: Content hash (or None)
- - ext: File extension (or None)
- - is_dir: True if folder
- - timestamp: Upload timestamp
- - mime_type: MIME type (files only, optional)
- - item_count: Number of files (folders only, optional)
- """
- return self.to_json()
-
def _ensure_backend(self):
"""Ensure storage backend is available for I/O operations."""
if self._backend is None:
diff --git a/src/datajoint/storage.py b/src/datajoint/storage.py
index 6dacbd7ec..846228137 100644
--- a/src/datajoint/storage.py
+++ b/src/datajoint/storage.py
@@ -24,13 +24,13 @@
# Characters safe for use in filenames and URLs
TOKEN_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
-# Supported remote URL protocols for copy insert
-REMOTE_PROTOCOLS = ("s3://", "gs://", "gcs://", "az://", "abfs://", "http://", "https://")
+# Supported URL protocols
+URL_PROTOCOLS = ("file://", "s3://", "gs://", "gcs://", "az://", "abfs://", "http://", "https://")
-def is_remote_url(path: str) -> bool:
+def is_url(path: str) -> bool:
"""
- Check if a path is a remote URL.
+ Check if a path is a URL.
Parameters
----------
@@ -40,21 +40,57 @@ def is_remote_url(path: str) -> bool:
Returns
-------
bool
- True if path starts with a supported remote protocol.
+ True if path starts with a supported URL protocol.
"""
- if not isinstance(path, str):
- return False
- return path.lower().startswith(REMOTE_PROTOCOLS)
+ return path.lower().startswith(URL_PROTOCOLS)
-def parse_remote_url(url: str) -> tuple[str, str]:
+def normalize_to_url(path: str) -> str:
"""
- Parse a remote URL into protocol and path.
+ Normalize a path to URL form.
+
+ Converts local filesystem paths to file:// URLs. URLs are returned unchanged.
+
+ Parameters
+ ----------
+ path : str
+ Path string (local path or URL).
+
+ Returns
+ -------
+ str
+ URL form of the path.
+
+ Examples
+ --------
+ >>> normalize_to_url("/data/file.dat")
+ 'file:///data/file.dat'
+ >>> normalize_to_url("s3://bucket/key")
+ 's3://bucket/key'
+ >>> normalize_to_url("file:///already/url")
+ 'file:///already/url'
+ """
+ if is_url(path):
+ return path
+ # Convert local path to file:// URL
+ # Ensure absolute path and proper format
+ abs_path = str(Path(path).resolve())
+ # Handle Windows paths (C:\...) vs Unix paths (/...)
+ if abs_path.startswith("/"):
+ return f"file://{abs_path}"
+ else:
+ # Windows: file:///C:/path
+ return f"file:///{abs_path.replace(chr(92), '/')}"
+
+
+def parse_url(url: str) -> tuple[str, str]:
+ """
+ Parse a URL into protocol and path.
Parameters
----------
url : str
- Remote URL (e.g., ``'s3://bucket/path/file.dat'``).
+ URL (e.g., ``'s3://bucket/path/file.dat'`` or ``'file:///path/to/file'``).
Returns
-------
@@ -65,11 +101,19 @@ def parse_remote_url(url: str) -> tuple[str, str]:
------
DataJointError
If URL protocol is not supported.
+
+ Examples
+ --------
+ >>> parse_url("s3://bucket/key/file.dat")
+ ('s3', 'bucket/key/file.dat')
+ >>> parse_url("file:///data/file.dat")
+ ('file', '/data/file.dat')
"""
url_lower = url.lower()
# Map URL schemes to fsspec protocols
protocol_map = {
+ "file://": "file",
"s3://": "s3",
"gs://": "gcs",
"gcs://": "gcs",
@@ -84,7 +128,7 @@ def parse_remote_url(url: str) -> tuple[str, str]:
path = url[len(prefix) :]
return protocol, path
- raise errors.DataJointError(f"Unsupported remote URL protocol: {url}")
+ raise errors.DataJointError(f"Unsupported URL protocol: {url}")
def generate_token(length: int = 8) -> str:
@@ -358,6 +402,53 @@ def _full_path(self, path: str | PurePosixPath) -> str:
return str(Path(location) / path)
return path
+ def get_url(self, path: str | PurePosixPath) -> str:
+ """
+ Get the full URL for a path in storage.
+
+ Returns a consistent URL representation for any storage backend,
+ including file:// URLs for local filesystem.
+
+ Parameters
+ ----------
+ path : str or PurePosixPath
+ Relative path within the storage location.
+
+ Returns
+ -------
+ str
+ Full URL (e.g., 's3://bucket/path' or 'file:///data/path').
+
+ Examples
+ --------
+ >>> backend = StorageBackend({"protocol": "file", "location": "/data"})
+ >>> backend.get_url("schema/table/file.dat")
+ 'file:///data/schema/table/file.dat'
+
+ >>> backend = StorageBackend({"protocol": "s3", "bucket": "mybucket", ...})
+ >>> backend.get_url("schema/table/file.dat")
+ 's3://mybucket/schema/table/file.dat'
+ """
+ full_path = self._full_path(path)
+
+ if self.protocol == "file":
+ # Ensure absolute path for file:// URL
+ abs_path = str(Path(full_path).resolve())
+ if abs_path.startswith("/"):
+ return f"file://{abs_path}"
+ else:
+ # Windows path
+ return f"file:///{abs_path.replace(chr(92), '/')}"
+ elif self.protocol == "s3":
+ return f"s3://{full_path}"
+ elif self.protocol == "gcs":
+ return f"gs://{full_path}"
+ elif self.protocol == "azure":
+ return f"az://{full_path}"
+ else:
+ # Fallback: use protocol prefix
+ return f"{self.protocol}://{full_path}"
+
def put_file(self, local_path: str | Path, remote_path: str | PurePosixPath, metadata: dict | None = None) -> None:
"""
Upload a file from local filesystem to storage.
@@ -674,7 +765,7 @@ def copy_from_url(self, source_url: str, dest_path: str | PurePosixPath) -> int:
int
Size of copied file in bytes.
"""
- protocol, source_path = parse_remote_url(source_url)
+ protocol, source_path = parse_url(source_url)
full_dest = self._full_path(dest_path)
logger.debug(f"copy_from_url: {protocol}://{source_path} -> {self.protocol}:{full_dest}")
@@ -774,8 +865,8 @@ def source_is_directory(self, source: str) -> bool:
bool
True if source is a directory.
"""
- if is_remote_url(source):
- protocol, path = parse_remote_url(source)
+ if is_url(source):
+ protocol, path = parse_url(source)
source_fs = fsspec.filesystem(protocol)
return source_fs.isdir(path)
else:
@@ -795,8 +886,8 @@ def source_exists(self, source: str) -> bool:
bool
True if source exists.
"""
- if is_remote_url(source):
- protocol, path = parse_remote_url(source)
+ if is_url(source):
+ protocol, path = parse_url(source)
source_fs = fsspec.filesystem(protocol)
return source_fs.exists(path)
else:
@@ -817,8 +908,8 @@ def get_source_size(self, source: str) -> int | None:
Size in bytes, or None if directory or cannot determine.
"""
try:
- if is_remote_url(source):
- protocol, path = parse_remote_url(source)
+ if is_url(source):
+ protocol, path = parse_url(source)
source_fs = fsspec.filesystem(protocol)
if source_fs.isdir(path):
return None
diff --git a/src/datajoint/types.py b/src/datajoint/types.py
index 72cefee3c..c8f6c7039 100644
--- a/src/datajoint/types.py
+++ b/src/datajoint/types.py
@@ -9,22 +9,16 @@
from __future__ import annotations
-from typing import Any, TypeAlias
+from typing import TYPE_CHECKING, Any, TypeAlias
# Primary key types
PrimaryKey: TypeAlias = dict[str, Any]
"""A dictionary mapping attribute names to values that uniquely identify an entity."""
-PrimaryKeyList: TypeAlias = list[dict[str, Any]]
-"""A list of primary key dictionaries."""
-
# Row/record types
Row: TypeAlias = dict[str, Any]
"""A single row/record as a dictionary mapping attribute names to values."""
-RowList: TypeAlias = list[dict[str, Any]]
-"""A list of rows/records."""
-
# Attribute types
AttributeName: TypeAlias = str
"""Name of a table attribute/column."""
@@ -47,7 +41,7 @@
"""Mapping of child_attr -> (parent_table, parent_attr) for foreign keys."""
# Restriction types
-Restriction: TypeAlias = str | dict[str, Any] | bool | "QueryExpression" | list | None
+Restriction: TypeAlias = str | dict[str, Any] | bool | "QueryExpression" | list[Any] | None
"""Valid restriction types for query operations."""
# Fetch result types
@@ -56,5 +50,5 @@
# For avoiding circular imports
-if False: # TYPE_CHECKING equivalent that's always False
+if TYPE_CHECKING:
from .expression import QueryExpression
diff --git a/tests/unit/test_storage_urls.py b/tests/unit/test_storage_urls.py
new file mode 100644
index 000000000..649d695b2
--- /dev/null
+++ b/tests/unit/test_storage_urls.py
@@ -0,0 +1,121 @@
+"""Unit tests for storage URL functions."""
+
+import pytest
+
+from datajoint.storage import (
+ URL_PROTOCOLS,
+ is_url,
+ normalize_to_url,
+ parse_url,
+)
+
+
+class TestURLProtocols:
+ """Test URL protocol constants."""
+
+ def test_url_protocols_includes_file(self):
+ """URL_PROTOCOLS should include file://."""
+ assert "file://" in URL_PROTOCOLS
+
+ def test_url_protocols_includes_s3(self):
+ """URL_PROTOCOLS should include s3://."""
+ assert "s3://" in URL_PROTOCOLS
+
+ def test_url_protocols_includes_cloud_providers(self):
+ """URL_PROTOCOLS should include major cloud providers."""
+ assert "gs://" in URL_PROTOCOLS
+ assert "az://" in URL_PROTOCOLS
+
+
+class TestIsUrl:
+ """Test is_url function."""
+
+ def test_s3_url(self):
+ assert is_url("s3://bucket/key")
+
+ def test_gs_url(self):
+ assert is_url("gs://bucket/key")
+
+ def test_file_url(self):
+ assert is_url("file:///path/to/file")
+
+ def test_http_url(self):
+ assert is_url("http://example.com/file")
+
+ def test_https_url(self):
+ assert is_url("https://example.com/file")
+
+ def test_local_path_not_url(self):
+ assert not is_url("/path/to/file")
+
+ def test_relative_path_not_url(self):
+ assert not is_url("relative/path/file.dat")
+
+ def test_case_insensitive(self):
+ assert is_url("S3://bucket/key")
+ assert is_url("FILE:///path")
+
+
+class TestNormalizeToUrl:
+ """Test normalize_to_url function."""
+
+ def test_local_path_to_file_url(self):
+ url = normalize_to_url("/data/file.dat")
+ assert url.startswith("file://")
+ assert "data/file.dat" in url
+
+ def test_s3_url_unchanged(self):
+ url = "s3://bucket/key/file.dat"
+ assert normalize_to_url(url) == url
+
+ def test_file_url_unchanged(self):
+ url = "file:///data/file.dat"
+ assert normalize_to_url(url) == url
+
+ def test_relative_path_becomes_absolute(self):
+ url = normalize_to_url("relative/path.dat")
+ assert url.startswith("file://")
+ # Should be absolute (contain full path)
+ assert "/" in url[7:] # After "file://"
+
+
+class TestParseUrl:
+ """Test parse_url function."""
+
+ def test_parse_s3(self):
+ protocol, path = parse_url("s3://bucket/key/file.dat")
+ assert protocol == "s3"
+ assert path == "bucket/key/file.dat"
+
+ def test_parse_gs(self):
+ protocol, path = parse_url("gs://bucket/key")
+ assert protocol == "gcs"
+ assert path == "bucket/key"
+
+ def test_parse_gcs(self):
+ protocol, path = parse_url("gcs://bucket/key")
+ assert protocol == "gcs"
+ assert path == "bucket/key"
+
+ def test_parse_file(self):
+ protocol, path = parse_url("file:///data/file.dat")
+ assert protocol == "file"
+ assert path == "/data/file.dat"
+
+ def test_parse_http(self):
+ protocol, path = parse_url("http://example.com/file")
+ assert protocol == "http"
+ assert path == "example.com/file"
+
+ def test_parse_https(self):
+ protocol, path = parse_url("https://example.com/file")
+ assert protocol == "https"
+ assert path == "example.com/file"
+
+ def test_unsupported_protocol_raises(self):
+ with pytest.raises(Exception, match="Unsupported URL protocol"):
+ parse_url("ftp://example.com/file")
+
+ def test_local_path_raises(self):
+ with pytest.raises(Exception, match="Unsupported URL protocol"):
+ parse_url("/local/path")