Sage-Bionetworks
diff --git a/‎.github/workflows/build.yml
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/build.yml
Lines changed: 6 additions & 6 deletions
diff --git a/‎Pipfile.lock
Lines changed: 967 additions & 692 deletions b/‎Pipfile.lock
Lines changed: 967 additions & 692 deletions
diff --git a/‎docs/reference/experimental/async/dataset.md
Lines changed: 31 additions & 0 deletions b/‎docs/reference/experimental/async/dataset.md
Lines changed: 31 additions & 0 deletions
diff --git a/‎docs/reference/experimental/async/table.md
Lines changed: 1 addition & 1 deletion b/‎docs/reference/experimental/async/table.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/reference/experimental/sync/dataset.md
Lines changed: 41 additions & 0 deletions b/‎docs/reference/experimental/sync/dataset.md
Lines changed: 41 additions & 0 deletions
diff --git a/‎docs/reference/experimental/sync/table.md
Lines changed: 1 addition & 1 deletion b/‎docs/reference/experimental/sync/table.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tutorials/python/dataset.md
Lines changed: 125 additions & 2 deletions b/‎docs/tutorials/python/dataset.md
Lines changed: 125 additions & 2 deletions
diff --git a/‎docs/tutorials/python/tutorial_screenshots/dataset_default_schema.png
161 KB b/‎docs/tutorials/python/tutorial_screenshots/dataset_default_schema.png
161 KB
diff --git a/‎docs/tutorials/python/tutorial_screenshots/dataset_with_files.png
332 KB b/‎docs/tutorials/python/tutorial_screenshots/dataset_with_files.png
332 KB
diff --git a/‎docs/tutorials/python/tutorial_scripts/dataset.py
Lines changed: 86 additions & 0 deletions b/‎docs/tutorials/python/tutorial_scripts/dataset.py
Lines changed: 86 additions & 0 deletions
@@ -50,7 +50,7 @@ jobs:
 
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-13, windows-2022]
+        os: [ubuntu-22.04, macos-13, windows-2022]
 
         # if changing the below change the run-integration-tests versions and the check-deploy versions
         # Make sure that we are running the integration tests on the first and last versions of the matrix
@@ -83,7 +83,7 @@ jobs:
           path: |
             ${{ steps.get-dependencies.outputs.site_packages_loc }}
             ${{ steps.get-dependencies.outputs.site_bin_dir }}
-          key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py') }}-v20
+          key: ${{ runner.os }}-${{ matrix.python }}-build-${{ env.cache-name }}-${{ hashFiles('setup.py') }}-v21
 
       - name: Install py-dependencies
         if: steps.cache-dependencies.outputs.cache-hit != 'true'
@@ -212,7 +212,7 @@ jobs:
       - name: Upload coverage report
         id: upload_coverage_report
         uses: actions/upload-artifact@v4
-        if: ${{ contains(fromJSON('["3.13"]'), matrix.python) && contains(fromJSON('["ubuntu-20.04"]'), matrix.os)}}
+        if: ${{ contains(fromJSON('["3.13"]'), matrix.python) && contains(fromJSON('["ubuntu-22.04"]'), matrix.os)}}
         with:
           name: coverage-report
           path: coverage.xml
@@ -221,7 +221,7 @@ jobs:
     needs: [test]
     if: ${{ always() && !cancelled()}}
     name: SonarCloud
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v4
         with:
@@ -256,7 +256,7 @@ jobs:
   package:
     needs: [test,pre-commit]
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     if: github.event_name == 'release'
 
@@ -404,7 +404,7 @@ jobs:
 
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-13, windows-2022]
+        os: [ubuntu-24.04, macos-13, windows-2022]
 
         # python versions should be consistent with the strategy matrix and the runs-integration-tests versions
         python: ['3.9', '3.10', '3.11', '3.12', '3.13']
 
@@ -0,0 +1,31 @@
+# Dataset
+
+Contained within this file are experimental interfaces for working with the Synapse Python
+Client. Unless otherwise noted these interfaces are subject to change at any time. Use
+at your own risk.
+
+## API reference
+
+::: synapseclient.models.Dataset
+    options:
+        inherited_members: true
+        members:
+            - add_item_async
+            - remove_item_async
+            - store_async
+            - get_async
+            - delete_async
+            - update_rows_async
+            - snapshot_async
+            - query_async
+            - query_part_mask_async
+            - add_column
+            - delete_column
+            - reorder_column
+            - rename_column
+            - get_permissions
+            - get_acl
+            - set_permissions
+---
+::: synapseclient.models.EntityRef
+---
@@ -46,4 +46,4 @@ at your own risk.
 ::: synapseclient.models.UploadToTableRequest
 ::: synapseclient.models.TableUpdateTransaction
 ::: synapseclient.models.CsvTableDescriptor
-::: synapseclient.models.mixins.table_operator.csv_to_pandas_df
+::: synapseclient.models.mixins.table_components.csv_to_pandas_df
@@ -0,0 +1,41 @@
+# Dataset
+
+Contained within this file are experimental interfaces for working with the Synapse Python
+Client. Unless otherwise noted these interfaces are subject to change at any time. Use
+at your own risk.
+
+## Example Script:
+
+<details class="quote">
+  <summary>Working with Synapse datasets</summary>
+
+```python
+{!docs/scripts/object_orientated_programming_poc/oop_poc_dataset.py!}
+```
+</details>
+
+## API reference
+
+::: synapseclient.models.Dataset
+    options:
+        inherited_members: true
+        members:
+            - add_item
+            - remove_item
+            - store
+            - get
+            - delete
+            - update_rows
+            - snapshot
+            - query
+            - query_part_mask
+            - add_column
+            - delete_column
+            - reorder_column
+            - rename_column
+            - get_permissions
+            - get_acl
+            - set_permissions
+---
+::: synapseclient.models.EntityRef
+---
@@ -57,4 +57,4 @@ at your own risk.
 ::: synapseclient.models.UploadToTableRequest
 ::: synapseclient.models.TableUpdateTransaction
 ::: synapseclient.models.CsvTableDescriptor
-::: synapseclient.models.mixins.table_operator.csv_to_pandas_df
+::: synapseclient.models.mixins.table_components.csv_to_pandas_df
@@ -1,2 +1,125 @@
-# Datasets in Synapse
-![Under Construction](../../assets/under_construction.png)
+# Datasets
+Datasets in Synapse are a way to organize, annotate, and publish sets of files for others to use. Datasets behave similarly to Tables and FileViews, but provide some default behavior that makes it easy to put a group of files together.
+
+This tutorial will walk through basics of working with datasets using the Synapse Python client.
+
+# Tutorial Purpose
+In this tutorial, you will:
+
+1. Create a dataset
+2. Add files to the dataset
+3. Query the dataset
+4. Add a custom column to the dataset
+6. Save a snapshot of the dataset
+
+# Prerequisites
+* This tutorial assumes that you have a project in Synapse with one or more files in it. To test all of the ways to add files to a dataset, you will need to have at least 3 files in your project. A structure like this is recommended:
+    ```
+    Project
+    ├── File 1
+    ├── File 2
+    ├── Folder 1
+    │   ├── File 4
+    │   ├── ...
+    ```
+* Pandas must be installed as shown in the [installation documentation](../installation.md)
+
+## 1. Get the ID of your Synapse project
+
+Let's get started by authenticating with Synapse and retrieving the ID of your project.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=17-23}
+```
+
+## 2. Create your Dataset
+
+Next, we will create the dataset. We will use the project ID to tell Synapse where we want the dataset to be created. After this step, we will have a Dataset object with all of the needed information to start building the dataset.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=27-28}
+```
+
+Because we haven't added any files to the dataset yet, it will be empty, but if you view the dataset's schema in the UI, you will notice that datasets come with default columns that help to describe each file that we add to the dataset.
+
+![Dataset Default Schema](./tutorial_screenshots/dataset_default_schema.png)
+
+## 3. Add files to the dataset
+
+Let's add some files to the dataset now. There are three ways to add files to a dataset:
+
+1. Add an Entity Reference to a file with its ID and version
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=32-34}
+```
+2. Add a File with its ID and version
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=36-38}
+```
+3. Add a Folder. When adding a folder, all child files inside of the folder are added to the dataset recursively.
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=40-42}
+```
+
+Whenever we make changes to the dataset, we need to call the `store()` method to save the changes to Synapse.
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=44}
+```
+
+And now we are able to see our dataset with all of the files that we added to it.
+
+![Dataset with Files](./tutorial_screenshots/dataset_with_files.png)
+
+## 4. Retrieve the dataset
+
+Now that we have a dataset with some files in it, we can retrieve the dataset from Synapse the next time we need to use it.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=48-50}
+```
+
+## 5. Query the dataset
+
+Now that we have a dataset with some files in it, we can query the dataset to find files that match certain criteria.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=54-57}
+```
+
+## 6. Add a custom column to the dataset
+
+We can also add a custom column to the dataset. This will allow us to annotate files in the dataset with additional information.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=61-67}
+```
+
+Our custom column isn't all that useful empty, so let's update the dataset with some values.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=70-78}
+```
+
+## 7. Save a snapshot of the dataset
+
+Finally, let's save a snapshot of the dataset. This creates a read-only version of the dataset that captures the current state of the dataset and can be referenced later.
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=82-86}
+```
+
+## Source Code for this Tutorial
+
+<details class="quote">
+  <summary>Click to show me</summary>
+
+```python
+{!docs/tutorials/python/tutorial_scripts/dataset.py!}
+```
+</details>
+
+## References
+- [Dataset](../../reference/experimental/sync/dataset.md)
+- [Column][synapseclient.models.Column]
+- [syn.login][synapseclient.Synapse.login]
+- [Project](../../reference/experimental/sync/project.md)
@@ -0,0 +1,86 @@
+"""Here is where you'll find the code for the dataset tutorial."""
+
+import pandas as pd
+
+from synapseclient import Synapse
+from synapseclient.models import (
+    Column,
+    ColumnType,
+    Dataset,
+    EntityRef,
+    File,
+    Folder,
+    Project,
+)
+
+# First, let's get the project that we want to create the dataset in
+syn = Synapse()
+syn.login()
+
+project = Project(name="My Testing Project").get()  # Replace with your project name
+project_id = project.id
+print(project_id)
+
+# Next, let's create the dataset. We'll use the project id as the parent id.
+# To begin, the dataset will be empty, but if you view the dataset's schema in the UI,
+# you will notice that datasets come with default columns.
+my_new_dataset = Dataset(parent_id=project_id, name="My New Dataset").store()
+print(f"My Dataset's ID is {my_new_dataset.id}")
+
+# Now, let's add some files to the dataset. There are three ways to add files to a dataset:
+# 1. Add an Entity Reference to a file with its ID and version
+my_new_dataset.add_item(
+    EntityRef(id="syn51790029", version=1)
+)  # Replace with the ID of the file you want to add
+# 2. Add a File with its ID and version
+my_new_dataset.add_item(
+    File(id="syn51790028", version_label=1)
+)  # Replace with the ID of the file you want to add
+# 3. Add a Folder. In this case, all child files of the folder are added to the dataset recursively.
+my_new_dataset.add_item(
+    Folder(id="syn64893446")
+)  # Replace with the ID of the folder you want to add
+# Our changes won't be persisted to Synapse until we call the store() method.
+my_new_dataset.store()
+
+# Now that our Dataset with all of our files has been created, the next time
+# we want to use it, we can retrieve it from Synapse.
+my_retrieved_dataset = Dataset(id=my_new_dataset.id).get()
+print(f"My Dataset's ID is {my_retrieved_dataset.id}")
+print(len(my_retrieved_dataset.items))
+
+# If you want to query your dataset for files that match certain criteria, you can do so
+# using the query method.
+rows = Dataset.query(
+    query=f"SELECT * FROM {my_retrieved_dataset.id} WHERE name like '%test%'"
+)
+print(rows)
+
+# In addition to the default columns, you may want to annotate items in your dataset using
+# custom columns.
+my_retrieved_dataset.add_column(
+    column=Column(
+        name="my_annotation",
+        column_type=ColumnType.STRING,
+    )
+)
+my_retrieved_dataset.store()
+
+# Now that our custom column has been added, we can update the dataset with new values.
+modified_data = pd.DataFrame(
+    {
+        "id": "syn51790028",  # The ID of one of our Files
+        "my_annotation": ["excellent data"],
+    }
+)
+my_retrieved_dataset.update_rows(
+    values=modified_data, primary_keys=["id"], dry_run=False
+)
+
+
+# Finally, let's save a snapshot of the dataset.
+snapshot_info = my_retrieved_dataset.snapshot(
+    comment="My first snapshot",
+    label="My first snapshot",
+)
+print(snapshot_info)