address comments

nitbharambe · nitbharambe · commit c5ef4020658f · 2024-10-22T14:11:26.000+02:00
Signed-off-by: Nitish Bharambe &lt;nitish.bharambe@alliander.com&gt;
diff --git a/docs/examples/arrow_example.ipynb b/docs/examples/arrow_example.ipynb
@@ -21,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -44,16 +44,6 @@
     "import numpy as np"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# A constant showing error message\n",
-    "ZERO_COPY_ERROR_MSG = \"Zero-copy conversion requested, but the data types do not match.\""
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -93,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -142,17 +132,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-------node combined asym scehma-------\n",
+      "-------node asym scehma-------\n",
       "id: int32\n",
       "u_rated: double\n",
-      "-------asym load combined asym scehma-------\n",
+      "-------asym load scehma-------\n",
       "id: int32\n",
       "node: int32\n",
       "status: int8\n",
@@ -179,9 +169,9 @@
     "    return pa.schema(schemas)\n",
     "\n",
     "\n",
-    "print(\"-------node combined asym scehma-------\")\n",
+    "print(\"-------node asym scehma-------\")\n",
     "print(pgm_schema(DatasetType.input, ComponentType.node))\n",
-    "print(\"-------asym load combined asym scehma-------\")\n",
+    "print(\"-------asym load scehma-------\")\n",
     "print(pgm_schema(DatasetType.input, ComponentType.asym_load))"
    ]
   },
@@ -198,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -212,7 +202,7 @@
        "u_rated: [10500,10500,10500]"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -272,13 +262,14 @@
     "Similar approach be adopted by the user to convert to row based data.\n",
     "\n",
     "```{note}\n",
-    "The option of `zero_copy_only` in the function below is added in this demo to verify no copies are made. Its usage is not mandatory to do zero copy conversion.\n",
+    "The option of `zero_copy_only` in the function below and assert for correct dtype is added in this demo to verify no copies are made. \n",
+    "Its usage is not mandatory to do zero copy conversion.\n",
     "```"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -287,27 +278,24 @@
        "{'id': array([1, 2, 3]), 'u_rated': array([10500., 10500., 10500.])}"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def arrow_to_numpy(\n",
-    "    data: pa.RecordBatch, dataset_type: DatasetType, component_type: ComponentType, zero_copy_only: bool = False\n",
-    ") -> np.ndarray:\n",
+    "def arrow_to_numpy(data: pa.RecordBatch, dataset_type: DatasetType, component_type: ComponentType) -> np.ndarray:\n",
     "    \"\"\"Convert Arrow data to NumPy data.\"\"\"\n",
     "    result = {}\n",
     "    result_dtype = power_grid_meta_data[dataset_type][component_type].dtype\n",
     "    for name, column in zip(data.column_names, data.columns):\n",
-    "        column_data = column.to_numpy(zero_copy_only=zero_copy_only)\n",
-    "        if zero_copy_only and column_data.dtype != result_dtype[name]:\n",
-    "            raise ValueError(ZERO_COPY_ERROR_MSG)\n",
+    "        column_data = column.to_numpy(zero_copy_only=True)\n",
+    "        assert column_data.dtype == result_dtype[name]\n",
     "        result[name] = column_data.astype(dtype=result_dtype[name], copy=False)\n",
     "    return result\n",
     "\n",
     "\n",
-    "node_input = arrow_to_numpy(nodes, DatasetType.input, ComponentType.node, zero_copy_only=True)\n",
+    "node_input = arrow_to_numpy(nodes, DatasetType.input, ComponentType.node)\n",
     "line_input = arrow_to_numpy(lines, DatasetType.input, ComponentType.line)\n",
     "source_input = arrow_to_numpy(sources, DatasetType.input, ComponentType.source)\n",
     "sym_load_input = arrow_to_numpy(sym_loads, DatasetType.input, ComponentType.sym_load)\n",
@@ -324,7 +312,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -338,7 +326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -361,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -473,7 +461,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -497,7 +485,7 @@
        "q: [-3299418.661306348,-0.5000000701801947,-1.4999998507078594]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -536,7 +524,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -560,7 +548,7 @@
        "q_specified: [[0.5,1500,0.1],[1.5,2.5,1500]]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -584,7 +572,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -600,15 +588,13 @@
        "        [1.5e+00, 2.5e+00, 1.5e+03]])}"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def arrow_to_numpy_asym(\n",
-    "    data: pa.RecordBatch, dataset_type: DatasetType, component_type: ComponentType, zero_copy_only: bool = False\n",
-    ") -> np.ndarray:\n",
+    "def arrow_to_numpy_asym(data: pa.RecordBatch, dataset_type: DatasetType, component_type: ComponentType) -> np.ndarray:\n",
     "    \"\"\"Convert asymmetric Arrow data to NumPy data.\n",
     "\n",
     "    This function is similar to the arrow_to_numpy function, but also supports asymmetric data.\"\"\"\n",
@@ -621,17 +607,15 @@
     "        dtype = result_dtype[name]\n",
     "\n",
     "        if len(dtype.shape) == 0:\n",
-    "            column_data = data.column(name).to_numpy(zero_copy_only=zero_copy_only)\n",
+    "            column_data = data.column(name).to_numpy(zero_copy_only=True)\n",
     "        else:\n",
-    "            column_data = data.column(name).flatten().to_numpy(zero_copy_only=zero_copy_only).reshape(-1, 3)\n",
-    "\n",
-    "        if zero_copy_only and column_data.dtype.base != dtype.base:\n",
-    "            raise ValueError(ZERO_COPY_ERROR_MSG)\n",
+    "            column_data = data.column(name).flatten().to_numpy(zero_copy_only=True).reshape(-1, 3)\n",
+    "        assert column_data.dtype.base == dtype.base\n",
     "        result[name] = column_data.astype(dtype=dtype.base, copy=False)\n",
     "    return result\n",
     "\n",
     "\n",
-    "asym_load_input = arrow_to_numpy_asym(asym_loads, DatasetType.input, ComponentType.asym_load, zero_copy_only=True)\n",
+    "asym_load_input = arrow_to_numpy_asym(asym_loads, DatasetType.input, ComponentType.asym_load)\n",
     "\n",
     "asym_load_input"
    ]
@@ -645,7 +629,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -704,9 +688,8 @@
        "2 -0.004338 -2.098733  2.090057"
       ]
      },
-     "execution_count": 27,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
@@ -728,7 +711,7 @@
     ")\n",
     "\n",
     "# use pandas to display the results, but beware the data types\n",
-    "pd.DataFrame(asym_result[ComponentType.node][\"u_angle\"])"
+    "display(pd.DataFrame(asym_result[ComponentType.node][\"u_angle\"]))"
    ]
   },
   {
@@ -740,7 +723,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -769,7 +752,7 @@
        "q: [[-1099806.4185888197,-1098301.0302391076,-1098302.79423175],[-0.499999998516201,-1499.9999999095232,-0.10000001915949493],[-1.5000000216889147,-2.50000006806065,-1500.0000000385737]]"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }