Skip to content

Commit bdc1429

Browse files
authored
Merge pull request #646 from DagsHub/bug/pandas3-string-handling
Bug: uploading metadata from a dataframe fails if pandas>=3
2 parents edc57e9 + 83ebb07 commit bdc1429

File tree

1 file changed

+22
-22
lines changed

1 file changed

+22
-22
lines changed

dagshub/data_engine/model/datasource.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import datetime
33
import json
44
import logging
5-
import tempfile
65
import os.path
6+
import tempfile
77
import threading
88
import time
99
import uuid
@@ -12,65 +12,65 @@
1212
from dataclasses import dataclass, field
1313
from os import PathLike
1414
from pathlib import Path
15-
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union, Set, ContextManager, Tuple, Literal, Callable
16-
15+
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Dict, List, Literal, Optional, Set, Tuple, Union
1716

1817
import rich.progress
19-
from dataclasses_json import config, LetterCase, DataClassJsonMixin
18+
from dataclasses_json import DataClassJsonMixin, LetterCase, config
2019
from pathvalidate import sanitize_filepath
2120

2221
import dagshub.common.config
2322
from dagshub.common import rich_console
2423
from dagshub.common.analytics import send_analytics_event
2524
from dagshub.common.environment import is_mlflow_installed
26-
from dagshub.common.helpers import prompt_user, http_request, log_message
25+
from dagshub.common.helpers import http_request, log_message, prompt_user
2726
from dagshub.common.rich_util import get_rich_progress
2827
from dagshub.common.util import (
28+
deprecated,
29+
exclude_if_none,
2930
lazy_load,
3031
multi_urljoin,
3132
to_timestamp,
32-
exclude_if_none,
33-
deprecated,
3433
)
35-
from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationType, AnnotationLocation
34+
from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationLocation, AnnotationType
3635
from dagshub.data_engine.client.models import (
37-
PreprocessingStatus,
36+
DatasetResult,
3837
MetadataFieldSchema,
38+
PreprocessingStatus,
3939
ScanOption,
40-
DatasetResult,
4140
)
4241
from dagshub.data_engine.dtypes import MetadataFieldType
4342
from dagshub.data_engine.model.datapoint import Datapoint
43+
from dagshub.data_engine.model.datasource_state import DatasourceState
4444
from dagshub.data_engine.model.errors import (
45-
WrongOperatorError,
46-
WrongOrderError,
4745
DatasetFieldComparisonError,
48-
FieldNotFoundError,
4946
DatasetNotFoundError,
47+
FieldNotFoundError,
48+
WrongOperatorError,
49+
WrongOrderError,
5050
)
5151
from dagshub.data_engine.model.metadata import (
52-
validate_uploading_metadata,
53-
run_preupload_transforms,
5452
precalculate_metadata_info,
53+
run_preupload_transforms,
54+
validate_uploading_metadata,
5555
)
56-
from dagshub.data_engine.model.metadata.transforms import DatasourceFieldInfo, _add_metadata
5756
from dagshub.data_engine.model.metadata.dtypes import DatapointMetadataUpdateEntry
57+
from dagshub.data_engine.model.metadata.transforms import DatasourceFieldInfo, _add_metadata
5858
from dagshub.data_engine.model.metadata_field_builder import MetadataFieldBuilder
5959
from dagshub.data_engine.model.query import QueryFilterTree
6060
from dagshub.data_engine.model.schema_util import (
6161
default_metadata_type_value,
6262
)
63-
from dagshub.data_engine.model.datasource_state import DatasourceState
6463

6564
if TYPE_CHECKING:
66-
from dagshub.data_engine.model.query_result import QueryResult
65+
import cloudpickle
6766
import fiftyone as fo
68-
import pandas
6967
import mlflow
7068
import mlflow.entities
71-
import cloudpickle
72-
import ngrok
7369
import mlflow.exceptions as mlflow_exceptions
70+
import ngrok
71+
import pandas
72+
73+
from dagshub.data_engine.model.query_result import QueryResult
7474
else:
7575
plugin_server_module = lazy_load("dagshub.data_engine.voxel_plugin_server.server")
7676
fo = lazy_load("fiftyone")
@@ -663,7 +663,7 @@ def _df_to_metadata(
663663
path_column = df.columns[path_column]
664664

665665
# objects are actually mixed and not guaranteed to be string, but this should cover most use cases
666-
if df.dtypes[path_column] != "object":
666+
if not pandas.api.types.is_string_dtype(df.dtypes[path_column]):
667667
raise ValueError(f"Path column {path_column} must contain strings")
668668

669669
field_info = self._generate_metadata_cache_info()

0 commit comments

Comments
 (0)