Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions docs/user_guide/01_Reading_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,27 +139,36 @@ shp = utilities.read_file(input="/path/to/boxes_shapefile.shp")
shp.head()
```

If your shapefile does not include an `image_path` column, you must provide the raster path via `img_path`:
##### Reading Shapefiles Without `image_path` or `label` Columns

Many GIS shapefiles do not include `image_path` or `label` columns. You can provide these values directly to `read_file`:

```python
from deepforest import utilities

# Shapefile doesn't have image_path or label columns
shp = utilities.read_file(
input="/path/to/boxes_shapefile.shp",
image_path="/path/to/OSBS_029.tif"
input="/path/to/annotations.shp",
image_path="my_raster.tif", # Required if shapefile has no image_path column
label="Tree", # Optional, defaults to "Unknown"
root_dir="/path/to/images/" # Required when using image_path argument
)
```

If your shapefile also lacks a `label` column, you can assign one for all rows:
**Arguments:**

```python
from deepforest import utilities
| Argument | Required? | Description |
|----------|-----------|-------------|
| `image_path` | **Required** if shapefile lacks `image_path` column | The image file path (relative to `root_dir`) that all annotations belong to |
| `label` | Optional | The label for all annotations. Defaults to `"Unknown"` if not provided |
| `root_dir` | **Required** when using `image_path` argument | Directory where image files are located |

shp = utilities.read_file(
input="/path/to/boxes_shapefile.shp",
image_path="/path/to/OSBS_029.tif",
label="Tree"
)
This assigns the same `image_path` and `label` to all annotations in the file. Use this when all annotations belong to a single image and share the same label.

**Note:** A warning will be shown when `image_path` is provided but the shapefile doesn't have an `image_path` column:

```
UserWarning: You have passed an image_path argument, but the shapefile does not contain an image_path column. All annotations will be assigned to my_raster.tif. Make sure all annotations in the shapefile relate to this image.
```

Example output:
Expand Down
13 changes: 10 additions & 3 deletions src/deepforest/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ def __assign_image_path__(gdf, image_path: str) -> str:
)
gdf["image_path"] = image_path
else:
warnings.warn(
f"You have passed an image_path argument, but the shapefile does not contain an image_path column. "
f"All annotations will be assigned to {image_path}. "
f"Make sure all annotations in the shapefile relate to this image.",
UserWarning,
stacklevel=2,
)
gdf["image_path"] = image_path

return gdf
Expand Down Expand Up @@ -492,9 +499,8 @@ def __check_and_assign_label__(
):
if label is None:
if "label" not in df.columns:
raise ValueError(
"No label specified and no label column found in dataframe, please specify label in label argument: read_file(input=df, label='YourLabel', ...)"
)
# Default to "Unknown" if label is not provided and not in dataframe
df["label"] = "Unknown"
else:
if "label" in df.columns:
existing_labels = df.label.unique()
Expand All @@ -508,6 +514,7 @@ def __check_and_assign_label__(
f"Label {existing_labels[0]} found in dataframe, overriding and assigning {label} to all rows!",
stacklevel=2,
)
df["label"] = label
else:
df["label"] = label

Expand Down
113 changes: 113 additions & 0 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,3 +770,116 @@ def test_read_file_column_names():
assert "image_path" in result.columns
assert "label" in result.columns
assert hasattr(result, "root_dir")


@pytest.fixture
def sample_shapefile_gdf():
"""Create a sample GeoDataFrame for shapefile testing."""
sample_geometry = [geometry.Point(404211.9 + 10, 3285102 + 20),
geometry.Point(404211.9 + 20, 3285102 + 20)]
df = pd.DataFrame({"geometry": sample_geometry})
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:32617")
gdf["geometry"] = [geometry.box(left, bottom, right, top) for left, bottom, right, top
in gdf.geometry.buffer(0.5).bounds.values]
return gdf


def test_read_file_shapefile_with_image_path_argument(tmp_path, sample_shapefile_gdf):
"""Test reading a shapefile without image_path column by passing image_path argument.

This tests the fix for issue #997.
"""
# Create shapefile without image_path column
gdf = sample_shapefile_gdf.copy()
gdf["label"] = "Tree"
shp_path = tmp_path / "no_image_path.shp"
gdf.to_file(str(shp_path))

# Read with image_path argument
image_path_full = get_data("OSBS_029.tif")
image_path = os.path.basename(image_path_full)
root_dir = os.path.dirname(image_path_full)

with pytest.warns(UserWarning, match="You have passed an image_path argument"):
result = utilities.read_file(
input=str(shp_path),
image_path=image_path,
root_dir=root_dir
)

assert result.shape[0] == 2
assert "image_path" in result.columns
assert result["image_path"].iloc[0] == image_path


def test_read_file_shapefile_with_label_argument(tmp_path, sample_shapefile_gdf):
"""Test reading a shapefile without label column by passing label argument.

This tests the fix for issue #997.
"""
# Create shapefile without label column
gdf = sample_shapefile_gdf.copy()
image_path_full = get_data("OSBS_029.tif")
gdf["image_path"] = os.path.basename(image_path_full)
shp_path = tmp_path / "no_label.shp"
gdf.to_file(str(shp_path))

# Read with label argument - no warning expected when shapefile has no label column
root_dir = os.path.dirname(image_path_full)
result = utilities.read_file(
input=str(shp_path),
label="CustomTree",
root_dir=root_dir
)

assert result.shape[0] == 2
assert "label" in result.columns
assert result["label"].iloc[0] == "CustomTree"


def test_read_file_shapefile_with_image_path_and_label_arguments(tmp_path, sample_shapefile_gdf):
"""Test reading a shapefile without image_path and label columns.

This tests the fix for issue #997 where users can pass both arguments.
"""
# Create shapefile without image_path and label columns
gdf = sample_shapefile_gdf.copy()
shp_path = tmp_path / "no_image_path_no_label.shp"
gdf.to_file(str(shp_path))

# Read with both image_path and label arguments
image_path_full = get_data("OSBS_029.tif")
image_path = os.path.basename(image_path_full)
root_dir = os.path.dirname(image_path_full)

with pytest.warns(UserWarning, match="You have passed an image_path argument"):
result = utilities.read_file(
input=str(shp_path),
image_path=image_path,
label="Tree",
root_dir=root_dir
)

assert result.shape[0] == 2
assert "image_path" in result.columns
assert "label" in result.columns
assert result["image_path"].iloc[0] == image_path
assert result["label"].iloc[0] == "Tree"


def test_read_file_shapefile_without_image_path_raises_error(tmp_path):
"""Test that reading a shapefile without image_path column raises an error.

This documents the expected behavior when no image_path is provided.
"""
# Create a simple shapefile without image_path column
sample_geometry = [geometry.Point(10, 20), geometry.Point(20, 40)]
labels = ["Tree", "Tree"]
df = pd.DataFrame({"geometry": sample_geometry, "label": labels})
gdf = gpd.GeoDataFrame(df, geometry="geometry")
shp_path = tmp_path / "no_image_path.shp"
gdf.to_file(str(shp_path))

# Should raise ValueError when image_path is not provided
with pytest.raises(ValueError, match="No image_path column found"):
utilities.read_file(input=str(shp_path))