Skip to content

Commit 60ec1c5

Browse files
Bulk Import Management Command (#271)
* bulk import managment command * Update bats_ai/core/utils/guano_utils.py Co-authored-by: Michael Nagler <[email protected]> * Update bats_ai/core/management/commands/importRecordings.py Co-authored-by: Michael Nagler <[email protected]> * Update bats_ai/core/management/commands/importRecordings.py Co-authored-by: Michael Nagler <[email protected]> * adding comments and linting --------- Co-authored-by: Michael Nagler <[email protected]>
1 parent 508b305 commit 60ec1c5

File tree

5 files changed

+453
-55
lines changed

5 files changed

+453
-55
lines changed

README.md

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,56 @@ but allows developers to run Python code on their native system.
7272
4. `npm run dev`
7373
6. When finished, run `docker compose stop`
7474

75+
## Importing Recordings
76+
77+
The `importRecordings` management command allows you to bulk import WAV files from a
78+
directory. It will:
79+
80+
- Recursively search for all `.wav` and `.WAV` files in the specified directory
81+
- Extract GUANO metadata from each file (with filename fallback if metadata is missing)
82+
- Create Recording objects with the extracted metadata
83+
- Generate spectrograms synchronously for each recording
84+
- Log progress to the terminal
85+
86+
### Usage
87+
88+
**Basic usage with Docker Compose (with bind mount):**
89+
90+
```bash
91+
docker compose run --rm -v /path/to/wav/files:/data django ./manage.py importRecordings /data
92+
```
93+
94+
**With options:**
95+
96+
```bash
97+
docker compose run --rm -v /path/to/wav/files:/data django ./manage.py importRecordings /data \
98+
--owner username \
99+
--public \
100+
--limit 10
101+
```
102+
103+
**Options:**
104+
105+
- `directory` (required): Path to directory containing WAV files
106+
- `--owner USERNAME`: Username of the owner for the recordings (defaults to first superuser)
107+
- `--public`: Make imported recordings public
108+
- `--limit N`: Limit the number of WAV files to import (useful for testing)
109+
110+
**Example with bind mount:**
111+
112+
```bash
113+
docker compose run --rm \
114+
-v /media/bryon.lewis/Elements/BATSAI/training_files:/data \
115+
django ./manage.py importRecordings /data --limit 5
116+
```
117+
118+
This will:
119+
120+
1. Mount your host directory `/media/bryon.lewis/Elements/BATSAI/training_files` to `/data` in the container
121+
2. Import only the first 5 WAV files found
122+
3. Use the first superuser as the owner
123+
4. Create private recordings (unless `--public` is specified)
124+
75125
## Testing
76126

77127
### Initial Setup for Testing
@@ -91,9 +141,9 @@ Individual test environments may be selectively run.
91141
This also allows additional options to be be added.
92142
Useful sub-commands include:
93143

94-
* `uv run tox -e lint`: Run only the style checks
95-
* `uv run tox -e type`: Run only the type checks
96-
* `uv run tox -e test`: Run only the pytest-driven tests
144+
- `uv run tox -e lint`: Run only the style checks
145+
- `uv run tox -e type`: Run only the type checks
146+
- `uv run tox -e test`: Run only the pytest-driven tests
97147

98148
To automatically reformat all code to comply with
99149
some (but not all) of the style checks, run `uv run tox -e format`.
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
import logging
2+
from pathlib import Path
3+
4+
from django.contrib.auth.models import User
5+
from django.contrib.gis.geos import Point
6+
from django.core.files import File
7+
from django.core.management.base import BaseCommand
8+
from django.utils import timezone
9+
10+
from bats_ai.core.models import Recording
11+
from bats_ai.core.utils.guano_utils import extract_guano_metadata
12+
from bats_ai.tasks.tasks import recording_compute_spectrogram
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class Command(BaseCommand):
18+
help = 'Import WAV files from a directory, extract GUANO metadata, and create recordings'
19+
20+
def add_arguments(self, parser):
21+
parser.add_argument(
22+
'directory',
23+
type=str,
24+
help='Directory path containing WAV files to import',
25+
)
26+
parser.add_argument(
27+
'--owner',
28+
type=str,
29+
help='Username of the owner for the recordings (defaults to first superuser)',
30+
)
31+
parser.add_argument(
32+
'--public',
33+
action='store_true',
34+
help='Make imported recordings public',
35+
)
36+
parser.add_argument(
37+
'--limit',
38+
type=int,
39+
help='Limit the number of WAV files to import (useful for testing)',
40+
)
41+
42+
def handle(self, *args, **options):
43+
import matplotlib
44+
45+
matplotlib.use('Agg')
46+
47+
directory_path = Path(options['directory'])
48+
owner_username = options.get('owner')
49+
is_public = options.get('public', False)
50+
limit = options.get('limit')
51+
52+
# Validate directory
53+
if not directory_path.exists():
54+
self.stdout.write(self.style.ERROR(f'Directory does not exist: {directory_path}'))
55+
return
56+
57+
if not directory_path.is_dir():
58+
self.stdout.write(self.style.ERROR(f'Path is not a directory: {directory_path}'))
59+
return
60+
61+
# Get or find owner
62+
if owner_username:
63+
try:
64+
owner = User.objects.get(username=owner_username)
65+
except User.DoesNotExist:
66+
self.stdout.write(self.style.ERROR(f'User not found: {owner_username}'))
67+
return
68+
else:
69+
# Default to first superuser
70+
owner = User.objects.filter(is_superuser=True).first()
71+
if not owner:
72+
self.stdout.write(
73+
self.style.ERROR(
74+
'No superuser found. Please specify --owner or create a superuser.'
75+
)
76+
)
77+
return
78+
self.stdout.write(self.style.WARNING(f'Using default owner: {owner.username}'))
79+
80+
# Find all WAV files
81+
wav_files = list(directory_path.rglob('*.wav', case_sensitive=False))
82+
83+
if not wav_files:
84+
self.stdout.write(
85+
self.style.WARNING(f'No WAV files found in directory: {directory_path}')
86+
)
87+
return
88+
89+
# Apply limit if specified
90+
total_files = len(wav_files)
91+
if limit and limit > 0:
92+
wav_files = wav_files[:limit]
93+
self.stdout.write(
94+
self.style.SUCCESS(
95+
f'Found {total_files} WAV file(s), importing first {len(wav_files)}'
96+
)
97+
)
98+
else:
99+
self.stdout.write(self.style.SUCCESS(f'Found {len(wav_files)} WAV file(s) to import'))
100+
101+
# Process each file
102+
successful = 0
103+
failed = 0
104+
105+
for idx, wav_file in enumerate(wav_files, 1):
106+
self.stdout.write(f'\n[{idx}/{len(wav_files)}] Processing: {wav_file.name}')
107+
108+
try:
109+
# Extract GUANO metadata
110+
self.stdout.write(' Extracting GUANO metadata...')
111+
metadata = extract_guano_metadata(wav_file, check_filename=True)
112+
113+
# Extract date and time from metadata or file modification time
114+
recorded_date = None
115+
recorded_time = None
116+
117+
if metadata.get('nabat_activation_start_time'):
118+
dt = metadata['nabat_activation_start_time']
119+
recorded_date = dt.date()
120+
recorded_time = dt.time()
121+
else:
122+
# Use file modification time as fallback
123+
mtime = timezone.datetime.fromtimestamp(
124+
wav_file.stat().st_mtime, tz=timezone.get_current_timezone()
125+
)
126+
recorded_date = mtime.date()
127+
recorded_time = mtime.time()
128+
self.stdout.write(
129+
self.style.WARNING(
130+
' No activation start time in metadata, using file modification time'
131+
)
132+
)
133+
134+
# Create Point from latitude/longitude if available
135+
point = None
136+
if metadata.get('nabat_latitude') and metadata.get('nabat_longitude'):
137+
point = Point(metadata['nabat_longitude'], metadata['nabat_latitude'])
138+
139+
# Get grid cell ID
140+
grts_cell_id = None
141+
if metadata.get('nabat_grid_cell_grts_id'):
142+
try:
143+
grts_cell_id = int(metadata['nabat_grid_cell_grts_id'])
144+
except (ValueError, TypeError):
145+
pass
146+
147+
# Convert species list to string if present
148+
species_list_str = None
149+
if metadata.get('nabat_species_list'):
150+
species_list_str = ','.join(metadata['nabat_species_list'])
151+
152+
# Create recording
153+
self.stdout.write(' Creating recording...')
154+
with open(wav_file, 'rb') as f:
155+
recording = Recording(
156+
name=wav_file.name,
157+
owner=owner,
158+
audio_file=File(f, name=wav_file.name),
159+
recorded_date=recorded_date,
160+
recorded_time=recorded_time,
161+
equipment=None, # Not in GUANO metadata
162+
grts_cell_id=grts_cell_id,
163+
recording_location=point,
164+
public=is_public,
165+
comments=metadata.get('nabat_comments'),
166+
detector=metadata.get('nabat_detector_type'),
167+
software=metadata.get('nabat_software_type'),
168+
site_name=metadata.get('nabat_site_name'),
169+
species_list=species_list_str,
170+
unusual_occurrences=metadata.get('nabat_unusual_occurrences'),
171+
)
172+
recording.save()
173+
174+
self.stdout.write(self.style.SUCCESS(f' Created recording ID: {recording.pk}'))
175+
176+
# Generate spectrogram synchronously
177+
self.stdout.write(' Generating spectrogram...')
178+
try:
179+
result = recording_compute_spectrogram(recording.pk)
180+
self.stdout.write(
181+
self.style.SUCCESS(
182+
f' Spectrogram generated (ID: {result.get("spectrogram_id")})'
183+
)
184+
)
185+
except Exception as e:
186+
self.stdout.write(
187+
self.style.ERROR(f' Failed to generate spectrogram: {str(e)}')
188+
)
189+
logger.exception('Error generating spectrogram', exc_info=e)
190+
raise e
191+
192+
successful += 1
193+
self.stdout.write(self.style.SUCCESS(f' ✓ Successfully imported: {wav_file.name}'))
194+
195+
except Exception as e:
196+
failed += 1
197+
self.stdout.write(
198+
self.style.ERROR(f' ✗ Failed to import {wav_file.name}: {str(e)}')
199+
)
200+
logger.exception('Error importing file', exc_info=e)
201+
202+
# Summary
203+
self.stdout.write('\n' + '=' * 60)
204+
self.stdout.write(
205+
self.style.SUCCESS(f'Import complete: {successful} successful, {failed} failed')
206+
)

bats_ai/core/utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)