Skip to content

Commit d6476e1

Browse files
committed
bulk import managment command
1 parent 508b305 commit d6476e1

File tree

5 files changed

+449
-55
lines changed

5 files changed

+449
-55
lines changed

README.md

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,56 @@ but allows developers to run Python code on their native system.
7272
4. `npm run dev`
7373
6. When finished, run `docker compose stop`
7474

75+
## Importing Recordings
76+
77+
The `importRecordings` management command allows you to bulk import WAV files from a
78+
directory. It will:
79+
80+
- Recursively search for all `.wav` and `.WAV` files in the specified directory
81+
- Extract GUANO metadata from each file (with filename fallback if metadata is missing)
82+
- Create Recording objects with the extracted metadata
83+
- Generate spectrograms synchronously for each recording
84+
- Log progress to the terminal
85+
86+
### Usage
87+
88+
**Basic usage with Docker Compose (with bind mount):**
89+
90+
```bash
91+
docker compose run --rm -v /path/to/wav/files:/data django ./manage.py importRecordings /data
92+
```
93+
94+
**With options:**
95+
96+
```bash
97+
docker compose run --rm -v /path/to/wav/files:/data django ./manage.py importRecordings /data \
98+
--owner username \
99+
--public \
100+
--limit 10
101+
```
102+
103+
**Options:**
104+
105+
- `directory` (required): Path to directory containing WAV files
106+
- `--owner USERNAME`: Username of the owner for the recordings (defaults to first superuser)
107+
- `--public`: Make imported recordings public
108+
- `--limit N`: Limit the number of WAV files to import (useful for testing)
109+
110+
**Example with bind mount:**
111+
112+
```bash
113+
docker compose run --rm \
114+
-v /media/bryon.lewis/Elements/BATSAI/training_files:/data \
115+
django ./manage.py importRecordings /data --limit 5
116+
```
117+
118+
This will:
119+
120+
1. Mount your host directory `/media/bryon.lewis/Elements/BATSAI/training_files` to `/data` in the container
121+
2. Import only the first 5 WAV files found
122+
3. Use the first superuser as the owner
123+
4. Create private recordings (unless `--public` is specified)
124+
75125
## Testing
76126

77127
### Initial Setup for Testing
@@ -91,9 +141,9 @@ Individual test environments may be selectively run.
91141
This also allows additional options to be be added.
92142
Useful sub-commands include:
93143

94-
* `uv run tox -e lint`: Run only the style checks
95-
* `uv run tox -e type`: Run only the type checks
96-
* `uv run tox -e test`: Run only the pytest-driven tests
144+
- `uv run tox -e lint`: Run only the style checks
145+
- `uv run tox -e type`: Run only the type checks
146+
- `uv run tox -e test`: Run only the pytest-driven tests
97147

98148
To automatically reformat all code to comply with
99149
some (but not all) of the style checks, run `uv run tox -e format`.
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
import logging
2+
from pathlib import Path
3+
4+
from django.contrib.auth.models import User
5+
from django.contrib.gis.geos import Point
6+
from django.core.files import File
7+
from django.core.management.base import BaseCommand
8+
from django.utils import timezone
9+
10+
from bats_ai.core.models import Recording
11+
from bats_ai.core.utils.guano_utils import extract_guano_metadata
12+
from bats_ai.tasks.tasks import recording_compute_spectrogram
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class Command(BaseCommand):
18+
help = 'Import WAV files from a directory, extract GUANO metadata, and create recordings'
19+
20+
def add_arguments(self, parser):
21+
parser.add_argument(
22+
'directory',
23+
type=str,
24+
help='Directory path containing WAV files to import',
25+
)
26+
parser.add_argument(
27+
'--owner',
28+
type=str,
29+
help='Username of the owner for the recordings (defaults to first superuser)',
30+
)
31+
parser.add_argument(
32+
'--public',
33+
action='store_true',
34+
help='Make imported recordings public',
35+
)
36+
parser.add_argument(
37+
'--limit',
38+
type=int,
39+
help='Limit the number of WAV files to import (useful for testing)',
40+
)
41+
42+
def handle(self, *args, **options):
43+
directory_path = Path(options['directory'])
44+
owner_username = options.get('owner')
45+
is_public = options.get('public', False)
46+
limit = options.get('limit')
47+
48+
# Validate directory
49+
if not directory_path.exists():
50+
self.stdout.write(self.style.ERROR(f'Directory does not exist: {directory_path}'))
51+
return
52+
53+
if not directory_path.is_dir():
54+
self.stdout.write(self.style.ERROR(f'Path is not a directory: {directory_path}'))
55+
return
56+
57+
# Get or find owner
58+
if owner_username:
59+
try:
60+
owner = User.objects.get(username=owner_username)
61+
except User.DoesNotExist:
62+
self.stdout.write(self.style.ERROR(f'User not found: {owner_username}'))
63+
return
64+
else:
65+
# Default to first superuser
66+
owner = User.objects.filter(is_superuser=True).first()
67+
if not owner:
68+
self.stdout.write(
69+
self.style.ERROR(
70+
'No superuser found. Please specify --owner or create a superuser.'
71+
)
72+
)
73+
return
74+
self.stdout.write(self.style.WARNING(f'Using default owner: {owner.username}'))
75+
76+
# Find all WAV files
77+
wav_files = list(directory_path.rglob('*.wav'))
78+
wav_files.extend(directory_path.rglob('*.WAV'))
79+
80+
if not wav_files:
81+
self.stdout.write(
82+
self.style.WARNING(f'No WAV files found in directory: {directory_path}')
83+
)
84+
return
85+
86+
# Apply limit if specified
87+
total_files = len(wav_files)
88+
if limit and limit > 0:
89+
wav_files = wav_files[:limit]
90+
self.stdout.write(
91+
self.style.SUCCESS(
92+
f'Found {total_files} WAV file(s), importing first {len(wav_files)}'
93+
)
94+
)
95+
else:
96+
self.stdout.write(self.style.SUCCESS(f'Found {len(wav_files)} WAV file(s) to import'))
97+
98+
# Process each file
99+
successful = 0
100+
failed = 0
101+
102+
for idx, wav_file in enumerate(wav_files, 1):
103+
self.stdout.write(f'\n[{idx}/{len(wav_files)}] Processing: {wav_file.name}')
104+
105+
try:
106+
# Extract GUANO metadata
107+
self.stdout.write(' Extracting GUANO metadata...')
108+
metadata = extract_guano_metadata(wav_file, check_filename=True)
109+
110+
# Extract date and time from metadata or file modification time
111+
recorded_date = None
112+
recorded_time = None
113+
114+
if metadata.get('nabat_activation_start_time'):
115+
dt = metadata['nabat_activation_start_time']
116+
recorded_date = dt.date()
117+
recorded_time = dt.time()
118+
else:
119+
# Use file modification time as fallback
120+
mtime = timezone.datetime.fromtimestamp(
121+
wav_file.stat().st_mtime, tz=timezone.get_current_timezone()
122+
)
123+
recorded_date = mtime.date()
124+
recorded_time = mtime.time()
125+
self.stdout.write(
126+
self.style.WARNING(
127+
' No activation start time in metadata, using file modification time'
128+
)
129+
)
130+
131+
# Create Point from latitude/longitude if available
132+
point = None
133+
if metadata.get('nabat_latitude') and metadata.get('nabat_longitude'):
134+
point = Point(metadata['nabat_longitude'], metadata['nabat_latitude'])
135+
136+
# Get grid cell ID
137+
grts_cell_id = None
138+
if metadata.get('nabat_grid_cell_grts_id'):
139+
try:
140+
grts_cell_id = int(metadata['nabat_grid_cell_grts_id'])
141+
except (ValueError, TypeError):
142+
pass
143+
144+
# Convert species list to string if present
145+
species_list_str = None
146+
if metadata.get('nabat_species_list'):
147+
species_list_str = ','.join(metadata['nabat_species_list'])
148+
149+
# Create recording
150+
self.stdout.write(' Creating recording...')
151+
with open(wav_file, 'rb') as f:
152+
recording = Recording(
153+
name=wav_file.name,
154+
owner=owner,
155+
audio_file=File(f, name=wav_file.name),
156+
recorded_date=recorded_date,
157+
recorded_time=recorded_time,
158+
equipment=None, # Not in GUANO metadata
159+
grts_cell_id=grts_cell_id,
160+
recording_location=point,
161+
public=is_public,
162+
comments=metadata.get('nabat_comments'),
163+
detector=metadata.get('nabat_detector_type'),
164+
software=metadata.get('nabat_software_type'),
165+
site_name=metadata.get('nabat_site_name'),
166+
species_list=species_list_str,
167+
unusual_occurrences=metadata.get('nabat_unusual_occurrences'),
168+
)
169+
recording.save()
170+
171+
self.stdout.write(self.style.SUCCESS(f' Created recording ID: {recording.pk}'))
172+
173+
# Generate spectrogram synchronously
174+
self.stdout.write(' Generating spectrogram...')
175+
try:
176+
result = recording_compute_spectrogram(recording.pk)
177+
self.stdout.write(
178+
self.style.SUCCESS(
179+
f' Spectrogram generated (ID: {result.get("spectrogram_id")})'
180+
)
181+
)
182+
except Exception as e:
183+
self.stdout.write(
184+
self.style.ERROR(f' Failed to generate spectrogram: {str(e)}')
185+
)
186+
logger.exception('Error generating spectrogram', exc_info=e)
187+
188+
successful += 1
189+
self.stdout.write(self.style.SUCCESS(f' ✓ Successfully imported: {wav_file.name}'))
190+
191+
except Exception as e:
192+
failed += 1
193+
self.stdout.write(
194+
self.style.ERROR(f' ✗ Failed to import {wav_file.name}: {str(e)}')
195+
)
196+
logger.exception('Error importing file', exc_info=e)
197+
198+
# Summary
199+
self.stdout.write('\n' + '=' * 60)
200+
self.stdout.write(
201+
self.style.SUCCESS(f'Import complete: {successful} successful, {failed} failed')
202+
)

bats_ai/core/utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)