ace-step · zCoder0 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -142,6 +142,17 @@ ENV/
 env.bak/
 venv.bak/
 
+#Datasets
+datasets/
+*.csv
+*.mp3
+*.wav
+*_prompt.txt
+*lyrics.txt
+*(1).txt
+*lyric.txt
+*.zip
+
 # Spyder project settings
 .spyderproject
 .spyproject

diff --git a/README.md b/README.md
@@ -30,8 +30,17 @@ ACE-Step bridges this gap by integrating diffusion-based generation with Sana’
 Rather than building yet another end-to-end text-to-music pipeline, our vision is to establish a foundation model for music AI: a fast, general-purpose, efficient yet flexible architecture that makes it easy to train sub-tasks on top of it. This paves the way for developing powerful tools that seamlessly integrate into the creative workflows of music artists, producers, and content creators. In short, we aim to build the Stable Diffusion moment for music.
 
 
+
+
+
+
+
+
+
 ## 📢 News and Updates
 
+
+
 - 📃 2025.06.02: Released [ACE-Step Technical Report (PDF)](https://arxiv.org/abs/2506.00045).
 
 - 🎮 2025.05.14: Add `Stable Audio Open Small` sampler `pingpong`. Use SDE to achieve better music consistency and quality, including lyric alignment and style alignment. Use a better method to re-implement `Audio2Audio`

diff --git a/acestep/text2music_dataset.py b/acestep/text2music_dataset.py
@@ -56,9 +56,11 @@ def is_silent_audio(audio_tensor, silence_threshold=0.95):
     "ja": 5412,
     "hu": 5753,
     "ko": 6152,
-    "hi": 6680,
+
 }
 
+SUPPORT_LANGUAGES ['ta']=7000
+
 # Regex pattern for structure markers like [Verse], [Chorus], etc.
 structure_pattern = re.compile(r"\[.*?\]")
 
@@ -469,6 +471,7 @@ def process(self, item):
         random.shuffle(prompt)
         prompt = ", ".join(prompt)
 
+
         # Handle recaption data if available
         recaption = item.get("recaption", {})
         valid_recaption = []
@@ -694,3 +697,4 @@ def __getitem__(self, idx):
             print(v.shape, v.min(), v.max())
         else:
             print(k, v)
+
diff --git a/chkpts/lora_data/dataset_info.json b/chkpts/lora_data/dataset_info.json
@@ -0,0 +1,26 @@
+{
+  "total_samples": 50,
+  "original_samples": 357,
+  "tokenizer_vocab_size": 2000,
+  "special_tokens": {
+    "pad_token_id": 0,
+    "unk_token_id": 1,
+    "bos_token_id": 2,
+    "eos_token_id": 3,
+    "sep_token_id": 4,
+    "cls_token_id": 5,
+    "mask_token_id": 6,
+    "music_token_id": 7,
+    "lyric_token_id": 8,
+    "chorus_token_id": 9,
+    "verse_token_id": 10,
+    "bridge_token_id": 11,
+    "intro_token_id": 12,
+    "outro_token_id": 13,
+    "instrumental_token_id": 14
+  },
+  "audio_codebooks": 8,
+  "sample_rate": 16000,
+  "success_rate": "50/50",
+  "note": "Using dummy audio tokens for testing - replace with real EnCodec tokens for training"
+}
diff --git a/chkpts/tokenizer_ace/tamil_corpus_ace.txt b/chkpts/tokenizer_ace/tamil_corpus_ace.txt
diff --git a/chkpts/tokenizer_ace/tamil_tokenizer.model b/chkpts/tokenizer_ace/tamil_tokenizer.model
diff --git a/chkpts/tokenizer_ace/tamil_tokenizer_ace.model b/chkpts/tokenizer_ace/tamil_tokenizer_ace.model