diff --git a/README.md b/README.md index 042a0cb..6225f9b 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This application implements a pipeline that can be used to create audio datasets for the generation of stem continuations of music audio files. The code uses [Dask](https://www.dask.org/) in order to scale the dataset processing on a cluster of virtual machines in the cloud. The application is configured to run on AWS EC2 and to use S3 as storage. The audio files are encoded using Meta's [Encodec](https://github.com/facebookresearch/encodec) into a discrete, compressed, tokenized representation. Finally, the last step uploads the dataset to [ClearML](https://clear.ml) to be used for training and/or inference. The dataset generation pipeline is comprised of several steps: +- **Stem**. Creates drums, bass, guitar and other stems starting from MP3 files using [Demucs](https://github.com/adefossez/demucs) - **Uncompress**. The application expects to find the stem files for a single music file (in .wav format) in a compressed zip archive. Each stem should have a predefined name in order to be identified as a guitar, bass, drum, etc. - **Convert to ogg**. Conversion of wav files to the Ogg Opus audio format. - **Merge**. Several different assortments of stems are generated. diff --git a/poetry.lock b/poetry.lock index 07c0e26..8c4282b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -190,6 +190,16 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "antlr4-python3-runtime" +version = "4.9.3" +description = "ANTLR 4.9.3 runtime for Python 3.7" +optional = false +python-versions = "*" +files = [ + {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, +] + [[package]] name = "anyio" version = "4.6.2.post1" @@ -985,6 +995,30 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "demucs" +version = "4.0.1" +description = "Music source separation in the waveform domain." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "demucs-4.0.1.tar.gz", hash = "sha256:e45a5a788bae79767c37bbf6e69aae03862ddcca05550fb79b926346a177d713"}, +] + +[package.dependencies] +dora-search = "*" +einops = "*" +julius = ">=0.2.3" +lameenc = ">=1.2" +openunmix = "*" +pyyaml = "*" +torch = ">=1.8.1" +torchaudio = ">=0.8" +tqdm = "*" + +[package.extras] +dev = ["diffq (>=0.2.1)", "dora-search (>=0.1.12)", "einops", "flake8", "hydra-colorlog (>=1.1)", "hydra-core (>=1.1)", "julius (>=0.2.3)", "lameenc (>=1.2)", "museval", "mypy", "openunmix", "pyyaml", "soundfile (>=0.10.3)", "submitit", "torch (>=1.8.1)", "torchaudio (>=0.8)", "tqdm", "treetable"] + [[package]] name = "deprecated" version = "1.2.15" @@ -1041,6 +1075,26 @@ tornado = ">=6.2.0" urllib3 = ">=1.26.5" zict = ">=3.0.0" +[[package]] +name = "dora-search" +version = "0.1.12" +description = "Easy grid searches for ML." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "dora_search-0.1.12.tar.gz", hash = "sha256:2956fd2c4c7e4b9a4830e83f0d4cf961be45cfba1a2f0570281e91d15ac516fb"}, +] + +[package.dependencies] +omegaconf = "*" +retrying = "*" +submitit = "*" +torch = "*" +treetable = "*" + +[package.extras] +dev = ["coverage", "flake8", "hiplot", "hydra-core", "hydra_colorlog", "mypy", "pdoc3", "pytest", "pytorch_lightning"] + [[package]] name = "einops" version = "0.8.0" @@ -1781,6 +1835,22 @@ files = [ [package.dependencies] referencing = ">=0.31.0" +[[package]] +name = "julius" +version = "0.2.7" +description = "Nice DSP sweets: resampling, FFT Convolutions. All with PyTorch, differentiable and with CUDA support." +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "julius-0.2.7.tar.gz", hash = "sha256:3c0f5f5306d7d6016fcc95196b274cae6f07e2c9596eed314e4e7641554fbb08"}, +] + +[package.dependencies] +torch = ">=1.7.0" + +[package.extras] +dev = ["coverage", "flake8", "mypy", "onnxruntime", "pdoc3", "resampy (==0.2.2)"] + [[package]] name = "jupyterlab-widgets" version = "3.0.13" @@ -1792,6 +1862,84 @@ files = [ {file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"}, ] +[[package]] +name = "lameenc" +version = "1.7.0" +description = "LAME encoding bindings" +optional = false +python-versions = "*" +files = [ + {file = "lameenc-1.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:666dae5544b82b7c7e2d45ee82b0b74a3b5d46c62f79df603a30f4b633c78556"}, + {file = "lameenc-1.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:73ba7b02685d9c17a3b083622b85c859f9db35254d4ea9ee8305d4a8d321876b"}, + {file = "lameenc-1.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:367c152dd1dc54c2d6c42837c1c149b0173eb4225e4782f88b1a391c95ff97af"}, + {file = "lameenc-1.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b34c529d719bf303ace7ec169965c516b0b94922837cf67f60a73931f1f4580"}, + {file = "lameenc-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ed1f49416e9531a49f462923ca0d2ae14baf9ceea0384cba2c5f9f1090f0df31"}, + {file = "lameenc-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3274cade2d3f00cc49748b968660e1297eaca2d15911deb566cc378ffa4e7d8b"}, + {file = "lameenc-1.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:790c250086dd38b26860ccb5574ad0f2c625b52bb9f2a367e07e50ffbf3de832"}, + {file = "lameenc-1.7.0-cp310-cp310-win32.whl", hash = "sha256:8b712902e02ea03e9a5272ee008062b08d99129e7a00a369b90848a9bfab050f"}, + {file = "lameenc-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:c8fd147d1faa904cb0b8ba3b038ab795daebec46147b2b9e3891dbb427084f5c"}, + {file = "lameenc-1.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5a5b322d82dfdf6132c563e1fa352487c82a0bb1e93dff8ef634c867c94d82ad"}, + {file = "lameenc-1.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a582a1f356c83be526ba97ce8465553cc55524992325ab1641b3ee0428e6453"}, + {file = "lameenc-1.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:e3399b72c8adb965405aeb8ccc614db9b94e5842426289166c4fbf2588cf1e74"}, + {file = "lameenc-1.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93eedf217db3832ead5ea28ad89b2f4dd3d79e3a7d5641d09dbeeeffe6283768"}, + {file = "lameenc-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:63a2ce2d63706d766ad1686fbc5e12c807c027b7ccfc97de9b9a81f58216e770"}, + {file = "lameenc-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2cb91af4b25b3d650781e681032c469fd38979b180eddfbf5a55322372d71b9"}, + {file = "lameenc-1.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:e67f5e985d940f9f8037823e6a629a18acbfe02d0fcd68a8fc113ed46a555aa5"}, + {file = "lameenc-1.7.0-cp311-cp311-win32.whl", hash = "sha256:c835438c0e8b8d680e871095cec143abf655d071b1bb60ccf7a8e08245acd877"}, + {file = "lameenc-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:93f426a565e666227303257db67287f33836872dee62d982d521d8972467df46"}, + {file = "lameenc-1.7.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ff9b6e1fed60a1e1f54861b51628a5c99a4ebc8be76462b8995c7cae99722ec5"}, + {file = "lameenc-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a14a5f449cc32158ac2fd557a21b6a35dd1cb077e0372ede69572740f9b67905"}, + {file = "lameenc-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b425fec9430a0aa55877d8787f82b47ff2c5d2a36663a4ca562103d7bbaeb23a"}, + {file = "lameenc-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:002742147c99e819be4feb8ab05da08ff57f0f76480746ddc83f6a2c09d2639d"}, + {file = "lameenc-1.7.0-cp312-cp312-win32.whl", hash = "sha256:bbabad75799e62638c0e55537e50ceba81456600ce04fb604c006c1b7399e8c1"}, + {file = "lameenc-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0faf238be8044ee706d5f410e19ca1ae38fafa75dedcdc69ed764964d01c8d8b"}, + {file = "lameenc-1.7.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:c194dd436ed1894e325f3cb8baebf76cfa5ee867810c3fb8af2a76a62e219ac1"}, + {file = "lameenc-1.7.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eec093a6673c7eebeebc269d7735ea09ec9cfda696ff92959b33bc1117b26954"}, + {file = "lameenc-1.7.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:2bed2133410206f61780c412e49c218bf40179016de0284f61ed5bead3cce95a"}, + {file = "lameenc-1.7.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96e54919790e2b4e3519f1696ac19a5efb864c0621e7a2baf89ecfa95908e044"}, + {file = "lameenc-1.7.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:26799cc2dac7d972f354489f33a5ab99d5ca4791edd1b0b3bd87e89bfa22f34f"}, + {file = "lameenc-1.7.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:62ec507686326a6baec8b997070e96328ba8ff03abdc688e36ab47114883a223"}, + {file = "lameenc-1.7.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e00c8771be3e002a539fd534f8df063a2dafc2d679803a7a4dfd7886ffa4304"}, + {file = "lameenc-1.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:be3cf0820f278996680a48d163f781e85431704c0551d90780b165798f2ea1c5"}, + {file = "lameenc-1.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8bc05b160603665e570f2d8dd6a70dc6e337cb59dbd5e9a72044afb92c629096"}, + {file = "lameenc-1.7.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:0d6c195df3f22e39dd2e2963858b0f6a73db8b993ca4f1996c5c147d386c6151"}, + {file = "lameenc-1.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b18015e33d9530f47bdefc1a0dc91e4a8b94a3b0ff5fbdcac82b64b170388e8"}, + {file = "lameenc-1.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:d63cd0f1b52b7719b212d9fe8c1fdeb8aa8618b9f943e628001981c235ae8722"}, + {file = "lameenc-1.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:1ecd67a8270075885b5b6f3cdf0c4bb3fb0a4589a679e55298f2606ee2fcb5ce"}, + {file = "lameenc-1.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbbe822069bfadb4608c11a578f027f9c8ba01182332cd538a8ee55cf5d3bf23"}, + {file = "lameenc-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:110b781e7916849eb430411180ac8c3821ffc7e0d15a18b3f26129e1aff2ce8f"}, + {file = "lameenc-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f84bdcc3daa0b7635ba2a41d995b321b5d00b6637c89886f88e1d88e356c9baa"}, + {file = "lameenc-1.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:e6c6a934754ed32a7680aaaf15f7fd20e29575239b93a23ee86dafa9b0143608"}, + {file = "lameenc-1.7.0-cp38-cp38-win32.whl", hash = "sha256:4193472c4ec50397b234284551b52af23dd6f998634e0bd75e2caad5bb85ace7"}, + {file = "lameenc-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:ff836d4964cc2f7197a14dfc8e2d3787f5e37f1d809ed914995a95ea24abce8f"}, + {file = "lameenc-1.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c4dc7b13e871f9d4667be161823082f87c05252b36c1f1e3e0088765f0e81b5b"}, + {file = "lameenc-1.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:6667cfe1ab6c15960cfe5fdc6ffdc6c1818d1385da143fcb0692218500b60b1e"}, + {file = "lameenc-1.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d46fea0110812e89a4add722bc18e20e8146843395f9b4b9fad9f98a6ccf327e"}, + {file = "lameenc-1.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f8e056d48f07bd17eecca11643c59aaf2804f6c1f822e4ab5b2bf5a313ef1a4"}, + {file = "lameenc-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:4926c2b50b384c7640c78757b89a7faaccc6a6fcde5fd9016b554f946522e157"}, + {file = "lameenc-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2d89e63c88e5a224299b310ac9430c2c2e16ca4f2b479e65baf4f17c92928e2a"}, + {file = "lameenc-1.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:6cec7e369067d54528b98023fb75c26cd539bbc9292e73fe58516b6964b35db5"}, + {file = "lameenc-1.7.0-cp39-cp39-win32.whl", hash = "sha256:53f1dc2574052354baecf07ba8d2da71dd9aab310c3efc8e029dae13be22a29f"}, + {file = "lameenc-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:c3589924788e26afd55a1faa3947ba70167d9ba9901d42ba2b372922999e0ea6"}, + {file = "lameenc-1.7.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2f51808bd3f2da74ce586ef3e3bcae8b77a4a59e9c476a46681d7c052c1c2211"}, + {file = "lameenc-1.7.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c74611af25522db4bcca9221254151ed6133847aa355595152d343d430dd888b"}, + {file = "lameenc-1.7.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5bf4632fed752eca67c93d8ce89098cec8a58706d995151f27eec493771c4856"}, + {file = "lameenc-1.7.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:29c9cc3c9012cf52640c071fc519f4b55c40e87b4e07f43ff80e939e6f6baae4"}, + {file = "lameenc-1.7.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a2d88a1e32246f6071ad18dc42c6a9a2af2d8a600de561a887d4dc3d335b2f3d"}, + {file = "lameenc-1.7.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9edec7fef66d0e1bd25690780ef697c3bea15f4f614e04de3fd930d558c43833"}, + {file = "lameenc-1.7.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:6b7e0d28c3982eabf9fd8c51f19593c4c1684bb0e261482284a3fd56134bc302"}, + {file = "lameenc-1.7.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:a84335effaa1f56202a359b9fb2524e4e486e3c37c72a66647626220b22e34bb"}, + {file = "lameenc-1.7.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d089d5c2ae6430531e2dad49d9d4bfa06ff5e6ee9a2b24f1e1ccb5c942be200b"}, + {file = "lameenc-1.7.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:02b7ab137f1fb624020210ae3c2f671a9006ec30d2bec704a55aaebb3850ab3d"}, + {file = "lameenc-1.7.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c76a21488ba35b1dd1d37d8b0671646cb52ca92f7d9b2bb8234bf106ba57db08"}, + {file = "lameenc-1.7.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:a46642b88d9461fd1377f2fcae387503d80753b954b2547ace91e5ba5fff11af"}, + {file = "lameenc-1.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:0d0aa76ab3642c1025d8a47f0df4986147ae03f10d83c4b5ff34e8ddd1b62c45"}, + {file = "lameenc-1.7.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c103b3a021a000de7822249d91008e1adfcb70e7bdffa3483c5794045dc051dc"}, + {file = "lameenc-1.7.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:74fafaa009697a39b4dfae0d6763ad666ccda75386d68befd943decf46610722"}, + {file = "lameenc-1.7.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3fe077c20dd521fd261ae5dfccab80d94eace7b7474035f1584a08eca1baa7f"}, + {file = "lameenc-1.7.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:0bbd88b127bafce6da8ac8b03d5a87791d32b928dc61533e4752a935808fc851"}, +] + [[package]] name = "lazy-loader" version = "0.4" @@ -2649,6 +2797,44 @@ files = [ {file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"}, ] +[[package]] +name = "omegaconf" +version = "2.3.0" +description = "A flexible configuration library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"}, + {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"}, +] + +[package.dependencies] +antlr4-python3-runtime = "==4.9.*" +PyYAML = ">=5.1.0" + +[[package]] +name = "openunmix" +version = "1.3.0" +description = "PyTorch-based music source separation toolkit" +optional = false +python-versions = ">=3.9" +files = [ + {file = "openunmix-1.3.0-py3-none-any.whl", hash = "sha256:e893ae22c5b8001a6107022499c2587b70d5c2e4777cc7c9ed6272b68a69534e"}, + {file = "openunmix-1.3.0.tar.gz", hash = "sha256:cc9245ce728700f5d0b72c67f01be4162777e617cdc47f9b035963afac180fc8"}, +] + +[package.dependencies] +numpy = "*" +torch = ">=1.9.0" +torchaudio = ">=0.9.0" +tqdm = "*" + +[package.extras] +asteroid = ["asteroid-filterbanks (>=0.3.2)"] +evaluation = ["musdb (>=0.4.0)", "museval (>=0.4.0)"] +stempeg = ["stempeg"] +tests = ["asteroid-filterbanks (>=0.3.2)", "musdb (>=0.4.0)", "museval (>=0.4.0)", "onnx", "pytest", "stempeg", "tqdm"] + [[package]] name = "orderedmultidict" version = "1.0.1" @@ -3638,6 +3824,20 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "retrying" +version = "1.3.4" +description = "Retrying" +optional = false +python-versions = "*" +files = [ + {file = "retrying-1.3.4-py3-none-any.whl", hash = "sha256:8cc4d43cb8e1125e0ff3344e9de678fefd85db3b750b81b2240dc0183af37b35"}, + {file = "retrying-1.3.4.tar.gz", hash = "sha256:345da8c5765bd982b1d1915deb9102fd3d1f7ad16bd84a9700b85f64d24e8f3e"}, +] + +[package.dependencies] +six = ">=1.7.0" + [[package]] name = "rich" version = "13.9.4" @@ -4169,6 +4369,24 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] +[[package]] +name = "submitit" +version = "1.5.2" +description = "\"Python 3.8+ toolbox for submitting jobs to Slurm" +optional = false +python-versions = ">=3.8" +files = [ + {file = "submitit-1.5.2-py3-none-any.whl", hash = "sha256:c6d5867fbcc78588d0ded3338436903f8db9fdb759f80e9639e6025a9ea32ade"}, + {file = "submitit-1.5.2.tar.gz", hash = "sha256:36a8a54ad4e10171111e7618eefe28fe819f931a89c9cd1f6d2770900c013f12"}, +] + +[package.dependencies] +cloudpickle = ">=1.2.1" +typing_extensions = ">=3.7.4.2" + +[package.extras] +dev = ["black (==23.3.0)", "coverage[toml] (>=5.1)", "flit (>=3.5.1)", "isort (==5.11.5)", "mypy (>=1.4.1)", "pre-commit (>=1.15.2)", "pylint (>=3.0.0)", "pytest (>=7.4.2)", "pytest-asyncio (>=0.15.0)", "pytest-cov (>=4.1.0)"] + [[package]] name = "sympy" version = "1.13.1" @@ -4607,6 +4825,16 @@ torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", video = ["av (==9.2.0)"] vision = ["Pillow (>=10.0.1,<=15.0)"] +[[package]] +name = "treetable" +version = "0.2.5" +description = "Helper to pretty print an ascii table with atree-like structure" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "treetable-0.2.5.tar.gz", hash = "sha256:29c95b797a8ecff4bb894cb7b103e39a78c905ab78a88a9a247de30c87743a2f"}, +] + [[package]] name = "triton" version = "3.1.0" @@ -4974,4 +5202,4 @@ test = [] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.11" -content-hash = "df08720341e8b90792b6d0a551aa828d689ab0a17f6376b604412a188b357e76" +content-hash = "1037bb91cfaf13e803b7901c0a7506067cc8a023acb26452de175a445c4a1103" diff --git a/pyproject.toml b/pyproject.toml index 1581460..83a37d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ torch = "^2.5.1" torchaudio = "^2.5.1" torchvision = "^0.20.1" accelerate = "^1.1.1" +demucs = "^4.0.1" [tool.poetry.dev-dependencies] flake8 = "^7.1.1" diff --git a/src/stem_continuation_dataset_generator/constants.py b/src/stem_continuation_dataset_generator/constants.py index 7f786f9..4d8c8dd 100644 --- a/src/stem_continuation_dataset_generator/constants.py +++ b/src/stem_continuation_dataset_generator/constants.py @@ -8,6 +8,10 @@ DASK_CLUSTER_NAME = 'stem-continuation-dataset-generator-cluster' +def get_whole_tracks_files_path(): + return os.path.join(STORAGE_BUCKET_NAME, 'whole-tracks') + + def get_original_files_path(): return os.path.join(STORAGE_BUCKET_NAME, 'original') diff --git a/src/stem_continuation_dataset_generator/pipeline.py b/src/stem_continuation_dataset_generator/pipeline.py index b6cb26c..dcd77f6 100644 --- a/src/stem_continuation_dataset_generator/pipeline.py +++ b/src/stem_continuation_dataset_generator/pipeline.py @@ -1,9 +1,11 @@ -from stem_continuation_dataset_generator.constants import DATASET_TAGS, get_augmented_files_path, get_distorted_files_path, get_encoded_files_path, get_merged_files_path, get_original_files_path, get_split_files_path +from stem_continuation_dataset_generator.constants import DATASET_TAGS, get_augmented_files_path, get_distorted_files_path, get_encoded_files_path, get_merged_files_path +from stem_continuation_dataset_generator.constants import get_original_files_path, get_split_files_path, get_whole_tracks_files_path from stem_continuation_dataset_generator.steps.augment import augment_all from stem_continuation_dataset_generator.steps.convert_to_ogg import convert_to_ogg from stem_continuation_dataset_generator.steps.encode import encode_all from stem_continuation_dataset_generator.steps.merge import assort_and_merge_all from stem_continuation_dataset_generator.steps.split import split_all +from stem_continuation_dataset_generator.steps.stem import stem_all from stem_continuation_dataset_generator.steps.uncompress import uncompress_files from stem_continuation_dataset_generator.steps.upload import upload from stem_continuation_dataset_generator.steps.distort import distort_all @@ -33,6 +35,7 @@ def dataset_creation_pipeline(stem_name: str): tags = DATASET_TAGS + [f'stem-{stem_name}'] + stem_all(get_whole_tracks_files_path(), get_original_files_path()) assort_and_merge_all(get_original_files_path(), get_merged_files_path(stem_name), stem_name) augment_all(get_merged_files_path(stem_name), get_augmented_files_path(stem_name)) distort_all(get_augmented_files_path(stem_name), get_distorted_files_path(stem_name)) diff --git a/src/stem_continuation_dataset_generator/steps/merge.py b/src/stem_continuation_dataset_generator/steps/merge.py index c13c315..f44100e 100644 --- a/src/stem_continuation_dataset_generator/steps/merge.py +++ b/src/stem_continuation_dataset_generator/steps/merge.py @@ -3,7 +3,6 @@ import os import random from typing import FrozenSet, List, Optional, Tuple, cast, Set -import librosa from pydub import AudioSegment from dask.distributed import progress, Client from s3fs.core import S3FileSystem @@ -11,6 +10,7 @@ from stem_continuation_dataset_generator.cluster import get_client from stem_continuation_dataset_generator.constants import DEFAULT_STEM_NAME, get_merged_files_path, get_original_files_path from stem_continuation_dataset_generator.utils.constants import get_random_seed +from stem_continuation_dataset_generator.utils.utils import is_mostly_silent STEM_NAMES = ['guitar', 'drum', 'bass', 'perc', 'fx', 'vocals', 'piano', 'synth', 'winds', 'strings'] BASIC_STEM_NAMES = ['guitar', 'drum', 'bass', 'perc', 'gtr', 'drm', 'piano'] @@ -133,22 +133,17 @@ def create_stems_assortments(other_stems: List[StemFile], current_stem_file: str return [(current_stem_file, assortment) for assortment in assortments] -def is_mostly_silent(fs: S3FileSystem, file_path: str) -> bool: - with fs.open(file_path, 'rb') as file: - - audio, sr = librosa.load(file) # type: ignore - no_of_samples = audio.shape[-1] - splits = librosa.effects.split(audio, top_db=60) - non_silent_samples = sum([end - start for (start, end) in splits]) - return non_silent_samples / no_of_samples < MIN_PERCENTAGE_OF_AUDIO_IN_NON_SILENT_FILES - - def get_stem(file_path: str, silent: bool) -> StemFile: return StemFile(file_path=file_path, is_mostly_silent=silent) +def is_remote_file_mostly_silent(fs: S3FileSystem, file_path: str): + with fs.open(file_path, 'rb') as file: + return is_mostly_silent(cast(io.TextIOWrapper, file), MIN_PERCENTAGE_OF_AUDIO_IN_NON_SILENT_FILES) + + def get_stems(fs: S3FileSystem, paths: List[str]) -> List[StemFile]: - return [get_stem(path, is_mostly_silent(fs, path)) for path in paths] + return [get_stem(path, is_remote_file_mostly_silent(fs, path)) for path in paths] def assort(fs: S3FileSystem, directory: str, stem_name: str) -> List[List[Tuple[str, FrozenSet[str]]]]: diff --git a/src/stem_continuation_dataset_generator/steps/stem.py b/src/stem_continuation_dataset_generator/steps/stem.py new file mode 100644 index 0000000..a005398 --- /dev/null +++ b/src/stem_continuation_dataset_generator/steps/stem.py @@ -0,0 +1,90 @@ +import glob +import os +import shlex +import tempfile +from typing import List, Tuple, cast + +from distributed import Client, progress +import demucs.separate +from s3fs.core import S3FileSystem + +from stem_continuation_dataset_generator.cluster import get_client +from stem_continuation_dataset_generator.constants import get_original_files_path, get_whole_tracks_files_path +from stem_continuation_dataset_generator.steps.convert_to_ogg import convert_to_ogg +from stem_continuation_dataset_generator.utils.utils import is_mostly_silent + + +RUN_LOCALLY = False +PERCENTAGE_OF_NON_SILENT_AUDIO_FILE = 0.25 +EXCLUDED_STEMS = ['piano', 'vocals'] # Piano and vocals stems produced by Demucs are low quality + + +def get_whole_track_files(fs: S3FileSystem, dir: str) -> List[str]: + return cast(List[str], fs.glob(os.path.join(dir, '**/*.mp3'))) + + +def stem_file(output_directory: str, file_path: str) -> tuple[str, list[tuple[str, str]]]: + """ + Separates an audio file into its individual tracks using the Demucs model. + + This function takes an audio file as input, separates it into its individual tracks using the Demucs model, + and returns the directory where the separated tracks are stored along with a list of tuples containing the + instrument name of each track and its corresponding file path. + + Args: + filename (str): The path to the audio file to be separated. + + Returns: + tuple[str, list[tuple[str, str]]]: A tuple containing the directory path where the separated tracks are stored, + and a list of tuples where each tuple contains the instrument name of a track and its file path. + """ + demucs.separate.main(shlex.split(f'-n htdemucs_6s --clip-mode clamp --out "{output_directory}" "{file_path}"')) + return (output_directory, [(os.path.splitext(os.path.basename(filename))[0], filename) for filename in glob.glob(os.path.join(output_directory, '**', '*.wav'), recursive=True)]) + + +def stem(params: Tuple[S3FileSystem, str, str, str, str]): + fs, file_path, artist, source_directory, base_output_directory = params + + basename = os.path.basename(file_path) + song_name = basename.replace('.mp3', '') + output_directory = os.path.join(base_output_directory, artist, song_name) + + with tempfile.TemporaryDirectory() as local_directory: + local_path = os.path.join(local_directory, basename) + fs.download(file_path, local_path) + stem_file(local_directory, local_path) + os.remove(local_path) + convert_to_ogg(local_directory) + ogg_files = glob.glob(os.path.join(local_directory, '**/*.ogg'), recursive=True) + for ogg_file in ogg_files: + if os.path.basename(ogg_file).split('.')[0] not in EXCLUDED_STEMS: + with open(ogg_file, 'rb') as file: + if not is_mostly_silent(file, PERCENTAGE_OF_NON_SILENT_AUDIO_FILE): + print(ogg_file) + fs.upload(ogg_file, os.path.join(output_directory, os.path.basename(ogg_file))) + + +def stem_all(source_directory: str, output_directory: str): + + fs = S3FileSystem() + files = get_whole_track_files(fs, source_directory) + files_with_artist = [(file_path, os.path.dirname(file_path).split(os.path.sep)[-1]) for file_path in files] + + client = cast( + Client, + get_client( + RUN_LOCALLY, + ), + ) + + params_list: List[Tuple[S3FileSystem, str, str, str, str]] = [(fs, file_path, artist, source_directory, output_directory) for file_path, artist in files_with_artist] + + print('Stemming audio tracks') + futures = client.map(stem, params_list, retries=2) + progress(futures) + + return output_directory + + +if __name__ == '__main__': + stem_all(get_whole_tracks_files_path(), get_original_files_path()) \ No newline at end of file diff --git a/src/stem_continuation_dataset_generator/utils/utils.py b/src/stem_continuation_dataset_generator/utils/utils.py index 2d128c5..7c507f1 100644 --- a/src/stem_continuation_dataset_generator/utils/utils.py +++ b/src/stem_continuation_dataset_generator/utils/utils.py @@ -1,5 +1,8 @@ +import io from clearml import Dataset import numpy as np +import librosa +from typing import Union from stem_continuation_dataset_generator.constants import CLEARML_DATASET_NAME from stem_continuation_dataset_generator.utils.constants import get_clearml_project_name @@ -40,3 +43,10 @@ def convert_audio_to_float_32(audio_data: np.ndarray) -> np.ndarray: raw_data = audio_data / max_32bit return raw_data.astype(np.float32) + +def is_mostly_silent(file: Union[io.TextIOWrapper, io.BufferedReader], percentage_non_silent: float) -> bool: + audio, sr = librosa.load(file) # type: ignore + no_of_samples = audio.shape[-1] + splits = librosa.effects.split(audio, top_db=60) + non_silent_samples = sum([end - start for (start, end) in splits]) + return non_silent_samples / no_of_samples < percentage_non_silent \ No newline at end of file