Skip to content

Commit 7d4b31b

Browse files
authored
Merge pull request #2 from pablodz/dev
Dev
2 parents 29ebb13 + c90b8ed commit 7d4b31b

File tree

11 files changed

+308
-12
lines changed

11 files changed

+308
-12
lines changed

README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,55 @@
44

55
Sox is a great tool, but it's not easy to use. SoPro is a next generation sound processing tool that is easy to use and easy to extend. By now only audio files can be converted to other formats, but in the future more features will be added, like video processing, etc.
66

7+
```
8+
┌─────────────────┐
9+
raw data ───────►│ ├────────► returns raw data in other format
10+
│ │
11+
│ │
12+
websocket ───────►│ ├────────► returns raw data in other formats
13+
│ │
14+
│ SOPRO-CORE │
15+
chunked data───────►│ ├────────► returns chunked processed data
16+
│ │
17+
│ │
18+
gRPC ───────►│ ├────────► returns grpc chunked data
19+
│ │
20+
└─────────────────┘
21+
22+
Examples:
23+
24+
- ulaw -> wav pcm
25+
- ulaw -> wav pcm normalized (on the fly)
26+
```
27+
28+
Plugins:
29+
- Connectivity to python neural network inference api with grpc/http and caching the inference
30+
31+
## Installation
32+
33+
```bash
34+
go get -v github.com/pablodz/sopro
35+
```
36+
737
## Methods planned to be implemented
838

939
- [x] Chunked
1040
- [x] Full memory
1141
- [ ] Batch
1242
- [ ] Streaming
1343

44+
## Examples
45+
46+
Check [./examples](./examples/) folder
47+
1448
## Roadmap
1549

50+
- [ ] CLI (sox-friendly)
51+
- [ ] GUI (in another repo)
52+
- [ ] Microservice (in another repo)
53+
- [ ] HTTP
54+
- [ ] Websocket
55+
- [ ] gRPC
1656
- [x] Audio file conversion
1757
- [ ] Format conversion [Work in progress...](docs/format_table.md)
1858
- [ ] Bitrate conversion

docs/format_table.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ blockquote {
3737
| mogg | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | | | |
3838
| ulaw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | + | | | | |
3939
| opus | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | |
40-
| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | |
40+
| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | + | | | | |
4141
| ra | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | |
4242
| rm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | |
4343
| raw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | |

examples/ulaw2wav_logpcm/main.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package main
2+
3+
import (
4+
"os"
5+
6+
"github.com/pablodz/sopro/pkg/audioconfig"
7+
"github.com/pablodz/sopro/pkg/cpuarch"
8+
"github.com/pablodz/sopro/pkg/encoding"
9+
"github.com/pablodz/sopro/pkg/fileformat"
10+
"github.com/pablodz/sopro/pkg/method"
11+
"github.com/pablodz/sopro/pkg/transcoder"
12+
)
13+
14+
func main() {
15+
16+
// Open the input file
17+
in, err := os.Open("./internal/samples/recording.ulaw")
18+
if err != nil {
19+
panic(err)
20+
}
21+
defer in.Close()
22+
23+
// Create the output file
24+
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_logpcm.wav")
25+
if err != nil {
26+
panic(err)
27+
}
28+
defer out.Close()
29+
30+
// create a transcoder
31+
t := &transcoder.Transcoder{
32+
Method: method.BIT_TABLE,
33+
SourceConfigs: transcoder.TranscoderAudioConfig{
34+
Endianness: cpuarch.LITTLE_ENDIAN,
35+
},
36+
TargetConfigs: transcoder.TranscoderAudioConfig{
37+
Endianness: cpuarch.LITTLE_ENDIAN,
38+
},
39+
SizeBuffer: 1024,
40+
Verbose: true,
41+
}
42+
43+
// Transcode the file
44+
err = t.Mulaw2Wav(
45+
&transcoder.AudioFileIn{
46+
Data: in,
47+
AudioFileGeneral: transcoder.AudioFileGeneral{
48+
Format: fileformat.AUDIO_MULAW,
49+
Config: audioconfig.MulawConfig{
50+
BitDepth: 8,
51+
Channels: 1,
52+
Encoding: encoding.SPACE_LOGARITHMIC, // ulaw is logarithmic
53+
SampleRate: 8000,
54+
},
55+
},
56+
},
57+
&transcoder.AudioFileOut{
58+
Data: out,
59+
AudioFileGeneral: transcoder.AudioFileGeneral{
60+
Format: fileformat.AUDIO_WAV,
61+
Config: audioconfig.WavConfig{
62+
BitDepth: 8,
63+
Channels: 1,
64+
Encoding: encoding.SPACE_LOGARITHMIC,
65+
SampleRate: 8000,
66+
},
67+
},
68+
},
69+
)
70+
71+
if err != nil {
72+
panic(err)
73+
}
74+
75+
}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func main() {
2121
defer in.Close()
2222

2323
// Create the output file
24-
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_be.wav")
24+
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_lpcm.wav")
2525
if err != nil {
2626
panic(err)
2727
}
@@ -36,8 +36,8 @@ func main() {
3636
TargetConfigs: transcoder.TranscoderAudioConfig{
3737
Endianness: cpuarch.LITTLE_ENDIAN,
3838
},
39-
SizeBufferToProcess: 1024,
40-
Verbose: true,
39+
SizeBuffer: 1024,
40+
Verbose: true,
4141
}
4242

4343
// Transcode the file

pkg/audioconfig/wav.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ type WavConfig struct {
1313
Encoding int // the encoding format (e.g. "PCM", "IEEE_FLOAT")
1414
}
1515

16+
// https://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
17+
const (
18+
WAVE_FORMAT_PCM = 0x0001
19+
WAVE_FORMAT_IEEE_FLOAT = 0x0003
20+
WAVE_FORMAT_ALAW = 0x0006
21+
WAVE_FORMAT_MULAW = 0x0007
22+
)
23+
1624
func PrintWavHeaders(headersWav []byte) {
1725
if len(headersWav) != 44 {
1826
log.Println("[ERROR] Headers are not 44 bytes long")

pkg/encoding/encoding.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const (
77
)
88

99
var ENCODINGS = map[int]string{
10+
NOT_FILLED: "Not filled",
1011
SPACE_LINEAR: "Linear",
1112
SPACE_LOGARITHMIC: "Logarithmic",
1213
}

pkg/transcoder/models_transcoder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package transcoder
33
type Transcoder struct {
44
Method int // the method of transcoding (e.g. 1, 2, 3, etc.)
55
MethodAdvancedConfigs interface{} // the specific configuration options for the transcoding method
6-
SizeBufferToProcess int // the size of the buffer to read from the input file. Default is 1024
6+
SizeBuffer int // the size of the buffer to read from the input file. Default is 1024
77
SourceConfigs TranscoderAudioConfig // the source configuration
88
TargetConfigs TranscoderAudioConfig // the target configuration
99
BitDepth int // the bit depth (e.g. 8, 16, 24) Needs to be equal for source and target

pkg/transcoder/mulaw2wav.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func init() {
3131
// Transcode an ulaw file to a wav file (large files supported)
3232
// https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png
3333
// http://www.topherlee.com/software/pcm-tut-wavformat.html
34-
func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {
34+
func mulaw2WavLpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {
3535

3636
// read all the file
3737
if transcoder.Verbose {
@@ -74,7 +74,7 @@ func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err
7474
'W', 'A', 'V', 'E', // Format
7575
'f', 'm', 't', ' ', // Sub-chunk 1 ID
7676
16, 0, 0, 0, // Sub-chunk 1 size
77-
1, 0, // Audio format (PCM)
77+
audioconfig.WAVE_FORMAT_PCM, 0, // Audio format (PCM)
7878
byte(channels), 0, // Number of channels
7979
byte(sampleRate & 0xFF), // sample rate (low)
8080
byte(sampleRate >> 8 & 0xFF), // sample rate (mid)

pkg/transcoder/mulaw2wavlogpcm.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
package transcoder
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"io"
7+
"log"
8+
"os"
9+
10+
"github.com/pablodz/sopro/pkg/audioconfig"
11+
"github.com/pablodz/sopro/pkg/cpuarch"
12+
"github.com/pablodz/sopro/pkg/encoding"
13+
"golang.org/x/term"
14+
)
15+
16+
func init() {
17+
18+
err := error(nil)
19+
WIDTH_TERMINAL, HEIGHT_TERMINAL, err = term.GetSize(0)
20+
if err != nil {
21+
log.Fatal(err)
22+
}
23+
}
24+
25+
// TODO: split functions for different sizes of files
26+
// Transcode an ulaw file to a wav file (large files supported)
27+
// https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png
28+
// http://www.topherlee.com/software/pcm-tut-wavformat.html
29+
func mulaw2WavLogpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {
30+
31+
// read all the file
32+
if transcoder.Verbose {
33+
graphIn(in)
34+
}
35+
36+
// Get the WAV file configuration
37+
channels := out.Config.(audioconfig.WavConfig).Channels
38+
sampleRate := out.Config.(audioconfig.WavConfig).SampleRate
39+
bitsPerSample := out.Config.(audioconfig.WavConfig).BitDepth
40+
transcoder.SourceConfigs.Encoding = in.Config.(audioconfig.MulawConfig).Encoding
41+
transcoder.TargetConfigs.Encoding = out.Config.(audioconfig.WavConfig).Encoding
42+
transcoder.BitDepth = bitsPerSample
43+
44+
if transcoder.SourceConfigs.Endianness == cpuarch.NOT_FILLED && transcoder.TargetConfigs.Endianness == cpuarch.NOT_FILLED {
45+
transcoder.SourceConfigs.Endianness = cpuarch.LITTLE_ENDIAN // replace with cpuarch.GetEndianess()
46+
transcoder.TargetConfigs.Endianness = cpuarch.LITTLE_ENDIAN
47+
}
48+
49+
transcoder.Println(
50+
"\n[Format] ", in.Format, "=>", out.Format,
51+
"\n[Encoding] ", encoding.ENCODINGS[in.Config.(audioconfig.MulawConfig).Encoding], "=>", encoding.ENCODINGS[out.Config.(audioconfig.WavConfig).Encoding],
52+
"\n[Channels] ", in.Config.(audioconfig.MulawConfig).Channels, "=>", channels,
53+
"\n[SampleRate] ", in.Config.(audioconfig.MulawConfig).SampleRate, "=>", sampleRate, "kHz",
54+
"\n[BitDepth] ", in.Config.(audioconfig.MulawConfig).BitDepth, "=>", bitsPerSample, "bytes",
55+
"\n[Transcoder][Source][Encoding]", encoding.ENCODINGS[transcoder.SourceConfigs.Encoding],
56+
"\n[Transcoder][Target][Encoding]", encoding.ENCODINGS[transcoder.TargetConfigs.Encoding],
57+
"\n[Transcoder][BitDepth] ", transcoder.BitDepth,
58+
"\n[Transcoder][Endianness] ", cpuarch.ENDIANESSES[cpuarch.GetEndianess()],
59+
)
60+
61+
// Create a buffered reader and writer
62+
in.Reader = bufio.NewReader(in.Data)
63+
out.Writer = bufio.NewWriter(out.Data)
64+
out.Length = 0
65+
66+
headersWav := []byte{
67+
'R', 'I', 'F', 'F', // Chunk ID
68+
0, 0, 0, 0, // Chunk size
69+
'W', 'A', 'V', 'E', // Format
70+
'f', 'm', 't', ' ', // Sub-chunk 1 ID
71+
16, 0, 0, 0, // Sub-chunk 1 size
72+
audioconfig.WAVE_FORMAT_MULAW, 0, // Audio format (1 = PCM)
73+
byte(channels), 0, // Number of channels
74+
byte(sampleRate & 0xFF), // sample rate (low)
75+
byte(sampleRate >> 8 & 0xFF), // sample rate (mid)
76+
byte(sampleRate >> 16 & 0xFF), // sample rate (high)
77+
byte(sampleRate >> 24 & 0xFF), // sample rate (high)
78+
byte(sampleRate * channels * (bitsPerSample / 8) & 0xFF), // byte rate (low)
79+
byte(sampleRate * channels * (bitsPerSample / 8) >> 8 & 0xFF), // byte rate (mid)
80+
byte(sampleRate * channels * (bitsPerSample / 8) >> 16 & 0xFF), // byte rate (high)
81+
byte(sampleRate * channels * (bitsPerSample / 8) >> 24 & 0xFF), // byte rate (high)
82+
byte(channels * (bitsPerSample / 8)), 0, // block align
83+
byte(bitsPerSample), 0, // bits per sample
84+
'd', 'a', 't', 'a',
85+
0, 0, 0, 0,
86+
}
87+
out.Writer.Write(headersWav)
88+
out.Length += len(headersWav)
89+
90+
if transcoder.Verbose {
91+
audioconfig.PrintWavHeaders(headersWav)
92+
}
93+
94+
// Copy the data from the input file to the output file in chunks
95+
if err = TranscodeBytes(in, out, transcoder); err != nil {
96+
return fmt.Errorf("error converting bytes: %v", err)
97+
}
98+
99+
// Flush the output file
100+
if err := out.Writer.Flush(); err != nil {
101+
return fmt.Errorf("error flushing output file: %v", err)
102+
}
103+
transcoder.Println("Wrote", out.Length, "bytes to output file")
104+
105+
// Update the file size and data size fields
106+
fileFixer := out.Data.(*os.File)
107+
r, err := fileFixer.Seek(4, io.SeekStart)
108+
if err != nil {
109+
return fmt.Errorf("error seeking file: %v", err)
110+
}
111+
transcoder.Println("Seeked to:", r)
112+
fileSize := []byte{
113+
byte((out.Length - 8) & 0xff),
114+
byte((out.Length - 8) >> 8 & 0xff),
115+
byte((out.Length - 8) >> 16 & 0xff),
116+
byte((out.Length - 8) >> 24 & 0xff),
117+
}
118+
n, err := fileFixer.Write(fileSize)
119+
if err != nil {
120+
return fmt.Errorf("error writing file size: %v", err)
121+
}
122+
transcoder.Println("File size:", fmt.Sprintf("% 02x", fileSize), "bytes written:", n)
123+
dataSize := []byte{
124+
byte((out.Length - 44) & 0xff),
125+
byte((out.Length - 44) >> 8 & 0xff),
126+
byte((out.Length - 44) >> 16 & 0xff),
127+
byte((out.Length - 44) >> 24 & 0xff),
128+
}
129+
r, err = fileFixer.Seek(40, io.SeekStart)
130+
if err != nil {
131+
return fmt.Errorf("[2]error seeking file: %v", err)
132+
}
133+
transcoder.Println("Seeked to:", r)
134+
n, err = fileFixer.Write(dataSize)
135+
if err != nil {
136+
return fmt.Errorf("error writing data size: %v", err)
137+
}
138+
transcoder.Println("Data size:", fmt.Sprintf("% 02x", dataSize), "bytes written:", n)
139+
140+
if transcoder.Verbose {
141+
graphOut(in, out)
142+
}
143+
144+
return nil
145+
146+
}

pkg/transcoder/router.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
11
package transcoder
22

3+
import (
4+
"fmt"
5+
6+
"github.com/pablodz/sopro/pkg/audioconfig"
7+
"github.com/pablodz/sopro/pkg/encoding"
8+
)
9+
10+
const ErrUnsupportedConversion = "unsupported conversion"
11+
312
func (t *Transcoder) Mulaw2Wav(in *AudioFileIn, out *AudioFileOut) error {
4-
return mulaw2Wav(in, out, t)
13+
14+
inSpace := in.Config.(audioconfig.MulawConfig).Encoding
15+
outSpace := out.Config.(audioconfig.WavConfig).Encoding
16+
17+
switch {
18+
case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LINEAR:
19+
return mulaw2WavLpcm(in, out, t)
20+
case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LOGARITHMIC:
21+
return mulaw2WavLogpcm(in, out, t)
22+
default:
23+
return fmt.Errorf(
24+
"%s: %s -> %s",
25+
ErrUnsupportedConversion,
26+
encoding.ENCODINGS[inSpace],
27+
encoding.ENCODINGS[outSpace],
28+
)
29+
30+
}
531
}

0 commit comments

Comments
 (0)