Skip to content

Commit 0e5d3d5

Browse files
authored
Merge pull request #80 from edgeware/file-encode-modes
Change/refactor: Replace file.EncodeVerbatim with more options
2 parents 7374d00 + 18d92e2 commit 0e5d3d5

File tree

6 files changed

+111
-113
lines changed

6 files changed

+111
-113
lines changed

README.md

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ In the non-fragmented files, the members Ftyp, Moov, and Mdat are used.
2525
A fragmented `mp4.File` file can be a single init segment, one or more media segments, or a a
2626
combination of both like a CMAF track which renders into a playable one-track asset.
2727

28-
The following high-level structures are used:
28+
For fragmented files, following high-level structures are used:
2929

3030
* `InitSegment` contains an `ftyp` and `moov` box and provides the metadata for a fragmented files.
3131
It corresponds to a CMAF header
@@ -39,7 +39,7 @@ The typical child boxes are exported so that one can write paths such as
3939

4040
to access the (only) trun box in a fragment.
4141

42-
The codec currently supported are AVC/H.264 and AAC.
42+
The codecs currently supported are AVC/H.264, HEVC/H.265 and AAC.
4343

4444
## Usage for creating new fragmented files
4545

@@ -104,18 +104,15 @@ adds a box to its slice of children boxes `Children`, but also sets a specific
104104
member reference such as `Tfdt` to point to that box. If `Children` is manipulated
105105
directly, that link will not be valid.
106106

107-
## Automatic settings of values on Fragment.Encode
108-
It is possible to optimize the `TrunBox` size by writing default values in `TfhdBox`.
109-
To do this, one must analyze if, for example, the duration of all samples is the same.
110-
This is done by the method `TrafBox.OptimizeTfhdTrun` which changes its children `Tfhd`and `Trun`.
111-
Since this will change the size of boxes, it is important that this function is called
112-
before `Encode` on any parent box of `Traf`. In particular, this method is called automatically
113-
when running `Fragment.Encode`.
114-
115-
Another value which is automatically set by `Moof.Encode` is `MoofBox.Traf.Trun.DataOffset`.
116-
This value is the address of the first media sample relative to the start of the `MoofBox` start.
117-
It therefore depends on the size of `MoofBox` and is unknown until all values are in place so that
118-
it can be calculated. It is set to `MoofBox.Size()+8`.
107+
## Encoding modes and optimizations
108+
For fragmented files, one can choose to either encode all boxes in a file, or only code
109+
the ones which are inclueded in the init and media segments. The attribute that controls that
110+
is called `FragEncMode`.
111+
Another attribute `EncOptimize` controls possible optimizations of the file encoding process.
112+
Currently there is only one possible optimization called `OptimizeTrun`.
113+
It can reduce the size of the `TrunBox` by finding and writing default
114+
values in the `TfhdBox` and omitting the corresponding values from the `TrunBox`.
115+
Note that this may change the size of all ancestor boxes of `trun`.
119116

120117
## Sample Number Offset
121118
Following the ISOBMFF standard, sample numbers and other numbers start at 1 (one-based).

examples/multitrack/multitrack_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,8 @@ func TestGetMultiTrackSamples(t *testing.T) {
8080
}
8181

8282
var buf bytes.Buffer
83-
parsedMp4.EncodeVerbatim = true
83+
parsedMp4.FragEncMode = mp4.EncModeBoxTree
84+
parsedMp4.EncOptimize = mp4.OptimizeNone
8485
err = parsedMp4.Encode(&buf)
8586
if err != nil {
8687
t.Error(err)

mp4/file.go

Lines changed: 67 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"io"
66
"os"
7+
"strings"
78
)
89

910
// File - an MPEG-4 file asset
@@ -22,22 +23,49 @@ import (
2223
// To Encode the same data as Decoded, this flag must therefore be set.
2324
// In all cases, Children contain all top-level boxes
2425
type File struct {
25-
Ftyp *FtypBox
26-
Moov *MoovBox
27-
Mdat *MdatBox // Only used for non-fragmented files
28-
Init *InitSegment // Init data (ftyp + moov for fragmented file)
29-
Sidx *SidxBox // SidxBox for a DASH OnDemand file
30-
Segments []*MediaSegment // Media segments
31-
Children []Box // All top-level boxes in order
32-
EncodeVerbatim bool // Set to encode box by box without fragment optimizations
33-
isFragmented bool
26+
Ftyp *FtypBox
27+
Moov *MoovBox
28+
Mdat *MdatBox // Only used for non-fragmented files
29+
Init *InitSegment // Init data (ftyp + moov for fragmented file)
30+
Sidx *SidxBox // SidxBox for a DASH OnDemand file
31+
Segments []*MediaSegment // Media segments
32+
Children []Box // All top-level boxes in order
33+
FragEncMode EncFragFileMode // Determine how fragmented files are encoded
34+
EncOptimize EncOptimize // Bit field with optimizations being done at encoding
35+
isFragmented bool
36+
}
37+
38+
type EncFragFileMode byte
39+
40+
const (
41+
EncModeSegment = EncFragFileMode(0) // Only encode boxes that are part of Init and MediaSegments
42+
EncModeBoxTree = EncFragFileMode(1) // Encode all boxes in file tree
43+
)
44+
45+
type EncOptimize uint32
46+
47+
const (
48+
OptimizeNone = EncOptimize(0)
49+
OptimizeTrun = EncOptimize(1 << 0)
50+
)
51+
52+
func (eo EncOptimize) String() string {
53+
var optList []string
54+
msg := "OptimizeNone"
55+
if eo&OptimizeTrun != 0 {
56+
optList = append(optList, "OptimizeTrun")
57+
}
58+
if len(optList) > 0 {
59+
msg = strings.Join(optList, " | ")
60+
}
61+
return msg
3462
}
3563

3664
// NewFile - create MP4 file
3765
func NewFile() *File {
3866
return &File{
39-
Children: []Box{},
40-
Segments: []*MediaSegment{},
67+
FragEncMode: EncModeSegment,
68+
EncOptimize: OptimizeNone,
4169
}
4270
}
4371

@@ -205,35 +233,41 @@ func (f *File) DumpWithSampleData(w io.Writer, specificBoxLevels string) error {
205233
// Encode - encode a file to a Writer
206234
// Fragmented files are encoded based on InitSegment and MediaSegments, unless EncodeVerbatim is set.
207235
func (f *File) Encode(w io.Writer) error {
208-
if f.isFragmented && !f.EncodeVerbatim {
209-
if f.Init != nil {
210-
err := f.Init.Encode(w)
211-
if err != nil {
212-
return err
213-
}
214-
}
215-
if f.Sidx != nil {
216-
err := f.Sidx.Encode(w)
217-
if err != nil {
218-
return err
236+
if f.isFragmented {
237+
switch f.FragEncMode {
238+
case EncModeSegment:
239+
if f.Init != nil {
240+
err := f.Init.Encode(w)
241+
if err != nil {
242+
return err
243+
}
219244
}
220-
}
221-
if !f.EncodeVerbatim {
222-
for _, seg := range f.Segments {
223-
err := seg.Encode(w)
245+
if f.Sidx != nil {
246+
err := f.Sidx.Encode(w)
224247
if err != nil {
225248
return err
226249
}
227250
}
228-
return nil
229-
}
230-
// Fragmented and Verbatim. Don't optimize trun
231-
for _, seg := range f.Segments {
232-
err := seg.EncodeVerbatim(w)
233-
if err != nil {
234-
return err
251+
if f.EncOptimize&OptimizeTrun != 0 {
252+
for _, seg := range f.Segments {
253+
seg.EncOptimize = f.EncOptimize
254+
err := seg.Encode(w)
255+
if err != nil {
256+
return err
257+
}
258+
}
259+
}
260+
case EncModeBoxTree:
261+
for _, b := range f.Children {
262+
err := b.Encode(w)
263+
if err != nil {
264+
return err
265+
}
235266
}
267+
default:
268+
return fmt.Errorf("Unknown FragEncMode=%d", f.FragEncMode)
236269
}
270+
return nil
237271
}
238272
// Progressive file
239273
for _, b := range f.Children {

mp4/fragment.go

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@ import (
99

1010
// Fragment - MP4 Fragment ([prft] + moof + mdat)
1111
type Fragment struct {
12-
Prft *PrftBox
13-
Moof *MoofBox
14-
Mdat *MdatBox
15-
Children []Box // All top-level boxes in order
16-
nextTrunNr uint32 // To handle multi-trun cases
12+
Prft *PrftBox
13+
Moof *MoofBox
14+
Mdat *MdatBox
15+
Children []Box // All top-level boxes in order
16+
nextTrunNr uint32 // To handle multi-trun cases
17+
EncOptimize EncOptimize // Bit field with optimizations being done at encoding
1718
}
1819

1920
// NewFragment - New emtpy one-track MP4 Fragment
@@ -206,9 +207,11 @@ func (f *Fragment) Encode(w io.Writer) error {
206207
return fmt.Errorf("moof not set in fragment")
207208
}
208209
traf := f.Moof.Traf
209-
err := traf.OptimizeTfhdTrun()
210-
if err != nil {
211-
return err
210+
if f.EncOptimize&OptimizeTrun != 0 {
211+
err := traf.OptimizeTfhdTrun()
212+
if err != nil {
213+
return err
214+
}
212215
}
213216
if f.Mdat == nil {
214217
return fmt.Errorf("mdat not set in fragment")
@@ -253,21 +256,3 @@ func (f *Fragment) SetTrunDataOffsets() {
253256
dataOffset += trun.SizeOfData()
254257
}
255258
}
256-
257-
// EncodeVerbatim - write fragment without trun optimization via writer
258-
func (f *Fragment) EncodeVerbatim(w io.Writer) error {
259-
if f.Moof == nil {
260-
return fmt.Errorf("moof not set in fragment")
261-
}
262-
if f.Mdat == nil {
263-
return fmt.Errorf("mdat not set in fragment")
264-
}
265-
f.SetTrunDataOffsets()
266-
for _, b := range f.Children {
267-
err := b.Encode(w)
268-
if err != nil {
269-
return err
270-
}
271-
}
272-
return nil
273-
}

mp4/mediasegment.go

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,18 @@ import (
66

77
// MediaSegment - MP4 Media Segment
88
type MediaSegment struct {
9-
Styp *StypBox
10-
Sidx *SidxBox // Sidx for a segment
11-
Fragments []*Fragment
9+
Styp *StypBox
10+
Sidx *SidxBox // Sidx for a segment
11+
Fragments []*Fragment
12+
EncOptimize EncOptimize
1213
}
1314

1415
// NewMediaSegment - New empty MediaSegment
1516
func NewMediaSegment() *MediaSegment {
1617
return &MediaSegment{
17-
Styp: CreateStyp(),
18-
Fragments: []*Fragment{},
18+
Styp: CreateStyp(),
19+
Fragments: []*Fragment{},
20+
EncOptimize: OptimizeNone,
1921
}
2022
}
2123

@@ -44,6 +46,7 @@ func (s *MediaSegment) Encode(w io.Writer) error {
4446
}
4547
}
4648
for _, f := range s.Fragments {
49+
f.EncOptimize = s.EncOptimize
4750
err := f.Encode(w)
4851
if err != nil {
4952
return err
@@ -113,26 +116,3 @@ func (s *MediaSegment) Fragmentify(timescale uint64, trex *TrexBox, duration uin
113116
}
114117
return outFragments, nil
115118
}
116-
117-
// EncodeVerbatim - Write MediaSegment via writer with verbatim Fragments
118-
func (s *MediaSegment) EncodeVerbatim(w io.Writer) error {
119-
if s.Styp != nil {
120-
err := s.Styp.Encode(w)
121-
if err != nil {
122-
return err
123-
}
124-
}
125-
if s.Sidx != nil {
126-
err := s.Sidx.Encode(w)
127-
if err != nil {
128-
return err
129-
}
130-
}
131-
for _, f := range s.Fragments {
132-
err := f.EncodeVerbatim(w)
133-
if err != nil {
134-
return err
135-
}
136-
}
137-
return nil
138-
}

mp4/mediasegment_test.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ func TestMediaSegmentFragmentation(t *testing.T) {
3535
}
3636

3737
var bufInSeg bytes.Buffer
38-
f.EncodeVerbatim = true
38+
f.EncOptimize = OptimizeNone // Avoid trun optimization
39+
f.FragEncMode = EncModeBoxTree
3940
err = f.Encode(&bufInSeg)
4041
if err != nil {
4142
t.Error(err)
@@ -71,6 +72,7 @@ func TestMediaSegmentFragmentation(t *testing.T) {
7172

7273
var bufFrag bytes.Buffer
7374
fragmentedSegment := NewMediaSegment()
75+
fragmentedSegment.EncOptimize = OptimizeTrun
7476
fragmentedSegment.Styp = f.Segments[0].Styp
7577
fragmentedSegment.Fragments = fragments
7678

@@ -102,7 +104,6 @@ func TestMediaSegmentFragmentation(t *testing.T) {
102104
}
103105

104106
func TestDoubleDecodeEncodeOptimize(t *testing.T) {
105-
encodeVerbatim := false
106107
inFile := "testdata/1.m4s"
107108

108109
fd, err := os.Open(inFile)
@@ -111,17 +112,17 @@ func TestDoubleDecodeEncodeOptimize(t *testing.T) {
111112
}
112113
defer fd.Close()
113114

114-
enc1 := decodeEncode(t, fd, encodeVerbatim)
115+
enc1 := decodeEncode(t, fd, OptimizeTrun)
115116
buf1 := bytes.NewBuffer(enc1)
116-
enc2 := decodeEncode(t, buf1, encodeVerbatim)
117+
enc2 := decodeEncode(t, buf1, OptimizeTrun)
117118
diff := deep.Equal(enc2, enc1)
118119
if diff != nil {
119120
t.Errorf("Second write gives diff %s", diff)
120121
}
121122
}
122123

123124
func TestDoubleDecodeEncodeNoOptimize(t *testing.T) {
124-
encodeVerbatim := true
125+
125126
inFile := "testdata/1.m4s"
126127

127128
fd, err := os.Open(inFile)
@@ -130,23 +131,23 @@ func TestDoubleDecodeEncodeNoOptimize(t *testing.T) {
130131
}
131132
defer fd.Close()
132133

133-
enc1 := decodeEncode(t, fd, encodeVerbatim)
134+
enc1 := decodeEncode(t, fd, OptimizeNone)
134135
buf1 := bytes.NewBuffer(enc1)
135-
enc2 := decodeEncode(t, buf1, encodeVerbatim)
136+
enc2 := decodeEncode(t, buf1, OptimizeNone)
136137
diff := deep.Equal(enc2, enc1)
137138
if diff != nil {
138139
t.Errorf("Second write gives diff %s", diff)
139140
}
140141
}
141142

142-
func decodeEncode(t *testing.T, r io.Reader, encodeVerbatim bool) []byte {
143+
func decodeEncode(t *testing.T, r io.Reader, optimize EncOptimize) []byte {
143144
f, err := DecodeFile(r)
144145
if err != nil {
145146
t.Error(err)
146147
}
147148

148149
buf := bytes.Buffer{}
149-
f.EncodeVerbatim = encodeVerbatim
150+
f.EncOptimize = optimize
150151
err = f.Encode(&buf)
151152
if err != nil {
152153
t.Error(err)
@@ -170,7 +171,7 @@ func TestMoofEncrypted(t *testing.T) {
170171
}
171172

172173
var bufOut bytes.Buffer
173-
f.EncodeVerbatim = true
174+
f.FragEncMode = EncModeBoxTree
174175
err = f.Encode(&bufOut)
175176
if err != nil {
176177
t.Error(err)
@@ -206,7 +207,7 @@ func BenchmarkDecodeEncode(b *testing.B) {
206207
buf := bytes.NewBuffer(raw)
207208
f, _ := DecodeFile(buf)
208209
var bufInSeg bytes.Buffer
209-
f.EncodeVerbatim = true
210+
f.FragEncMode = EncModeBoxTree
210211
_ = f.Encode(&bufInSeg)
211212
}
212213
}

0 commit comments

Comments
 (0)