Skip to content

Commit 22c35e0

Browse files
committed
image/cas: Implement Engine.Put
This is a bit awkward. For writing a tar entry, we need to know both the name and size of the file ahead of time. The implementation in this commit accomplishes that by reading the Put content into a buffer, hashing and sizing the buffer, and then calling WriteTarEntryByName to create the entry. With a filesystem-backed CAS engine, we could avoid the buffer by writing the file to a temporary location with rolling hash and size tracking and then renaming the temporary file to the appropriate path. WriteTarEntryByName itself has awkward buffering to avoid dropping anything onto disk. It reads through its current file and writes the new tar into a buffer, and then writes that buffer back back over its current file. There are a few issues with this: * It's a lot more work than you need if you're just appending a new entry to the end of the tarball. But writing the whole file into a buffer means we don't have to worry about the trailing blocks that mark the end of the tarball; that's all handled transparently for us by the Go implementation. And this implementation doesn't have to be performant (folks should not be using tarballs to back write-heavy engines). * It could leave you with a corrupted tarball if the caller dies mid-overwrite. Again, I expect folks will only ever write to a tarball when building a tarball for publishing. If the caller dies, you can just start over. Folks looking for a more reliable implementation should use a filesystem-backed engine. * It could leave you with dangling bytes at the end of the tarball. I couldn't find a Go invocation to truncate the file. Go does have an ftruncate(2) wrapper [1], but it doesn't seem to be exposed at the io.Reader/io.Writer/... level. So if you write a shorter file with the same name as the original, you may end up with some dangling bytes. cas.Engine.Put protects against excessive writes with a Get guard; after hashing the new data, Put trys to Get it from the tarball and only writes a new entry if it can't find an existing entry. This also protects the CAS engine from the dangling-bytes issue. The 0666 file modes and 0777 directory modes rely on the caller's umask to appropriately limit user/group/other permissions for the tarball itself and any content extracted to the filesystem from the tarball. The trailing slash manipulation (stripping before comparison and injecting before creation) is based on part of libarchive's description of old-style archives [2]: name Pathname, stored as a null-terminated string. Early tar implementations only stored regular files (including hardlinks to those files). One common early convention used a trailing "/" character to indicate a directory name, allowing directory permissions and owner information to be archived and restored. and POSIX ustar archives [3]: name, prefix ... The standard does not require a trailing / character on directory names, though most implementations still include this for compatibility reasons. [1]: https://golang.org/pkg/syscall/#Ftruncate [2]: https://github.com/libarchive/libarchive/wiki/ManPageTar5#old-style-archive-format [3]: https://github.com/libarchive/libarchive/wiki/ManPageTar5#posix-ustar-archives Signed-off-by: W. Trevor King <[email protected]>
1 parent d3b867c commit 22c35e0

File tree

5 files changed

+252
-3
lines changed

5 files changed

+252
-3
lines changed

cmd/oci-cas/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ func main() {
2828
}
2929

3030
cmd.AddCommand(newGetCmd())
31+
cmd.AddCommand(newPutCmd())
3132

3233
err := cmd.Execute()
3334
if err != nil {

cmd/oci-cas/put.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright 2016 The Linux Foundation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"fmt"
19+
"os"
20+
21+
"github.com/opencontainers/image-tools/image/cas/layout"
22+
"github.com/spf13/cobra"
23+
"golang.org/x/net/context"
24+
)
25+
26+
type putCmd struct {
27+
path string
28+
}
29+
30+
func newPutCmd() *cobra.Command {
31+
state := &putCmd{}
32+
33+
return &cobra.Command{
34+
Use: "put PATH",
35+
Short: "Write a blob to the store",
36+
Long: "Read a blob from stdin, write it to the store, and print the digest to stdout.",
37+
Run: state.Run,
38+
}
39+
}
40+
41+
func (state *putCmd) Run(cmd *cobra.Command, args []string) {
42+
if len(args) != 1 {
43+
if err := cmd.Usage(); err != nil {
44+
fmt.Fprintln(os.Stderr, err)
45+
}
46+
os.Exit(1)
47+
}
48+
49+
state.path = args[0]
50+
51+
err := state.run()
52+
if err != nil {
53+
fmt.Fprintln(os.Stderr, err)
54+
os.Exit(1)
55+
}
56+
57+
os.Exit(0)
58+
}
59+
60+
func (state *putCmd) run() (err error) {
61+
ctx := context.Background()
62+
63+
engine, err := layout.NewEngine(ctx, state.path)
64+
if err != nil {
65+
return err
66+
}
67+
defer engine.Close()
68+
69+
digest, err := engine.Put(ctx, os.Stdin)
70+
if err != nil {
71+
return err
72+
}
73+
74+
n, err := fmt.Fprintln(os.Stdout, digest)
75+
if err != nil {
76+
return err
77+
}
78+
if n < len(digest) {
79+
return fmt.Errorf("wrote %d of %d bytes", n, len(digest))
80+
}
81+
82+
return nil
83+
}

image/cas/layout/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import (
2828
// NewEngine instantiates an engine with the appropriate backend (tar,
2929
// HTTP, ...).
3030
func NewEngine(ctx context.Context, path string) (engine cas.Engine, err error) {
31-
file, err := os.Open(path)
31+
file, err := os.OpenFile(path, os.O_RDWR, 0)
3232
if err != nil {
3333
return nil, err
3434
}

image/cas/layout/tar.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@
1515
package layout
1616

1717
import (
18+
"bytes"
19+
"crypto/sha256"
20+
"encoding/hex"
1821
"errors"
1922
"fmt"
2023
"io"
2124
"io/ioutil"
25+
"os"
2226
"strings"
2327

2428
"github.com/opencontainers/image-tools/image/cas"
@@ -47,8 +51,30 @@ func NewTarEngine(ctx context.Context, file ReadWriteSeekCloser) (eng cas.Engine
4751

4852
// Put adds a new blob to the store.
4953
func (engine *TarEngine) Put(ctx context.Context, reader io.Reader) (digest string, err error) {
50-
// FIXME
51-
return "", errors.New("TarEngine.Put is not supported yet")
54+
data, err := ioutil.ReadAll(reader)
55+
if err != nil {
56+
return "", err
57+
}
58+
59+
size := int64(len(data))
60+
hash := sha256.Sum256(data)
61+
hexHash := hex.EncodeToString(hash[:])
62+
algorithm := "sha256"
63+
digest = fmt.Sprintf("%s:%s", algorithm, hexHash)
64+
65+
_, err = engine.Get(ctx, digest)
66+
if err == os.ErrNotExist {
67+
targetName := fmt.Sprintf("./blobs/%s/%s", algorithm, hexHash)
68+
reader = bytes.NewReader(data)
69+
err = layout.WriteTarEntryByName(ctx, engine.file, targetName, reader, &size)
70+
if err != nil {
71+
return "", err
72+
}
73+
} else if err != nil {
74+
return "", err
75+
}
76+
77+
return digest, nil
5278
}
5379

5480
// Get returns a reader for retrieving a blob from the store.

image/layout/tar.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,15 @@ package layout
1616

1717
import (
1818
"archive/tar"
19+
"bytes"
1920
"encoding/json"
2021
"errors"
2122
"fmt"
2223
"io"
24+
"io/ioutil"
2325
"os"
26+
"strings"
27+
"time"
2428

2529
"golang.org/x/net/context"
2630
)
@@ -56,6 +60,141 @@ func TarEntryByName(ctx context.Context, reader io.ReadSeeker, name string) (hea
5660
}
5761
}
5862

63+
// WriteTarEntryByName reads content from reader into an entry at name
64+
// in the tarball at file, replacing a previous entry with that name
65+
// (if any). The current implementation avoids writing a temporary
66+
// file to disk, but risks leaving a corrupted tarball if the program
67+
// crashes mid-write.
68+
//
69+
// To add an entry to a tarball (with Go's interface) you need to know
70+
// the size ahead of time. If you set the size argument,
71+
// WriteTarEntryByName will use that size in the entry header (and
72+
// Go's implementation will check to make sure it matches the length
73+
// of content read from reader). If unset, WriteTarEntryByName will
74+
// copy reader into a local buffer, measure its size, and then write
75+
// the entry header and content.
76+
func WriteTarEntryByName(ctx context.Context, file io.ReadWriteSeeker, name string, reader io.Reader, size *int64) (err error) {
77+
var buffer bytes.Buffer
78+
tarWriter := tar.NewWriter(&buffer)
79+
80+
components := strings.Split(name, "/")
81+
if components[0] != "." {
82+
return fmt.Errorf("tar name entry does not start with './': %q", name)
83+
}
84+
85+
var parents []string
86+
for i := 2; i < len(components); i++ {
87+
parents = append(parents, strings.Join(components[:i], "/"))
88+
}
89+
90+
_, err = file.Seek(0, os.SEEK_SET)
91+
if err != nil {
92+
return err
93+
}
94+
95+
tarReader := tar.NewReader(file)
96+
found := false
97+
for {
98+
select {
99+
case <-ctx.Done():
100+
return ctx.Err()
101+
default:
102+
}
103+
104+
var header *tar.Header
105+
header, err = tarReader.Next()
106+
if err == io.EOF {
107+
break
108+
} else if err != nil {
109+
return err
110+
}
111+
112+
dirName := strings.TrimRight(header.Name, "/")
113+
for i, parent := range parents {
114+
if dirName == parent {
115+
parents = append(parents[:i], parents[i+1:]...)
116+
break
117+
}
118+
}
119+
120+
if header.Name == name {
121+
found = true
122+
err = writeTarEntry(ctx, tarWriter, name, reader, size)
123+
} else {
124+
err = tarWriter.WriteHeader(header)
125+
if err != nil {
126+
return err
127+
}
128+
_, err = io.Copy(tarWriter, tarReader)
129+
}
130+
if err != nil {
131+
return err
132+
}
133+
}
134+
135+
if !found {
136+
now := time.Now()
137+
for _, parent := range parents {
138+
header := &tar.Header{
139+
Name: parent + "/",
140+
Mode: 0777,
141+
ModTime: now,
142+
Typeflag: tar.TypeDir,
143+
}
144+
err = tarWriter.WriteHeader(header)
145+
if err != nil {
146+
return err
147+
}
148+
}
149+
err = writeTarEntry(ctx, tarWriter, name, reader, size)
150+
if err != nil {
151+
return err
152+
}
153+
}
154+
155+
err = tarWriter.Close()
156+
if err != nil {
157+
return err
158+
}
159+
160+
_, err = file.Seek(0, os.SEEK_SET)
161+
if err != nil {
162+
return err
163+
}
164+
// FIXME: truncate file
165+
166+
_, err = buffer.WriteTo(file)
167+
return err
168+
}
169+
170+
func writeTarEntry(ctx context.Context, writer *tar.Writer, name string, reader io.Reader, size *int64) (err error) {
171+
if size == nil {
172+
var data []byte
173+
data, err = ioutil.ReadAll(reader)
174+
if err != nil {
175+
return err
176+
}
177+
reader = bytes.NewReader(data)
178+
_size := int64(len(data))
179+
size = &_size
180+
}
181+
now := time.Now()
182+
header := &tar.Header{
183+
Name: name,
184+
Mode: 0666,
185+
Size: *size,
186+
ModTime: now,
187+
Typeflag: tar.TypeReg,
188+
}
189+
err = writer.WriteHeader(header)
190+
if err != nil {
191+
return err
192+
}
193+
194+
_, err = io.Copy(writer, reader)
195+
return err
196+
}
197+
59198
// CheckTarVersion walks a tarball pointed to by reader and returns an
60199
// error if oci-layout is missing or has unrecognized content.
61200
func CheckTarVersion(ctx context.Context, reader io.ReadSeeker) (err error) {

0 commit comments

Comments
 (0)