Skip to content
This repository was archived by the owner on Dec 21, 2023. It is now read-only.

Commit cf86bc5

Browse files
Hoyt KoepkeHoyt Koepke
andauthored
Added user-controlled sframe and sarray pickling functionality. (#3327)
Added user-controlled SFrame pickling functionality. Because the SFrame datastructure is inherently disk-backed, pickling is enabled with the user specifying a directory in which to save the serialized artifacts. Pickling creates a uuid-based sframe in this directory, which the unpickling references. Co-authored-by: Hoyt Koepke <[email protected]>
1 parent 72255a0 commit cf86bc5

File tree

5 files changed

+120
-1
lines changed

5 files changed

+120
-1
lines changed

src/python/turicreate/_cython/cy_sframe.pyx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ from .cy_model cimport create_model_from_proxy
3232
from .cy_cpp_utils cimport str_to_cpp, cpp_to_str
3333
from .cy_cpp_utils cimport to_vector_of_strings, from_vector_of_strings
3434
from .cy_cpp_utils cimport to_nested_vectors_of_strings, dict_to_string_string_map
35+
36+
from ..data_structures.serialization import _safe_serialization_directory
37+
import os
3538

3639
cdef create_proxy_wrapper_from_existing_proxy(const unity_sframe_base_ptr& proxy):
3740
if proxy.get() == NULL:
@@ -53,6 +56,9 @@ cdef pydict_from_gl_error_map(gl_error_map& d):
5356
inc(it)
5457
return ret
5558

59+
60+
61+
5662
cdef class UnitySFrameProxy:
5763

5864
def __cinit__(self, do_not_allocate=None):
@@ -375,3 +381,23 @@ cdef class UnitySFrameProxy:
375381
cpdef delete_on_close(self):
376382
with nogil:
377383
self.thisptr.delete_on_close()
384+
385+
386+
def __reduce__(self):
387+
import uuid
388+
389+
save_dir = _safe_serialization_directory()
390+
sframe_id = str(uuid.uuid4())
391+
392+
self.save(os.path.join(save_dir, sframe_id))
393+
return (_UnitySFrame_unpickler, (sframe_id,) )
394+
395+
def _UnitySFrame_unpickler(sframe_id):
396+
proxy = UnitySFrameProxy()
397+
398+
load_dir = _safe_serialization_directory()
399+
400+
proxy.load_from_sframe_index(os.path.join(load_dir, sframe_id))
401+
402+
return proxy
403+

src/python/turicreate/data_structures/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
from __future__ import division as _
1313
from __future__ import absolute_import as _
1414

15-
__all__ = ["sframe", "sarray", "sgraph", "sketch", "image"]
15+
__all__ = ["sframe", "sarray", "sgraph", "sketch", "image", "serialization"]
1616

1717
from . import image
1818
from . import sframe
1919
from . import sarray
2020
from . import sgraph
2121
from . import sketch
22+
from . import serialization
23+
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
2+
# -*- coding: utf-8 -*-
3+
# Copyright © 2020 Apple Inc. All rights reserved.
4+
#
5+
# Use of this source code is governed by a BSD-3-clause license that can
6+
# be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause
7+
8+
# SFrames require a disk-backed file or directory to work with. This directory
9+
# has to be present to allow for serialization or deserialization.
10+
__serialization_directory = None
11+
12+
13+
def enable_sframe_serialization(serialization_directory):
14+
"""
15+
Enables pickling of SFrames through the use of a user-set directory to
16+
store the objects. This directory must be set through his method for
17+
deserialization to work. It may be a different directory for serialization and
18+
unserialization.
19+
20+
When an SFrame is pickled, a copy of the SFrame is saved in this
21+
directory and a reference handle to a randomly generated subdirectory is saved in the
22+
pickle. As long as that reference handle is present in the set directory, then
23+
deserialization should work.
24+
25+
Note that the pickle files themselves do not contain the data -- both the directory contents
26+
and the pickle need to be present for deserialization to work.
27+
"""
28+
29+
import os
30+
31+
global __serialization_directory
32+
if serialization_directory is None:
33+
__serialization_directory = None
34+
return
35+
36+
__serialization_directory = os.path.abspath(os.path.expanduser(serialization_directory))
37+
38+
# Make sure the directory exists.
39+
if not os.path.exists(__serialization_directory):
40+
41+
# Attempt to create it
42+
os.makedirs(__serialization_directory)
43+
44+
# Is it a directory?
45+
elif not os.path.isdir(__serialization_directory):
46+
raise ValueError("%s is not a directory." % __serialization_directory)
47+
48+
49+
50+
def get_serialization_directory():
51+
"""
52+
Returns the current serialization directory if set, or None otherwise.
53+
"""
54+
global __serialization_directory
55+
56+
return __serialization_directory
57+
58+
def _safe_serialization_directory():
59+
global __serialization_directory
60+
61+
from pickle import PickleError
62+
63+
if __serialization_directory is None:
64+
raise PickleError("Serialization directory not set to enable pickling or unpickling. "
65+
"Set using turicreate.data_structures.serialization.enable_sframe_serialization().")
66+
67+
return __serialization_directory

src/python/turicreate/test/test_sframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,29 @@ def test_save_to_csv(self):
642642
f.close()
643643
os.unlink(f.name)
644644

645+
def test_pickling(self):
646+
647+
import pickle
648+
from ..data_structures import serialization
649+
650+
X = generate_random_sframe(100, "ncc")
651+
652+
with util.TempDirectory() as f:
653+
654+
655+
self.assertRaises(pickle.PickleError, lambda: pickle.dumps(X))
656+
657+
serialization.enable_sframe_serialization(f)
658+
659+
s = pickle.dumps(X)
660+
661+
Y = pickle.loads(s)
662+
663+
_assert_sframe_equal(X, Y)
664+
665+
serialization.enable_sframe_serialization(None) # Disables it
666+
667+
645668
def test_save_to_json(self):
646669
f = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
647670
sf = SFrame(data=self.dataframe, format="dataframe")

src/visualization/client/Turi Create Visualization.xcodeproj/project.pbxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@
651651
GCC_WARN_UNUSED_FUNCTION = YES;
652652
GCC_WARN_UNUSED_VARIABLE = YES;
653653
MTL_ENABLE_DEBUG_INFO = NO;
654+
ONLY_ACTIVE_ARCH = YES;
654655
SDKROOT = macosx;
655656
SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
656657
};

0 commit comments

Comments
 (0)