Skip to content

Commit 3e59987

Browse files
author
tobyroseman
committed
Update to version 1.6
1 parent f54302f commit 3e59987

File tree

7 files changed

+33
-24
lines changed

7 files changed

+33
-24
lines changed

oss_local_scripts/conda_requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@ scikit-learn==0.16.1
1616
scipy==0.15.1
1717
six==1.9.0
1818
tornado==4.1
19+
wheel==0.24.0
1920
statsmodels
2021
PIL

oss_src/sframe/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,6 @@ make_copy_target(_local_sys_util_
5555
pylambda
5656
)
5757
add_dependencies(spark_unity _local_sys_util_)
58-
file(DOWNLOAD http://s3-us-west-2.amazonaws.com/glbin-engine/spark_unity_0.1.jar ${CMAKE_CURRENT_BINARY_DIR}/spark_unity.jar
59-
EXPECTED_MD5 f5e0653648f0b474cee57bd134d9ee83)
58+
file(DOWNLOAD http://s3-us-west-2.amazonaws.com/glbin-engine/spark_unity_0.3.jar ${CMAKE_CURRENT_BINARY_DIR}/spark_unity.jar
59+
EXPECTED_MD5 b6261c8614da3da1c89dce64fed09420)
6060

oss_src/sframe/spark_unity.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -625,12 +625,8 @@ int concat_main(std::string & _output_directory, std::string & _prefix) {
625625

626626
}
627627

628-
auto first_sframe_ptr = std::make_shared<sframe>(list_filenames[0]);
629-
sframe append_sframe;
630-
append_sframe.open_for_write(first_sframe_ptr->column_names(),first_sframe_ptr->column_types(), "", 1, false);
631-
append_sframe.close();
632-
633-
for(int index=0;index<list_filenames.size();index++) {
628+
sframe append_sframe(list_filenames[0]);
629+
for(int index=1;index<list_filenames.size();index++) {
634630
auto sframe_ptr = std::make_shared<sframe>(list_filenames[index]);
635631
append_sframe = append_sframe.append(*sframe_ptr);
636632
}

oss_src/unity/python/doc/source/graphlab.data_structures.connectors.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ Spark RDD
4040

4141
SFrame.from_rdd
4242
SFrame.to_rdd
43-
SFrame.to_schema_rdd
4443

4544
SQL Database
4645
----------------

oss_src/unity/python/sframe/data_structures/image.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def show(self):
223223
Displays the image. Requires PIL/Pillow.
224224
225225
Alternatively, you can create an :class:`graphlab.SArray` of this image
226-
and use :func:`graphlab.SArray.show()`
226+
and use py:func:`graphlab.SArray.show()`
227227
228228
See Also
229229
--------

oss_src/unity/python/sframe/data_structures/sframe.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,10 +1697,11 @@ def to_spark_dataframe(self,sc,sql,number_of_partitions=4):
16971697
16981698
>>> from pyspark import SparkContext, SQLContext
16991699
>>> from graphlab import SFrame
1700+
>>> from pyspark.sql import SQLContext
17001701
>>> sc = SparkContext('local')
1701-
>>> sqlc = SQLContext(sc)
1702+
>>> sql = SQLContext(sc)
17021703
>>> sf = SFrame({'x': [1,2,3], 'y': ['fish', 'chips', 'salad']})
1703-
>>> df = sf.to_spark_dataframe(sc, sqlc)
1704+
>>> df = sf.to_spark_dataframe(sc, sql)
17041705
>>> df.show()
17051706
x y
17061707
1 fish
@@ -1902,15 +1903,28 @@ def from_rdd(cls, rdd, cur_sc):
19021903
df, tmp_loc, finalSFramePrefix)
19031904
else:
19041905
if encoding == 'utf8':
1905-
finalSFrameFilename = graphlab_util_ref.toSFrame(
1906-
rdd._jrdd.rdd(),tmp_loc, finalSFramePrefix)
1907-
else:
1908-
# Prep the additional arguments to feed into the pySparkToSFrame function in Java
1909-
# that will call the spark_unity binary which does the actual encoding
1910-
additiona_args = os.path.join(" --encoding=%s " % encoding +\
1911-
" --type=rdd ")
1912-
finalSFrameFilename = graphlab_util_ref.pySparkToSFrame(
1913-
rdd._jrdd, tmp_loc, finalSFramePrefix, additiona_args)
1906+
## TODO: This is a temporary solution. Here we are completely bypassing
1907+
## toSFrame() codepath when encoding is 'utf8'. This is because of Spark1.5 error
1908+
## for closure cleaning issue on deep nested functions.
1909+
1910+
def f(iterator):
1911+
for obj in iterator:
1912+
yield obj.encode("utf-8")
1913+
1914+
rdd = rdd.mapPartitions(f)
1915+
encoding = "batch"
1916+
if(rdd._jrdd_deserializer.__class__.__name__ == 'PickleSerializer'):
1917+
encoding = "pickle"
1918+
1919+
#finalSFrameFilename = graphlab_util_ref.toSFrame(
1920+
# rdd._jrdd.rdd(),tmp_loc, finalSFramePrefix)
1921+
#else:
1922+
# Prep the additional arguments to feed into the pySparkToSFrame function in Java
1923+
# that will call the spark_unity binary which does the actual encoding
1924+
additiona_args = os.path.join(" --encoding=%s " % encoding +\
1925+
" --type=rdd ")
1926+
finalSFrameFilename = graphlab_util_ref.pySparkToSFrame(
1927+
rdd._jrdd, tmp_loc, finalSFramePrefix, additiona_args)
19141928

19151929
# Load and return the sframe
19161930
sf = SFrame()

oss_src/unity/python/sframe/toolkits/_model.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,10 +427,9 @@ def show(self, view=None, model_type='base'):
427427
view : str, optional
428428
The name of the Model view to show. Can be one of:
429429
430-
- 'Summary': The summary description of a Model.
430+
- Summary: Shows the statistics of the training process such as size of the data and time cost. The summary also shows the parameters and settings for the model training process if available.
431+
- Evaluation: Shows precision recall plot as line chart. Tooltip is provided for pointwise analysis. Precision recall values are shown in the tooltip at any given cutoff value the mouse points to.
431432
432-
- 'Evaluation': A visual representation of the evaluation results for
433-
a Model.
434433
435434
Returns
436435
-------

0 commit comments

Comments
 (0)