Skip to content

Commit cc02644

Browse files
authored
Merge pull request #562 from s22s/feature/pyspark-3.1
Python binding updates for PySpark 3.1
2 parents 10a7fa3 + b9d2344 commit cc02644

File tree

32 files changed

+519
-349
lines changed

32 files changed

+519
-349
lines changed

.circleci/.dockerignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*
2+
!requirements-conda.txt
3+
!fix-permissions

.circleci/Dockerfile

Lines changed: 19 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,35 @@
1-
FROM circleci/openjdk:8-jdk
2-
3-
ENV MINICONDA_VERSION=4.8.2 \
4-
MINICONDA_MD5=87e77f097f6ebb5127c77662dfc3165e \
5-
CONDA_VERSION=4.8.2 \
6-
CONDA_DIR=/opt/conda \
7-
PYTHON_VERSION=3.7.7
1+
FROM circleci/openjdk:11-jdk
2+
#LABEL org.opencontainers.image.source=https://github.com/locationtech/rasterframes
83

94
USER root
105

11-
ENV PATH=$CONDA_DIR/bin:$PATH
12-
13-
# circleci is 3434
14-
COPY --chown=3434:3434 fix-permissions /tmp
15-
6+
# See: https://docs.conda.io/projects/conda/en/latest/user-guide/install/rpm-debian.html
167
RUN \
17-
apt-get update && \
18-
apt-get install -yq --no-install-recommends \
19-
sudo \
20-
wget \
21-
bzip2 \
22-
file \
23-
libtinfo5 \
24-
ca-certificates \
25-
gettext-base \
26-
locales && \
27-
apt-get clean && \
28-
rm -rf /var/lib/apt/lists/*
8+
curl -s https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
9+
install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
10+
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
11+
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list
2912

3013
RUN \
31-
cd /tmp && \
32-
mkdir -p $CONDA_DIR && \
33-
wget --quiet https://repo.continuum.io/miniconda/Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
34-
echo "${MINICONDA_MD5} *Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh" | md5sum -c - && \
35-
/bin/bash Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh -f -b -p $CONDA_DIR && \
36-
rm Miniconda3-py37_${MINICONDA_VERSION}-Linux-x86_64.sh && \
37-
conda config --system --set auto_update_conda false && \
38-
conda config --system --set show_channel_urls true && \
39-
conda config --system --set channel_priority strict && \
40-
if [ ! $PYTHON_VERSION = 'default' ]; then conda install --yes python=$PYTHON_VERSION; fi && \
41-
conda list python | grep '^python ' | tr -s ' ' | cut -d '.' -f 1,2 | sed 's/$/.*/' >> $CONDA_DIR/conda-meta/pinned && \
42-
conda install --quiet --yes conda && \
43-
conda install --quiet --yes pip && \
44-
pip config set global.progress_bar off && \
45-
echo "$CONDA_DIR/lib" > /etc/ld.so.conf.d/conda.conf && \
46-
conda clean --all --force-pkgs-dirs --yes --quiet && \
47-
sh /tmp/fix-permissions $CONDA_DIR 2> /dev/null
14+
apt-get update && \
15+
apt-get install -yq --no-install-recommends conda && \
16+
apt-get clean && \
17+
rm -rf /var/lib/apt/lists/*
4818

49-
COPY requirements-conda.txt /tmp/
19+
ENV CONDA_DIR=/opt/conda
20+
ENV PATH=$CONDA_DIR/bin:$PATH
5021

22+
COPY requirements-conda.txt fix-permissions /tmp
5123
RUN \
52-
conda install --channel conda-forge --no-channel-priority --freeze-installed \
53-
--file /tmp/requirements-conda.txt && \
24+
conda install --quiet --yes --channel=conda-forge --file=/tmp/requirements-conda.txt && \
25+
echo "$CONDA_DIR/lib" > /etc/ld.so.conf.d/conda.conf && \
26+
ldconfig && \
5427
conda clean --all --force-pkgs-dirs --yes --quiet && \
55-
sh /tmp/fix-permissions $CONDA_DIR 2> /dev/null && \
56-
ldconfig 2> /dev/null
28+
sh /tmp/fix-permissions $CONDA_DIR
29+
5730

5831
# Work-around for pyproj issue https://github.com/pyproj4/pyproj/issues/415
5932
ENV PROJ_LIB=/opt/conda/share/proj
6033

6134
USER 3434
62-
6335
WORKDIR /home/circleci

.circleci/Makefile

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
1-
IMAGE_NAME=miniconda-gdal
2-
VERSION=latest
3-
HOST=docker.pkg.github.com
4-
REPO=${HOST}/locationtech/rasterframes
5-
FULL_NAME=${REPO}/${IMAGE_NAME}:${VERSION}
1+
IMAGE_NAME=circleci-openjdk-conda-gdal
2+
SHA=$(shell git log -n1 --format=format:"%H" | cut -c 1-7)
3+
VERSION?=$(SHA)
4+
HOST=docker.io
5+
REPO=$(HOST)/s22s
6+
FULL_NAME=$(REPO)/$(IMAGE_NAME):$(VERSION)
67

7-
all: build login push
8+
.DEFAULT_GOAL := help
9+
help:
10+
# http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html
11+
@echo "Usage: make [target]"
12+
@echo "Targets: "
13+
@grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\t\033[36m%-20s\033[0m %s\n", $$1, $$2}'
814

9-
build:
15+
all: build push ## Build and then push image
16+
17+
build: ## Build the docker image
1018
docker build . -t ${FULL_NAME}
1119

12-
login:
13-
docker login ${HOST}
20+
login: ## Login to the docker registry
21+
docker login
1422

15-
push:
23+
push: login ## Push docker image to registry
1624
docker push ${FULL_NAME}
1725

18-
shell: build
26+
run: build ## Build image and launch shell
1927
docker run --rm -it ${FULL_NAME} bash

.circleci/config.yml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@ orbs:
66
executors:
77
default:
88
docker:
9-
- image: s22s/miniconda-gdal:latest
9+
- image: s22s/circleci-openjdk-conda-gdal:b8e30ee
1010
working_directory: ~/repo
1111
environment:
12-
SBT_VERSION: 1.3.8
13-
SBT_OPTS: -Xmx768m
12+
SBT_OPTS: "-Xms64m -Xmx1536m -Djava.awt.headless=true -Dsun.io.serialization.extendedDebugInfo=true"
1413
commands:
1514
setup:
1615
description: Setup for sbt build
@@ -24,8 +23,7 @@ orbs:
2423
steps:
2524
- run:
2625
name: "Compile Scala via sbt"
27-
command: |-
28-
sbt -v -batch compile test:compile it:compile
26+
command: sbt -v -batch compile test:compile it:compile
2927

3028
python:
3129
commands:
@@ -60,6 +58,7 @@ orbs:
6058
mkdir -p /tmp/core_dumps
6159
ls -lh /tmp
6260
cp core.* *.hs /tmp/core_dumps 2> /dev/null || true
61+
cp core/* /tmp/core_dumps/ 2> /dev/null || true
6362
cp -r /tmp/hsperfdata* /tmp/*.hprof /tmp/core_dumps 2> /dev/null || true
6463
when: on_fail
6564

@@ -125,24 +124,23 @@ jobs:
125124

126125
- run:
127126
name: "Scala Tests: core"
128-
command: sbt -batch core/test
127+
command: sbt -v -batch core/test
129128

130129
- run:
131130
name: "Scala Tests: datasource"
132-
command: sbt -batch datasource/test
131+
command: sbt -v -batch datasource/test
133132

134133
- run:
135134
name: "Scala Tests: experimental"
136-
command: sbt -batch experimental/test
135+
command: sbt -v -batch experimental/test
137136

138137
- run:
139138
name: "Create PyRasterFrames package"
140-
command: |-
141-
sbt -v -batch pyrasterframes/package
139+
command: sbt -v -batch pyrasterframes/package
142140

143141
- run:
144142
name: "Python Tests"
145-
command: sbt -batch pyrasterframes/test
143+
command: sbt -v -batch pyrasterframes/test
146144

147145
- rasterframes/save-artifacts
148146
- rasterframes/save-cache
@@ -249,4 +247,4 @@ workflows:
249247
- test
250248
- it
251249
- it-no-gdal
252-
- docs
250+
- docs

.circleci/fix-permissions

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,4 @@
11
#!/usr/bin/env bash
2-
# set permissions on a directory
3-
# after any installation, if a directory needs to be (human) user-writable,
4-
# run this script on it.
5-
# It will make everything in the directory owned by the group $NB_GID
6-
# and writable by that group.
7-
# Deployments that want to set a specific user id can preserve permissions
8-
# by adding the `--group-add users` line to `docker run`.
9-
10-
# uses find to avoid touching files that already have the right permissions,
11-
# which would cause massive image explosion
12-
13-
# right permissions are:
14-
# group=$NB_GID
15-
# AND permissions include group rwX (directory-execute)
16-
# AND directories have setuid,setgid bits set
172

183
set -e
194

.circleci/requirements-conda.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1-
gdal==2.4.4
1+
python==3.8
2+
gdal==3.1.2
23
libspatialindex
4+
rasterio[s3]
35
rtree

.sbtopts

Lines changed: 0 additions & 1 deletion
This file was deleted.

.scalafmt.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version = 3.0.3
1+
version = 3.0.4
22
runner.dialect = scala212
33
indent.main = 2
44
indent.significant = 2

build.sbt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
*
2020
*/
2121

22+
// Leave me an my custom keys alone!
23+
Global / lintUnusedKeysOnLoad := false
24+
2225
addCommandAlias("makeSite", "docs/makeSite")
2326
addCommandAlias("previewSite", "docs/previewSite")
2427
addCommandAlias("ghpagesPushSite", "docs/ghpagesPushSite")
@@ -52,6 +55,10 @@ lazy val core = project
5255
libraryDependencies ++= Seq(
5356
`slf4j-api`,
5457
shapeless,
58+
circe("core").value,
59+
circe("generic").value,
60+
circe("parser").value,
61+
circe("generic-extras").value,
5562
frameless excludeAll ExclusionRule("com.github.mpilquist", "simulacrum"),
5663
`jts-core`,
5764
`spray-json`,
@@ -152,14 +159,14 @@ lazy val docs = project
152159
.dependsOn(core, datasource, pyrasterframes)
153160
.enablePlugins(SiteScaladocPlugin, ParadoxPlugin, ParadoxMaterialThemePlugin, GhpagesPlugin, ScalaUnidocPlugin)
154161
.settings(
155-
apiURL := Some(url("http://rasterframes.io/latest/api")),
162+
apiURL := Some(url("https://rasterframes.io/latest/api")),
156163
autoAPIMappings := true,
157164
ghpagesNoJekyll := true,
158165
ScalaUnidoc / siteSubdirName := "latest/api",
159166
paradox / siteSubdirName := ".",
160167
paradoxProperties ++= Map(
161168
"version" -> version.value,
162-
"scaladoc.org.apache.spark.sql.rf" -> "http://rasterframes.io/latest",
169+
"scaladoc.org.apache.spark.sql.rf" -> "https://rasterframes.io/latest",
163170
"github.base_url" -> ""
164171
),
165172
paradoxNavigationExpandDepth := Some(3),

core/src/main/scala/org/apache/spark/sql/rf/TileUDT.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class TileUDT extends UserDefinedType[Tile] {
4545
def userClass: Class[Tile] = classOf[Tile]
4646

4747
def sqlType: StructType = StructType(Seq(
48-
StructField("cell_type", StringType, false),
48+
StructField("cellType", StringType, false),
4949
StructField("cols", IntegerType, false),
5050
StructField("rows", IntegerType, false),
5151
StructField("cells", BinaryType, true),

0 commit comments

Comments
 (0)