Skip to content

Commit 9cab2d6

Browse files
tylerjthomas9Tyler Thomas
andauthored
Switch from PyCall.jl to PythonCall.jl (#15)
* Initial conversion from PyCall to PythonCall * Remove PyCall references * run format check * fix __init__ (do not return cv) * bump julia version in docs to v1.6 * commit @erikphanson's suggestions * fix to_pandas * fix to_pandas Co-authored-by: Tyler Thomas <[email protected]>
1 parent 7999f91 commit 9cab2d6

File tree

14 files changed

+66
-111
lines changed

14 files changed

+66
-111
lines changed

.github/workflows/docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
- uses: actions/checkout@v2
2222
- uses: julia-actions/setup-julia@latest
2323
with:
24-
version: 1.5 # earliest supported version
24+
version: 1.6 # earliest supported version
2525
- uses: actions/cache@v2
2626
with:
2727
path: ~/.julia/artifacts

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ Manifest.toml
2525

2626
# Custom.
2727
catboost_info
28+
.CondaPkg*

CondaPkg.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[deps.catboost]
2+
channel = "conda-forge"
3+
version = "=1.1"

Project.toml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
11
name = "CatBoost"
22
uuid = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12"
33
authors = ["Beacon Biosignals, Inc."]
4-
version = "0.1.2"
4+
version = "0.2.0"
55

66
[deps]
7-
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
87
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
98
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
10-
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
9+
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
1110
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
1211

1312
[compat]
1413
Aqua = "0.5"
15-
Conda = "1.5"
1614
DataFrames = "0.22, 1"
1715
OrderedCollections = "1.4"
18-
PyCall = "1.9"
16+
PythonCall = "0.9"
1917
Tables = "1.4"
20-
julia = "1.5"
18+
julia = "1.6"
2119

2220
[extras]
2321
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"

README.md

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,16 @@
1010

1111
Julia interface to [CatBoost](https://catboost.ai/).
1212

13-
## Setting up PyCall
14-
15-
Please follow the PyCall guidelines described in [PyCall.jl](https://github.com/JuliaPy/PyCall.jl).
16-
17-
We highly recommend using a Julia-specific Python environment to handle dependencies. We recommend that users follow the build instructions in [Conda.jl](https://github.com/JuliaPy/Conda.jl).
18-
19-
If users have installed [miniconda](https://docs.conda.io/en/latest/miniconda.html) on their local machine, we recommend checking out the Julia-specific Python environment (which is usually located at `$HOME/.julia/conda/3`) and installing `catboost` there with `pip`:
20-
21-
```
22-
pip install catboost
23-
```
24-
2513
## Example
2614

2715
```julia
2816
module Regression
2917

3018
using CatBoost
3119

32-
train_data = [[1, 4, 5, 6], [4, 5, 6, 7], [30, 40, 50, 60]]
33-
eval_data = [[2, 4, 6, 8], [1, 4, 50, 60]]
34-
train_labels = [10, 20, 30]
20+
train_data = PyList([[1, 4, 5, 6], [4, 5, 6, 7], [30, 40, 50, 60]])
21+
eval_data = PyList([[2, 4, 6, 8], [1, 4, 50, 60]])
22+
train_labels = PyList([10, 20, 30])
3523

3624
# Initialize CatBoostRegressor
3725
model = CatBoostRegressor(iterations = 2, learning_rate = 1, depth = 2)

deps/build.jl

Lines changed: 0 additions & 6 deletions
This file was deleted.

examples/binary.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
module Binary
22

3-
using CatBoost, DataFrames
3+
using CatBoost
4+
using DataFrames
5+
using PythonCall
46

57
# Initialize data
6-
cat_features = [0, 1]
8+
cat_features = pylist([0, 1])
79
train_data = DataFrame([["a", "a", "c"], ["b", "b", "d"], [1, 4, 30], [4, 5, 40],
810
[5, 6, 50], [6, 7, 60]], :auto)
9-
train_labels = [1, 1, -1]
11+
train_labels = pylist([1, 1, -1])
1012
eval_data = DataFrame([["a", "a"], ["b", "d"], [2, 1], [4, 4], [6, 50], [8, 60]], :auto)
1113

1214
# Initialize CatBoostClassifier

examples/cross_validation.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@ module CrossValidation
33

44
using CatBoost
55
using DataFrames
6+
using PythonCall
67

7-
cv_data = [["France", 1924, 44], ["USA", 1932, 37], ["Switzerland", 1928, 25],
8-
["Norway", 1952, 30], ["Japan", 1972, 35], ["Mexico", 1968, 112]]
8+
cv_data = pylist([["France", 1924, 44], ["USA", 1932, 37], ["Switzerland", 1928, 25],
9+
["Norway", 1952, 30], ["Japan", 1972, 35], ["Mexico", 1968, 112]])
910

10-
labels = [1, 1, 0, 0, 0, 1]
11+
labels = pylist([1, 1, 0, 0, 0, 1])
1112

12-
cat_features = [0]
13+
cat_features = pylist([0])
1314

1415
cv_dataset = Pool(; data=cv_data, label=labels, cat_features=cat_features)
1516

16-
params = Dict("iterations" => 100, "depth" => 2, "loss_function" => "Logloss",
17-
"verbose" => false)
17+
params = PyDict(Dict("iterations" => 100, "depth" => 2, "loss_function" => "Logloss",
18+
"verbose" => false))
1819

1920
scores = cv(cv_dataset; fold_count=2, params)
2021

examples/learning_to_rank.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ module LearningToRank
22

33
using CatBoost
44
using DataFrames
5+
using PythonCall
56

67
train, test = load_dataset(:msrank_10k)
8+
train.sort_values(2; inplace=true)
9+
test.sort_values(2; inplace=true)
710
x_train = train.drop([0, 1]; axis=1).values
811
y_train = train[1].values
912
queries_train = train[2].values
@@ -13,8 +16,8 @@ y_test = test[1].values
1316
queries_test = test[2].values
1417

1518
# Important dims.
16-
num_documents, num_features = size(x_train)
17-
num_queries = size(unique(queries_train))[1]
19+
num_documents, num_features = size(pyconvert(Array, x_train))
20+
num_queries = size(pyconvert(Array, queries_train), 1)
1821
println("Data dims: $((num_documents, num_features))")
1922
println("Num queries: $(num_queries)")
2023

@@ -28,10 +31,10 @@ train = Pool(; data=x_train, label=y_train, group_id=queries_train)
2831
test = Pool(; data=x_test, label=y_test, group_id=queries_test)
2932

3033
# small number of iterations to not slow down CI too much
31-
default_parameters = Dict("iterations" => 10, "loss_function" => "RMSE",
32-
"custom_metric" => ["MAP:top=10", "PrecisionAt:top=10",
33-
"RecallAt:top=10"], "verbose" => false,
34-
"random_seed" => 314159)
34+
default_parameters = PyDict(Dict("iterations" => 10, "loss_function" => "RMSE",
35+
"custom_metric" => ["MAP:top=10", "PrecisionAt:top=10",
36+
"RecallAt:top=10"], "verbose" => false,
37+
"random_seed" => 314159))
3538

3639
function fit_model(params, train_pool, test_pool)
3740
model = catboost.CatBoost(params)

examples/multiclass.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
module MultiClass
22

33
using CatBoost
4+
using PythonCall
45

5-
train_data = [["summer", 1924, 44], ["summer", 1932, 37], ["winter", 1980, 37],
6-
["summer", 2012, 204]]
6+
train_data = PyList([["summer", 1924, 44], ["summer", 1932, 37], ["winter", 1980, 37],
7+
["summer", 2012, 204]])
78

8-
eval_data = [["winter", 1996, 197], ["winter", 1968, 37], ["summer", 2002, 77],
9-
["summer", 1948, 59]]
9+
eval_data = PyList([["winter", 1996, 197], ["winter", 1968, 37], ["summer", 2002, 77],
10+
["summer", 1948, 59]])
1011

11-
cat_features = [0]
12+
cat_features = PyList([0])
1213

13-
train_label = ["France", "USA", "USA", "UK"]
14-
eval_label = ["USA", "France", "USA", "UK"]
14+
train_label = PyList(["France", "USA", "USA", "UK"])
15+
eval_label = PyList(["USA", "France", "USA", "UK"])
1516

1617
train_dataset = Pool(; data=train_data, label=train_label, cat_features=cat_features)
1718

0 commit comments

Comments
 (0)