Skip to content

Commit aa84180

Browse files
authored
Merge pull request #77 from alibaba/gpu
Gpu
2 parents f5985d2 + 02393a5 commit aa84180

File tree

13 files changed

+78
-10
lines changed

13 files changed

+78
-10
lines changed

flink-ml-examples/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
</dependency>
5656
<dependency>
5757
<groupId>org.apache.flink</groupId>
58-
<artifactId>flink-table-planner-blink_2.11</artifactId>
58+
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
5959
<scope>test</scope>
6060
</dependency>
6161
</dependencies>

flink-ml-framework/pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@
130130
<groupId>org.apache.curator</groupId>
131131
<artifactId>curator-test</artifactId>
132132
</exclusion>
133+
<exclusion>
134+
<artifactId>commons-cli</artifactId>
135+
<groupId>commons-cli</groupId>
136+
</exclusion>
133137
</exclusions>
134138
</dependency>
135139
<dependency>
@@ -140,6 +144,10 @@
140144
<groupId>io.netty</groupId>
141145
<artifactId>netty-all</artifactId>
142146
</exclusion>
147+
<exclusion>
148+
<artifactId>commons-cli</artifactId>
149+
<groupId>commons-cli</groupId>
150+
</exclusion>
143151
</exclusions>
144152
</dependency>
145153
<dependency>

flink-ml-framework/src/main/java/com/alibaba/flink/ml/util/MLConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,6 @@ public class MLConstants {
100100
public static final String FAILOVER_RESTART_INDIVIDUAL_STRATEGY = "individual";
101101
public static final String FAILOVER_STRATEGY_DEFAULT = FAILOVER_RESTART_ALL_STRATEGY;
102102
public static final String PYTHON_VERSION = "python.version";
103+
104+
public static final String GPU_INFO = "gpu_info";
103105
}

flink-ml-lib/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
</dependency>
5858
<dependency>
5959
<groupId>org.apache.flink</groupId>
60-
<artifactId>flink-table-planner-blink_2.11</artifactId>
60+
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
6161
<scope>provided</scope>
6262
</dependency>
6363
<dependency>

flink-ml-operator/src/main/java/com/alibaba/flink/ml/operator/ops/MLMapFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public MLMapFunction(ExecutionMode mode, BaseRole role, MLConfig config, TypeInf
7373
* @throws Exception
7474
*/
7575
public void open(RuntimeContext runtimeContext) throws Exception {
76+
ResourcesUtils.parseGpuInfo(runtimeContext, config);
7677
mlContext = new MLContext(mode, config, role.name(), runtimeContext.getIndexOfThisSubtask(),
7778
config.getEnvPath(), null);
7879
PythonFileUtil.preparePythonFilesForExec(runtimeContext, mlContext);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.alibaba.flink.ml.operator.ops;
2+
3+
import org.apache.flink.api.common.externalresource.ExternalResourceInfo;
4+
import org.apache.flink.api.common.functions.RuntimeContext;
5+
6+
import com.alibaba.flink.ml.cluster.MLConfig;
7+
import com.alibaba.flink.ml.util.MLConstants;
8+
9+
import java.util.ArrayList;
10+
import java.util.Collections;
11+
import java.util.List;
12+
import java.util.Set;
13+
14+
public class ResourcesUtils {
15+
16+
public static void parseGpuInfo(RuntimeContext runtimeContext, MLConfig mlConfig) {
17+
Set<ExternalResourceInfo> gpuInfo = runtimeContext.getExternalResourceInfos("gpu");
18+
if (gpuInfo != null && gpuInfo.size() >0) {
19+
List<String> indexList = new ArrayList<>();
20+
for (ExternalResourceInfo gpu : gpuInfo) {
21+
if (gpu.getProperty("index").isPresent()) {
22+
indexList.add(gpu.getProperty("index").get());
23+
}
24+
}
25+
Collections.sort(indexList);
26+
String gpuStr = String.join(",", indexList);
27+
mlConfig.getProperties().put(MLConstants.GPU_INFO, gpuStr);
28+
}else {
29+
mlConfig.getProperties().put(MLConstants.GPU_INFO, "");
30+
}
31+
}
32+
}

flink-ml-operator/src/main/java/com/alibaba/flink/ml/operator/ops/inputformat/MLInputFormat.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.alibaba.flink.ml.cluster.rpc.NodeServer;
2626
import com.alibaba.flink.ml.data.DataExchange;
2727
import com.alibaba.flink.ml.operator.hook.FlinkOpHookManager;
28+
import com.alibaba.flink.ml.operator.ops.ResourcesUtils;
2829
import com.alibaba.flink.ml.operator.util.ColumnInfos;
2930
import com.alibaba.flink.ml.operator.util.PythonFileUtil;
3031
import com.alibaba.flink.ml.cluster.role.AMRole;
@@ -34,7 +35,6 @@
3435
import org.apache.flink.api.common.io.statistics.BaseStatistics;
3536
import org.apache.flink.api.common.typeinfo.TypeInformation;
3637
import org.apache.flink.configuration.Configuration;
37-
import org.apache.flink.core.io.InputSplit;
3838
import org.apache.flink.core.io.InputSplitAssigner;
3939
import org.slf4j.Logger;
4040
import org.slf4j.LoggerFactory;
@@ -122,6 +122,7 @@ public InputSplitAssigner getInputSplitAssigner(MLInputSplit[] inputSplits) {
122122
*/
123123
@Override
124124
public void open(MLInputSplit split) throws IOException {
125+
ResourcesUtils.parseGpuInfo(getRuntimeContext(), mlConfig);
125126
mlContext = new MLContext(mode, mlConfig, role.name(), split.getSplitNumber(),
126127
mlConfig.getEnvPath(), ColumnInfos.dummy().getNameToTypeMap());
127128

flink-ml-pytorch/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
</dependency>
3737
<dependency>
3838
<groupId>org.apache.flink</groupId>
39-
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
39+
<artifactId>flink-table-api-java-bridge_${scala.major.version}</artifactId>
4040
</dependency>
4141

4242
<dependency>
@@ -64,7 +64,7 @@
6464
</dependency>
6565
<dependency>
6666
<groupId>org.apache.flink</groupId>
67-
<artifactId>flink-table-planner-blink_2.11</artifactId>
67+
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
6868
<scope>test</scope>
6969
</dependency>
7070
</dependencies>

flink-ml-tensorflow/pom.xml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,21 @@
4747
<dependency>
4848
<groupId>org.tensorflow</groupId>
4949
<artifactId>tensorflow-hadoop</artifactId>
50+
<exclusions>
51+
<exclusion>
52+
<artifactId>commons-cli</artifactId>
53+
<groupId>commons-cli</groupId>
54+
</exclusion>
55+
</exclusions>
5056
</dependency>
51-
5257
<dependency>
5358
<groupId>org.apache.flink</groupId>
5459
<artifactId>flink-table-api-java</artifactId>
5560
<scope>provided</scope>
5661
</dependency>
5762
<dependency>
5863
<groupId>org.apache.flink</groupId>
59-
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
64+
<artifactId>flink-table-api-java-bridge_${scala.major.version}</artifactId>
6065
</dependency>
6166

6267
<dependency>
@@ -77,7 +82,7 @@
7782
</dependency>
7883
<dependency>
7984
<groupId>org.apache.flink</groupId>
80-
<artifactId>flink-table-planner-blink_2.11</artifactId>
85+
<artifactId>flink-table-planner-blink_${scala.major.version}</artifactId>
8186
<scope>test</scope>
8287
</dependency>
8388
</dependencies>

flink-ml-tensorflow/python/flink_ml_tensorflow/tensorflow_on_flink_mlconf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,6 @@ class MLCONSTANTS(object):
9898
FAILOVER_RESTART_INDIVIDUAL_STRATEGY = str(ml_constants.FAILOVER_RESTART_INDIVIDUAL_STRATEGY)
9999
FAILOVER_STRATEGY_DEFAULT = str(ml_constants.FAILOVER_STRATEGY_DEFAULT)
100100

101-
PYTHON_VERSION = str(ml_constants.PYTHON_VERSION)
101+
PYTHON_VERSION = str(ml_constants.PYTHON_VERSION)
102+
103+
GPU_INFO = str(ml_constants.GPU_INFO)

0 commit comments

Comments
 (0)