Skip to content

Commit 37123ff

Browse files
committed
update FG document
1 parent d7f0d77 commit 37123ff

File tree

10 files changed

+97
-110
lines changed

10 files changed

+97
-110
lines changed

docker/Dockerfile_paitf115

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
FROM dsw-registry.cn-shanghai.cr.aliyuncs.com/pai/tensorflow-training:1.15PAI-cpu-py36-ubuntu18.04
2+
3+
COPY docker/sources_18.04.list /etc/apt/sources.list
4+
5+
# necessary for later commands to take effect
6+
RUN md5sum /etc/apt/sources.list
7+
8+
RUN apt-get update
9+
RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y
10+
11+
RUN mkdir /EasyRec
12+
COPY requirements /EasyRec/requirements
13+
COPY requirements.txt /EasyRec/
14+
COPY easy_rec /EasyRec/easy_rec/
15+
COPY setup.cfg /EasyRec/
16+
COPY setup.py /EasyRec/
17+
COPY MANIFEST.in /EasyRec/
18+
COPY README.md /EasyRec/
19+
COPY scripts /EasyRec/scripts
20+
21+
RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip
22+
RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip
23+
RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd
24+
RUN pip3 install --upgrade pip
25+
RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
26+
RUN pip3 install -r /EasyRec/requirements/runtime.txt
27+
RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
28+
RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/graphlearn-1.1.0-cp36-cp36m-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
29+
RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
30+
RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.4.2%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
31+
RUN pip3 install tensorflow_probability==0.8 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
32+
RUN cd /EasyRec && pip install .
33+
RUN rm -rf /EasyRec
34+
RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka"
35+
36+
COPY docker/hadoop_env.sh /opt/hadoop_env.sh

docker/Dockerfile_tf211

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
FROM dsw-registry.cn-shanghai.cr.aliyuncs.com/pai/tensorflow:2.11-cpu-py39-ubuntu20.04-1
2+
COPY docker/sources_20.04.list /etc/apt/sources.list
3+
4+
# necessary for later commands to take effect
5+
RUN md5sum /etc/apt/sources.list
6+
7+
RUN apt-get update
8+
RUN apt-get install apt-utils inetutils-ping wget curl telnet vim strace libpq-dev curl libsasl2-dev gcc g++ unzip openjdk-8-jdk -y
9+
10+
RUN mkdir /EasyRec
11+
COPY requirements /EasyRec/requirements
12+
COPY requirements.txt /EasyRec/
13+
COPY easy_rec /EasyRec/easy_rec/
14+
COPY setup.cfg /EasyRec/
15+
COPY setup.py /EasyRec/
16+
COPY MANIFEST.in /EasyRec/
17+
COPY README.md /EasyRec/
18+
COPY scripts /EasyRec/scripts
19+
20+
RUN curl "http://easyrec.oss-cn-beijing.aliyuncs.com/tools/odpscmd_public_0.45.0.zip" -o /EasyRec/odpscmd_public.zip
21+
RUN mkdir /usr/local/odps_clt/ && cd /usr/local/odps_clt/ && unzip /EasyRec/odpscmd_public.zip
22+
RUN ln -s /usr/local/odps_clt/bin/odpscmd /usr/local/bin/odpscmd
23+
RUN python -m pip install --upgrade pip
24+
RUN pip3 install pystack-debugger idna kafka-python -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
25+
RUN pip3 install -r /EasyRec/requirements/runtime.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
26+
RUN pip3 install -r /EasyRec/requirements/extra.txt -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
27+
RUN pip3 install https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/graphlearn-1.2.0-cp39-cp39-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
28+
# RUN pip3 install http://easyrec.oss-cn-beijing.aliyuncs.com/releases/pai_automl-0.0.1rc1-py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
29+
RUN pip3 install tensorflow_probability==0.19.0 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
30+
#RUN pip3 install encodings
31+
RUN pip3 install https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.4.j2%2Btunnel-py2.py3-none-any.whl -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
32+
RUN cd /EasyRec && pip install .
33+
RUN rm -rf /EasyRec
34+
# RUN python -c "import easy_rec; easy_rec.help(); import pyhive; import datahub; import kafka"
35+
36+
COPY docker/hadoop_env.sh /opt/hadoop_env.sh

docs/images/other/fg.svg

Lines changed: 1 addition & 1 deletion
Loading

docs/source/feature/fg.md

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
是一套把原始输入转换为模型所需输入(特征)的数据变换过程,用来保证离线、在线样本生成结果的一致性。
55
特征生成也可以理解为特征变换,对单个特征或者多个特征做变换。我们提供了各种类型的FG算子来完成各种特征变换操作。
66

7-
特征生成只关注同时需要在离线和在线样本生成过程中的变换操作。如果某个变换操作只需要作用在离线阶段,则不需要定义为FG的操作。
7+
特征生成只关注同时需要在离线和在线样本生成过程中的变换操作。如果某个变换操作只需要作用在离线阶段,则不需要定义为FG的操作。
88

99
FG模块在推荐系统架构中的位置如下图所示:
1010

@@ -55,21 +55,21 @@ FG模块在推荐系统架构中的位置如下图所示:
5555

5656
FG支持的特征变换算子与EasyRec支持的特征(`Feature Column`)之间没有严格的对应关系,大致可以参加如下表格:
5757

58-
| FG 算子 | EasyRec Feature Column |
59-
|:-------------|:------------------------------------|
60-
| id_feature | IdFeature 或 TagFeature |
61-
| raw_feature | RawFeature |
62-
| expr_feature | RawFeature |
63-
| combo_feature | IdFeature 或 TagFeature |
64-
| lookup_feature | RawFeature 或 IdFeature 或 TagFeature |
65-
| match_feature | RawFeature 或 IdFeature 或 TagFeature |
66-
| overlap_feature | RawFeature |
67-
| sequence_feature | SequenceFeature 或 TagFeature |
68-
| bm25_feature | RawFeature |
69-
| kv_dot_product | RawFeature |
70-
| tokenize_feature | TagFeature |
71-
| text_normalizer | IdFeature |
72-
| regex_replace_feature | IdFeature |
58+
| FG 算子 | EasyRec Feature Column |
59+
| :-------------------- | :---------------------------------- |
60+
| id_feature | IdFeature 或 TagFeature |
61+
| raw_feature | RawFeature |
62+
| expr_feature | RawFeature |
63+
| combo_feature | IdFeature 或 TagFeature |
64+
| lookup_feature | RawFeature 或 IdFeature 或 TagFeature |
65+
| match_feature | RawFeature 或 IdFeature 或 TagFeature |
66+
| overlap_feature | RawFeature |
67+
| sequence_feature | SequenceFeature 或 TagFeature |
68+
| bm25_feature | RawFeature |
69+
| kv_dot_product | RawFeature |
70+
| tokenize_feature | TagFeature |
71+
| text_normalizer | IdFeature |
72+
| regex_replace_feature | IdFeature |
7373

7474
备注:**FG的执行结果输出给EasyRec模型,两种之间是串联的关系**
7575

@@ -122,8 +122,8 @@ pai -name easy_rec_ext
122122
如果不是, 可以通过-Dedit_config_json='{"export_config.multi_placeholder":true}' 进行修改
123123
124124
- 如果有设置feature_config.features.max_partitions, 请加入下面的命令重置:
125-
- -Dedit_config_json='{"feature_config.features\[:\].max_partitions":1}'进行修改, 可以获得更好的性能
126125
126+
- -Dedit_config_json='{"feature_config.features\[:\].max_partitions":1}'进行修改, 可以获得更好的性能
127127
128128
#### 特征筛选
129129

docs/source/quick_start/local_tutorial.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ docker exec -it <CONTAINER_ID> bash
6060

6161
可选镜像:
6262

63-
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-paitf1.12-0.8.5 [只能跑在DLC环境]
63+
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-paitf1.12-0.8.5 \[只能跑在DLC环境\]
6464
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-paitf1.15-0.8.5
6565
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15.5-0.8.5
6666
- mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/easyrec:py36-tf1.15.5-gpu-0.8.5

easy_rec/python/compat/early_stopping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
import os
2222
import threading
2323
import time
24-
from distutils.version import LooseVersion
2524

2625
import tensorflow as tf
26+
from distutils.version import LooseVersion
2727
from tensorflow.python.framework import dtypes
2828
from tensorflow.python.framework import ops
2929
from tensorflow.python.ops import init_ops

easy_rec/python/input/odps_input_v3.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ def __init__(self,
3232
task_num, check_mode, pipeline_config)
3333
self._num_epoch = 0
3434
if common_io is None:
35-
logging.error("""please install common_io pip install
36-
https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.4.2%2Btunnel-py2.py3-none-any.whl"""
35+
logging.error('''
36+
please install common_io pip install
37+
https://easyrec.oss-cn-beijing.aliyuncs.com/3rdparty/common_io-0.4.2%2Btunnel-py2.py3-none-any.whl'''
3738
)
3839
sys.exit(1)
3940

easy_rec/python/test/odps_input_v3_test.py

Lines changed: 0 additions & 86 deletions
This file was deleted.

easy_rec/python/test/train_eval_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import threading
88
import time
99
import unittest
10-
from distutils.version import LooseVersion
1110

1211
import numpy as np
1312
import six
1413
import tensorflow as tf
14+
from distutils.version import LooseVersion
1515
from tensorflow.python.platform import gfile
1616

1717
from easy_rec.python.main import predict

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ multi_line_output = 7
1010
force_single_line = true
1111
known_standard_library = setuptools
1212
known_first_party = easy_rec
13-
known_third_party = absl,common_io,docutils,eas_prediction,faiss,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,scipy,six,sklearn,sparse_operation_kit,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,tensorflow_probability,yaml
13+
known_third_party = absl,common_io,distutils,docutils,eas_prediction,faiss,future,google,graphlearn,kafka,matplotlib,numpy,oss2,pai,pandas,psutil,scipy,six,sklearn,sparse_operation_kit,sphinx_markdown_tables,sphinx_rtd_theme,tensorflow,tensorflow_probability,yaml
1414
no_lines_before = LOCALFOLDER
1515
default_section = THIRDPARTY
1616
skip = easy_rec/python/protos

0 commit comments

Comments
 (0)