Skip to content

Commit a496187

Browse files
authored
Add Python Interfaces (#12)
* add python interface * del python deps * add python README.md * add python README.md * add python demo link into python readme * support python3 * update readme * update readme * modify PYTHON_INCLUDE in Makefile * update code annotation * modify raw_input inferface to support python3 && update Makefile * modify topical word embeddings model * update the path of news model * update the path of news model * update slda_infer code annotation
1 parent 3485a09 commit a496187

20 files changed

+957
-12
lines changed

Makefile

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,18 @@ ifndef DEPS_PATH
66
DEPS_PATH = $(shell pwd)/third_party
77
endif
88

9+
ifndef PYTHON_PATH
10+
PYTHON_PATH = $(shell python -c"import sys; print(sys.prefix)")
11+
endif
12+
13+
ifndef PYTHON_VERSION
14+
PYTHON_VERSION = $(shell ls $(PYTHON_PATH)/include | grep python)
15+
endif
16+
17+
ifndef PYTHON_INCLUDE
18+
PYTHON_INCLUDE = $(shell ls $(PYTHON_PATH)/include | grep python | sed "s:^:$(PYTHON_PATH)/include/:")
19+
endif
20+
921
ifndef PROTOC
1022
PROTOC = ${DEPS_PATH}/bin/protoc
1123
endif
@@ -25,12 +37,13 @@ CXXFLAGS=-pipe \
2537

2638
INCPATH=-I./include/ \
2739
-I./include/familia \
28-
-I./third_party/include
40+
-I./third_party/include \
41+
-I$(PYTHON_INCLUDE)
2942

30-
LDFLAGS_SO = -L$(DEPS_PATH)/lib -L./build/ -lfamilia -lprotobuf -lglog -lgflags
43+
LDFLAGS_SO = -L$(DEPS_PATH)/lib -L$(PYTHON_PATH)/lib -L./build/ -lfamilia -lprotobuf -lglog -lgflags
3144

3245
.PHONY: all
33-
all: familia
46+
all: familia python/demo/familia.so
3447
@echo $(SOURCES)
3548
@echo $(OBJS)
3649
$(CXX) $(CXXFLAGS) $(INCPATH) build/demo/inference_demo.o $(LDFLAGS_SO) -o inference_demo
@@ -50,13 +63,17 @@ clean:
5063
rm -rf word_distance_demo
5164
rm -rf topic_word_demo
5265
rm -rf show_topic_demo
53-
rm -rf build
66+
rm -rf build
67+
rm -rf python/cpp/*.o
68+
rm -rf python/demo/*.so
69+
rm -rf python/demo/*.pyc
5470
find src -name "*.pb.[ch]*" -delete
5571

5672
# third party dependency
5773
deps: ${GLOGS} ${GFLAGS} ${PROTOBUF}
5874
@echo "dependency installed!"
5975

76+
.PHONY: familia
6077
familia: build/libfamilia.a
6178

6279
OBJS = $(addprefix build/, vose_alias.o inference_engine.o model.o vocab.o document.o sampler.o config.o util.o semantic_matching.o tokenizer.o \
@@ -74,12 +91,16 @@ build/libfamilia.a: include/config.pb.h $(OBJS)
7491
build/%.o: src/%.cpp
7592
@mkdir -p $(@D)
7693
$(CXX) $(INCPATH) $(CXXFLAGS) -MM -MT build/$*.o $< >build/$*.d
77-
$(CXX) $(INCPATH) $(CXXFLAGS) -c $< -o $@
94+
$(CXX) $(INCPATH) $(CXXFLAGS) -c $< -o $@
7895

7996
# build proto
80-
include/config.pb.h src/config.cpp : proto/config.proto
97+
include/config.pb.h src/config.cpp : proto/config.proto
8198
$(PROTOC) --cpp_out=./src --proto_path=./proto $<
8299
mv src/config.pb.h ./include/familia
83100
mv src/config.pb.cc ./src/config.cpp
84101

102+
python/demo/familia.so : python/cpp/familia_wrapper.cpp familia
103+
$(CXX) $(INCPATH) $(CXXFLAGS) -c $< -o python/cpp/familia_wrapper.o
104+
$(CXX) $(INCPATH) $(CXXFLAGS) -shared python/cpp/familia_wrapper.o $(LDFLAGS_SO) -l$(PYTHON_VERSION) -o $@
105+
85106
-include $(wildcard */*.d *.d)

model/download_model.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@
22
# 下载主题模型文件
33

44
if [ ! -d news ]; then
5-
wget http://familia.bj.bcebos.com/models/news.tar.gz
6-
tar -xzf news.tar.gz
5+
wget http://familia.bj.bcebos.com/models/news.v1.tar.gz
6+
tar -xzf news.v1.tar.gz
77
fi

python/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Familia Python接口
2+
3+
## 代码编译
4+
第三方依赖除了Familia C++代码所需要的库之外,还需要依赖python,默认使用当前系统python(支持python2和python3),兼容Linux和Mac操作系统。
5+
默认情况下在Familia目录执行以下脚本会自动获取依赖并编译产生familia.so。
6+
7+
$ sh build.sh # 包含获取并安装第三方依赖的过程
8+
9+
## Python接口
10+
将原先C++代码封装成两个python类(familia_wrapper.py):InferenceEngineWrapper 和 TopicalWordEmbeddingsWrapper.
11+
其中,InferenceEngineWrappr提供了与主题模型相关的接口:
12+
13+
- lda_infer # LDA主题模型推断
14+
- slda_infer # SentenceLDA主题模型推断
15+
- cal_doc_distance # 计算长文本与长文本之间的距离
16+
- cal_query_doc_similarity # 计算短文本跟长文本之间的相关性
17+
18+
TopicalWordEmbeddingsWrapper则提供了与TWE模型相关的接口:
19+
20+
- nearest_words # 寻求与目标词最相关的词
21+
- nearest_words_around_topic # 寻求与目标主题最相关的词
22+
23+
具体使用方法可参照[Demo使用文档](https://github.com/baidu/Familia/wiki/Python-Demo%E4%BD%BF%E7%94%A8%E6%96%87%E6%A1%A3)

0 commit comments

Comments
 (0)