Skip to content

Commit 88dc8eb

Browse files
committed
remove redundant files
1 parent 42350a7 commit 88dc8eb

File tree

4 files changed

+27
-81
lines changed

4 files changed

+27
-81
lines changed

datasets/MIND/data/behavior.py

Lines changed: 0 additions & 48 deletions
This file was deleted.

datasets/MIND/data/lineCount.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

datasets/MIND/data/make_article.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,8 @@
5656

5757
print(inx)
5858
print(len(article_map))
59-
file_base = ["train", "dev", "test"]
60-
files = [
61-
"train_raw/behaviors.tsv", "dev_raw/behaviors.tsv",
62-
"test_raw/behaviors.tsv"
63-
]
59+
file_base = ["train", "dev"]
60+
files = ["train_raw/behaviors.tsv", "dev_raw/behaviors.tsv"]
6461
for base, aim in zip(files, file_base):
6562
with open(aim + "/browse.txt", "w") as w1:
6663
print("generate " + aim)
@@ -91,3 +88,25 @@
9188

9289
line = visit + "\t" + pos_sample + "\t" + neg_sample + "\n"
9390
w1.write(line)
91+
92+
93+
def remove(str, sp1, sp2):
94+
l = list(str)
95+
index2 = -1
96+
for i in range(len(l) - 1, -1, -1):
97+
if l[i] == sp1:
98+
index2 = i
99+
break
100+
if index2 == -1:
101+
return '', False
102+
l[index2] = sp2
103+
return ''.join(l), True
104+
105+
106+
with open("test_raw/behaviors.tsv", "r") as r:
107+
with open("test/browse.txt", "w") as w:
108+
for l in r:
109+
x, y = l.split('\t')[3:5]
110+
line2, stat = remove(x, ' ', '\t')
111+
if stat == True:
112+
w.write(line2 + "\t" + y)

datasets/MIND/data/run.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
set -v
2-
wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_train.zip
3-
wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_dev.zip
4-
wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_test.zip
2+
#wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_train.zip
3+
#wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_dev.zip
4+
#wget https://paddlerec.bj.bcebos.com/datasets%2FMIND%2FMINDlarge_test.zip
55
unzip datasets%2FMIND%2FMINDlarge_train.zip -d ./train_raw
66
unzip datasets%2FMIND%2FMINDlarge_dev.zip -d ./dev_raw
77
unzip datasets%2FMIND%2FMINDlarge_test.zip -d ./test_raw

0 commit comments

Comments
 (0)