Skip to content

Commit caadf9a

Browse files
committed
Completed example showing how to find duplicate files
1 parent 4a2ac97 commit caadf9a

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<metamorph xmlns="http://www.culturegraph.org/metamorph"
3+
version="1" entityMarker=".">
4+
<meta>
5+
<name>Filter out records which contain only one hash</name>
6+
</meta>
7+
8+
<rules>
9+
<data source="SHA1" name="MATCH">
10+
<occurrence only="moreThen 1" />
11+
</data>
12+
</rules>
13+
</metamorph>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
default in = ".";
2+
3+
in
4+
|walk-filetree
5+
|digest-file("sha1")
6+
|sort-triples(by="OBJECT")
7+
|reorder-triple(subjectFrom="object", objectFrom="subject")
8+
|collect-triples
9+
|filter(FLUX_DIR + "filter-duplicates.xml")
10+
|encode-formeta(style="multiline")
11+
|write("stdout");

0 commit comments

Comments
 (0)