diff --git a/HaskTorrent/HaskTorrent.cabal b/HaskTorrent/HaskTorrent.cabal index 2d613cd..49f3f74 100644 --- a/HaskTorrent/HaskTorrent.cabal +++ b/HaskTorrent/HaskTorrent.cabal @@ -4,7 +4,7 @@ cabal-version: 1.12 -- -- see: https://github.com/sol/hpack -- --- hash: 4fb61dc38e7e8f30c57dbfa4c67e05ac64559c03966e5050e7af41e5ceac196c +-- hash: 9186307ef8ec6d144d4f63a42c23f7362f662770f3e2fee2323767f75e1805d8 name: HaskTorrent version: 0.1.0.0 @@ -26,7 +26,6 @@ source-repository head library exposed-modules: - Api DB.Access DB.Search Lib @@ -50,8 +49,6 @@ library , memory , mongoDB , network - , persistent - , persistent-template , servant-multipart , servant-server , text @@ -84,8 +81,6 @@ executable HaskTorrent-exe , memory , mongoDB , network - , persistent - , persistent-template , servant-multipart , servant-server , text @@ -122,8 +117,6 @@ test-suite HaskTorrent-test , memory , mongoDB , network - , persistent - , persistent-template , servant-multipart , servant-server , text diff --git a/HaskTorrent/README.md b/HaskTorrent/README.md index e4de5fc..3a6ea11 100644 --- a/HaskTorrent/README.md +++ b/HaskTorrent/README.md @@ -1 +1,73 @@ # HaskTorrent + +## Brief Summary + +1. **Language:** Haskell + +2. **Libraries:** + + 1. **Rest WebAPI:** [Servant](https://hackage.haskell.org/package/servant-server) + 2. **HTTP Server:** [Warp](https://hackage.haskell.org/package/warp) + 3. **MongoDB Database Access:** [mongoDB](https://hackage.haskell.org/package/mongoDB) + 4. **BSON Parsing (for DB):** [bson](https://hackage.haskell.org/package/bson) + 5. **JSON Parsing (for API):** [aeson](https://hackage.haskell.org/package/aeson) + +3. **Database:** MongoDB + +4. **Search Backend:** MongoDB Full Text Search. + +## Fuzzy Text Search Implementation + + The methodology is simple. + +1. During metainfo upload, process the incoming text fields to generate + a list of [ngrams](https://en.wikipedia.org/wiki/N-gram). We use 3-grams and 4-grams. +2. This list of N-Grams is used as a text-index for the torrents in MongoDB. +3. Any search queries will be processed in the same way as step one. + This processed query is directly sent to MongoDB text search. + +The processing applied to the text is + +1. A simple [stop word](https://en.wikipedia.org/wiki/Stop_words) filter using the words from [here](https://www.ranks.nl/stopwords). After this step, the text is reduced to a list of key-words +2. In order to allow fuzziness in searching, the keywords are then passed + through another filter that produces 3-grams and 4-grams of the same. + +Misspelt queries, often contain some patterns in them that match those found in exact queries. Since we store text-indices to identify torrents, we can rank appropriate documents using similarity algorithms, and standard Information retrieval methods, provided by MongoDB. + +Pattern similarity helps us bring fuzziness inspite of the limited capabilities of MongoDB Full Text Search. + +## API Description + +This package's primary function is to serve a REST API with the following routes + +- (GET) "/search/:search": Takes search term as parameter, and returns a JSON Array of objects, containing brief descriptions of possible matches. The Object contains the following keys + + - 'infoHash': ID to use for fetching detailed description or download + + - 'title': Self explanatory + + - 'seed': Number of seeders for that torrent + + - 'leech': Number of leechers for that torrent + + - 'created by': Name of torrent creator, anonymous if not specified. + + - 'creation date': Time at which torrent was first uploaded. + +- (GET) "/torrent/desc/:infoHash": Takes infoHash found from previous route as parameters, and returns a JSON Object containing detailed description of the torrent. Same keys as that mentioned above, with the addition of + + - 'file-list': List of files and their directory structure found by the torrent, including sizes. + + - 'description': A plain text description of the torrent written by uploaded to describe the contents of the torrent. + +- (GET) "/torrent/download/:infoHash": Takes infoHash as parameter, and returns .torrent metainfo file, which is automatically opened by Torrent Clients on most browsers, as it has mime-type "application/x-bittorrent" + +- (POST) "/upload" : Takes a JSON Form containing + + - 'title': Title of the Torrent (used for search) + + - 'description': Plain text description of torrent explaining it's contents. (used for search) + + - 'metainfo': Contains .torrent file to be uploaded, in ByteArray representation + + Returns a String, containing the infoHash of the newly uploaded torrent. diff --git a/HaskTorrent/package.yaml b/HaskTorrent/package.yaml index 166c5ee..bafa68a 100644 --- a/HaskTorrent/package.yaml +++ b/HaskTorrent/package.yaml @@ -32,8 +32,6 @@ dependencies: - http-media - wai-extra - warp -- persistent -- persistent-template - bencode - bson - mongoDB diff --git a/HaskTorrent/src/Api.hs b/HaskTorrent/src/Api.hs deleted file mode 100644 index 6ea9f4b..0000000 --- a/HaskTorrent/src/Api.hs +++ /dev/null @@ -1,8 +0,0 @@ -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE DataKinds #-} - -module Api where - -import Types -import Servant - diff --git a/HaskTracker/CHANGELOG.md b/HaskTracker/CHANGELOG.md deleted file mode 100644 index 540c52f..0000000 --- a/HaskTracker/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# Revision history for summer-proj - -## 0.1.0.0 -- YYYY-mm-dd - -* First version. Released on an unsuspecting world. diff --git a/HaskTracker/README.md b/HaskTracker/README.md new file mode 100644 index 0000000..0d46c82 --- /dev/null +++ b/HaskTracker/README.md @@ -0,0 +1,29 @@ +# HaskTracker + +## Brief Summary + +1. **Language:** [Haskell](https://www.haskell.org/) + +2. **Libraries**: + + 1. **Rest WebAPI:** [Servant](https://hackage.haskell.org/package/servant-server) + + 2. **HTTP Server:** [Warp](https://hackage.haskell.org/package/warp) + + 3. **UDP Server (Sockets):** [Network](https://hackage.haskell.org/package/network) + + 4. **Concurrency Management:** Haskell Base Libraries + +## Design Description + +This tracker provides two services, that are mentioned by the [BEP Specifications](https://www.bittorrent.org/beps/bep_0000.html). + +- Responds to *Announce* Requests at 'http://tracker-ip:6969/announce' and 'udp://tracker-ip:6969/announce' + +- Responds to *Scrape* Requests at 'http://tracker-ip:6969/scrape' and 'udp://tracker-ip:6969/scrape' + +Protocol Specific Information on the exact structure of these requests is found from [TheoryOrg](https://wiki.theory.org/BitTorrentSpecification). + +The peer-selection protocol is effectively a round-robin algorithm, by randomizing the selection of peers, thus proving an equal probability of selection to each peer, ensuring fairness. + +The state information for this tracker is maintained in memory, and can be rebuilt subsequently after power-failures or server downtimes, providing no additional server interruption to clients. diff --git a/HaskTracker/src/Data/Torrent/DB/Models.hs b/HaskTracker/src/Data/Torrent/DB/Models.hs deleted file mode 100644 index e69de29..0000000 diff --git a/README.md b/README.md index 3a37eb5..266f492 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,34 @@ -# Minor Project - +# HaskTorrent (B.Tech Minor Project: Aug '20) + +[](https://youtu.be/TPgXMHi15TI) + +## Introduction + +This project implements a Complete P2P Torrent File Distribution System, providing + +- **HaskTracker:** A Torrent Tracking server, implemented in Haskell as per the BitTorrent Protocol, cross compatible with all commonly available [Torrent Clients](https://fossbytes.com/best-torrent-client-windows-free-torrent-downloaders/) + +- **HaskTorrent:** A Torrent MetaInfo Distribution API providing endpoints to upload/download .torrent files, fuzzy text search for torrents, as well as get detailed description of .torrent files. Implemented using Haskell and MongoDB. + +- **hask-react:** A server-decoupled ReactJS frontend that utilizes the **HaskTorrent** API. + +Further Documentation for each Module can be found in their folders. + +## Installation + +### Database Setup -## Pre-Build Install MongoDB, create a database with authentication. Note the port on which mongo is running. +``` +# In Mongo Shell +> use torrentDB +> db.createCollection("torrents") +> db.createIndex({ 'ngrams': 'text'}) +> db.createIndex({ 'infoHash': 1}, {unique: true}) +``` + Add the following environment variables to the ```~/.bash_profile``` and run ```source ~/.bash_profile``` @@ -22,87 +46,27 @@ export TorrDBUserName="user" export TorrDBPassWord="password" ``` -## Build and REPL for testing +## HaskTracker + +Uses cabal. + ```bash -# In the package directory cabal v2-build -# To create the necessary indices in the Database for text search -cabal v2-run torrdb-init -# To test tracker libraries in REPL -cabal v2-repl hask-tracker -# To run the tracker cabal v2-run hask-tracker - -# For testing out the torrent database, -# To upload localhost:8080/upload -# To download localhost:8080/download -mv static/ /var/www/html/ -cabal v2-run torrent-db -``` - -Once started, find the address of the host that is running the tracker over the desired -network on which to share the files. - -If the address is ```192.168.a.b```, then the trackers to add while creating the torrent are -``` -http://192.168.a.b:6969/announce -udp://192.168.a.b:6969/announce ``` -In any torrent-client, ensure that the torrent is automatically added to queue as soon as it is created -in order to create the first seeder. - -## Project Features -### Tracker -The Tracker itself uses [Servant](https://hackage.haskell.org/package/servant-server) along with [Warp](https://hackage.haskell.org/package/warp) -as a part of the HTTP Tracker, and the standard [network](https://hackage.haskell.org/package/network) package for the UDP Server. - -The tracker has been implemented using the [BEP Specifications](https://www.bittorrent.org/beps/bep_0000.html) as well as [unofficial community documentation] (https://wiki.theory.org/BitTorrentSpecification). - -This tracker is upto standard and supports standard announce as well as scrape features. - -### Database -This package utilizes MongoDB in order to store the .torrent metainfo files. -In order to utilize MongoDB via haskell, [the mongoDB package](https://hackage.haskell.org/package/mongoDB) was utilized. - -Since this library takes queries as well as give responses in BSON - encoding, represented in haskell using [Data.Bson](https://hackage.haskell.org/package/bson), - -It was necessary to implement a translation layer that takes -translates MetaInformation to BSON from BEncoding, -as well as to BEncoding from BSON - -In order to parse BEncoded metainfo , [bencode](https://hackage.haskell.org/package/bencode) was used. +## HaskTorrent -### Search Backend -This is largely implemented using the inbuilt MongoDB full text search. -However, since MongoDB text searches do not allow fuzzy searches, -(misspellings in query, etc), It was necessary to enhance it. +Uses Stack. -For this application ElasticSearch is neither practical nor secure enough -to allow usage. - -The methodology is simple. - -1. During metainfo upload, process the incoming text fields to generate - a list of [ngrams](https://en.wikipedia.org/wiki/N-gram). We use 3-grams - and 4-grams. -2. Along with translated metainfo, add another field that contains these ngrams - generated from the text fields. -3. Create a text index in the torrents collection for the ngrams field. -4. Any search queries will be processed in the same way as step one. - This new search query is directly sent to MongoDB text search. - -The processing applied to the text is -1. A simple [stop word](https://en.wikipedia.org/wiki/Stop_words) filter. - The stop words are as specified [here](https://www.ranks.nl/stopwords). After this step, the text is reduced to a list of key-words -2. In order to allow fuzziness in searching, the keywords are then passed - through another filter that produces 3-grams and 4-grams of the same. - -Even for misspelt queries, we assume that there may be some patterns in it that match the patterns of the actual word we are looking for. This is why even with the mongoDB full text searching, we are able to build in some fuzziness. +```bash +stack build +stack run +``` -Documents are ranked based on number of times query ngrams are found in their list of ngrams +## hask-react -### Search Frontend -The web-based frontend for the search module is implemented using once again [Servant](https://hackage.haskell.org/package/servant-server) -along with [blaze-html](https://hackage.haskell.org/package/blaze-html) for generating HTML from templates +```bash +yarn install +yarn start +``` diff --git a/hask-react/.eslintcache b/hask-react/.eslintcache index e6622e4..9db4a25 100644 --- a/hask-react/.eslintcache +++ b/hask-react/.eslintcache @@ -1 +1 @@ -[{"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/index.js":"1","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/Error404.js":"2","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/components/Header.js":"3","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/static/js/mkDirTree.js":"4"},{"size":397,"mtime":1609764739335,"results":"5","hashOfConfig":"6"},{"size":547,"mtime":1609841302313,"results":"7","hashOfConfig":"6"},{"size":1105,"mtime":1609783777295,"results":"8","hashOfConfig":"6"},{"size":1645,"mtime":1610142512494,"results":"9","hashOfConfig":"6"},{"filePath":"10","messages":"11","errorCount":0,"warningCount":0,"fixableErrorCount":0,"fixableWarningCount":0},"6p4mhv",{"filePath":"12","messages":"13","errorCount":0,"warningCount":1,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},{"filePath":"14","messages":"15","errorCount":0,"warningCount":3,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},{"filePath":"16","messages":"17","errorCount":0,"warningCount":1,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/index.js",[],"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/Error404.js",["18"],"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/components/Header.js",["19","20","21"],"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/static/js/mkDirTree.js",["22"],{"ruleId":"23","severity":1,"message":"24","line":16,"column":7,"nodeType":"25","endLine":20,"endColumn":11},{"ruleId":"23","severity":1,"message":"24","line":16,"column":9,"nodeType":"25","endLine":16,"endColumn":27},{"ruleId":"26","severity":1,"message":"27","line":33,"column":9,"nodeType":"25","endLine":33,"endColumn":12},{"ruleId":"26","severity":1,"message":"27","line":36,"column":9,"nodeType":"25","endLine":36,"endColumn":12},{"ruleId":"28","severity":1,"message":"29","line":39,"column":26,"nodeType":"30","messageId":"31","endLine":39,"endColumn":30},"jsx-a11y/alt-text","img elements must have an alt prop, either with meaningful text, or an empty string for decorative images.","JSXOpeningElement","jsx-a11y/anchor-is-valid","The href attribute is required for an anchor to be keyboard accessible. Provide a valid, navigable address as the href value. If you cannot provide an href, but still need the element to resemble a link, use a button and change it with appropriate styles. Learn more: https://github.com/evcohen/eslint-plugin-jsx-a11y/blob/master/docs/rules/anchor-is-valid.md","no-unused-vars","'size' is assigned a value but never used.","Identifier","unusedVar"] \ No newline at end of file +[{"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/index.js":"1","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/Error404.js":"2","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/components/Header.js":"3","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/static/js/mkDirTree.js":"4","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/serviceWorker.js":"5","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/Search.js":"6","/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/TorrentDesc.js":"7"},{"size":397,"mtime":1609764739335,"results":"8","hashOfConfig":"9"},{"size":547,"mtime":1609841302313,"results":"10","hashOfConfig":"9"},{"size":1105,"mtime":1609783777295,"results":"11","hashOfConfig":"9"},{"size":1645,"mtime":1610142512494,"results":"12","hashOfConfig":"9"},{"size":5086,"mtime":1603113480615,"results":"13","hashOfConfig":"9"},{"size":740,"mtime":1610060505831,"results":"14","hashOfConfig":"9"},{"size":2858,"mtime":1610143270313,"results":"15","hashOfConfig":"9"},{"filePath":"16","messages":"17","errorCount":0,"warningCount":0,"fixableErrorCount":0,"fixableWarningCount":0,"usedDeprecatedRules":"18"},"6p4mhv",{"filePath":"19","messages":"20","errorCount":0,"warningCount":1,"fixableErrorCount":0,"fixableWarningCount":0,"source":"21"},{"filePath":"22","messages":"23","errorCount":0,"warningCount":3,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},{"filePath":"24","messages":"25","errorCount":0,"warningCount":1,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},{"filePath":"26","messages":"27","errorCount":0,"warningCount":0,"fixableErrorCount":0,"fixableWarningCount":0},{"filePath":"28","messages":"29","errorCount":0,"warningCount":3,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},{"filePath":"30","messages":"31","errorCount":0,"warningCount":2,"fixableErrorCount":0,"fixableWarningCount":0,"source":null},"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/index.js",[],["32","33"],"/home/anjan/lang-prac/haskell/Haskell-Torrent-Tracker-and-Search-Engine/hask-react/src/pages/Error404.js",["34"],"import React from \"react\";\nimport doge from \"../static/img/error404.jpg\"\n\nconst Error404 = (props) => {\n return (\n
ERROR! I CAN'T FIND WHAT YOU WANT!
\n \n Do me pardon pls.\n \n