@@ -24,12 +24,19 @@ License, or (at your option) any later version.
2424import itemrecommendations .Resource3LTCalculator ;
2525import itemrecommendations .ZhengCalculator ;
2626
27+ import java .io .BufferedWriter ;
28+ import java .io .File ;
29+ import java .io .FileWriter ;
30+ import java .io .IOException ;
2731import java .util .ArrayList ;
2832import java .util .Arrays ;
2933import java .util .List ;
34+ import java .util .Map ;
3035
36+ import common .Bookmark ;
3137import common .CalculationType ;
3238import common .Features ;
39+ import common .Utilities ;
3340import processing .BLLCalculator ;
3441import processing .BM25Calculator ;
3542import processing .BaselineCalculator ;
@@ -65,9 +72,9 @@ public class Pipeline {
6572 // set for categorizer/describer split (true is describer, false is categorizer - null for nothing)
6673 private final static Boolean DESCRIBER = null ;
6774 // placeholder for the topic posfix
68- private static String TOPIC_NAME = null ;
75+ private static String TOPIC_NAME = "lda_500" ;
6976 // placeholder for the used dataset
70- private final static String DATASET = "cul " ;
77+ private final static String DATASET = "lastfm " ;
7178
7279 public static void main (String [] args ) {
7380 System .out .println ("TagRecommender:\n " + "" +
@@ -88,7 +95,7 @@ public static void main(String[] args) {
8895 // Resource-Recommender testing
8996 String dir = DATASET + "_core" ;
9097 String path = dir + "/" + DATASET + "_sample" ;
91- //getStatistics(path);
98+ //try { getStatistics(path, true); } catch (IOException e) { e.printStackTrace(); }
9299 //writeTensorFiles(path, false);
93100 //evaluate(dir, path, "wrmf_500_mml", TOPIC_NAME, false, true);
94101 //createLdaSamples(path, 1, 500, false);
@@ -477,19 +484,57 @@ private static List<Integer> getBetaValues(int betaUpperBound) {
477484 return betaValues ;
478485 }
479486
480- private static void getStatistics (String dataset ) {
487+ private static void getStatistics (String dataset , boolean writeAll ) throws IOException {
488+ if (TOPIC_NAME != null ) {
489+ dataset += ("_" + TOPIC_NAME );
490+ }
481491 BookmarkReader reader = new BookmarkReader (0 , false );
482492 reader .readFile (dataset );
493+
483494 int bookmarks = reader .getBookmarks ().size ();
484- System .out .println ("Bookmarks : " + bookmarks );
495+ System .out .println ("Posts : " + bookmarks );
485496 int users = reader .getUsers ().size ();
486497 System .out .println ("Users: " + users );
487498 int resources = reader .getResources ().size ();
488499 System .out .println ("Resources: " + resources );
489500 int tags = reader .getTags ().size ();
490501 System .out .println ("Tags: " + tags );
491502 int tagAssignments = reader .getTagAssignmentsCount ();
492- System .out .println ("Tag-Assignments: " + tagAssignments );
503+ System .out .println ("Tag-Assignments: " + tagAssignments );
504+ int categories = reader .getCategories ().size ();
505+ System .out .println ("Topics: " + categories );
506+ double avgBookmarksPerUser = (double )bookmarks / users ;
507+ System .out .println ("Avg. resources/posts per user: " + avgBookmarksPerUser );
508+ double avgBookmarksPerResource = (double )bookmarks / resources ;
509+ System .out .println ("Avg. users/posts per resource: " + avgBookmarksPerResource );
510+
511+ if (writeAll ) {
512+ getTrainTestSize (dataset );
513+ FileWriter userWriter = new FileWriter (new File ("./data/metrics/" + dataset + "_userStats.txt" ));
514+ BufferedWriter userBW = new BufferedWriter (userWriter );
515+ userBW .write ("UserID| NoOfResources| NoOfTopics| Topic-Similarity\n " );
516+ List <Bookmark > trainList = reader .getBookmarks ().subList (0 , TRAIN_SIZE );
517+ List <Integer > testUsers = reader .getUniqueUserListFromTestSet (TRAIN_SIZE );
518+ System .out .println ();
519+
520+ double avgTopicsPerUser = 0.0 ;
521+ double avgTopicDiversityPerUser = 0.0 ;
522+ List <Map <Integer , Double >> userTopics = Utilities .getRelativeTopicMaps (trainList , false );
523+ List <List <Bookmark >> userBookmarks = Utilities .getBookmarks (trainList , false );
524+ for (int userID : testUsers ) {
525+ Map <Integer , Double > topicsOfUser = userTopics .get (userID );
526+ double topicDiversityOfUser = Bookmark .getBookmarkDiversity (userBookmarks .get (userID ));
527+ userBW .write (userID + "| " + reader .getUserCounts ().get (userID ) + "| " + topicsOfUser .keySet ().size () + "| " + topicDiversityOfUser + "\n " );
528+ avgTopicsPerUser += topicsOfUser .keySet ().size ();
529+ avgTopicDiversityPerUser += topicDiversityOfUser ;
530+ }
531+ System .out .println ("Avg. topics per user: " + avgTopicsPerUser / testUsers .size ());
532+ System .out .println ("Avg. topic-similarity per user: " + avgTopicDiversityPerUser / testUsers .size ());
533+ double avgTopicsPerResource = Bookmark .getAvgNumberOfTopics (trainList );
534+ System .out .println ("Avg. topics per resource: " + avgTopicsPerResource );
535+ userBW .flush ();
536+ userBW .close ();
537+ }
493538 }
494539
495540 private static void getTrainTestSize (String sample ) {
0 commit comments