added comments to examples

zStupan · zStupan · commit fb5edde18d17 · 2023-10-31T20:41:30.000+01:00
diff --git a/examples/basic_run.py b/examples/basic_run.py
@@ -8,7 +8,7 @@
     # load and preprocess the dataset from csv
     data = Dataset("datasets/Abalone.csv")
 
-    # Create a problem:::
+    # Create a problem
     # dimension represents the dimension of the problem;
     # features represent the list of features, while transactions depicts the list of transactions
     # the following 4 elements represent weights (support, confidence, coverage, shrinkage)
diff --git a/examples/basic_run_with_get_rules.py b/examples/basic_run_with_get_rules.py
@@ -1,13 +1,18 @@
 from niaarm import Dataset, get_rules
 from niapy.algorithms.basic import DifferentialEvolution
 
-
+# load dataset
 data = Dataset("datasets/Abalone.csv")
+
+# initialize the algorithm
 algo = DifferentialEvolution(
     population_size=50, differential_weight=0.5, crossover_probability=0.9
 )
+
+# define metrics to be used in fitness computation
 metrics = ("support", "confidence")
 
+# mine association rules
 res = get_rules(data, algo, metrics, max_iters=30, logging=True)
 # or rules, run_time = get_rules(...)
 
diff --git a/examples/data_squashing.py b/examples/data_squashing.py
@@ -1,7 +1,11 @@
 from niaarm.dataset import Dataset
 from niaarm.preprocessing import squash
 
-
+# load dataset
 dataset = Dataset("datasets/Abalone.csv")
+
+# squash the dataset with a threshold of 0.9, using Euclidean distance as a similarity measure
 squashed = squash(dataset, threshold=0.9, similarity="euclidean")
+
+# print the squashed dataset
 print(squashed)
diff --git a/examples/text_mining.py b/examples/text_mining.py
@@ -3,9 +3,11 @@
 from niaarm.mine import get_text_rules
 from niapy.algorithms.basic import ParticleSwarmOptimization
 
+# load corpus and extract the documents as a list of strings
 df = pd.read_json("datasets/text/artm_test_dataset.json", orient="records")
 documents = df["text"].tolist()
 
+# create a Corpus object from the documents (requires nltk's punkt tokenizer and the stopwords list)
 try:
     corpus = Corpus.from_list(documents)
 except LookupError:
@@ -15,21 +17,21 @@
     nltk.download("stopwords")
     corpus = Corpus.from_list(documents)
 
+# the rest is pretty much the same as with the numerical association rules
+# 1. Init algorithm
+# 2. Define metrics
+# 3. Run algorithm
 algorithm = ParticleSwarmOptimization(population_size=200, seed=123)
 metrics = ("support", "confidence", "aws")
 rules, time = get_text_rules(
     corpus,
-    max_terms=5,
+    max_terms=8,
     algorithm=algorithm,
     metrics=metrics,
     max_evals=10000,
     logging=True,
 )
 
-if len(rules):
-    print(rules)
-    print(f"Run time: {time:.2f}s")
-    rules.to_csv("output.csv")
-else:
-    print("No rules generated")
-    print(f"Run time: {time:.2f}s")
+print(rules)
+print(f"Run time: {time:.2f}s")
+rules.to_csv("output.csv")
diff --git a/examples/visualization.py b/examples/visualization.py
@@ -2,11 +2,14 @@
 from niaarm import Dataset, get_rules
 from niaarm.visualize import hill_slopes
 
+# Load dataset and mine rules
 dataset = Dataset("datasets/Abalone.csv")
 metrics = ("support", "confidence")
 rules, _ = get_rules(
     dataset, "DifferentialEvolution", metrics, max_evals=1000, seed=1234
 )
+
+# Visualize any rule using the hill_slope function like so:
 some_rule = rules[150]
 print(some_rule)
 fig, ax = hill_slopes(some_rule, dataset.transactions)