Improved documentation

aryans1204 · aryans1204 · commit 6befe0182686 · 2025-02-26T21:29:46.000-06:00
Signed-off-by: aryans1204 &lt;arshar1204@gmail.com&gt;

Fixed spellcheck in documentation

Signed-off-by: aryans1204 &lt;arshar1204@gmail.com&gt;
diff --git a/docs/generate_deploy_info_docs.md b/docs/generate_deploy_info_docs.md
@@ -1,13 +1,10 @@
 # Deploy Info JSON
 
-## The `deploy_info.json` file in the `vSwarm_deploy_metadata.tar.gz`  is used to identify the relative file paths for the Knative YAML manifests for deploying vSwarm functions. It is generated using `workloads/container/generate_deploy_info.py` Python script, which outputs a JSON that embeds the yaml-location and pre-deployment commands for every vSwarm function. It also contains the path of YAML files needed as part of the pre-deployment commands to run certain vSwarm benchmarks, for example the `online-shop-database` which requires to be deployed before running `cartservice` benchmark.
+The `deploy_info.json` file is used to identify deployment information for services and their pre-dependencies. It is pre-generated, and stored inside the `vSwarm_deploy_metadata.tar.gz` to contain deployment information for vSwarm functions.
 
-While the `deploy_info.json` file ships with the `vSwarm_deploy_metadata.tar.gz`, In order to regenerate the `deploy_info.json` run from the root of this repository:
-```console
-tar -xzvf workloads/container/vSwarm_deploy_metadata.tar.gz -C workloads/container
-cd workloads/container/
-python3 generate_deploy_info.py
-```
+## Schema and Usage
+
+The `deploy_info.json` file in the `vSwarm_deploy_metadata.tar.gz`  is used to identify the relative file paths for the Knative YAML manifests for deploying vSwarm functions. It is generated using `workloads/container/generate_deploy_info.py` Python script, which outputs a JSON that embeds the yaml-location and pre-deployment commands for every vSwarm function. It also contains the path of YAML files needed as part of the pre-deployment commands to run certain vSwarm benchmarks, for example the `online-shop-database` which requires to be deployed before running `cartservice` benchmark.
 
 The `deploy_info.json` has the following schema:
 ```console
@@ -20,9 +17,19 @@ The `deploy_info.json` has the following schema:
 }
 ```
 
-## The `PredeploymentPath` is the path to the YAML file, which is applied via `kubectl apply -f`, before creating the service under `YamlLocation`. This pre-deployment step is required in some vSwarm benchmarks, like `cartservice` which depends on a separate service `online-shop-database` before it can be started.
+The `PredeploymentPath` is the path to the YAML file, which is applied via `kubectl apply -f`, before creating the service under `YamlLocation`. This pre-deployment step is required in some vSwarm benchmarks, like `cartservice` which depends on a separate service `online-shop-database` before it can be started.
+## Deployment File Generation
+
+While the `deploy_info.json` file ships with the `vSwarm_deploy_metadata.tar.gz`, In order to regenerate the `deploy_info.json` run from the root of this repository:
+```console
+tar -xzvf workloads/container/vSwarm_deploy_metadata.tar.gz -C workloads/container
+cd workloads/container/
+python3 generate_deploy_info.py
+```
+
+## YAML Generation
 
-Similarly, the `workloads/container/generate_all_yamls.py` is a wrapper script that calls the `generate-yamls.py` script for each vSwarm benchmark in the `vSwarm_deploy_metadata.tar.gz`. The `generate-yamls.py` Python script parameterize the YAML script for the benchmark for different configurations, and creates YAML files accordingly. 
+The `workloads/container/generate_all_yamls.py` is a wrapper script that calls the `generate-yamls.py` script for each vSwarm benchmark in the `vSwarm_deploy_metadata.tar.gz`. The `generate-yamls.py` Python script generates YAML files for vSwarm benchmarks with different workload parameters to create a variety of durations and memory consumption. 
 
 While the YAMLs are pre-generated inside the `vSwarm_deploy_metadata.tar.gz` tarball, to regenerate the YAMLs, run this from the root of this repository:
 ```console
diff --git a/docs/loader.md b/docs/loader.md
@@ -110,7 +110,7 @@ To account for difference in CPU performance set `ITERATIONS_MULTIPLIER=102` if
 Cloudlab `xl170` or `d430` machines. (Date of measurement: 18-Oct-2022)
 
 ## Executing vSwarm functions
-If the input trace directory has a `mapper_output.json` file, which you would like to use as profiles for benchmark execution in the loader, run the following from the root of this directory:
+If you would like to use vSwarm benchmarks as profile functions to execute, first you need to generate a `mapper_output.json` using the `mapper` tool. Please refer to `mapper.md` docs for usage of the mapper tool to generate an output file. Once the `mapper_output.json` has been generated in the input trace directory, next run the following from the root of this repository:
 
 ```console
 # install pre-requisites
@@ -120,11 +120,12 @@ git lfs install
 git lfs fetch
 git lfs checkout
 ```
-This retrieves the `yamls.tar.gz` from Git LFS. Then, untar this tarball by running the following command from the root of this directory:
+This retrieves the `vSwarm_deploy_metadata.tar.gz` from Git LFS. Then, untar this tarball by running the following command from the root of this repository:
 
 ```bash
 $ tar -xzvf workloads/container/vSwarm_deploy_metadata.tar.gz -C workloads/container/
 ```
+This untar the tarball into the `workloads/container/yamls` directory, which contains deployment information and deployment YAML files for all vSwarm benchmarks. If you would like to change some of these deployment files of regenerate them, refer to `generate_deploy_info_docs.md` for an outline of what these deployment files are, and how you can regenerate them.
 
 ## Single execution
 
@@ -138,12 +139,12 @@ To run load generator to use vSwarm functions based on `mapper_output.json` run
 ```bash
 $ go run cmd/loader.go --config cmd/config_vswarm_trace.json
 ```
+The difference between instructing the loader to run vSwarm functions is by setting the `VSwarm` flag in the loader configuration to `true`. For information on how to configure the workload for load generator, please refer to `docs/configuration.md`.
+
 Additionally, one can specify log verbosity argument as `--verbosity [info, debug, trace]`. The default value is `info`.
 
 To execute in a dry run mode without generating any load, set the `--dry-run` flag to `true`. This is useful for testing and validating configurations without executing actual requests.
 
-For to configure the workload for load generator, please refer to `docs/configuration.md`.
-
 There are a couple of constants that should not be exposed to the users. They can be examined and changed
 in `pkg/common/constants.go`.
 
diff --git a/pkg/trace/mapper_trace_parser.go b/pkg/trace/mapper_trace_parser.go
@@ -22,7 +22,13 @@ type DeploymentInfo struct {
 	PredeploymentPath []string
 }
 
-type JSONParser map[string]DeploymentInfo
+type MapperOutput struct {
+	ProxyFunction string `json:"proxy-function"`
+}
+
+type functionToDeploymentInfo map[string]DeploymentInfo
+
+type functionToProxy map[string]MapperOutput
 
 func NewMapperParser(directoryPath string, totalDuration int) *MapperTraceParser {
 	return &MapperTraceParser{
@@ -33,7 +39,7 @@ func NewMapperParser(directoryPath string, totalDuration int) *MapperTraceParser
 	}
 }
 
-func (p *MapperTraceParser) extractFunctions(mapperOutput map[string]map[string]string, deploymentInfo JSONParser, dirPath string) []*common.Function {
+func (p *MapperTraceParser) extractFunctions(mapperOutput functionToProxy, deploymentInfo functionToDeploymentInfo, dirPath string) []*common.Function {
 	var result []*common.Function
 
 	invocations := parseInvocationTrace(dirPath+"/invocations.csv", p.duration)
@@ -48,17 +54,17 @@ func (p *MapperTraceParser) extractFunctions(mapperOutput map[string]map[string]
 		hashFunction := invocationStats.HashFunction
 		hashApp := invocationStats.HashApp
 		hashOwner := invocationStats.HashOwner
-		proxyFunction := mapperOutput[hashFunction+hashOwner+hashApp]["proxy-function"]
+		proxyFunction := mapperOutput[hashFunction+hashOwner+hashApp].ProxyFunction
 		yamlPath := deploymentInfo[proxyFunction].YamlLocation
-		predeploymentPaths := deploymentInfo[proxyFunction].PredeploymentPath
+		predeploymentPath := deploymentInfo[proxyFunction].PredeploymentPath
 		function := &common.Function{
 			Name: fmt.Sprintf("%s-%d-%d", proxyFunction, i, p.functionNameGenerator.Uint64()),
 
 			InvocationStats:   &invocationStats,
 			RuntimeStats:      runtimeByHashFunction[hashFunction],
 			MemoryStats:       memoryByHashFunction[hashFunction],
 			YAMLPath:          yamlPath,
-			PredeploymentPath: predeploymentPaths,
+			PredeploymentPath: predeploymentPath,
 		}
 
 		result = append(result, function)
@@ -69,8 +75,8 @@ func (p *MapperTraceParser) extractFunctions(mapperOutput map[string]map[string]
 
 func (p *MapperTraceParser) Parse() []*common.Function {
 	var functions []*common.Function
-	var mapperOutput map[string]map[string]string // HashFunction mapped to vSwarm function yaml.
-	var deploymentInfo JSONParser
+	var mapperOutput functionToProxy
+	var deploymentInfo functionToDeploymentInfo
 	// Read the deployment info file for yaml locations and predeployment commands if any
 	deploymentInfoFile, err := os.ReadFile("test_data/test_deploy_info.json")
 	if err != nil {