bzhanglab · iblacksand · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/README.md b/README.md
@@ -4,16 +4,26 @@
 
 Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).
 
+## Notes
+
+This CLI is focused purely on computation. **It does not provide GMT files or HTML reports**. The output of this tool is JSON files containing the results. For a more feature-complete tool, see the original [WebGestaltR](https://bzhanglab.github.io/WebGestaltR/) tool.
+
 ## Install
 
 ```shell
-git clone https://github.com/bzhanglab/webgestalt_rust.git
-cd webgestalt_rust
-cargo build --release
+cargo install webgestalt
 ```
 
-## Run
+## CLI
+
+For help with CLI, run
 
 ```shell
-cargo run --release -- example ora
+webgestalt --help
 ```
+
+Example of running over-representation analysis using `kegg.gmt`, with an interesting list at `int.txt` and a reference of `ref.txt`. Outputs JSON file at `output.json`
+
+```shell
+ora -g kegg.gmt -i int.txt -r ref.txt -o output.json
+```
diff --git a/src/main.rs b/src/main.rs
@@ -1,3 +1,4 @@
+#![doc = include_str!("../README.md")]
 use clap::{Args, Parser};
 use clap::{Subcommand, ValueEnum};
 use owo_colors::{OwoColorize, Stream::Stdout, Style};

diff --git a/webgestalt_lib/src/lib.rs b/webgestalt_lib/src/lib.rs
@@ -1,10 +1,10 @@
+#![doc = include_str!("../README.md")]
 use std::{error::Error, fmt};
 
 pub mod methods;
 pub mod readers;
 pub mod stat;
 pub mod writers;
-
 trait CustomError {
     fn msg(&self) -> String;
 }

diff --git a/webgestalt_lib/src/methods/gsea.rs b/webgestalt_lib/src/methods/gsea.rs
@@ -36,7 +36,6 @@ pub struct RankListItem {
 }
 
 struct PartialGSEAResult {
-    // TODO: Look at adding enrichment and normalized enrichment score
     set: String,
     p: f64,
     es: f64,
@@ -296,7 +295,7 @@ fn enrichment_score(
     )
 }
 
-/// Run GSEA and return a [`Vec<FullGSEAResult`] for all analayte sets.
+/// Run GSEA and return a [`Vec<FullGSEAResult>`] for all analayte sets.
 ///
 /// # Parameters
 ///

diff --git a/webgestalt_lib/src/methods/multilist.rs b/webgestalt_lib/src/methods/multilist.rs
@@ -59,7 +59,7 @@ pub enum NormalizationMethod {
 /// # Parameters
 ///
 /// - `jobs` - A [`Vec<GSEAJob>`] containing all of the separates 'jobs' or analysis to combine
-/// - `method` - A [`MultiOmicsMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio).
+/// - `method` - A [`MultiListMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio).
 /// - `fdr_method` - [`AdjustmentMethod`] of what FDR method to use to adjust p-values
 ///
 /// # Returns

diff --git a/webgestalt_lib/src/methods/nta.rs b/webgestalt_lib/src/methods/nta.rs
@@ -13,12 +13,16 @@ pub struct NTAConfig {
     pub reset_probability: f64,
     /// A float representing the tolerance for probability calculation
     pub tolerance: f64,
+    /// The [`NTAMethod`] to use for the analysis
     pub method: Option<NTAMethod>,
 }
 
+/// Different methods for the NTA method that decides the important nodes to return
 #[derive(Debug, Clone)]
 pub enum NTAMethod {
+    /// Find the N most important seeds, where N is the provided [`usize`] value
     Prioritize(usize),
+    /// Find the N most important non-seed nodes, where N is the provided [`usize`] value
     Expand(usize),
 }
 
@@ -34,19 +38,32 @@ impl Default for NTAConfig {
     }
 }
 
+/// Struct representing the NTA results
 #[derive(Debug, Serialize)]
 pub struct NTAResult {
+    /// The nodes in the neighborhood. Will always include every seed
     pub neighborhood: Vec<String>,
+    /// The random walk probabilities (score) for the nodes in the neighborhood
     pub scores: Vec<f64>,
+    /// If using the Prioritize method, contains the top N seeds. For expand method, this Vec is empty.
     pub candidates: Vec<String>,
 }
 
+/// Performs network topology-based analysis using random walk to identify important nodes in a network
+///
+/// ## Parameters
+///
+/// - `config`: A [`NTAConfig`] struct containing the parameters for the analysis.
+///
+/// ## Returns
+///
+/// Returns a [`NTAResult`] struct containing the results from the analysis. Is [serde](https://serde.rs/) compatible.
 pub fn get_nta(config: NTAConfig) -> NTAResult {
     let mut method = config.clone().method;
     if method.is_none() {
         method = Some(NTAMethod::Expand(10));
     }
-    let mut nta_res = nta(config.clone());
+    let mut nta_res = process_nta(config.clone());
     match method {
         Some(NTAMethod::Prioritize(size)) => {
             let only_seeds = nta_res
@@ -95,12 +112,16 @@ pub fn get_nta(config: NTAConfig) -> NTAResult {
     }
 }
 
-/// Uses random walk to calculate the neighborhood of a set of nodes
-/// Returns [`Vec<String>`]representing the nodes in the neighborhood
+/// Uses random walk to calculate the probabilities of each node being walked through
+/// Returns [`Vec<String>`] representing the nodes in the neighborhood
+///
+/// ## Parameters
+/// - `config` - A [`NTAConfig`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance
 ///
-/// # Parameters
-/// - `config` - A [`NTAOptions`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance
-pub fn nta(config: NTAConfig) -> Vec<(String, f64)> {
+/// ## Returns
+///
+/// Returns a [`Vec<(String, f64)>`] where the [`String`] is the original node name, and the following value is the random walk probability (higher is typically better)
+pub fn process_nta(config: NTAConfig) -> Vec<(String, f64)> {
     println!("Building Graph");
     let unique_nodes = ahash::AHashSet::from_iter(config.edge_list.iter().flatten().cloned());
     let mut node_map: ahash::AHashMap<String, usize> = ahash::AHashMap::default();
@@ -135,20 +156,32 @@ pub fn nta(config: NTAConfig) -> Vec<(String, f64)> {
         .collect()
 }
 
+/// calculates the probability each node will be walked when starting from the one of the seeds
+///
+/// ## Parameters
+///
+/// - `adj_matrix` - A 2d adjacency matrix, where 1 means the node at the row and column indices are connected
+/// - `seed_indices` - a [`Vec<usize>`] of the indices of the seeds (starting points)
+/// - `r` - a [`f64`] of the reset probability (default in WebGestaltR is 0.5)
+/// - `tolerance` - the tolerance/threshold value in [`f64`] (WebGestaltR default is `1e-6`)
+///
+/// ## Output
+///
+/// Returns 1d array containing the probability for each node
 fn random_walk_probability(
     adj_matrix: &ndarray::Array2<f64>,
-    node_indices: &Vec<usize>,
+    seed_indices: &Vec<usize>,
     r: f64,
     tolerance: f64,
 ) -> ndarray::Array1<f64> {
-    let num_nodes = node_indices.len() as f64;
+    let num_nodes = seed_indices.len() as f64;
     let de = adj_matrix.sum_axis(Axis(0));
     // de to 2d array
     let de = de.insert_axis(Axis(1));
     let temp = adj_matrix.t().div(de);
     let w = temp.t();
     let mut p0 = ndarray::Array1::from_elem(w.shape()[0], 0.0);
-    for i in node_indices {
+    for i in seed_indices {
         p0[*i] = 1.0 / num_nodes;
     }
     let mut pt = p0.clone();

diff --git a/webgestalt_lib/src/methods/ora.rs b/webgestalt_lib/src/methods/ora.rs
@@ -53,14 +53,6 @@ pub fn ora_p(m: i64, j: i64, n: i64, k: i64) -> f64 {
 /// - `interest_list` - A [`AHashSet<String>`] of the interesting analytes
 /// - `reference` - A [`AHashSet<String>`] of the reference list
 /// - `gmt` - A [`Vec<Item>`] of the gmt file
-///
-/// # Panics
-///
-/// Panics if the [`Arc`] struggles to lock during parallelization.
-///
-/// # Errors
-///
-/// This function will return an error if .
 pub fn get_ora(
     interest_list: &AHashSet<String>,
     reference: &AHashSet<String>,