Skip to content

Commit 548482e

Browse files
authored
Improve Documentation (#24)
* improve NTA documentation * add NTA struct documentation * add doc fixes and README improvements
1 parent 4b6c4fd commit 548482e

File tree

7 files changed

+61
-26
lines changed

7 files changed

+61
-26
lines changed

README.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,26 @@
44

55
Rust implementation of [WebGestaltR](https://github.com/bzhanglab/webgestaltr).
66

7+
## Notes
8+
9+
This CLI is focused purely on computation. **It does not provide GMT files or HTML reports**. The output of this tool is JSON files containing the results. For a more feature-complete tool, see the original [WebGestaltR](https://bzhanglab.github.io/WebGestaltR/) tool.
10+
711
## Install
812

913
```shell
10-
git clone https://github.com/bzhanglab/webgestalt_rust.git
11-
cd webgestalt_rust
12-
cargo build --release
14+
cargo install webgestalt
1315
```
1416

15-
## Run
17+
## CLI
18+
19+
For help with CLI, run
1620

1721
```shell
18-
cargo run --release -- example ora
22+
webgestalt --help
1923
```
24+
25+
Example of running over-representation analysis using `kegg.gmt`, with an interesting list at `int.txt` and a reference of `ref.txt`. Outputs JSON file at `output.json`
26+
27+
```shell
28+
ora -g kegg.gmt -i int.txt -r ref.txt -o output.json
29+
```

src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![doc = include_str!("../README.md")]
12
use clap::{Args, Parser};
23
use clap::{Subcommand, ValueEnum};
34
use owo_colors::{OwoColorize, Stream::Stdout, Style};

webgestalt_lib/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
#![doc = include_str!("../README.md")]
12
use std::{error::Error, fmt};
23

34
pub mod methods;
45
pub mod readers;
56
pub mod stat;
67
pub mod writers;
7-
88
trait CustomError {
99
fn msg(&self) -> String;
1010
}

webgestalt_lib/src/methods/gsea.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ pub struct RankListItem {
3636
}
3737

3838
struct PartialGSEAResult {
39-
// TODO: Look at adding enrichment and normalized enrichment score
4039
set: String,
4140
p: f64,
4241
es: f64,
@@ -296,7 +295,7 @@ fn enrichment_score(
296295
)
297296
}
298297

299-
/// Run GSEA and return a [`Vec<FullGSEAResult`] for all analayte sets.
298+
/// Run GSEA and return a [`Vec<FullGSEAResult>`] for all analayte sets.
300299
///
301300
/// # Parameters
302301
///

webgestalt_lib/src/methods/multilist.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ pub enum NormalizationMethod {
5959
/// # Parameters
6060
///
6161
/// - `jobs` - A [`Vec<GSEAJob>`] containing all of the separates 'jobs' or analysis to combine
62-
/// - `method` - A [`MultiOmicsMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio).
62+
/// - `method` - A [`MultiListMethod`] enum detailing the analysis method to combine the runs together (meta-analysis, mean median ration, or max median ratio).
6363
/// - `fdr_method` - [`AdjustmentMethod`] of what FDR method to use to adjust p-values
6464
///
6565
/// # Returns

webgestalt_lib/src/methods/nta.rs

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@ pub struct NTAConfig {
1313
pub reset_probability: f64,
1414
/// A float representing the tolerance for probability calculation
1515
pub tolerance: f64,
16+
/// The [`NTAMethod`] to use for the analysis
1617
pub method: Option<NTAMethod>,
1718
}
1819

20+
/// Different methods for the NTA method that decides the important nodes to return
1921
#[derive(Debug, Clone)]
2022
pub enum NTAMethod {
23+
/// Find the N most important seeds, where N is the provided [`usize`] value
2124
Prioritize(usize),
25+
/// Find the N most important non-seed nodes, where N is the provided [`usize`] value
2226
Expand(usize),
2327
}
2428

@@ -34,19 +38,32 @@ impl Default for NTAConfig {
3438
}
3539
}
3640

41+
/// Struct representing the NTA results
3742
#[derive(Debug, Serialize)]
3843
pub struct NTAResult {
44+
/// The nodes in the neighborhood. Will always include every seed
3945
pub neighborhood: Vec<String>,
46+
/// The random walk probabilities (score) for the nodes in the neighborhood
4047
pub scores: Vec<f64>,
48+
/// If using the Prioritize method, contains the top N seeds. For expand method, this Vec is empty.
4149
pub candidates: Vec<String>,
4250
}
4351

52+
/// Performs network topology-based analysis using random walk to identify important nodes in a network
53+
///
54+
/// ## Parameters
55+
///
56+
/// - `config`: A [`NTAConfig`] struct containing the parameters for the analysis.
57+
///
58+
/// ## Returns
59+
///
60+
/// Returns a [`NTAResult`] struct containing the results from the analysis. Is [serde](https://serde.rs/) compatible.
4461
pub fn get_nta(config: NTAConfig) -> NTAResult {
4562
let mut method = config.clone().method;
4663
if method.is_none() {
4764
method = Some(NTAMethod::Expand(10));
4865
}
49-
let mut nta_res = nta(config.clone());
66+
let mut nta_res = process_nta(config.clone());
5067
match method {
5168
Some(NTAMethod::Prioritize(size)) => {
5269
let only_seeds = nta_res
@@ -95,12 +112,16 @@ pub fn get_nta(config: NTAConfig) -> NTAResult {
95112
}
96113
}
97114

98-
/// Uses random walk to calculate the neighborhood of a set of nodes
99-
/// Returns [`Vec<String>`]representing the nodes in the neighborhood
115+
/// Uses random walk to calculate the probabilities of each node being walked through
116+
/// Returns [`Vec<String>`] representing the nodes in the neighborhood
117+
///
118+
/// ## Parameters
119+
/// - `config` - A [`NTAConfig`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance
100120
///
101-
/// # Parameters
102-
/// - `config` - A [`NTAOptions`] struct containing the edge list, seeds, neighborhood size, reset probability, and tolerance
103-
pub fn nta(config: NTAConfig) -> Vec<(String, f64)> {
121+
/// ## Returns
122+
///
123+
/// Returns a [`Vec<(String, f64)>`] where the [`String`] is the original node name, and the following value is the random walk probability (higher is typically better)
124+
pub fn process_nta(config: NTAConfig) -> Vec<(String, f64)> {
104125
println!("Building Graph");
105126
let unique_nodes = ahash::AHashSet::from_iter(config.edge_list.iter().flatten().cloned());
106127
let mut node_map: ahash::AHashMap<String, usize> = ahash::AHashMap::default();
@@ -135,20 +156,32 @@ pub fn nta(config: NTAConfig) -> Vec<(String, f64)> {
135156
.collect()
136157
}
137158

159+
/// calculates the probability each node will be walked when starting from the one of the seeds
160+
///
161+
/// ## Parameters
162+
///
163+
/// - `adj_matrix` - A 2d adjacency matrix, where 1 means the node at the row and column indices are connected
164+
/// - `seed_indices` - a [`Vec<usize>`] of the indices of the seeds (starting points)
165+
/// - `r` - a [`f64`] of the reset probability (default in WebGestaltR is 0.5)
166+
/// - `tolerance` - the tolerance/threshold value in [`f64`] (WebGestaltR default is `1e-6`)
167+
///
168+
/// ## Output
169+
///
170+
/// Returns 1d array containing the probability for each node
138171
fn random_walk_probability(
139172
adj_matrix: &ndarray::Array2<f64>,
140-
node_indices: &Vec<usize>,
173+
seed_indices: &Vec<usize>,
141174
r: f64,
142175
tolerance: f64,
143176
) -> ndarray::Array1<f64> {
144-
let num_nodes = node_indices.len() as f64;
177+
let num_nodes = seed_indices.len() as f64;
145178
let de = adj_matrix.sum_axis(Axis(0));
146179
// de to 2d array
147180
let de = de.insert_axis(Axis(1));
148181
let temp = adj_matrix.t().div(de);
149182
let w = temp.t();
150183
let mut p0 = ndarray::Array1::from_elem(w.shape()[0], 0.0);
151-
for i in node_indices {
184+
for i in seed_indices {
152185
p0[*i] = 1.0 / num_nodes;
153186
}
154187
let mut pt = p0.clone();

webgestalt_lib/src/methods/ora.rs

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,6 @@ pub fn ora_p(m: i64, j: i64, n: i64, k: i64) -> f64 {
5353
/// - `interest_list` - A [`AHashSet<String>`] of the interesting analytes
5454
/// - `reference` - A [`AHashSet<String>`] of the reference list
5555
/// - `gmt` - A [`Vec<Item>`] of the gmt file
56-
///
57-
/// # Panics
58-
///
59-
/// Panics if the [`Arc`] struggles to lock during parallelization.
60-
///
61-
/// # Errors
62-
///
63-
/// This function will return an error if .
6456
pub fn get_ora(
6557
interest_list: &AHashSet<String>,
6658
reference: &AHashSet<String>,

0 commit comments

Comments
 (0)