@@ -158,10 +158,10 @@ def predict_memory_usage(
158158 """
159159 Predict memory usage for an ab initio tool based on contig length.
160160
161- Based on real memory monitoring data from funannotate2 runs (2024-07-14 ).
162- Values for SNAP and GlimmerHMM are from actual measurements on 8 contigs (2.8-4.9 Mbp).
163- Values for Augustus and GeneMark are conservative estimates due to Docker measurement
164- limitations on Apple Silicon .
161+ Based on comprehensive memory monitoring data from user runs (2024-07-17 ).
162+ Values for SNAP, Augustus, and GlimmerHMM are from linear regression analysis of
163+ 4,083 real-world measurements across contigs ranging from 508 bp to 102 Mbp.
164+ GeneMark values are from initial Docker-based estimates .
165165
166166 Args:
167167 tool_name: Name of the ab initio tool (snap, augustus, etc.)
@@ -171,22 +171,22 @@ def predict_memory_usage(
171171 Returns:
172172 Dictionary with predicted memory usage statistics
173173 """
174- # Base memory usage (MB) - minimum memory required regardless of contig size
175- # Updated with real data from memory monitoring
174+ # Base memory usage (MB) - intercept from linear regression analysis
175+ # Updated with real user data from 4,083 measurements (2024-07-17)
176176 base_memory = {
177- "snap" : 234 .0 , # From real measurements on 8 contigs (2.8-4.9 Mbp )
178- "augustus" : 250.0 , # Conservative estimate (Docker measurement issues)
179- "glimmerhmm" : 356.0 , # From real measurements on 8 contigs (2.8-4.9 Mbp)
180- "genemark" : 200 .0 , # Conservative estimate (Docker measurement issues)
177+ "snap" : 0 .0 , # Linear model: Memory = 93.3 * Size_Mbp - 2.1 (≈0 base )
178+ "augustus" : 35.8 , # Linear model: Memory = 12.4 * Size_Mbp + 35.8
179+ "glimmerhmm" : 6.4 , # Linear model: Memory = 6.6 * Size_Mbp + 6.4
180+ "genemark" : 20 .0 , # Conservative estimate from initial Docker measurements
181181 }
182182
183- # Memory scaling per MB of sequence (converted from MB per million base pairs )
184- # Updated with real data from memory monitoring
183+ # Memory scaling per MB of sequence (slope from linear regression analysis )
184+ # Updated with real user data showing strong linear relationships (R² > 0.92)
185185 memory_per_mb = {
186- "snap" : 149.0 , # From real measurements: 149.26 MB/Mbp
187- "augustus" : 150.0 , # Conservative estimate (similar to SNAP )
188- "glimmerhmm" : 164.0 , # From real measurements: 163.61 MB/Mbp
189- "genemark" : 140 .0 , # Conservative estimate (slightly lower than SNAP)
186+ "snap" : 93.3 , # From user data: 93.3 MB/Mbp (correlation = 0.998)
187+ "augustus" : 12.4 , # From user data: 12.4 MB/Mbp (correlation = 0.968 )
188+ "glimmerhmm" : 6.6 , # From user data: 6.6 MB/Mbp (correlation = 0.961)
189+ "genemark" : 50 .0 , # Conservative estimate from initial measurements
190190 }
191191
192192 tool = tool_name .lower ()
0 commit comments