Skip to content

Commit d9578e8

Browse files
author
Jon Palmer
committed
provide multiple ways of training genemark and using hints for prediction #24
1 parent f8ac1c8 commit d9578e8

File tree

6 files changed

+33
-19
lines changed

6 files changed

+33
-19
lines changed

CITATION.cff

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cff-version: version = "25.7.20"
1+
cff-version: version = "25.8.3"
22
title: 'funannotate2: eukaryotic genome annotation'
33
message: >-
44
If you use this software, please cite it using the
@@ -17,5 +17,5 @@ keywords:
1717
- functional annotation
1818
- consensus gene models
1919
license: BSD-2-Clause
20-
version: version = "25.7.20"
21-
date-released: '2025-07-20'
20+
version: version = "25.8.3"
21+
date-released: '2025-08-04'

MEMORY_MONITORING.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,27 +54,30 @@ The memory monitoring is integrated into:
5454

5555
### Command Line Options
5656

57-
Add memory monitoring to funannotate2 predict:
57+
Memory monitoring is enabled by default in funannotate2 predict:
5858

5959
```bash
60-
# Enable memory monitoring
61-
funannotate2 predict -i input_dir --monitor-memory
60+
# Memory monitoring is enabled by default
61+
funannotate2 predict -i input_dir
6262

63-
# Enable memory monitoring with memory limit
64-
funannotate2 predict -i input_dir --monitor-memory --memory-limit 16
63+
# Set memory limit with memory monitoring (default behavior)
64+
funannotate2 predict -i input_dir --memory-limit 16
65+
66+
# Disable memory monitoring if needed
67+
funannotate2 predict -i input_dir --disable-memory-monitoring
6568
```
6669

6770
### CLI Options
6871

69-
- `--monitor-memory`: Enable memory monitoring and prediction
72+
- `--disable-memory-monitoring`: Disable memory monitoring and prediction (enabled by default)
7073
- `--memory-limit GB`: Set memory limit in GB to adjust CPU allocation
7174

7275
### Example Output
7376

74-
When memory monitoring is enabled, you'll see output like:
77+
With memory monitoring enabled by default, you'll see output like:
7578

7679
```
77-
Memory monitoring enabled for ab initio predictions
80+
Memory monitoring: using 14.4 GB limit (90% of 16.0 GB total)
7881
Memory limit set to 16.0 GB
7982
Memory usage estimate for 150 contigs with tools ['snap', 'augustus']:
8083
Total estimated peak memory: 2847.3 MB

funannotate2/__main__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,9 @@ def predict_subparser(subparsers):
276276
"--tmpdir", default="/tmp", help="volume to write tmp files", metavar=""
277277
)
278278
optional_args.add_argument(
279-
"--monitor-memory",
279+
"--disable-memory-monitoring",
280280
action="store_true",
281-
help="Monitor memory usage of ab initio prediction tools",
281+
help="Disable memory monitoring of ab initio prediction tools (enabled by default)",
282282
)
283283
optional_args.add_argument(
284284
"--memory-limit",
@@ -372,6 +372,13 @@ def train_subparser(subparsers):
372372
help="Maximum number of gene models to use for training (default: 5000)",
373373
metavar="",
374374
)
375+
optional_args.add_argument(
376+
"--genemark-mode",
377+
default="fast",
378+
choices=["fast", "unsupervised", "guided"],
379+
help="GeneMark training mode: fast (subset+hints, default), unsupervised (self-training), guided (full+hints)",
380+
metavar="",
381+
)
375382
optional_args.add_argument(
376383
"--busco-lineage",
377384
dest="busco_lineage",

funannotate2/train.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,18 +348,20 @@ def train(args):
348348

349349
# check if genemark is installed, if so then run self training
350350
if which_path("gmes_petap.pl") and which_path("gmhmme3"):
351-
logger.info("Training GeneMark-ES using self-training")
351+
logger.info(f"Training GeneMark-ES using {args.genemark_mode} mode")
352352
fungus_flag = False
353353
if taxonomy["kingdom"] == "Fungi":
354354
fungus_flag = True
355355
genemark_train = train_genemark(
356356
TrainingGenomeFasta,
357-
train_models,
357+
filt_train_models_final,
358358
test_models,
359359
folder=misc_dir,
360360
fungus=fungus_flag,
361361
cpus=args.cpus,
362362
log=logger,
363+
training_mode=args.genemark_mode, # User-configurable training mode
364+
max_training_length=50000000, # 50 Mb limit for additional genomic context
363365
)
364366
genemark_train["training_set"] = "self training"
365367
train_data["genemark"] = genemark_train

funannotate2/utilities.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -833,16 +833,18 @@ def logoutput(logname, process_result):
833833

834834
# Try to determine the output directory from environment or current working directory
835835
output_dir = os.environ.get("FUNANNOTATE2_OUTPUT_DIR", os.getcwd())
836-
logs_dir = os.path.join(output_dir, "logs")
836+
logs_dir = os.path.join(output_dir, "logfiles")
837837

838-
# Create logs directory if it doesn't exist
838+
# Create logfiles directory if it doesn't exist
839839
if not os.path.exists(logs_dir):
840840
try:
841841
os.makedirs(logs_dir)
842842
except:
843843
logs_dir = output_dir # Fallback to output directory
844844

845-
memory_log_file = os.path.join(logs_dir, "memory-monitoring.jsonl")
845+
memory_log_file = os.path.join(
846+
logs_dir, "predict-abinitio-memory-monitoring.jsonl"
847+
)
846848

847849
# Extract additional metadata for prediction model improvement
848850
tool_name = None

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "funannotate2"
7-
version = "25.7.20"
7+
version = "25.8.3"
88
description = "Funannotate2: eukarytoic genome annotation pipeline"
99
readme = {file = "README.md", content-type = "text/markdown"}
1010
authors = [

0 commit comments

Comments
 (0)