Skip to content

Commit c945e1d

Browse files
authored
Merge pull request #215 from AlexandrovLab/spm-i213
Spm i213
2 parents 1064fd1 + 87a41a2 commit c945e1d

File tree

6 files changed

+47
-18
lines changed

6 files changed

+47
-18
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ python:
77

88
before_install:
99
- pip install --upgrade pip setuptools packaging
10-
- if ! [ -f ./src/GRCh37.tar.gz ]; then wget ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi
10+
- if ! [ -f ./src/GRCh37.tar.gz ]; then wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi
1111

1212
install:
1313
- pip install .[tests]

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66

77
## [Unreleased]
88

9+
## [1.3.1] - 2025-03-13
10+
11+
### Fixed
12+
- SV Matrix generation compatibility: Updated remnant Pandas and NumPy < 2.0.0 syntax to ensure compatibility with newer versions.
13+
- Indexing error in reference genome handling: Edge cases where BED file indices extended beyond the reference genome range caused crashes. Index accesses are now restricted to valid ranges, and cases where the genomic context extends beyond valid positions are skipped.
14+
- Uninitialized variable: The dinuc_mat variable was accessed before being initialized, causing a runtime error. It is now explicitly initialized to None before use.
15+
916
## [1.3.0] - 2025-02-11
1017

1118
### Changed

SigProfilerMatrixGenerator/scripts/SVMatrixGenerator.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def calcIntermutDist2(subs_type, first_chrom_na=False):
4646
]
4747

4848
if first_chrom_na:
49-
prevPos_arr_c = np.hstack((np.NAN, pos_array_im_c.flatten()[:-1]))
49+
prevPos_arr_c = np.hstack((np.nan, pos_array_im_c.flatten()[:-1]))
5050
else:
5151
prevPos_arr_c = np.hstack((0, pos_array_im_c.flatten()[:-1]))
5252
distPrev_arr_c = pos_array_im_c - prevPos_arr_c
@@ -69,7 +69,7 @@ def calcIntermutDist(subs_type, first_chrom_na=False):
6969
subs_type_chrom = subs_type[subs_type["chr"] == c].sort_values("position")
7070
if first_chrom_na:
7171
subs_type_chrom["prevPos"] = np.hstack(
72-
(np.NAN, subs_type_chrom["position"].values.flatten()[:-1])
72+
(np.nan, subs_type_chrom["position"].values.flatten()[:-1])
7373
)
7474
else:
7575
subs_type_chrom["prevPos"] = np.hstack(
@@ -292,7 +292,7 @@ def calcIntermutDist(subs_type, first_chrom_na=False):
292292
subs_type_chrom = subs_type[subs_type["chr"] == c].sort_values("position")
293293
if first_chrom_na:
294294
subs_type_chrom["prevPos"] = np.hstack(
295-
(np.NAN, subs_type_chrom["position"].values.flatten()[:-1])
295+
(np.nan, subs_type_chrom["position"].values.flatten()[:-1])
296296
)
297297
else:
298298
subs_type_chrom["prevPos"] = np.hstack(
@@ -324,7 +324,7 @@ def calcIntermutDist2(subs_type, first_chrom_na=False):
324324
]
325325

326326
if first_chrom_na:
327-
prevPos_arr_c = np.hstack((np.NAN, pos_array_im_c.flatten()[:-1]))
327+
prevPos_arr_c = np.hstack((np.nan, pos_array_im_c.flatten()[:-1]))
328328
else:
329329
prevPos_arr_c = np.hstack((0, pos_array_im_c.flatten()[:-1]))
330330
distPrev_arr_c = pos_array_im_c - prevPos_arr_c
@@ -815,8 +815,8 @@ def annotateBedpe(sv_bedpe):
815815

816816
sample_bps = pd.DataFrame(columns=cncd.columns)
817817
for chromi in unique_py(cncd["chr"]):
818-
sample_bps = sample_bps.append(
819-
cncd[cncd["chr"] == chromi].sort_values("position", kind="mergesort"),
818+
sample_bps = pd.concat(
819+
[sample_bps, cncd[cncd["chr"] == chromi].sort_values("position", kind="mergesort")],
820820
ignore_index=True,
821821
)
822822

@@ -829,7 +829,7 @@ def annotateBedpe(sv_bedpe):
829829
exp_dist = genome_size / len(sample_bps)
830830
gamma_sdev = 25 #
831831
PEAK_FACTOR = 10
832-
thresh_dist = np.NaN
832+
thresh_dist = np.nan
833833

834834
if logScale:
835835
sample_bps["intermut_dist"] = np.log10(
@@ -844,15 +844,15 @@ def annotateBedpe(sv_bedpe):
844844
if np.isnan(thresh_dist):
845845
thresh_dist = exp_dist / PEAK_FACTOR
846846

847-
gamma = np.NaN
847+
gamma = np.nan
848848
if np.isnan(gamma) & ~np.isnan(gamma_sdev):
849849
# compute the mean absolute deviation
850850
sdev = getMad(sample_bps["intermut_dist"].values)
851851
gamma = gamma_sdev * sdev
852852

853853
sample_bps["is_clustered_single"] = False
854854
all_kat_regions = pd.DataFrame()
855-
sample_bps["mean_intermut_dist"] = np.NaN
855+
sample_bps["mean_intermut_dist"] = np.nan
856856
for chrom in unique_py(sample_bps["chr"]): # loop over chromosomes
857857
sample_bps_flag = (
858858
sample_bps["chr"] == chrom

SigProfilerMatrixGenerator/scripts/SigProfilerMatrixGeneratorFunc.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,9 @@ def SigProfilerMatrixGeneratorFunc(
14151415
# Sorts files based on chromosome, sample, and start position
14161416
if not chrom_based:
14171417
chrom_start = None
1418+
1419+
# initialize dinuc_mat to None
1420+
dinuc_mat = None
14181421
if i != 1:
14191422
for file in vcf_files:
14201423
if reference_genome.lower() != "ebv":
@@ -2817,6 +2820,16 @@ def SigProfilerMatrixGeneratorFunc(
28172820
)
28182821

28192822
if not chrom_based:
2823+
# Ensure all required keys exist
2824+
for key, indel_set in [
2825+
("ID", indel_types),
2826+
("simple", indel_types_simple),
2827+
("tsb", indel_types),
2828+
("complete", indel_types)
2829+
]:
2830+
if key not in mutation_ID or not isinstance(mutation_ID[key], pd.DataFrame) or mutation_ID[key].empty:
2831+
mutation_ID[key] = pd.DataFrame(0, index=indel_set, columns=samples)
2832+
28202833
matGen.matrix_generator_INDEL(
28212834
output_matrix,
28222835
samples,

SigProfilerMatrixGenerator/scripts/save_context_distribution.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ def context_distribution(
125125
for l in range(i, i + context, 1):
126126
nuc += tsb_ref[chromosome[l]][1]
127127
base = nuc[int(context / 2)]
128-
129128
count += 1
130129
if count == 1000000:
131130
print(i)
@@ -394,17 +393,27 @@ def context_distribution_BED(
394393

395394
if chrom == chrom_initial:
396395
chrom_length += end - start
397-
for i in range(start, end + 1 - context, 1):
396+
for i in range(start, min(end + 1 - context, len(chromosome))):
398397
nuc = ""
399-
for l in range(i, i + context, 1):
398+
for l in range(i, min(i + context, len(chromosome))):
400399
nuc += tsb_ref[chromosome[l]][1]
401-
base = nuc[int(context / 2)]
400+
401+
# Skip incomplete windows
402+
if len(nuc) != context:
403+
overlap = context - len(nuc)
404+
print(f"Skipping window at index {i}: expected context length {context}, but got {len(nuc)}. "
405+
f"Missing {overlap} base(s) due to boundary at the end of the chromosome.")
406+
continue
402407

403408
# Skip the base if unknown
404409
if "N" in nuc:
405410
pass
406-
407411
else:
412+
# Assign `base` to the middle nucleotide of the context
413+
if len(nuc) >= int(context / 2) + 1: # Prevent IndexError
414+
base = nuc[int(context / 2)]
415+
else:
416+
continue # Skip if `nuc` is too short
408417
if context_input != "DINUC" and context_input != "DBS186":
409418
# Only save the pyrimidine context (canonical)
410419
if base == "A" or base == "G":

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from setuptools import setup
55

6-
VERSION = "1.3.0"
6+
VERSION = "1.3.1"
77

88
# remove the dist folder first if exists
99
if os.path.exists("dist"):
@@ -23,7 +23,7 @@ def write_version_py(filename="SigProfilerMatrixGenerator/version.py"):
2323
# THIS FILE IS GENERATED FROM SIGPROFILEMATRIXGENERATOR SETUP.PY
2424
short_version = '%(version)s'
2525
version = '%(version)s'
26-
Update = 'v1.3.0: Require Pandas and Numpy >= 2.0.0 and Python >= 3.9'
26+
Update = 'v1.3.1: Update SV matrix calls to pandas and numpy to use >= 2.0.0 syntax'
2727
2828
"""
2929
fh = open(filename, "w")
@@ -52,7 +52,7 @@ def write_version_py(filename="SigProfilerMatrixGenerator/version.py"):
5252
python_requires=">=3.9",
5353
install_requires=[
5454
"matplotlib>=2.2.2",
55-
"sigProfilerPlotting>=1.4.0",
55+
"sigProfilerPlotting>=1.4.1",
5656
"statsmodels>=0.9.0",
5757
"numpy>=2.0.0",
5858
"pandas>=2.0.0",

0 commit comments

Comments
 (0)