@@ -262,14 +262,6 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
262
262
}
263
263
}
264
264
}
265
-
266
- RealNumType* globalCounts = new RealNumType[matSize];
267
- for (int i = 0 ; i < num_states_; i++) {
268
- for (int j = 0 ; j < num_states_; j++) {
269
- globalCounts[row_index[i] + j] = 0 ;
270
- }
271
- }
272
-
273
265
std::stack<Index> nodeStack;
274
266
const PhyloNode& root = tree->nodes [tree->root_vector_index ];
275
267
nodeStack.push (root.getNeighborIndex (RIGHT));
@@ -350,15 +342,14 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
350
342
W[i][stateA] += branchLengthToObservation/2 ;
351
343
W[i][stateB] += branchLengthToObservation/2 ;
352
344
C[i][stateB + row_index[stateA]] += 1 ;
353
- globalCounts[stateB + row_index[stateA]] += 1 ;
354
345
}
355
346
} else {
356
347
// Case 2: Last observation was the other side of the root.
357
348
// In this case there are two further cases - the mutation happened either side of the root.
358
349
// We calculate the relative likelihood of each case and use this to weight waiting times etc.
359
350
RealNumType distToRoot = seqP_region->plength_observation2root + blength;
360
351
RealNumType distToObserved = seqP_region->plength_observation2node ;
361
- updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, globalCounts );
352
+ updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C);
362
353
}
363
354
} else if (seqP_region->type <= TYPE_R && seqC_region->type == TYPE_O) {
364
355
StateType stateA = seqP_region->type ;
@@ -371,7 +362,6 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
371
362
RealNumType prob = seqC_region->getLH (stateB);
372
363
if (stateB != stateA) {
373
364
C[end_pos][stateB + row_index[stateA]] += prob;
374
- globalCounts[stateB + row_index[stateA]] += prob;
375
365
376
366
W[end_pos][stateA] += prob * branchLengthToObservation/2 ;
377
367
W[end_pos][stateB] += prob * branchLengthToObservation/2 ;
@@ -385,7 +375,7 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
385
375
RealNumType distToObserved = seqP_region->plength_observation2node ;
386
376
for (StateType stateB = 0 ; stateB < num_states_; stateB++) {
387
377
RealNumType prob = seqC_region->getLH (stateB);
388
- updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, globalCounts, prob);
378
+ updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, prob);
389
379
}
390
380
}
391
381
} else if (seqP_region->type == TYPE_O && seqC_region->type <= TYPE_R) {
@@ -399,7 +389,6 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
399
389
RealNumType prob = seqP_region->getLH (stateA);
400
390
if (stateB != stateA) {
401
391
C[end_pos][stateB + row_index[stateA]] += prob;
402
- globalCounts[stateB + row_index[stateA]] += prob;
403
392
404
393
W[end_pos][stateA] += prob * branchLengthToObservation/2 ;
405
394
W[end_pos][stateB] += prob * branchLengthToObservation/2 ;
@@ -413,7 +402,7 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
413
402
RealNumType distToObserved = seqP_region->plength_observation2node ;
414
403
for (StateType stateA = 0 ; stateA < num_states_; stateA++) {
415
404
RealNumType prob = seqP_region->getLH (stateA);
416
- updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, globalCounts, prob);
405
+ updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, prob);
417
406
}
418
407
}
419
408
} else if (seqP_region->type == TYPE_O && seqC_region->type == TYPE_O) {
@@ -425,7 +414,6 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
425
414
RealNumType probB = seqC_region->getLH (stateB);
426
415
if (stateB != stateA) {
427
416
C[end_pos][stateB + row_index[stateA]] += probA * probB;
428
- globalCounts[stateB + row_index[stateA]] += probA * probB;
429
417
430
418
W[end_pos][stateA] += probA * probB * branchLengthToObservation/2 ;
431
419
W[end_pos][stateB] += probA * probB * branchLengthToObservation/2 ;
@@ -442,7 +430,7 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
442
430
RealNumType probA = seqP_region->getLH (stateA);
443
431
for (StateType stateB = 0 ; stateB < num_states_; stateB++) {
444
432
RealNumType probB = seqC_region->getLH (stateB);
445
- updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, globalCounts, probA * probB);
433
+ updateCountsAndWaitingTimesAcrossRoot (pos, end_pos, stateA, stateB, distToRoot, distToObserved, W, C, probA * probB);
446
434
}
447
435
}
448
436
}
@@ -451,22 +439,70 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
451
439
}
452
440
}
453
441
442
+ // Get genome-wide average mutation counts and waiting times
443
+ RealNumType* globalCounts = new RealNumType[matSize];
444
+ RealNumType* globalWaitingTimes = new RealNumType[num_states_];
445
+ for (int i = 0 ; i < num_states_; i++) {
446
+ globalWaitingTimes[i] = 0 ;
447
+ for (int j = 0 ; j < num_states_; j++) {
448
+ globalCounts[row_index[i] + j] = 0 ;
449
+ }
450
+ }
451
+
452
+ for (int i = 0 ; i < genomeSize; i++) {
453
+ for (int j = 0 ; j < num_states_; j++) {
454
+ globalWaitingTimes[j] += W[i][j];
455
+ for (int k = 0 ; k < num_states_; k++) {
456
+ globalCounts[row_index[j] + k] += C[i][row_index[j] + k];
457
+ }
458
+ }
459
+ }
460
+
454
461
for (int j = 0 ; j < num_states_; j++) {
462
+ globalWaitingTimes[j] /= genomeSize;
455
463
for (int k = 0 ; k < num_states_; k++) {
456
- ( globalCounts[row_index[j] + k] /= genomeSize) ;
464
+ globalCounts[row_index[j] + k] /= genomeSize;
457
465
}
458
466
}
459
467
460
- printMatrix (globalCounts, &std::cout);
461
468
// add pseudocount of average rate across genome * waitingTime pseudocount for counts
462
469
for (int i = 0 ; i < genomeSize; i++) {
463
470
for (int j = 0 ; j < num_states_; j++) {
464
471
for (int k = 0 ; k < num_states_; k++) {
465
- C[i][row_index[j] + k] += globalCounts[row_index[j] + k] * waitingTimePseudoCount ;
472
+ C[i][row_index[j] + k] += globalCounts[row_index[j] + k] * waitingTimePseudoCount / globalWaitingTimes[j] ;
466
473
}
467
474
}
468
475
}
469
476
477
+ if (cmaple::verbose_mode > VB_MIN)
478
+ {
479
+ RealNumType* referenceFreqs = new RealNumType[num_states_];
480
+ for (int j = 0 ; j < num_states_; j++) {
481
+ referenceFreqs[j] = 0 ;
482
+ }
483
+ for (int i = 0 ; i < genomeSize; i++) {
484
+ referenceFreqs[tree->aln ->ref_seq [i]]++;
485
+ }
486
+ for (int j = 0 ; j < num_states_; j++) {
487
+ referenceFreqs[j] /= genomeSize;
488
+ }
489
+
490
+ std::cout << " Genome-wide average waiting times:\t\t " ;
491
+ for (int j = 0 ; j < num_states_; j++) {
492
+ std::cout << globalWaitingTimes[j] << " \t " ;
493
+ }
494
+ std::cout << std::endl;
495
+ std::cout << " Reference nucleotide frequencies:\t\t " ;
496
+ for (int j = 0 ; j < num_states_; j++) {
497
+ std::cout << referenceFreqs[j] << " \t " ;
498
+ }
499
+ std::cout << std::endl;
500
+ delete[] referenceFreqs;
501
+ }
502
+
503
+ delete[] globalCounts;
504
+ delete[] globalWaitingTimes;
505
+
470
506
RealNumType totalRate = 0 ;
471
507
// Update mutation matrices with new rate estimation
472
508
for (int i = 0 ; i < genomeSize; i++) {
@@ -478,7 +514,7 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
478
514
for (int stateB = 0 ; stateB < num_states_; stateB++) {
479
515
if (stateA != stateB) {
480
516
RealNumType newRate = Ci[stateB + row_index[stateA]] / Wi[stateA];
481
- newRate = MIN (250.0 , MAX (0.01 , newRate));
517
+ newRate = MIN (250.0 , MAX (0.001 , newRate));
482
518
mutationMatrices[i * matSize + (stateB + row_index[stateA])] = newRate;
483
519
484
520
// Approximate total rate by considering rates from reference nucleotide
@@ -505,14 +541,16 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
505
541
}
506
542
507
543
// Normalise entries of mutation matrices
508
- RealNumType averageRate = totalRate / genomeSize;
544
+ totalRate /= genomeSize;
545
+ // RealNumType averageRate = totalRate / genomeSize;
509
546
for (int i = 0 ; i < genomeSize; i++) {
510
547
for (int stateA = 0 ; stateA < num_states_; stateA++) {
511
548
RealNumType rowSum = 0 ;
512
549
for (int stateB = 0 ; stateB < num_states_; stateB++) {
513
550
if (stateA != stateB) {
514
551
RealNumType val = mutationMatrices[i * matSize + (stateB + row_index[stateA])];
515
- val /= averageRate;
552
+ // val /= averageRate;
553
+ val /= totalRate;
516
554
517
555
mutationMatrices[i * matSize + (stateB + row_index[stateA])] = val;
518
556
transposedMutationMatrices[i * matSize + (stateA + row_index[stateB])] = val;
@@ -541,15 +579,13 @@ void ModelDNARateVariation::estimateRatesPerSitePerEntry(cmaple::Tree* tree) {
541
579
}
542
580
delete[] C;
543
581
delete[] W;
544
- delete[] globalCounts;
545
-
546
582
}
547
583
548
584
void ModelDNARateVariation::updateCountsAndWaitingTimesAcrossRoot ( PositionType start, PositionType end,
549
585
StateType parentState, StateType childState,
550
586
RealNumType distToRoot, RealNumType distToObserved,
551
587
RealNumType** waitingTimes, RealNumType** counts,
552
- RealNumType* globalCounts, RealNumType weight)
588
+ RealNumType weight)
553
589
{
554
590
if (parentState != childState) {
555
591
for (int i = start; i <= end; i++) {
@@ -559,7 +595,6 @@ void ModelDNARateVariation::updateCountsAndWaitingTimesAcrossRoot( PositionType
559
595
waitingTimes[i][parentState] += weight * relativeRootIsStateParent * distToRoot/2 ;
560
596
waitingTimes[i][childState] += weight * relativeRootIsStateParent * distToRoot/2 ;
561
597
counts[i][childState + row_index[parentState]] += weight * relativeRootIsStateParent;
562
- globalCounts[childState + row_index[parentState]] += weight * relativeRootIsStateParent;
563
598
564
599
RealNumType relativeRootIsStateChild = 1 - relativeRootIsStateParent;
565
600
waitingTimes[i][childState] += weight * relativeRootIsStateChild * distToRoot;
0 commit comments