@@ -4220,12 +4220,16 @@ double PartitionFinder::getmAICforMergeScheme(vector<set<int> > gene_sets, StrVe
42204220
42214221ModelPairSet PartitionFinder::getBetterPairsmAIC () {
42224222 cout << " Compute mAIC score of partition models for compatible partition merging schemes..." << endl;
4223+ double cpu_time = getCPUTime ();
4224+ double real_time = getRealTime ();
4225+
42234226 double cur_score_maic = 0 ;
4227+ ModelPairSet sorted_pairs_bu = sorted_pairs;
42244228 ModelPairSet cur_better_pairs;
42254229 set<int > part_ids;
4226-
4227- double cpu_time = getCPUTime () ;
4228- double real_time = getRealTime () ;
4230+ int better_df = dfsum;
4231+ vector<set< int > > better_gene_sets = gene_sets ;
4232+ StrVector better_model_names = model_names ;
42294233
42304234 // evaluate all pairs of partitions for merging
42314235 int index = 0 ;
@@ -4242,33 +4246,43 @@ ModelPairSet PartitionFinder::getBetterPairsmAIC() {
42424246
42434247 // compute mAIC
42444248 ModelPair cur_pair = it->second ;
4245- int merged_df = dfsum - dfvec[cur_pair.part1 ] - dfvec[cur_pair.part2 ] + it->second .df ;
42464249
42474250 // prepare partition IDs and model_names for subsets
4248- vector<set<int > > cur_gene_sets = gene_sets;
4249- StrVector model_names;
4250- model_names.resize (in_tree->size ());
4251- for (int i = 0 ; i < gene_sets.size (); i++) {
4252- model_names[i] = in_tree->at (i)->aln ->model_name ;
4253- }
4251+ auto it_bu = next (sorted_pairs_bu.begin (), index); // as partition index may change in this loop, use back up to get back correct information
4252+ int cur_df = better_df - dfvec[it_bu->second .part1 ] - dfvec[it_bu->second .part2 ] + it->second .df ;
4253+ vector<set<int > > cur_gene_sets = better_gene_sets;
4254+ StrVector cur_model_names = better_model_names;
4255+
42544256 cur_gene_sets[cur_pair.part1 ] = cur_pair.merged_set ;
4255- model_names [cur_pair.part1 ] = cur_pair.model_name ;
4257+ cur_model_names [cur_pair.part1 ] = cur_pair.model_name ;
42564258 cur_gene_sets.erase (cur_gene_sets.begin () + cur_pair.part2 );
4257- model_names .erase (model_names .begin () + cur_pair.part2 );
4259+ cur_model_names .erase (cur_model_names .begin () + cur_pair.part2 );
42584260
4259- cur_score_maic = getmAICforMergeScheme (cur_gene_sets, model_names, merged_df , true );
4261+ cur_score_maic = getmAICforMergeScheme (cur_gene_sets, cur_model_names, cur_df , true );
42604262
42614263 if (cur_score_maic < inf_score_maic) {
4262- cout << it->second .set_name << " will be merged with mAIC score: " << cur_score_maic
4263- << " (Marginal LnL: " << lh_marginal << " df: " << merged_df << " )" << endl;
4264- // << ", AIC: "<< it->second.score << " ( LnL: " << it->second.logl << ")" << endl;
4264+ cout << " Merging " << it->second .set_name << " with mAIC score: " << cur_score_maic
4265+ << " (Marginal LnL: " << lh_marginal << " df: " << cur_df << " )" << endl;
42654266 part_ids.insert (it->second .merged_set .begin (), it->second .merged_set .end ());
4266- cur_better_pairs.insertPair (it->second );
4267- } /* else {
4267+ cur_better_pairs.insertPair (it_bu->second );
4268+
4269+ inf_score_maic = cur_score_maic;
4270+ better_df = cur_df;
4271+ better_gene_sets = cur_gene_sets;
4272+ better_model_names = cur_model_names;
4273+
4274+ // decrease part ID for all pairs beyond opt_pair.part2
4275+ auto next_pair = it;
4276+ for (next_pair++; next_pair != sorted_pairs.end (); next_pair++) {
4277+ if (next_pair->second .part1 > cur_pair.part2 )
4278+ next_pair->second .part1 --;
4279+ if (next_pair->second .part2 > cur_pair.part2 )
4280+ next_pair->second .part2 --;
4281+ }
4282+ }/* else {
42684283 cout << "[mAIC] "<< it->second.set_name << " will [NOT] be merged with mAIC score: " << cur_score_maic
4269- << " (Marginal LnL: " << lh_marginal << " df: " << merged_df << "), AIC: "
4270- << it->second.score << " ( LnL: " << it->second.logl << ")" << endl;
4271- } */
4284+ << " (Marginal LnL: " << lh_marginal << " df: " << cur_df << ")" << endl;
4285+ }*/
42724286 }
42734287
42744288
@@ -4906,8 +4920,6 @@ void PartitionFinder::test_PartitionModel() {
49064920 cout << " Full partition model " << criterionName (params->model_test_criterion )
49074921 << " score: " << inf_score << " (LnL: " << lhsum << " df:" << dfsum << " )" << endl;
49084922
4909- StrVector model_names;
4910-
49114923 if (params->marginal_lh_aic ) {
49124924 inf_score_maic = getmAICforMergeScheme (gene_sets, model_names, dfsum, false );
49134925 cout << " Full partition model mAIC score: " << inf_score_maic << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " )" << endl;
@@ -4999,7 +5011,7 @@ void PartitionFinder::test_PartitionModel() {
49995011 bool proceed_stepwise_merge = perform_merge;
50005012
50015013 // variables for mAIC merging
5002- bool switch_to_caic = false ;
5014+ bool switched_to_caic = false ;
50035015 double lhsum_bu;
50045016 int dfsum_bu;
50055017 vector<set<int > > gene_sets_bu;
@@ -5041,7 +5053,7 @@ void PartitionFinder::test_PartitionModel() {
50415053 bool is_maic_pairs_empty = better_pairs.empty ();
50425054
50435055 if (is_maic_pairs_empty){
5044- if (switch_to_caic ) {
5056+ if (switched_to_caic ) {
50455057 cout << " No better pairs based on mAIC after one round of cAIC merging and finish merging with previous optimal mAIC" << endl;
50465058 // load back back up
50475059 lhsum = lhsum_bu;
@@ -5060,7 +5072,7 @@ void PartitionFinder::test_PartitionModel() {
50605072 };
50615073 cout << " No better pairs based on mAIC, try merging better pairs based on cAIC" << endl;
50625074 cout << compatible_pairs.size () << " compatible better partition pairs found based on cAIC" << endl;
5063- switch_to_caic = true ;
5075+ switched_to_caic = true ;
50645076 // back up
50655077 lhsum_bu = lhsum;
50665078 dfsum_bu = dfsum;
@@ -5072,9 +5084,9 @@ void PartitionFinder::test_PartitionModel() {
50725084 greedy_model_trees_bu = greedy_model_trees;
50735085 }
50745086 } else {
5075- if (switch_to_caic ) {
5087+ if (switched_to_caic ) {
50765088 cout << " Better pairs based on mAIC are found after one round of cAIC merging" << endl;
5077- switch_to_caic = false ;
5089+ switched_to_caic = false ;
50785090 }
50795091 compatible_pairs = better_pairs;
50805092 }
@@ -5088,7 +5100,7 @@ void PartitionFinder::test_PartitionModel() {
50885100 dfsum = dfsum - dfvec[opt_pair.part1 ] - dfvec[opt_pair.part2 ] + opt_pair.df ;
50895101 inf_score = computeInformationScore (lhsum, dfsum, ssize, params->model_test_criterion );
50905102
5091- if (!params->marginal_lh_aic || switch_to_caic ) {
5103+ if (!params->marginal_lh_aic || switched_to_caic ) {
50925104 ASSERT (inf_score <= opt_pair.score + 0.1 );
50935105 cout << " Merging " << opt_pair.set_name << " with " << criterionName (params->model_test_criterion )
50945106 << " score: " << inf_score << " (LnL: " << lhsum << " df: " << dfsum << " )" << endl;
@@ -5133,25 +5145,19 @@ void PartitionFinder::test_PartitionModel() {
51335145 }
51345146 // save and output mAIC after merging all pairs
51355147 if (params->marginal_lh_aic ) {
5136- if (!switch_to_caic) {
5137- double cur_score_maic = getmAICforMergeScheme (gene_sets, model_names, dfsum, true );
5138- if (cur_score_maic < inf_score_maic) {
5139- cout << " Current partition model mAIC score: " << cur_score_maic
5140- << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " )" << endl;
5141- } else {
5142- cout << " [Rare case] Current partition model mAIC score: " << cur_score_maic
5143- << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " ) is worse than "
5144- << inf_score_maic << endl;
5145- }
5148+ double cur_score_maic = getmAICforMergeScheme (gene_sets, model_names, dfsum, true );
5149+ if (!switched_to_caic) {
5150+ cout << " Current partition model mAIC score: " << cur_score_maic
5151+ << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " ). Current cAIC score: "
5152+ << inf_score << " (LnL: " << lhsum << " df: " << dfsum << " )" << endl;
51465153 inf_score_maic = cur_score_maic;
51475154 } else {
5148- double cur_score_maic = getmAICforMergeScheme (gene_sets, model_names, dfsum, true );
5149- if (cur_score_maic < inf_score_maic) {
5155+ if (cur_score_maic <= inf_score_maic) {
51505156 cout << " Current partition model mAIC score: " << cur_score_maic
51515157 << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " ) is better than "
51525158 << inf_score_maic << " , back to merging with mAIC since next round" << endl;
51535159 inf_score_maic = cur_score_maic;
5154- switch_to_caic = false ;
5160+ switched_to_caic = false ;
51555161 } else {
51565162 cout << " Current partition model mAIC score: " << cur_score_maic
51575163 << " (Marginal LnL: " << lh_marginal << " df:" << dfsum << " ) is worse than "
@@ -5161,8 +5167,6 @@ void PartitionFinder::test_PartitionModel() {
51615167 cout << endl;
51625168 }
51635169
5164-
5165-
51665170 // proceed to the next iteration if gene_sets.size() >= 2
51675171 proceed_stepwise_merge = (gene_sets.size () >= 2 );
51685172 }
0 commit comments