Merge pull request #1 from behzadk/master

behzadk · web-flow · commit 5817177f1910 · 2020-07-15T14:13:49.000+02:00
Merging BK updates
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 venv/
 *.pyc
 *.pyo
+.idea/
diff --git a/README.MD b/README.MD
@@ -4,7 +4,9 @@ Automated equation generation of candidate models for synthetic microbial commun
 AutoCD builds a equations for candidate model systems that can simulate the growth of bacteria capturing important interactions such as competition for nutrients, communication through quorum sensing and amensalism via bacteriocins. AutoCD performs model selection for a desired population level behaviour. AutoCD is intended to aide in the design of synthetic microbial systems, identfying promising candidate network topologies and essential parameters for controlling a microbial community.
 
 
-AutoCD is written in C++ and Python 3.4+, data analysis scripts require R. This has been tested on Linux only.
+AutoCD is written in C++ and Python 3.4+, data analysis scripts require R. 
+
+This software has been tested on Linux and designed for HPC, using OpenMP for parallel simulations.
 
 ## Requirements
 
@@ -17,7 +19,7 @@ g++ (tested with g++-8)
 Boost >=  v1.67
 OpenMP
 
-Boost must be installed for the version of python being used to run AutoCD. Ensure boost has been correvtly added to paths. Run `build_test.sh` to test on a simple build example and identify issues with library flags and ensuring the correct Python includes. Changes should be made to `build_pop_modules.sh` which is used to the build necessarry Boost Python libarary for a newly generated model space.
+Boost must be installed for the version of python being used to run AutoCD. Ensure boost has been correctly added to paths. Run `build_test.sh` to test on a simple build example and identify issues with library flags and ensuring the correct Python includes. Changes should be made to `build_pop_modules.sh` which is used to the build necessary Boost Python libarary for a newly generated model space.
 
 #### R packages: ####
 ggplot2, ggisoband, gridExtra, sm
diff --git a/data_analysis/NMF_analysis.py b/data_analysis/NMF_analysis.py
@@ -354,8 +354,8 @@ def nmf_decomposition(output_dir, adj_mat_dir, n_components=4):
     write_W_to_csv(W, model_idxs, W_csv_path)
 
     H = model.components_
-    H = H.reshape(n_components, 9, 11)
-    # H = H.reshape(i, 7, 9)
+    # H = H.reshape(n_components, 9, 11)
+    H = H.reshape(n_components, 7, 9)
 
     H_csv_path = figure_output_dir +  "H_" + str(n_components) + ".csv"
     write_H_to_csv(H, H_csv_path, row_names, column_names, normalise=True)
diff --git a/data_analysis/data_analysis_ABCSMC.py b/data_analysis/data_analysis_ABCSMC.py
@@ -120,10 +120,6 @@ def generate_marginal_probability_boxplot(pop_dir_list, output_dir, hide_x_ticks
     ax.margins(y=0)
     fig.tight_layout()
     plt.show()
-    # model_space_report_df.drop(model_space_report_df.filter(regex="Unname"),axis=1, inplace=True)
-    # model_space_report_df = model_space_report_df.sort_values(by='model_idx', ascending=False).reset_index(drop=True)
-    # print(model_space_report_df.columns)
-    # model_space_report_list.append(model_space_report_df)
 
 
 def generate_marginal_probability_distribution(pop_dir_list, output_dir, hide_x_ticks=True, show_median=True, show_BF=False, drop_eqless=-1):
@@ -236,7 +232,6 @@ def plot_all_model_param_distributions(pop_dir, inputs_dir, figure_output_dir):
     model_space_report_path = pop_dir + "combined_model_space_report.csv"
     model_space_report_df = pd.read_csv(model_space_report_path, index_col=0)
     # generate_model_space_statistics(model_space_report_df, "model_marginal")
-    print(list(model_space_report_df))
 
     model_space_report_df.drop(model_space_report_df[model_space_report_df['model_marginal_mean'] == 0].index, inplace=True)
 
@@ -660,10 +655,6 @@ def self_regulation_bar_plot(pop_dir_list, adj_mat_dir, output_dir, hide_x_ticks
     model_space_report_df = merge_model_space_report_df_list(model_space_report_list)
     generate_model_space_statistics(model_space_report_df, "model_marginal")
     
-    # total_accepted = sum(model_space_report_df['accepted_count'].values)
-    # new_model_marginals = [x/total_accepted for x in model_space_report_df['accepted_count'].values]
-    # model_space_report_df['model_marginal_mean'] = new_model_marginals
-
     if drop_unnacepted:
         model_space_report_df.drop(model_space_report_df[model_space_report_df['model_marginal_mean'] == 0.0].index, inplace=True)
 
@@ -716,21 +707,10 @@ def self_regulation_bar_plot(pop_dir_list, adj_mat_dir, output_dir, hide_x_ticks
             adjusted_vals.append(row['model_marginal_mean'])
     model_space_report_df['model_marginal_mean'] = adjusted_vals
     print(model_space_report_df)
-    # if drop_unnacepted:
-    # model_space_report_df.drop(model_space_report_df[model_space_report_df['symmetrical'] == False].index, inplace=True)
-    # model_idxs = model_space_report_df['model_idx'].values
-    # model_space_report_df = model_space_report_df.sort_values(by='model_marginal_mean', ascending=False).reset_index(drop=True)
     model_space_report_df.to_csv(output_dir + "dists_test.csv")
 
     fig, ax = plt.subplots()
 
-    print(model_space_report_df)
-    if num_pops > 1:
-        pass
-        # ax.errorbar(model_space_report_df.index, 
-        #             model_space_report_df['model_marginal_mean'], 
-        #             yerr=model_space_report_df['model_marginal_std'], fmt=',', color='black', alpha=1,
-        #             label=None, elinewidth=0.5)
 
     sns.barplot(model_space_report_df.index, model_space_report_df.model_marginal_mean, 
                      data=model_space_report_df, alpha=0.9, ax=ax)
diff --git a/data_analysis/dens_plot_2D.R b/data_analysis/dens_plot_2D.R
@@ -722,9 +722,6 @@ output_dir <- args[5]
 make_1d_plot <- args[6]
 make_2d_plot <- args[7]
 
-# wd <- "/home/behzad/Documents/barnes_lab/cplusplus_software/speed_test/repressilator/cpp/"
-# data_dir <- paste(wd, "output/spock_manu_stable_1_SMC/spock_manu_stable_1_SMC_a1/Population_2/model_sim_params/", sep="")
-# inputs_data_dir <- paste(wd, "input_files/input_files_two_species_spock_manu_1/input_files/", sep="")
 
 data_df <- read.csv(params_posterior_path)
 
diff --git a/data_analysis/network_vis.py b/data_analysis/network_vis.py
@@ -212,11 +212,7 @@ def get_flat_adjacency_matricies_df(model_indexes, adj_matrix_path_template):
         adj_mat_path = adj_matrix_path_template.replace("#REF#", str(m_idx))
         adj_mat_df = pd.read_csv(adj_mat_path, index_col=0)
 
-        print(adj_mat_df)
         adj_mat_df = convert_QS_column(adj_mat_df)
-        print(adj_mat_df)
-        print(np.shape(adj_mat_df))
-        exit()
         flat_adj_mats.append(abs(adj_mat_df.values).flatten())
     
     interaction_indexes = ["a_" + str(i) for i in range(np.shape(flat_adj_mats)[1])]
@@ -237,9 +233,6 @@ def get_flat_adjacency_matricies_df(model_indexes, adj_matrix_path_template):
             adj_mat_df = adj_mat_df.loc[:, adj_mat_df.columns != col]
 
 
-    print(np.shape(adj_mat_df))
-    exit()
-
     return adj_mat_df
 
 
@@ -744,34 +737,6 @@ def make_hierarchical_clustering(combined_analysis_output_dir, adj_mat_dir, drop
     plt.savefig(output_path, dpi=500, bbox_inches='tight', transparent=False)
     plt.close()
 
-    # print("Making posterior probability heatmap")
-    # # Plot with posterior probability as heatmap
-    # adj_mat_df['p_prob'] = model_space_report_df['model_marginal_mean'].values
-    # linkage_data = adj_mat_df.loc[:, adj_mat_df.columns != 'p_prob'].values
-    # linkage = hc.linkage(sp.distance.pdist(linkage_data), method='average', metric='euclidean')
-    # cm = sns.clustermap(data=adj_mat_df['p_prob'], row_linkage=linkage, col_cluster=False, yticklabels=1)
-    # pprob_row_colours = cm.row_colors
-    # hm = cm.ax_heatmap.get_position()
-    # cm.ax_heatmap.set_position([int_link_x, int_link_y0, int_link_width*0.1, int_link_height])
-    
-    # # cm.ax_heatmap.set(yticklabels=[])
-    # # cm.ax_heatmap.set(xticklabels=[])
-
-    # # cm.ax_heatmap.set(xlabel='')
-    # # cm.ax_heatmap.set(ylabel='')
-    # cm.ax_heatmap.tick_params(left=False, bottom=False, right=False)
-
-    # col = cm.ax_col_dendrogram.get_position()
-    # cm.ax_heatmap.tick_params(left=False, bottom=False, right=False)
-
-    # cm.ax_row_dendrogram.set_visible(False)
-    # cm.cax.set_visible(False)
-
-    # plt.setp(cm.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
-
-    # output_path = combined_analysis_output_dir + "pprob_dendrogram_" + str(max_level) + ".pdf"
-    # plt.savefig(output_path, dpi=500, bbox_inches='tight', transparent=True)
-    # plt.close()
 
 
     print("Making posterior probability barchart")
@@ -994,15 +959,6 @@ def main():
     for levels in [5]:
         make_hierarchical_clustering(combined_analysis_output_dir, adj_mat_dir, drop_eqless=-1, hide_x_ticks=True, max_level=levels, use_bar=True, plot_error=False, average_clusters=True, log_scale=True)
 
-    exit()
-    make_model_network(combined_analysis_output_dir, adj_mat_dir, use_adjacent_neighbour=True)
-    exit()
-    exit()
-    exit()
-    make_model_network(combined_analysis_output_dir, adj_mat_dir, use_nearest_neighbour=True)
-
-    make_model_network(combined_analysis_output_dir, adj_mat_dir, use_adjacent_neighbour=True, colour_by_motif=True)
-    make_model_network(combined_analysis_output_dir, adj_mat_dir, use_nearest_neighbour=True, colour_by_motif=True)
 
 if __name__ == "__main__":
     main()
diff --git a/data_analysis/run_data_analysis.py b/data_analysis/run_data_analysis.py
@@ -8,7 +8,7 @@
 from . import data_analysis_ABCSMC as ABC_DA
 from . import NMF_analysis
 from . import nearest_neighbours
-
+from . import network_vis
 
 def main(data_analysis_config):
     experiment_name = data_analysis_config['experiment_name']
@@ -31,11 +31,12 @@ def main(data_analysis_config):
     # Get final population directories of each repeat experiment
     final_pop_dirs = ABC_DA.find_latest_pop_dirs(exp_dir)
 
+
     model_space_report_df = ABC_DA.write_combined_model_space_report(finished_exp_final_population_dirs, combined_analysis_output_dir) 
     model_space_report_df = ABC_DA.write_combined_model_space_with_motif_counts(finished_exp_final_population_dirs, combined_analysis_output_dir, adj_mat_dir, window=0, normalise=False, plot=False)
 
-    ABC_DA.generate_marginal_probability_distribution(finished_exp_final_population_dirs, 
-        combined_analysis_output_dir, hide_x_ticks=True, show_median=False, show_BF=False, drop_eqless=-1)
+    network_vis.make_hierarchical_clustering(combined_analysis_output_dir, adj_mat_dir, drop_eqless=-1, hide_x_ticks=True, max_level=1000, 
+        use_bar=True, plot_error=True, average_clusters=False, log_scale=False)
 
     ABC_DA.generate_marginal_probability_distribution(finished_exp_final_population_dirs, 
         combined_analysis_output_dir, hide_x_ticks=True, show_median=False, show_BF=False, drop_eqless=-1)
@@ -44,7 +45,11 @@ def main(data_analysis_config):
     	combined_analysis_output_dir, drop_eqless=-1, 
     	show_median=False, hide_x_ticks=False)
 
+    nearest_neighbours.get_motif_neighbours(combined_analysis_output_dir, load_pickle=False, remove_zero_change=True)
+    NMF_analysis.nmf_decomposition(combined_analysis_output_dir, adj_mat_dir)
     ABC_DA.combine_model_params(finished_exp_final_population_dirs, combined_analysis_output_dir)
     ABC_DA.plot_all_model_param_distributions(combined_analysis_output_dir, inputs_dir, combined_analysis_output_dir + "dist_plots/")
 
-    nearest_neighbours.get_motif_neighbours(combined_analysis_output_dir, load_pickle=False, remove_zero_change=True)
+
+
+
diff --git a/run_AutoCD.py b/run_AutoCD.py
@@ -46,6 +46,19 @@ def main():
             experiment_config = yaml.load(yaml_file, Loader=yaml.FullLoader)
 
         module_path = experiment_config['population_modules_path']
+        if os.path.isfile(module_path) and os.access(module_path, os.R_OK):
+            print("population.modules.so exists...")
+
+        else:
+            print("population.modules.so missing, attempting to build... ")
+            build_file_path = "./build_pop_modules.sh"
+            build_arguments = MSG_config['output_dir'] + MSG_config['model_space_name'] + '/'
+
+            # Call build for C++ modules, first argument
+            # pointing to where model.cpp and model.h are
+            subprocess.check_call([build_file_path, build_arguments])
+
+
         # Copy population_modules.so to ABC folder
         copy(module_path, './ABC/')
 

-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 venv/
 *.pyc
 *.pyo
 +.idea/