diff --git a/scripts_of/__main__.py b/scripts_of/__main__.py index e101f50a..a0ac2c7b 100755 --- a/scripts_of/__main__.py +++ b/scripts_of/__main__.py @@ -1681,7 +1681,7 @@ def CheckOptions(options, speciesToUse): # check can open enough files n_extra = 50 q_do_orthologs = not any((options.qStopAfterPrepare, options.qStopAfterGroups, options.qStopAfterSeqs, options.qStopAfterAlignments, options.qStopAfterTrees)) - if q_do_orthologs and not options.qStartFromTrees: + if q_do_orthologs and not options.qStartFromTrees and not ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1"): n_sp = len(speciesToUse) wd = files.FileHandler.GetWorkingDirectory_Write() wd_files_test = wd + "Files_test/" diff --git a/scripts_of/trees2ologs_of.py b/scripts_of/trees2ologs_of.py index bf7a2c7a..a35cc0c2 100644 --- a/scripts_of/trees2ologs_of.py +++ b/scripts_of/trees2ologs_of.py @@ -18,6 +18,8 @@ import itertools import multiprocessing as mp from collections import defaultdict, deque +import io +import shutil from . import tree as tree_lib from . import resolve, util, files, parallel_task_manager @@ -987,13 +989,38 @@ def __init__(self, directory, speciesDict, iSpeciesToUse, nSpecies, sp_to_index) self.nSpecies = nSpecies self.sp_to_index = sp_to_index self.dPutativeXenologs = files.FileHandler.GetPutativeXenelogsDir() + self.inMemory = ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1") + if self.inMemory: + util.PrintTime("Handling OrthologsFiles in memory") self.ortholog_file_handles = [[None for _ in self.iSpeciesToUse] for _ in self.iSpeciesToUse] self.xenolog_file_handles = [None for _ in self.iSpeciesToUse] + def my_open(self, path, mode): + tmp = None + if self.inMemory and not PY2: + tmp = io.StringIO() + setattr(tmp,'filepath',path) + if os.path.exists(path): + with open(path, "r") as tmp_fh: + shutil.copyfileobj(tmp_fh,tmp) + else: + tmp = open(xenolog_path, csv_append_mode) + return tmp + + def my_close(self, orig_fh): + if self.inMemory and not PY2: + fh = open(orig_fh.filepath, "w") + shutil.copyfileobj(orig_fh,fh) + fh.close() + orig_fh.close() + else: + orig_fh.close() + def __enter__(self): for i in xrange(self.nSpecies): sp0 = str(self.iSpeciesToUse[i]) - self.xenolog_file_handles[i] = open(self.dPutativeXenologs + "%s.tsv" % self.speciesDict[sp0], csv_append_mode) + xenolog_path = self.dPutativeXenologs + "%s.tsv" % self.speciesDict[sp0] + self.xenolog_file_handles[i] = self.my_open(xenolog_path, csv_append_mode) strsp0 = sp0 + "_" isp0 = self.sp_to_index[sp0] d0 = self.d + "Orthologues_" + self.speciesDict[sp0] + "/" @@ -1003,17 +1030,17 @@ def __enter__(self): strsp1 = sp1 + "_" isp1 = self.sp_to_index[sp1] d1 = self.d + "Orthologues_" + self.speciesDict[sp1] + "/" - self.ortholog_file_handles[i][j] = open(d0 + '%s__v__%s.tsv' % (self.speciesDict[sp0], self.speciesDict[sp1]), csv_append_mode) - self.ortholog_file_handles[j][i] = open(d1 + '%s__v__%s.tsv' % (self.speciesDict[sp1], self.speciesDict[sp0]), csv_append_mode) + self.ortholog_file_handles[i][j] = self.my_open(d0 + '%s__v__%s.tsv' % (self.speciesDict[sp0], self.speciesDict[sp1]), csv_append_mode) + self.ortholog_file_handles[j][i] = self.my_open(d1 + '%s__v__%s.tsv' % (self.speciesDict[sp1], self.speciesDict[sp0]), csv_append_mode) return self.ortholog_file_handles, self.xenolog_file_handles def __exit__(self, type, value, traceback): for fh in self.xenolog_file_handles: - fh.close() + self.my_close(fh) for fh_list in self.ortholog_file_handles: for fh in fh_list: if fh is not None: - fh.close() + self.my_close(fh) @staticmethod def flush_olog_files(ortholog_file_handles): diff --git a/scripts_of/util.py b/scripts_of/util.py index 21a83645..6e35b555 100644 --- a/scripts_of/util.py +++ b/scripts_of/util.py @@ -387,7 +387,9 @@ def number_open_files_exception_advice(n_species, q_at_trees): """ # parallel_task_manager.RunCommand("ulimit -Hn") n_req = n_species*n_species + 100 - msg="\nERROR: The system limits on the number of files a process can open is too low. For %d species \ + msg="\nERROR: The system limits on the number of files a process can open is too low." + if not ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1"): + msg += "For %d species \ OrthoFinder needs to be able to open at least r=%d files. Please increase the limit and restart OrthoFinder\n\ 1. Check the hard and soft limits on the number of open files for your system:\n\ $ ulimit -Hn\n\ @@ -398,16 +400,17 @@ def number_open_files_exception_advice(n_species, q_at_trees): To increase the limit to %d for user called 'emms' add the lines:\n\ emms hard nofile %d\n\ emms soft nofile %d\n" % (n_species, n_req, n_req, n_req, n_req, n_req) - msg +=" (edit these lines to match your username)\n\ + msg +=" (edit these lines to match your username)\n\ 4. Check the limit has now been updated (if you changed the hard limit you'll need to open a new session and confirm it's updated):\n\ - $ ulimit -Sn" - - if q_at_trees: - msg_part_2 = "5. Once the limit is updated restart OrthoFinder 'from trees' using the '-ft' command" + $ ulimit -Sn\n" + if q_at_trees: + msg += "\n5. Once the limit is updated restart OrthoFinder 'from trees' using the '-ft' command" + else: + msg += "\n5. Once the limit is updated restart OrthoFinder with the original command" else: - msg_part_2 = "5. Once the limit is updated restart OrthoFinder with the original command" - msg_part_3 = "\nFor full details see: https://github.com/davidemms/OrthoFinder/issues/384" - print(msg + "\n" + msg_part_2 + "\n" + msg_part_3 + "\n") + msg += "Please try to run OrthoFinder with USE_MEM=1 as environment variable. You need more memory for it" + msg += "\nFor full details see: https://github.com/davidemms/OrthoFinder/issues/384" + print(msg + "\n") """ ------------------------------------------------------------------------------- """