galaxyproject · bgruening · Aug 21, 2018 · Aug 21, 2018 · Aug 21, 2018 · Oct 25, 2018
diff --git a/.tt_skip b/.tt_skip
@@ -55,7 +55,6 @@ tools/rmap
 tools/weightedaverage
 tools/annotation_profiler
 tools/megablast_xml_parser
-tools/correlation
 tools/merge_cols
 tools/microsats_alignment_level
 tools/bowtie_color_wrappers

diff --git a/tools/correlation/cor.py b/tools/correlation/cor.py
@@ -6,7 +6,9 @@
 """
 
 import sys
-from rpy import *
+import rpy2.robjects as robjects
+r = robjects.r
+
 
 def stop_err(msg):
     sys.stderr.write(msg)
@@ -17,17 +19,25 @@ def main():
     assert method in ( "pearson", "kendall", "spearman" )
 
     try:
-        columns = map( int, sys.argv[3].split( ',' ) )
+        column_string = sys.argv[3]
+        columns = list()
+        for col in column_string.split(','):
+            if '-' in col:
+                s, e = col.split('-')
+                col = list(range(int(s), int(e) + 1))
+                columns.extend(col)
+            else:
+                columns.append(int(col))
     except:
         stop_err( "Problem determining columns, perhaps your query does not contain a column of numerical data." )
-    
+
     matrix = []
     skipped_lines = 0
     first_invalid_line = 0
     invalid_value = ''
     invalid_column = 0
 
-    for i, line in enumerate( file( sys.argv[1] ) ):
+    for i, line in enumerate( open( sys.argv[1] ) ):
         valid = True
         line = line.rstrip('\n\r')
 
@@ -60,29 +70,32 @@ def main():
                 first_invalid_line = i+1
 
         if valid:
-            matrix.append( row )
+            matrix += row 
 
     if skipped_lines < i:
-        try:
-            out = open( sys.argv[2], "w" )
-        except:
-            stop_err( "Unable to open output file" )
-
         # Run correlation
         try:
-            value = r.cor( array( matrix ), use="pairwise.complete.obs", method=method )
-        except Exception, exc:
-            out.close()
-            stop_err("%s" %str( exc ))
-        for row in value:
-            print >> out, "\t".join( map( str, row ) )
-        out.close()
+            fv = robjects.FloatVector(matrix)
+            m = r['matrix'](fv, ncol=len(columns),byrow=True)
+            rslt_mat = r.cor(m, use="pairwise.complete.obs", method=method )
+            value = []
+            for ri in range(1, rslt_mat.nrow + 1):
+                row = []
+                for ci in range(1, rslt_mat.ncol + 1):
+                    row.append(rslt_mat.rx(ri,ci)[0])
+                value.append(row)
+        except Exception as exc:
+            stop_err("%s" % str( exc ))
+
+        with open( sys.argv[2], "w" ) as out:
+            for row in value:
+                out.write("%s\n" % "\t".join( map( str, row ) ))
 
     if skipped_lines > 0:
         msg = "..Skipped %d lines starting with line #%d. " %( skipped_lines, first_invalid_line )
         if invalid_value and invalid_column > 0:
             msg += "Value '%s' in column %d is not numeric." % ( invalid_value, invalid_column )
-        print msg
+        print(msg)
 
 if __name__ == "__main__":
     main()
diff --git a/tools/correlation/cor.xml b/tools/correlation/cor.xml
@@ -1,33 +1,48 @@
-<tool id="cor2" name="Correlation" version="1.0.0">
-  <description>for numeric columns</description>
-  <requirements>
-    <requirement type="package" version="1.0.3">rpy</requirement>
-  </requirements>
-  <command interpreter="python">cor.py $input1 $out_file1 $numeric_columns $method</command>
-  <inputs>
-    <param format="tabular" name="input1" type="data" label="Dataset" help="Dataset missing? See TIP below"/>
-    <param name="numeric_columns" label="Numerical columns" type="data_column" numerical="True" multiple="True" data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
-    <param name="method" type="select" label="Method">
-      <option value="pearson">Pearson</option>
-      <option value="kendall">Kendall rank</option>
-      <option value="spearman">Spearman rank</option>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="txt" name="out_file1" />
-  </outputs>
-  <tests>
-    <!--
-    Test a tabular input with the first line being a comment without a # character to start
-    -->
-    <test>
-      <param name="input1" value="cor.tabular" />
-      <param name="numeric_columns" value="2,3" />
-      <param name="method" value="pearson" />
-      <output name="out_file1" file="cor_out.txt" />
-    </test>
-  </tests>
-  <help>
+<tool id="cor2" name="Correlation" version="1.0.1">
+    <description>for numeric columns</description>
+    <requirements>
+        <requirement type="package" version="2.9.4">rpy2</requirement>
+    </requirements>
+    <command>
+      python '$__tool_directory__/cor.py'
+        '$input1'
+        '$out_file1'
+        $numeric_columns
+        $method
+    </command>
+    <inputs>
+        <param format="tabular" name="input1" type="data" label="Dataset" help="Dataset missing? See TIP below"/>
+        <param name="numeric_columns" label="Numerical columns" type="text" multiple="True"
+            data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" />
+        <!--param name="numeric_columns" label="Numerical columns" type="data_column" numerical="True" multiple="True"
+            data_ref="input1" help="Multi-select list - hold the appropriate key while clicking to select multiple columns" /-->
+        <param name="method" type="select" label="Method">
+            <option value="pearson">Pearson</option>
+            <option value="kendall">Kendall rank</option>
+            <option value="spearman">Spearman rank</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="out_file1" />
+    </outputs>
+    <tests>
+        <!--
+        Test a tabular input with the first line being a comment without a # character to start
+        -->
+        <test>
+            <param name="input1" value="cor.tabular" />
+            <param name="numeric_columns" value="2,3" />
+            <param name="method" value="pearson" />
+            <output name="out_file1" file="cor_out.txt" />
+        </test>
+        <test>
+            <param name="input1" value="cor.tabular" />
+            <param name="numeric_columns" value="2-3" />
+            <param name="method" value="pearson" />
+            <output name="out_file1" file="cor_out.txt" />
+        </test>
+    </tests>
+    <help>
 
 .. class:: infomark
 
@@ -97,5 +112,5 @@ This tool computes the matrix of correlation coefficients between numeric column
     0.730635686279	1.0
 
   So the correlation for our twenty cases is .73, which is a fairly strong positive relationship.
-  </help>
+    </help>
 </tool>
diff --git a/tools/correlation/test-data/cor.tabular b/tools/correlation/test-data/cor.tabular
@@ -18,4 +18,4 @@ Person	Height	Self Esteem
 1	65	4.1
 1	67	3.8
 1	63	3.4
-2	61	3.6
+2	61	3.6
diff --git a/tools/correlation/test-data/cor_out.txt b/tools/correlation/test-data/cor_out.txt
@@ -1,2 +1,2 @@
-1.0	0.730635686279
-0.730635686279	1.0
+1.0	0.7306356862792351
+0.7306356862792351	1.0
diff --git a/tools/correlation/tool_dependencies.xml b/tools/correlation/tool_dependencies.xml
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,4 +18,4 @@ Person Height Self Esteem @@
 	65	4.1
 	67	3.8
 	63	3.4
-	61	3.6
+	61	3.6