From 84f0a20e58b40b238f52319f2017ae77df0dbf72 Mon Sep 17 00:00:00 2001 From: qPCR4vir Date: Wed, 20 Aug 2014 23:06:18 +0200 Subject: [PATCH] Import CSF optionally ignore some first lines, and add Orange header and an Icon Originally: Commit: 6cdffa810780fd59f37dcd2990298e54a2a5fa7e [6cdffa8] Parents: b8775022ee Author: qPCR4vir Date: Mittwoch, 20. August 2014 23:06:18 --- .../Prototypes/OWCSVFileImport.py | 86 +++++++++++++----- .../Prototypes/icons/FileCSV.png | Bin 0 -> 1215 bytes Orange/data/io.py | 5 +- 3 files changed, 65 insertions(+), 26 deletions(-) create mode 100644 Orange/OrangeWidgets/Prototypes/icons/FileCSV.png diff --git a/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py b/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py index 274f43d09..d0e68d7a9 100644 --- a/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py +++ b/Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py @@ -1,5 +1,6 @@ """ CSV File import +icons/FileCSV.png Import comma separated file """ @@ -49,7 +50,7 @@ def reload_icon(self): class OWCSVFileImport(OWWidget): - settingsList = ["recent_files", "hints"] + settingsList = ["recent_files", "hints","ignore_first_lines"] DELIMITERS = [("Tab", "\t"), ("Comma", ","), @@ -75,6 +76,8 @@ def __init__(self, parent=None, signalManager=None, self.skipinitialspace = True self.has_header = True self.has_orange_header = True + self.ignore_first_lines = 0 #3 + self.add_simple_orange_header = False # # List of recent opened files. self.recent_files = [] @@ -190,6 +193,12 @@ def __init__(self, parent=None, signalManager=None, form.addRow(self.skipinitialspace_check) + self.spin_sk_ln= OWGUI.spin(box, self, "ignore_first_lines", label="Skip first lines", # !!!!!!!!!!!!!!!!!!!!!!!!!!!!! + min=0, max=1000, step=1, + callback=self.ignore_first_lines_changed, + controlWidth=40, + keyboardTracking=False) + self.has_header_check = \ QCheckBox(objectName="has_header_check", checked=self.has_header, @@ -254,6 +263,16 @@ def quote_changed(self): self.quote = str(self.quote_edit.text()) self.update_preview() + def ignore_first_lines_changed(self): # !!!!!!!!!!!!!!!!!! + #self.ignore_first_lines = self.spin_sk_ln.value() + if self.selected_file: + with open(self.selected_file, "rU") as f: + self.skipinitiallines(f) + self.selected_file_head=[] + for i, line in zip(range(30), f): + self.selected_file_head.append(line) + self.update_preview() + def missing_changed(self): self.missing = str(self.missing_edit.text()) self.update_preview() @@ -270,6 +289,20 @@ def skipinitialspace_changed(self): self.skipinitialspace = self.skipinitialspace_check.isChecked() self.update_preview() + def skipinitiallines(self,file): # !!!!!!!!!!!!!! + ignore=self.ignore_first_lines + while ignore and file.readline(): + ignore-=1 + + def open_and_skiplines(self,file, mode="rb"): # !!!!!!!!!!!!!! + if isinstance(file, basestring): + file = open(file, mode) + else: # assuming it is file like with proper mode, could check for write, read + pass + self.skipinitiallines(file) + return file + + def set_selected_file(self, filename): basedir, name = os.path.split(filename) index_to_remove = None @@ -291,7 +324,7 @@ def set_selected_file(self, filename): hints = self.hints[filename] else: try: - hints = sniff_csv(filename) + hints = self.sniff_csv(filename) except csv.Error, ex: self.warning(1, str(ex)) hints = dict(DEFAULT_HINTS) @@ -338,6 +371,7 @@ def set_selected_file(self, filename): self.selected_file = filename self.selected_file_head = [] with open(self.selected_file, "rU") as f: + self.skipinitiallines(f) for i, line in zip(range(30), f): self.selected_file_head.append(line) @@ -357,7 +391,7 @@ def update_preview(self): hints["skipinitialspace"] = self.skipinitialspace hints["DK"] = self.missing or None try: - data = Orange.data.io.load_csv(head, delimiter=self.delimiter, + data = Orange.data.io.load_csv(head, delimiter=self.delimiter, quotechar=self.quote, has_header=self.has_header, has_types=self.has_orange_header, @@ -379,7 +413,9 @@ def send_data(self): self.error(0) if self.selected_file: try: - data = Orange.data.io.load_csv(self.selected_file, + with open(self.selected_file, "rb") as f: + self.skipinitiallines(f) + data = Orange.data.io.load_csv(f, delimiter=self.delimiter, quotechar=self.quote, has_header=self.has_header, @@ -397,26 +433,28 @@ def send_data(self): self.send("Data", self.data) -def sniff_csv(file): - snifer = csv.Sniffer() - if isinstance(file, basestring): - file = open(file, "rU") - - sample = file.read(2 ** 20) # max 1MB sample - dialect = snifer.sniff(sample) - has_header = snifer.has_header(sample) - - return {"delimiter": dialect.delimiter, - "doublequote": dialect.doublequote, - "escapechar": dialect.escapechar, - "quotechar": dialect.quotechar, - "quoting": dialect.quoting, - "skipinitialspace": dialect.skipinitialspace, - "has_header": has_header, - "has_orange_header": False, - "skipinitialspace": True, - "DK": None, - } + def sniff_csv(self,file): + snifer = csv.Sniffer() + if isinstance(file, basestring): + with open(file, "rb") as f: + self.skipinitiallines(f) + sample = f.read(2 ** 20) # max 1MB sample self opened file + else: + sample = file.read(2 ** 20) # max 1MB sample + dialect = snifer.sniff(sample) + has_header = snifer.has_header(sample) + + return {"delimiter": dialect.delimiter, + "doublequote": dialect.doublequote, + "escapechar": dialect.escapechar, + "quotechar": dialect.quotechar, + "quoting": dialect.quoting, + "skipinitialspace": dialect.skipinitialspace, + "has_header": has_header, + "has_orange_header": False, + "skipinitialspace": True, + "DK": None, + } if __name__ == "__main__": import sys diff --git a/Orange/OrangeWidgets/Prototypes/icons/FileCSV.png b/Orange/OrangeWidgets/Prototypes/icons/FileCSV.png new file mode 100644 index 0000000000000000000000000000000000000000..cabb4cc0e7622345fc32669770618845fbc5d434 GIT binary patch literal 1215 zcmV;w1VHj{00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj9X-PyuRA@u(n$JsIR}{zJ7&N97)JzhP`U}TJ-9<+w zh%H5mT3w`}F8r~G0TISU7Fp=7(3R0YK*2>U6silGZn~%nR#ZeLgSs;d6%mnQ3<{c+ z(aHJVckY{UwATCbo_U$MA2^(I@0)pZzxSSd-t#>TjYgxK%N1;|e0h0!2MurpF|E>Ta{nT}O{Ax% zXCS^R$H&K+2L}iFbeO)7N^8Np#lDi`1JJjAL{=Ou?nlJt41gk ziXI&uC64Kc&fiMs|BKs>VLR0E~Z@r)z^Nib$;i)}eTU%@7 z<>j4lGoHI-eKY~TkbPlxeLS8L9|#0sXZC#@s3Skq7U-ibmuq%?9G>%~uWMyT&r0f36 zk&%&Ci;IhUkx0ZJ4u|hfO-;RPYHEtmiofTt*v2z=c#_S{O{1`|@HL&|(!Jc-**Uwi zvhpqB_}^E-b1Pp|RD@Li-y+u7*m#pe+t?OQ19bHERu#YB|LMdh16q8lqQ$2wT70Ua z#iuG-ydo`R(e?Fp>ut@})>c1<76StVxCK6A4Gj%^)G$g)N<`n-*ckW3=k!BhR@(hZcR-M z_qv0Mc=5M0fzb|_p-FCT?r-GqY#^4`MEp~k;^Ja!A|MLw`ucit41y&K3k$-_%y1JG z@pbIa=V=0Ub>>G~TU*86m8qTxUQz%l7)(!_NmN~3Exeo9DR}?+kWDf;IB4|u$7&$b zSQb4LLWxZM;o)K7H#RmzUAT^v2X@5gvA>j_o*paBVGa{sV{#xo91e^A>qvQ^ zBHpZGhS9-Z2~0pJWY$Z%Gzt$oI?U6JkB?iGUuJ~csEGfaJ*rfX8Jg_w?*2dy%lkJ~ z?|U-E6T#{xQdLzYJoY-S5=ePugcpHmh>T`uXGep