Skip to content

Commit 64236cc

Browse files
committed
updated xlsx creation and merging
1 parent cb2457e commit 64236cc

File tree

2 files changed

+34
-42
lines changed

2 files changed

+34
-42
lines changed

core/report.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ def find_consultant_agency(self, app_data: list, filename: str) -> list:
187187
self.student_flag = 1
188188
elif self.student_flag == 1 and str(items).startswith('Date of Birth'):
189189
self.student_flag = 0
190-
_list.append(items)
190+
_list.append(items[:25])
191+
_list.append(items[26:])
191192
_list.append(filename)
192193

193194
return _list
@@ -196,12 +197,11 @@ def generate_xlsx_sheet(self, _list: list, filename: str) -> None:
196197
_temp = []
197198

198199
for idx, items in enumerate(_list):
199-
if len(items[0]) >= 3:
200-
_temp.append([items[0][0], items[0][1], items[0][2], items[1]])
201-
200+
if len(items[0]) >= 4:
201+
_temp.append([items[0][0], items[0][1], items[0][2], items[0][3], items[1]])
202202

203203
if len(_temp) != 0:
204204
df = pd.DataFrame(_temp)
205205
download_default = str(os.path.join(Path.home(), "Downloads"))
206206
filepath = f'{download_default}/{filename}.xlsx'
207-
df.to_excel(filepath, index=False, header= ["DOB", "Info", "Filename", "Name"])
207+
df.to_excel(filepath, index=False, header= ["DOB", "Gender", "Info", "Filename", "Name"])

main.py

+29-37
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pandas as pd
88
from tkinter.filedialog import askopenfilename
99
from tkinter.filedialog import askdirectory
10-
from typing import Union
1110
from pathlib import Path
1211

1312
def find_initial_dir() -> str:
@@ -74,11 +73,7 @@ def run(file_path: str, filename: str) -> None:
7473
"""
7574

7675
try:
77-
download_default = str(os.path.join(Path.home(), "Downloads"))
78-
# tk.Tk().withdraw()
79-
# folder = askdirectory(initialdir=download_default, title='Select Download Path')
80-
folder = download_default
81-
76+
folder = str(os.path.join(Path.home(), "Downloads"))
8277
p = core.Process(file_path)
8378
spe_list = p.read_spe_file()
8479

@@ -94,7 +89,7 @@ def run(file_path: str, filename: str) -> None:
9489
r.capture_student_name()
9590
r.capture_app_type()
9691

97-
# create_xlsx(translated_spe, filename)
92+
create_xlsx(translated_spe, filename)
9893

9994
for idx, item in enumerate(translated_spe):
10095
_list = r.fit_student_data(item)
@@ -122,38 +117,35 @@ def create_xlsx(translated_spe: list, filename: str)-> None:
122117

123118
def merge_xlsx()-> None:
124119
# specifying the path to csv files
125-
path = str(os.path.join(Path.home(), "Downloads"))
126-
127-
# csv files in the path
128-
file_list = glob.glob(path + "/*.xlsx")
129-
130-
# list of excel files we want to merge.
131-
# pd.read_excel(file_path) reads the excel
132-
# data into pandas dataframe.
133-
excl_list = []
120+
input_folder = str(os.path.join(Path.home(), "Downloads"))
121+
output_file = str(os.path.join(input_folder, 'total.xlsx'))
134122

135-
for file in file_list:
136-
excl_list.append(pd.read_excel(os.path.abspath(file)))
137-
138-
# create a new dataframe to store the
139-
# merged excel file.
140-
excl_merged = pd.DataFrame()
141-
142-
for excl_file in excl_list:
143-
144-
# appends the data into the excl_merged
145-
# dataframe.
146-
excl_merged = excl_merged.append(
147-
excl_file, ignore_index=True)
148-
149-
# exports the dataframe into excel file with
150-
# specified name.
151-
excl_merged.to_excel(f'{path}/total.xlsx', index=False)
123+
# Create a list to hold the dataframes
124+
dfs = []
125+
126+
# Iterate over all Excel files in the specified folder
127+
for file_name in os.listdir(input_folder):
128+
if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
129+
file_path = os.path.join(input_folder, file_name)
130+
# Read all sheets from the Excel file
131+
xls = pd.ExcelFile(file_path, engine='openpyxl')
132+
for sheet_name in xls.sheet_names:
133+
df = pd.read_excel(file_path, sheet_name=sheet_name)
134+
dfs.append(df)
135+
136+
# Concatenate all dataframes into one
137+
merged_df = pd.concat(dfs, ignore_index=True)
138+
139+
# Drop duplicate rows
140+
merged_df = merged_df.drop_duplicates()
141+
142+
# Save the merged dataframe to a new Excel file
143+
merged_df.to_excel(output_file, index=False, engine='openpyxl')
152144

153145
if __name__ == "__main__":
154146

155147
# find_spe_files() # Multiple .spe files
156-
find_spe_file() # Singluar .spe file
157-
print('Done')
158-
# merge_xlsx()
159-
# print('Done Done')
148+
# find_spe_file() # Singluar .spe file
149+
# print('Done')
150+
merge_xlsx()
151+
print('Done Done')

0 commit comments

Comments
 (0)