Skip to content

Commit f2e0a02

Browse files
committed
Add script to delete stale records from the DB
1 parent b1be3ec commit f2e0a02

File tree

2 files changed

+287
-0
lines changed

2 files changed

+287
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[db]
2+
# type of database
3+
backend = mysql
4+
# host with database
5+
hostname =
6+
# port to connect to
7+
port = 3306
8+
# database name
9+
name =
10+
# database user
11+
username =
12+
# password for database
13+
password =
14+
# Name of the table to purge stale records from
15+
table_name = VNormalisedSummaries
16+
17+
[common]
18+
# Timeframe (in hours) used to define the "recent" data window
19+
# Records newer than (MAX(UpdateTime) - timeframe) will be preserved
20+
timeframe = 24
21+
# Minimum number of records required within the timeframe to allow deletion
22+
# If preserved records < threshold, no deletion will occur
23+
threshold =
24+
25+
[logging]
26+
logfile = /var/log/apel/delete_stale_records.log
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
"""
2+
APEL - DELETE stale records from the DB
3+
4+
This script connects to a MySQL database and deletes stale records from a specified table.
5+
Records are only deleted if the number of recent entries (within a user-defined timeframe)
6+
meets or exceeds a threshold.
7+
8+
The script supports dry_run mode.
9+
10+
NOTE:
11+
Before running this script, ensure the following:
12+
13+
- A valid configuration file (e.g. delete_stale_records.cfg) is present and accessible.
14+
- A log file path (e.g. delete_stale_records.log) is defined either in the config
15+
or via the --log_config argument.
16+
- The directory for the log file exists and is writable.
17+
18+
You can override the default paths using:
19+
--db /path/to/delete_stale_records.cfg
20+
--log_config /path/to/delete_stale_records.log
21+
22+
Usage:
23+
python delete_stale_records.py
24+
python delete_stale_records.py --dry_run
25+
python delete_stale_records.py --db /path/to/delete_stale_records.cfg --log_config /path/to/delete_stale_records.log
26+
"""
27+
28+
# Requirements:
29+
# mysqlclient==2.1.1 # Latest package; works with Python 3.9+. Dropped support for Python 3.6.
30+
# Installation examples:
31+
# - For Python 3.6: python3.6 -m pip install mysqlclient==2.1.1
32+
# - For Python 3.9: python3.9 -m pip install mysqlclient==2.1.1
33+
34+
import os
35+
import sys
36+
import logging
37+
from datetime import timedelta
38+
from argparse import ArgumentParser
39+
from configparser import ConfigParser, NoSectionError, NoOptionError
40+
import MySQLdb
41+
42+
LOG_BREAK = '====================='
43+
44+
45+
def delete_stale_records(cp, args):
46+
"""
47+
Deletes stale records from the configured MySQL table based on UpdateTime.
48+
49+
Records older than (MAX(UpdateTime) - timeframe) are eligible for deletion,
50+
but only if the number of recent records (within timeframe) meets the threshold.
51+
52+
Parameters:
53+
cp: Parsed configuration object.
54+
args: Parsed command-line arguments.
55+
"""
56+
conn = None
57+
cursor = None
58+
59+
try:
60+
# Extract database details from the "*.cfg" file
61+
db_config = {
62+
'backend': cp.get('db', 'backend'),
63+
'host': cp.get('db', 'hostname'),
64+
'port': cp.getint('db', 'port'),
65+
'user': cp.get('db', 'username'),
66+
'password': cp.get('db', 'password'),
67+
'database': cp.get('db', 'name'),
68+
'table_name': cp.get('db', 'table_name'),
69+
'timeframe': cp.getint('common', 'timeframe'),
70+
'threshold': cp.getint('common', 'threshold'),
71+
}
72+
73+
# Ensure no required string is empty
74+
for key in ['backend', 'host', 'user', 'password', 'database', 'table_name']:
75+
if not db_config[key].strip():
76+
raise ValueError(f"'{key}' in config is empty")
77+
78+
# Numeric validations
79+
if db_config['timeframe'] <= 0:
80+
raise ValueError("'timeframe' must be > 0")
81+
if db_config['threshold'] <= 0:
82+
raise ValueError("'threshold' must be > 0")
83+
84+
except (NoSectionError, NoOptionError) as e:
85+
print(f"Configuration error: {e}")
86+
sys.exit(1)
87+
except ValueError as e:
88+
print(f"Invalid configuration value: {e}")
89+
sys.exit(1)
90+
91+
try:
92+
conn = MySQLdb.connect(
93+
host=db_config['host'],
94+
port=db_config['port'],
95+
user=db_config['user'],
96+
passwd=db_config['password'],
97+
db=db_config['database']
98+
)
99+
cursor = conn.cursor()
100+
101+
# Verify UpdateTime column exists
102+
cursor.execute(
103+
f"SHOW COLUMNS FROM {db_config['table_name']} LIKE 'UpdateTime'"
104+
)
105+
106+
if cursor.fetchone() is None:
107+
column_not_found_error = (
108+
f"'UpdateTime' column not found in table "
109+
f"'{db_config['table_name']}'. Aborting operation."
110+
)
111+
112+
if args.dry_run:
113+
print(column_not_found_error)
114+
else:
115+
log.error(column_not_found_error)
116+
117+
return
118+
119+
# Get the latest UpdateTime value
120+
cursor.execute(
121+
f"SELECT MAX(UpdateTime) FROM {db_config['table_name']}"
122+
)
123+
result = cursor.fetchone()
124+
125+
if not result or not result[0]:
126+
no_records_found_error = "No UpdateTime values found. Nothing to purge."
127+
128+
if args.dry_run:
129+
print(no_records_found_error)
130+
else:
131+
log.error(no_records_found_error)
132+
133+
return
134+
135+
max_update = result[0]
136+
cutoff_time = max_update - timedelta(hours=db_config['timeframe'])
137+
cutoff_str = cutoff_time.strftime('%Y-%m-%d %H:%M:%S')
138+
139+
cursor.execute(
140+
f"SELECT COUNT(*) FROM {db_config['table_name']} WHERE UpdateTime >= %s",
141+
(cutoff_str,)
142+
)
143+
preserved_count = cursor.fetchone()[0]
144+
deletable_count = 0
145+
146+
summary = (
147+
f"[SUMMARY] Table: {db_config['table_name']} | "
148+
f"Timeframe: {db_config['timeframe']}h | "
149+
f"Threshold: {db_config['threshold']} | "
150+
f"Preserved: {preserved_count} | "
151+
f"Deletable: {deletable_count} | "
152+
f"Cutoff: {cutoff_str}"
153+
)
154+
155+
if preserved_count < db_config['threshold']:
156+
records_below_threshold_error = (
157+
f"Preserved records ({preserved_count}) below threshold "
158+
f"({db_config['threshold']}). Skipping deletion."
159+
)
160+
161+
if args.dry_run:
162+
print(f"{summary} | Action: DRY_RUN")
163+
print(records_below_threshold_error)
164+
else:
165+
log.info(f"{summary} | Action: ABORT")
166+
log.warning(records_below_threshold_error)
167+
else:
168+
cursor.execute(
169+
f"SELECT COUNT(*) FROM {db_config['table_name']} WHERE UpdateTime < %s",
170+
(cutoff_str,)
171+
)
172+
deletable_count = cursor.fetchone()[0]
173+
174+
records_deleted_msg = (
175+
f"{deletable_count} rows deleted successfully from the"
176+
f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})."
177+
)
178+
179+
if not args.dry_run:
180+
cursor.execute(
181+
f"DELETE FROM {db_config['table_name']} WHERE UpdateTime < %s",
182+
(cutoff_str,)
183+
)
184+
deleted_rows = cursor.rowcount
185+
conn.commit()
186+
187+
log.info(f"{summary} | Action: SUCCESS")
188+
log.info(
189+
f"{deletable_count} rows deleted successfully from the"
190+
f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})."
191+
)
192+
else:
193+
print(f"{summary} | Action: DRY_RUN")
194+
print(
195+
f"DRY_RUN: {deletable_count} rows would be deleted from the"
196+
f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})."
197+
)
198+
199+
except MySQLdb.Error as err:
200+
log.error(f"MySQL error: {err}")
201+
if conn:
202+
conn.rollback()
203+
log.warning("Transaction rolled back due to error.")
204+
finally:
205+
if cursor:
206+
cursor.close()
207+
if conn:
208+
conn.close()
209+
210+
211+
if __name__ == '__main__':
212+
# Default config and log file paths
213+
default_config_path = '/etc/apel/delete_stale_records.cfg'
214+
default_logfile_path = '/var/log/apel/delete_stale_records.log'
215+
216+
# Parse CLI arguments
217+
parser = ArgumentParser(description='Delete stale records from APEL DB.')
218+
parser.add_argument(
219+
'-d', '--db',
220+
help='Location of DB config file',
221+
default=default_config_path
222+
)
223+
parser.add_argument(
224+
'-l', '--log_config',
225+
help='Location of logging config file',
226+
default=None
227+
)
228+
parser.add_argument(
229+
'--dry_run',
230+
action='store_true',
231+
help='Preview deletions without executing'
232+
)
233+
args = parser.parse_args()
234+
235+
# Load configuration file
236+
cp = ConfigParser()
237+
read_files = cp.read(args.db)
238+
if not read_files:
239+
print(f"Error: Failed to read config file: {args.db}")
240+
sys.exit(1)
241+
242+
# Ensure log directory exists
243+
logfile_path = args.log_config or cp.get('logging', 'logfile', fallback=default_logfile_path)
244+
log_dir = os.path.dirname(logfile_path)
245+
if not os.path.isdir(log_dir):
246+
print(f"Error: Log directory does not exist: {log_dir}")
247+
sys.exit(1)
248+
249+
logging.basicConfig(
250+
filename=logfile_path,
251+
level=logging.INFO,
252+
format='%(asctime)s - %(levelname)s - %(message)s'
253+
)
254+
log = logging.getLogger(__name__)
255+
256+
if args.dry_run:
257+
print(f"{LOG_BREAK}\nStarting APEL DB Purge Script\n")
258+
else:
259+
log.info(f"{LOG_BREAK}\nStarting APEL DB Purge Script\n")
260+
261+
delete_stale_records(cp, args)

0 commit comments

Comments
 (0)