|
| 1 | +""" |
| 2 | +APEL - DELETE stale records from the DB |
| 3 | +
|
| 4 | +This script connects to a MySQL database and deletes stale records from a specified table. |
| 5 | +Records are only deleted if the number of recent entries (within a user-defined timeframe) |
| 6 | +meets or exceeds a threshold. |
| 7 | +
|
| 8 | +The script supports dry_run mode. |
| 9 | +
|
| 10 | +NOTE: |
| 11 | + Before running this script, ensure the following: |
| 12 | +
|
| 13 | + - A valid configuration file (e.g. delete_stale_records.cfg) is present and accessible. |
| 14 | + - A log file path (e.g. delete_stale_records.log) is defined either in the config |
| 15 | + or via the --log_config argument. |
| 16 | + - The directory for the log file exists and is writable. |
| 17 | +
|
| 18 | + You can override the default paths using: |
| 19 | + --db /path/to/delete_stale_records.cfg |
| 20 | + --log_config /path/to/delete_stale_records.log |
| 21 | +
|
| 22 | +Usage: |
| 23 | + python delete_stale_records.py |
| 24 | + python delete_stale_records.py --dry_run |
| 25 | + python delete_stale_records.py --db /path/to/delete_stale_records.cfg --log_config /path/to/delete_stale_records.log |
| 26 | +""" |
| 27 | + |
| 28 | +# Requirements: |
| 29 | +# mysqlclient==2.1.1 # Latest package; works with Python 3.9+. Dropped support for Python 3.6. |
| 30 | +# Installation examples: |
| 31 | +# - For Python 3.6: python3.6 -m pip install mysqlclient==2.0.3 |
| 32 | +# - For Python 3.9: python3.9 -m pip install mysqlclient==2.1.1 |
| 33 | + |
| 34 | +import os |
| 35 | +import sys |
| 36 | +import logging |
| 37 | +from datetime import timedelta |
| 38 | +from argparse import ArgumentParser |
| 39 | +from configparser import ConfigParser, NoSectionError, NoOptionError |
| 40 | +import MySQLdb |
| 41 | + |
| 42 | +__version__ = (1, 0, 0) |
| 43 | +ver = "Starting APEL dbloader %s.%s.%s" % __version__ |
| 44 | +LOG_BREAK = '=====================' |
| 45 | + |
| 46 | + |
| 47 | +def delete_stale_records(cp, args): |
| 48 | + """ |
| 49 | + Deletes stale records from the configured MySQL table based on UpdateTime. |
| 50 | +
|
| 51 | + Records older than (MAX(UpdateTime) - timeframe) are eligible for deletion, |
| 52 | + but only if the number of recent records (within timeframe) meets the threshold. |
| 53 | +
|
| 54 | + Parameters: |
| 55 | + cp: Parsed configuration object. |
| 56 | + args: Parsed command-line arguments. |
| 57 | + """ |
| 58 | + conn = None |
| 59 | + cursor = None |
| 60 | + |
| 61 | + try: |
| 62 | + # Extract database details from the "*.cfg" file |
| 63 | + db_config = { |
| 64 | + 'backend': cp.get('db', 'backend'), |
| 65 | + 'host': cp.get('db', 'hostname'), |
| 66 | + 'port': cp.getint('db', 'port'), |
| 67 | + 'user': cp.get('db', 'username'), |
| 68 | + 'password': cp.get('db', 'password'), |
| 69 | + 'database': cp.get('db', 'name'), |
| 70 | + 'table_name': cp.get('db', 'table_name'), |
| 71 | + 'timeframe': cp.getint('common', 'timeframe'), |
| 72 | + 'threshold': cp.getint('common', 'threshold'), |
| 73 | + } |
| 74 | + |
| 75 | + # Ensure no required string is empty |
| 76 | + for key in ['backend', 'host', 'user', 'password', 'database', 'table_name']: |
| 77 | + if not db_config[key].strip(): |
| 78 | + raise ValueError(f"'{key}' in config is empty") |
| 79 | + |
| 80 | + # Numeric validations |
| 81 | + if db_config['timeframe'] <= 0: |
| 82 | + raise ValueError("'timeframe' must be > 0") |
| 83 | + if db_config['threshold'] <= 0: |
| 84 | + raise ValueError("'threshold' must be > 0") |
| 85 | + |
| 86 | + except (NoSectionError, NoOptionError) as e: |
| 87 | + print(f"Configuration error: {e}") |
| 88 | + sys.exit(1) |
| 89 | + except ValueError as e: |
| 90 | + print(f"Invalid configuration value: {e}") |
| 91 | + sys.exit(1) |
| 92 | + |
| 93 | + try: |
| 94 | + conn = MySQLdb.connect( |
| 95 | + host=db_config['host'], |
| 96 | + port=db_config['port'], |
| 97 | + user=db_config['user'], |
| 98 | + passwd=db_config['password'], |
| 99 | + db=db_config['database'] |
| 100 | + ) |
| 101 | + cursor = conn.cursor() |
| 102 | + |
| 103 | + # Verify UpdateTime column exists |
| 104 | + cursor.execute( |
| 105 | + f"SHOW COLUMNS FROM {db_config['table_name']} LIKE 'UpdateTime'" |
| 106 | + ) |
| 107 | + |
| 108 | + if cursor.fetchone() is None: |
| 109 | + column_not_found_error = ( |
| 110 | + f"'UpdateTime' column not found in table " |
| 111 | + f"'{db_config['table_name']}'. Aborting operation." |
| 112 | + ) |
| 113 | + |
| 114 | + if args.dry_run: |
| 115 | + print(column_not_found_error) |
| 116 | + else: |
| 117 | + log.error(column_not_found_error) |
| 118 | + |
| 119 | + return |
| 120 | + |
| 121 | + # Get the latest UpdateTime value |
| 122 | + cursor.execute( |
| 123 | + f"SELECT MAX(UpdateTime) FROM {db_config['table_name']}" |
| 124 | + ) |
| 125 | + result = cursor.fetchone() |
| 126 | + |
| 127 | + if not result or not result[0]: |
| 128 | + no_records_found_error = "No UpdateTime values found. Nothing to purge." |
| 129 | + |
| 130 | + if args.dry_run: |
| 131 | + print(no_records_found_error) |
| 132 | + else: |
| 133 | + log.error(no_records_found_error) |
| 134 | + |
| 135 | + return |
| 136 | + |
| 137 | + max_update = result[0] |
| 138 | + cutoff_time = max_update - timedelta(hours=db_config['timeframe']) |
| 139 | + cutoff_str = cutoff_time.strftime('%Y-%m-%d %H:%M:%S') |
| 140 | + |
| 141 | + cursor.execute( |
| 142 | + f"SELECT COUNT(*) FROM {db_config['table_name']} WHERE UpdateTime >= %s", |
| 143 | + (cutoff_str,) |
| 144 | + ) |
| 145 | + preserved_count = cursor.fetchone()[0] |
| 146 | + deletable_count = 0 |
| 147 | + |
| 148 | + summary = ( |
| 149 | + f"[SUMMARY] Table: {db_config['table_name']} | " |
| 150 | + f"Timeframe: {db_config['timeframe']}h | " |
| 151 | + f"Threshold: {db_config['threshold']} | " |
| 152 | + f"Preserved: {preserved_count} | " |
| 153 | + f"Deletable: {deletable_count} | " |
| 154 | + f"Cutoff: {cutoff_str}" |
| 155 | + ) |
| 156 | + |
| 157 | + if preserved_count < db_config['threshold']: |
| 158 | + records_below_threshold_error = ( |
| 159 | + f"Preserved records ({preserved_count}) below threshold " |
| 160 | + f"({db_config['threshold']}). Skipping deletion." |
| 161 | + ) |
| 162 | + |
| 163 | + if args.dry_run: |
| 164 | + print(f"{summary} | Action: DRY_RUN") |
| 165 | + print(records_below_threshold_error) |
| 166 | + else: |
| 167 | + log.info(f"{summary} | Action: ABORT") |
| 168 | + log.warning(records_below_threshold_error) |
| 169 | + else: |
| 170 | + cursor.execute( |
| 171 | + f"SELECT COUNT(*) FROM {db_config['table_name']} WHERE UpdateTime < %s", |
| 172 | + (cutoff_str,) |
| 173 | + ) |
| 174 | + deletable_count = cursor.fetchone()[0] |
| 175 | + |
| 176 | + records_deleted_msg = ( |
| 177 | + f"{deletable_count} rows deleted successfully from the" |
| 178 | + f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})." |
| 179 | + ) |
| 180 | + |
| 181 | + if not args.dry_run: |
| 182 | + cursor.execute( |
| 183 | + f"DELETE FROM {db_config['table_name']} WHERE UpdateTime < %s", |
| 184 | + (cutoff_str,) |
| 185 | + ) |
| 186 | + deleted_rows = cursor.rowcount |
| 187 | + conn.commit() |
| 188 | + |
| 189 | + log.info(f"{summary} | Action: SUCCESS") |
| 190 | + log.info( |
| 191 | + f"{deletable_count} rows deleted successfully from the" |
| 192 | + f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})." |
| 193 | + ) |
| 194 | + else: |
| 195 | + print(f"{summary} | Action: DRY_RUN") |
| 196 | + print( |
| 197 | + f"DRY_RUN: {deletable_count} rows would be deleted from the" |
| 198 | + f"'{db_config['table_name']}' (UpdateTime < {cutoff_str})." |
| 199 | + ) |
| 200 | + |
| 201 | + except MySQLdb.Error as err: |
| 202 | + log.error(f"MySQL error: {err}") |
| 203 | + if conn: |
| 204 | + conn.rollback() |
| 205 | + log.warning("Transaction rolled back due to error.") |
| 206 | + finally: |
| 207 | + if cursor: |
| 208 | + cursor.close() |
| 209 | + if conn: |
| 210 | + conn.close() |
| 211 | + |
| 212 | + |
| 213 | +if __name__ == '__main__': |
| 214 | + # Default config and log file paths |
| 215 | + default_config_path = '/etc/apel/delete_stale_records.cfg' |
| 216 | + default_logfile_path = '/var/log/apel/delete_stale_records.log' |
| 217 | + |
| 218 | + # Parse CLI arguments |
| 219 | + parser = ArgumentParser(description='Delete stale records from APEL DB.') |
| 220 | + parser.add_argument( |
| 221 | + '-d', '--db', |
| 222 | + help='Location of DB config file', |
| 223 | + default=default_config_path |
| 224 | + ) |
| 225 | + parser.add_argument( |
| 226 | + '-l', '--log_config', |
| 227 | + help='Location of logging config file', |
| 228 | + default=None |
| 229 | + ) |
| 230 | + parser.add_argument( |
| 231 | + '--dry_run', |
| 232 | + action='store_true', |
| 233 | + help='Preview deletions without executing' |
| 234 | + ) |
| 235 | + args = parser.parse_args() |
| 236 | + |
| 237 | + # Load configuration file |
| 238 | + cp = ConfigParser() |
| 239 | + read_files = cp.read(args.db) |
| 240 | + if not read_files: |
| 241 | + print(f"Error: Failed to read config file: {args.db}") |
| 242 | + sys.exit(1) |
| 243 | + |
| 244 | + # Ensure log directory exists |
| 245 | + logfile_path = args.log_config or cp.get('logging', 'logfile', fallback=default_logfile_path) |
| 246 | + log_dir = os.path.dirname(logfile_path) |
| 247 | + if not os.path.isdir(log_dir): |
| 248 | + print(f"Error: Log directory does not exist: {log_dir}") |
| 249 | + sys.exit(1) |
| 250 | + |
| 251 | + logging.basicConfig( |
| 252 | + filename=logfile_path, |
| 253 | + level=logging.INFO, |
| 254 | + format='%(asctime)s - %(levelname)s - %(message)s' |
| 255 | + ) |
| 256 | + log = logging.getLogger(__name__) |
| 257 | + |
| 258 | + if args.dry_run: |
| 259 | + print(f"{LOG_BREAK}\nStarting APEL DB Purge Script\n") |
| 260 | + else: |
| 261 | + log.info(f"{LOG_BREAK}\nStarting APEL DB Purge Script\n") |
| 262 | + |
| 263 | + delete_stale_records(cp, args) |
0 commit comments