Skip to content

Commit 8206f3b

Browse files
Double run issue fixes
1 parent cd0be30 commit 8206f3b

File tree

2 files changed

+199
-34
lines changed

2 files changed

+199
-34
lines changed

app.py

Lines changed: 67 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,24 @@ def format_local_time_filter(utc_dt, format_str='%Y-%m-%d %H:%M'):
169169
def schedule_all_repositories():
170170
"""Schedule all active repositories on startup"""
171171
try:
172+
# Clean up any stuck 'running' jobs from previous sessions
173+
stuck_jobs = BackupJob.query.filter_by(status='running').all()
174+
if stuck_jobs:
175+
logger.warning(f"Found {len(stuck_jobs)} stuck 'running' jobs from previous session")
176+
for stuck_job in stuck_jobs:
177+
stuck_job.status = 'failed'
178+
stuck_job.error_message = 'Job was running when application restarted'
179+
stuck_job.completed_at = datetime.utcnow()
180+
logger.info(f"Marked stuck job as failed: {stuck_job.id} for repository {stuck_job.repository_id}")
181+
db.session.commit()
182+
183+
# First, clear any existing jobs to prevent duplicates
184+
existing_jobs = scheduler.get_jobs()
185+
for job in existing_jobs:
186+
if job.id.startswith('backup_'):
187+
scheduler.remove_job(job.id)
188+
logger.info(f"Removed existing job on startup: {job.id}")
189+
172190
repositories = Repository.query.filter_by(is_active=True).all()
173191
scheduled_count = 0
174192
for repository in repositories:
@@ -456,14 +474,23 @@ def edit_repository(repo_id):
456474

457475
db.session.commit()
458476

459-
# Reschedule the backup job
477+
# Reschedule the backup job - more robust approach
478+
job_id = f'backup_{repo_id}'
460479
try:
461-
scheduler.remove_job(f'backup_{repo_id}', jobstore=None)
462-
except:
463-
pass
480+
# Remove job if it exists
481+
if scheduler.get_job(job_id):
482+
scheduler.remove_job(job_id)
483+
logger.info(f"Removed existing job during edit: {job_id}")
484+
except Exception as e:
485+
logger.warning(f"Could not remove job during edit {job_id}: {e}")
486+
487+
# Wait a moment to ensure job removal is complete
488+
import time
489+
time.sleep(0.1)
464490

465-
if repository.is_active:
491+
if repository.is_active and repository.schedule_type != 'manual':
466492
schedule_backup_job(repository)
493+
logger.info(f"Rescheduled job for repository: {repository.name}")
467494

468495
flash('Repository updated successfully', 'success')
469496
return redirect(url_for('repositories'))
@@ -590,11 +617,18 @@ def schedule_backup_job(repository):
590617

591618
job_id = f'backup_{repository.id}'
592619

593-
# Remove existing job if it exists
620+
# Remove existing job if it exists - try multiple ways to ensure it's gone
594621
try:
595-
scheduler.remove_job(job_id)
596-
except:
597-
pass
622+
if scheduler.get_job(job_id):
623+
scheduler.remove_job(job_id)
624+
logger.info(f"Removed existing scheduled job: {job_id}")
625+
except Exception as e:
626+
logger.warning(f"Could not remove existing job {job_id}: {e}")
627+
628+
# Double-check that job is really gone
629+
if scheduler.get_job(job_id):
630+
logger.error(f"Job {job_id} still exists after removal attempt")
631+
return
598632

599633
# Create a wrapper function that includes Flask app context
600634
def backup_with_context():
@@ -616,9 +650,22 @@ def backup_with_context():
616650
logger.warning(f"Backup already running for repository {repo.name}, skipping")
617651
return
618652

653+
# Additional check: ensure no backup started in the last 5 minutes to prevent rapid duplicates
654+
recent_backup = BackupJob.query.filter_by(
655+
repository_id=repository.id
656+
).filter(
657+
BackupJob.started_at > datetime.utcnow() - timedelta(minutes=5)
658+
).first()
659+
660+
if recent_backup:
661+
logger.warning(f"Recent backup found for repository {repo.name} (started at {recent_backup.started_at}), skipping")
662+
return
663+
664+
logger.info(f"Starting scheduled backup for repository: {repo.name}")
619665
backup_service.backup_repository(repo)
666+
620667
except Exception as e:
621-
logger.error(f"Error in scheduled backup for repository {repository.id}: {e}")
668+
logger.error(f"Error in scheduled backup for repository {repository.id}: {e}", exc_info=True)
622669

623670
# Create new schedule based on schedule_type
624671
if repository.schedule_type == 'hourly':
@@ -693,11 +740,19 @@ def backup_with_context():
693740
id=job_id,
694741
name=f'Backup {repository.name}',
695742
replace_existing=True,
696-
misfire_grace_time=300, # 5 minutes grace time
697-
coalesce=True # Combine multiple pending executions
743+
misfire_grace_time=60, # Reduced from 5 minutes to 1 minute
744+
coalesce=True, # Combine multiple pending executions
745+
max_instances=1 # Only one instance of this specific job can run
698746
)
699747

700748
logger.info(f"Scheduled backup job for {repository.name} with trigger: {trigger}")
749+
750+
# Verify the job was actually added
751+
added_job = scheduler.get_job(job_id)
752+
if added_job:
753+
logger.info(f"Job {job_id} successfully scheduled, next run: {added_job.next_run_time}")
754+
else:
755+
logger.error(f"Failed to schedule job {job_id}")
701756

702757
if __name__ == '__main__':
703758
app.run(host='0.0.0.0', port=8080, debug=False)

backup_service.py

Lines changed: 132 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -41,19 +41,32 @@ def backup_repository(self, repository):
4141
repo_backup_dir = user_backup_dir / repository.name
4242
repo_backup_dir.mkdir(exist_ok=True)
4343

44-
# Generate timestamp for this backup
45-
timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
46-
backup_name = f"{repository.name}_{timestamp}"
44+
# Generate timestamp for this backup with microseconds for uniqueness
45+
timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')
46+
backup_name = f"{repository.name}_{timestamp[:19]}" # Keep readable format for backup name
4747

4848
# Create unique temporary directory and ensure it's clean
4949
temp_clone_dir = repo_backup_dir / f"temp_{timestamp}"
5050

51-
# Remove temp directory if it already exists
52-
if temp_clone_dir.exists():
51+
# Ensure temp directory doesn't exist and create it
52+
retry_count = 0
53+
max_retries = 5
54+
while temp_clone_dir.exists() and retry_count < max_retries:
5355
logger.warning(f"Temp directory already exists, removing: {temp_clone_dir}")
54-
shutil.rmtree(temp_clone_dir)
56+
try:
57+
shutil.rmtree(temp_clone_dir)
58+
break
59+
except (OSError, PermissionError) as e:
60+
retry_count += 1
61+
if retry_count >= max_retries:
62+
raise Exception(f"Unable to clean temp directory after {max_retries} attempts: {e}")
63+
# Add a small delay and try with a new timestamp
64+
import time
65+
time.sleep(0.1)
66+
timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')
67+
temp_clone_dir = repo_backup_dir / f"temp_{timestamp}"
5568

56-
temp_clone_dir.mkdir(exist_ok=True)
69+
temp_clone_dir.mkdir(parents=True, exist_ok=False)
5770

5871
self._clone_repository(repository, temp_clone_dir)
5972

@@ -84,6 +97,12 @@ def backup_repository(self, repository):
8497
backup_job.status = 'failed'
8598
backup_job.error_message = str(e)
8699
backup_job.completed_at = datetime.utcnow()
100+
101+
# Ensure we commit the failed status immediately
102+
try:
103+
db.session.commit()
104+
except Exception as commit_error:
105+
logger.error(f"Failed to commit backup job failure status: {commit_error}")
87106

88107
finally:
89108
# Always clean up temporary directory
@@ -93,8 +112,30 @@ def backup_repository(self, repository):
93112
shutil.rmtree(temp_clone_dir)
94113
except Exception as cleanup_error:
95114
logger.error(f"Failed to cleanup temp directory {temp_clone_dir}: {cleanup_error}")
115+
# Try force cleanup
116+
try:
117+
import stat
118+
def handle_remove_readonly(func, path, exc):
119+
if exc[1].errno == 13: # Permission denied
120+
os.chmod(path, stat.S_IWRITE)
121+
func(path)
122+
else:
123+
raise
124+
shutil.rmtree(temp_clone_dir, onerror=handle_remove_readonly)
125+
logger.info(f"Force cleaned temp directory: {temp_clone_dir}")
126+
except Exception as force_error:
127+
logger.error(f"Could not force clean temp directory: {force_error}")
96128

97-
db.session.commit()
129+
# Final commit to ensure all changes are saved
130+
try:
131+
db.session.commit()
132+
except Exception as final_commit_error:
133+
logger.error(f"Failed final commit for backup job: {final_commit_error}")
134+
# Try to rollback to prevent session issues
135+
try:
136+
db.session.rollback()
137+
except:
138+
pass
98139

99140
def _clone_repository(self, repository, clone_dir):
100141
"""Clone a repository to the specified directory"""
@@ -109,28 +150,97 @@ def _clone_repository(self, repository, clone_dir):
109150
# Clean up any existing temp directories for this repository first
110151
self._cleanup_temp_directories(clone_dir.parent)
111152

153+
# Ensure the clone directory is completely clean before starting
154+
if clone_dir.exists():
155+
logger.warning(f"Clone directory exists before cloning, removing: {clone_dir}")
156+
try:
157+
shutil.rmtree(clone_dir)
158+
except Exception as e:
159+
logger.error(f"Failed to remove existing clone directory: {e}")
160+
raise Exception(f"Cannot clean clone directory: {e}")
161+
162+
# Recreate the directory to ensure it's empty
163+
clone_dir.mkdir(parents=True, exist_ok=False)
164+
112165
# Clone the repository with error handling
113166
try:
114-
git.Repo.clone_from(clone_url, clone_dir, depth=1)
115-
logger.info(f"Repository cloned to: {clone_dir}")
116-
except git.GitCommandError as e:
117-
if "already exists and is not an empty directory" in str(e):
118-
logger.warning(f"Directory exists, cleaning and retrying: {clone_dir}")
119-
shutil.rmtree(clone_dir)
120-
clone_dir.mkdir(exist_ok=True)
121-
git.Repo.clone_from(clone_url, clone_dir, depth=1)
122-
else:
123-
raise e
167+
# Use git command directly for better error handling
168+
import subprocess
169+
git_cmd = [
170+
'git', 'clone',
171+
'--depth=1',
172+
'--verbose',
173+
'--config', 'core.autocrlf=false', # Prevent line ending issues
174+
'--config', 'core.filemode=false', # Prevent permission issues
175+
clone_url,
176+
str(clone_dir)
177+
]
178+
179+
result = subprocess.run(
180+
git_cmd,
181+
capture_output=True,
182+
text=True,
183+
timeout=300, # 5 minute timeout
184+
cwd=str(clone_dir.parent)
185+
)
186+
187+
if result.returncode != 0:
188+
error_msg = f"Git clone failed with exit code {result.returncode}\n"
189+
error_msg += f"stdout: {result.stdout}\n"
190+
error_msg += f"stderr: {result.stderr}"
191+
logger.error(error_msg)
192+
raise Exception(f"Git clone failed: {result.stderr}")
193+
194+
logger.info(f"Repository cloned successfully to: {clone_dir}")
195+
196+
except subprocess.TimeoutExpired:
197+
logger.error(f"Git clone timed out for repository: {repository.url}")
198+
raise Exception("Git clone operation timed out")
199+
except Exception as e:
200+
logger.error(f"Git clone failed for {repository.url}: {str(e)}")
201+
# Clean up on failure
202+
if clone_dir.exists():
203+
try:
204+
shutil.rmtree(clone_dir)
205+
except:
206+
pass
207+
raise e
124208

125209
def _cleanup_temp_directories(self, repo_backup_dir):
126210
"""Clean up old temporary directories that might be left behind"""
127211
try:
212+
if not repo_backup_dir.exists():
213+
return
214+
128215
temp_dirs = [d for d in repo_backup_dir.iterdir() if d.is_dir() and d.name.startswith('temp_')]
216+
current_time = datetime.utcnow().timestamp()
217+
129218
for temp_dir in temp_dirs:
130-
# Remove temp directories older than 1 hour
131-
if datetime.utcnow().timestamp() - temp_dir.stat().st_mtime > 3600:
132-
logger.info(f"Cleaning up old temp directory: {temp_dir}")
133-
shutil.rmtree(temp_dir)
219+
try:
220+
# Remove temp directories older than 10 minutes or any that exist from failed jobs
221+
dir_age = current_time - temp_dir.stat().st_mtime
222+
if dir_age > 600: # 10 minutes
223+
logger.info(f"Cleaning up old temp directory: {temp_dir}")
224+
shutil.rmtree(temp_dir)
225+
elif not any(temp_dir.iterdir()): # Empty directory
226+
logger.info(f"Cleaning up empty temp directory: {temp_dir}")
227+
shutil.rmtree(temp_dir)
228+
except (OSError, PermissionError) as e:
229+
logger.warning(f"Failed to remove temp directory {temp_dir}: {e}")
230+
# Try to force remove if it's a permission issue
231+
try:
232+
import stat
233+
def handle_remove_readonly(func, path, exc):
234+
if exc[1].errno == 13: # Permission denied
235+
os.chmod(path, stat.S_IWRITE)
236+
func(path)
237+
else:
238+
raise
239+
shutil.rmtree(temp_dir, onerror=handle_remove_readonly)
240+
logger.info(f"Force removed temp directory: {temp_dir}")
241+
except Exception as force_error:
242+
logger.error(f"Could not force remove temp directory {temp_dir}: {force_error}")
243+
134244
except Exception as e:
135245
logger.warning(f"Failed to cleanup temp directories: {e}")
136246

0 commit comments

Comments
 (0)