Automate Data Backups with Python: Never Lose Important Files
You know you should be backing up your files. Your project folders, documents, configuration files—all that work sitting on a single drive. One hardware failure, one accidental deletion, and it's gone.
Let's build a Python backup system that handles this automatically. Set it once, and your important files are protected forever.
What You'll Learn
- Copying and syncing files with Python
- Creating compressed backup archives
- Implementing backup rotation (keeping recent backups, deleting old ones)
- Scheduling automated backups
- Logging and notifications for backup status
Prerequisites
- Python 3.8 or higher
- No external libraries required for basic backups
- Optional: cloud storage SDK for remote backups
The Problem
Manual backups fail because:
- You forget to do them
- They're tedious, so you skip them
- You don't know what's backed up and what isn't
- Old backups pile up and fill your drive
- No verification that backups actually work
The Solution
An automated backup script that:
- Copies important folders to a backup location
- Creates compressed archives with timestamps
- Rotates old backups automatically
- Logs every operation
- Runs on a schedule without intervention
Step 1: Simple File Copy Backup
Let's start with the basics—copying folders to a backup location:
1import shutil
2from pathlib import Path
3from datetime import datetime
4
5
6def simple_backup(source, destination):
7 """
8 Create a simple copy backup of a folder.
9
10 Args:
11 source: Path to folder to backup
12 destination: Path to backup location
13
14 Returns:
15 Path to the created backup
16 """
17 source = Path(source)
18 destination = Path(destination)
19
20 if not source.exists():
21 raise FileNotFoundError(f"Source not found: {source}")
22
23 # Create timestamped backup folder name
24 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
25 backup_name = f"{source.name}_{timestamp}"
26 backup_path = destination / backup_name
27
28 # Copy the folder
29 print(f"Backing up: {source}")
30 print(f"To: {backup_path}")
31
32 shutil.copytree(source, backup_path)
33
34 print(f"✅ Backup complete!")
35 return backup_path
36
37
38# Example usage
39backup_path = simple_backup(
40 source="/home/user/Documents/Projects",
41 destination="/media/backup_drive/backups"
42)Step 2: Compressed Backups
Save space with compressed archives:
1import shutil
2from pathlib import Path
3from datetime import datetime
4
5
6def compressed_backup(source, destination, format='zip'):
7 """
8 Create a compressed backup archive.
9
10 Args:
11 source: Path to folder to backup
12 destination: Path to backup location
13 format: Archive format ('zip', 'tar', 'gztar', 'bztar')
14
15 Returns:
16 Path to the created archive
17 """
18 source = Path(source)
19 destination = Path(destination)
20
21 if not source.exists():
22 raise FileNotFoundError(f"Source not found: {source}")
23
24 destination.mkdir(parents=True, exist_ok=True)
25
26 # Create archive name with timestamp
27 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
28 archive_name = f"{source.name}_{timestamp}"
29 archive_path = destination / archive_name
30
31 print(f"Creating {format} archive of: {source}")
32
33 # Create the archive
34 result = shutil.make_archive(
35 str(archive_path),
36 format,
37 root_dir=source.parent,
38 base_dir=source.name
39 )
40
41 # Get file size
42 size_mb = Path(result).stat().st_size / (1024 * 1024)
43 print(f"✅ Archive created: {result} ({size_mb:.1f} MB)")
44
45 return Path(result)
46
47
48# Example usage
49archive = compressed_backup(
50 source="/home/user/Documents",
51 destination="/media/backup/archives",
52 format='gztar' # .tar.gz - good compression
53)Step 3: Incremental Backup (Sync)
Only copy changed files—much faster for regular backups:
1import os
2import shutil
3from pathlib import Path
4from datetime import datetime
5
6
7def sync_backup(source, destination, delete_extra=False):
8 """
9 Sync backup - only copy new or modified files.
10
11 Args:
12 source: Path to folder to backup
13 destination: Path to backup location
14 delete_extra: Remove files in destination not in source
15
16 Returns:
17 Dictionary with backup statistics
18 """
19 source = Path(source)
20 destination = Path(destination)
21
22 if not source.exists():
23 raise FileNotFoundError(f"Source not found: {source}")
24
25 destination.mkdir(parents=True, exist_ok=True)
26
27 stats = {
28 "files_copied": 0,
29 "files_skipped": 0,
30 "files_deleted": 0,
31 "bytes_copied": 0,
32 }
33
34 # Walk through source directory
35 for src_path in source.rglob("*"):
36 if src_path.is_dir():
37 continue
38
39 # Calculate destination path
40 rel_path = src_path.relative_to(source)
41 dst_path = destination / rel_path
42
43 # Check if copy is needed
44 should_copy = False
45
46 if not dst_path.exists():
47 should_copy = True
48 else:
49 # Compare modification times
50 src_mtime = src_path.stat().st_mtime
51 dst_mtime = dst_path.stat().st_mtime
52
53 if src_mtime > dst_mtime:
54 should_copy = True
55
56 if should_copy:
57 # Create parent directories
58 dst_path.parent.mkdir(parents=True, exist_ok=True)
59
60 # Copy file
61 shutil.copy2(src_path, dst_path)
62 stats["files_copied"] += 1
63 stats["bytes_copied"] += src_path.stat().st_size
64 else:
65 stats["files_skipped"] += 1
66
67 # Optionally delete extra files in destination
68 if delete_extra:
69 for dst_path in destination.rglob("*"):
70 if dst_path.is_dir():
71 continue
72
73 rel_path = dst_path.relative_to(destination)
74 src_path = source / rel_path
75
76 if not src_path.exists():
77 dst_path.unlink()
78 stats["files_deleted"] += 1
79
80 return stats
81
82
83# Example usage
84stats = sync_backup(
85 source="/home/user/Documents",
86 destination="/media/backup/Documents_sync"
87)
88print(f"Files copied: {stats['files_copied']}")
89print(f"Files skipped (unchanged): {stats['files_skipped']}")Step 4: Backup Rotation
Keep recent backups, automatically delete old ones:
1from pathlib import Path
2from datetime import datetime, timedelta
3
4
5def rotate_backups(backup_dir, keep_daily=7, keep_weekly=4, keep_monthly=3):
6 """
7 Rotate backups - keep recent ones, delete old ones.
8
9 Args:
10 backup_dir: Directory containing backup files
11 keep_daily: Number of daily backups to keep
12 keep_weekly: Number of weekly backups to keep
13 keep_monthly: Number of monthly backups to keep
14
15 Returns:
16 List of deleted backup paths
17 """
18 backup_dir = Path(backup_dir)
19
20 if not backup_dir.exists():
21 return []
22
23 # Get all backup files/folders with timestamps
24 backups = []
25 for item in backup_dir.iterdir():
26 # Extract timestamp from name (format: name_YYYYMMDD_HHMMSS)
27 try:
28 parts = item.stem.split('_')
29 date_str = parts[-2]
30 time_str = parts[-1].split('.')[0]
31 timestamp = datetime.strptime(f"{date_str}_{time_str}", "%Y%m%d_%H%M%S")
32 backups.append((item, timestamp))
33 except (ValueError, IndexError):
34 continue
35
36 # Sort by date (newest first)
37 backups.sort(key=lambda x: x[1], reverse=True)
38
39 now = datetime.now()
40 keep = set()
41
42 # Keep recent daily backups
43 daily_kept = 0
44 for item, timestamp in backups:
45 if daily_kept >= keep_daily:
46 break
47 if now - timestamp < timedelta(days=keep_daily):
48 keep.add(item)
49 daily_kept += 1
50
51 # Keep weekly backups (one per week)
52 weeks_seen = set()
53 for item, timestamp in backups:
54 week = timestamp.strftime("%Y-%W")
55 if week not in weeks_seen and len(weeks_seen) < keep_weekly:
56 keep.add(item)
57 weeks_seen.add(week)
58
59 # Keep monthly backups (one per month)
60 months_seen = set()
61 for item, timestamp in backups:
62 month = timestamp.strftime("%Y-%m")
63 if month not in months_seen and len(months_seen) < keep_monthly:
64 keep.add(item)
65 months_seen.add(month)
66
67 # Delete backups not in keep set
68 deleted = []
69 for item, _ in backups:
70 if item not in keep:
71 if item.is_dir():
72 shutil.rmtree(item)
73 else:
74 item.unlink()
75 deleted.append(item)
76 print(f"🗑️ Deleted old backup: {item.name}")
77
78 print(f"Kept {len(keep)} backups, deleted {len(deleted)}")
79 return deletedThe Complete Backup Script
1#!/usr/bin/env python3
2"""
3Automated Backup System - Protect your important files automatically.
4Author: Alex Rodriguez
5
6This script creates compressed backups of specified folders,
7manages backup rotation, and logs all operations.
8"""
9
10import json
11import logging
12import os
13import shutil
14import sys
15from datetime import datetime, timedelta
16from pathlib import Path
17
18
19# ========================================
20# CONFIGURATION
21# ========================================
22
23# Folders to backup
24BACKUP_SOURCES = [
25 {
26 "name": "Documents",
27 "path": Path.home() / "Documents",
28 "enabled": True,
29 },
30 {
31 "name": "Projects",
32 "path": Path.home() / "Projects",
33 "enabled": True,
34 },
35 {
36 "name": "Config",
37 "path": Path.home() / ".config",
38 "enabled": True,
39 "exclude": ["cache", "Cache", "tmp"],
40 },
41]
42
43# Backup destination
44BACKUP_DESTINATION = Path("/media/backup/automated_backups")
45# Alternative: BACKUP_DESTINATION = Path.home() / "Backups"
46
47# Backup settings
48BACKUP_FORMAT = "gztar" # Options: zip, tar, gztar, bztar
49KEEP_DAILY = 7
50KEEP_WEEKLY = 4
51KEEP_MONTHLY = 6
52
53# Logging
54LOG_FILE = Path.home() / ".backup_log.txt"
55
56
57# ========================================
58# LOGGING SETUP
59# ========================================
60
61def setup_logging():
62 """Configure logging."""
63 logging.basicConfig(
64 level=logging.INFO,
65 format='%(asctime)s | %(levelname)-8s | %(message)s',
66 handlers=[
67 logging.FileHandler(LOG_FILE),
68 logging.StreamHandler(sys.stdout)
69 ]
70 )
71 return logging.getLogger(__name__)
72
73
74logger = setup_logging()
75
76
77# ========================================
78# BACKUP FUNCTIONS
79# ========================================
80
81def get_folder_size(path):
82 """Calculate total size of a folder in bytes."""
83 total = 0
84 for item in Path(path).rglob("*"):
85 if item.is_file():
86 total += item.stat().st_size
87 return total
88
89
90def format_size(bytes_size):
91 """Format bytes as human-readable string."""
92 for unit in ['B', 'KB', 'MB', 'GB']:
93 if bytes_size < 1024:
94 return f"{bytes_size:.1f} {unit}"
95 bytes_size /= 1024
96 return f"{bytes_size:.1f} TB"
97
98
99def create_backup(source_config, destination):
100 """
101 Create a compressed backup of a source folder.
102
103 Args:
104 source_config: Dictionary with source configuration
105 destination: Base backup destination path
106
107 Returns:
108 Path to created backup or None if failed
109 """
110 name = source_config["name"]
111 source_path = Path(source_config["path"])
112 exclude = source_config.get("exclude", [])
113
114 if not source_path.exists():
115 logger.warning(f"Source not found, skipping: {source_path}")
116 return None
117
118 # Create backup destination folder for this source
119 backup_dir = destination / name
120 backup_dir.mkdir(parents=True, exist_ok=True)
121
122 # Generate backup filename with timestamp
123 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
124 archive_name = f"{name}_{timestamp}"
125 archive_path = backup_dir / archive_name
126
127 logger.info(f"Backing up: {name}")
128 logger.info(f" Source: {source_path}")
129
130 # Get source size
131 source_size = get_folder_size(source_path)
132 logger.info(f" Size: {format_size(source_size)}")
133
134 try:
135 # If we have exclusions, we need to copy to temp first
136 if exclude:
137 import tempfile
138 with tempfile.TemporaryDirectory() as temp_dir:
139 temp_source = Path(temp_dir) / source_path.name
140
141 # Copy with exclusions
142 shutil.copytree(
143 source_path,
144 temp_source,
145 ignore=shutil.ignore_patterns(*exclude)
146 )
147
148 # Create archive from temp
149 result = shutil.make_archive(
150 str(archive_path),
151 BACKUP_FORMAT,
152 root_dir=temp_dir,
153 base_dir=source_path.name
154 )
155 else:
156 # Create archive directly
157 result = shutil.make_archive(
158 str(archive_path),
159 BACKUP_FORMAT,
160 root_dir=source_path.parent,
161 base_dir=source_path.name
162 )
163
164 result_path = Path(result)
165 backup_size = result_path.stat().st_size
166
167 logger.info(f" ✅ Created: {result_path.name} ({format_size(backup_size)})")
168
169 return result_path
170
171 except Exception as e:
172 logger.error(f" ❌ Backup failed: {e}")
173 return None
174
175
176def rotate_backups(backup_dir):
177 """
178 Rotate backups - keep recent ones, delete old ones.
179
180 Args:
181 backup_dir: Directory containing backup files
182
183 Returns:
184 Number of backups deleted
185 """
186 backup_dir = Path(backup_dir)
187
188 if not backup_dir.exists():
189 return 0
190
191 # Collect all backup files with their timestamps
192 backups = []
193 for item in backup_dir.iterdir():
194 if not item.is_file():
195 continue
196
197 try:
198 # Parse timestamp from filename
199 parts = item.stem.split('_')
200 if len(parts) >= 3:
201 date_str = parts[-2]
202 time_str = parts[-1]
203 timestamp = datetime.strptime(f"{date_str}_{time_str}", "%Y%m%d_%H%M%S")
204 backups.append((item, timestamp))
205 except (ValueError, IndexError):
206 continue
207
208 if not backups:
209 return 0
210
211 # Sort newest first
212 backups.sort(key=lambda x: x[1], reverse=True)
213
214 now = datetime.now()
215 keep = set()
216
217 # Keep daily backups
218 for i, (item, timestamp) in enumerate(backups):
219 if i < KEEP_DAILY:
220 keep.add(item)
221
222 # Keep weekly backups (one per week)
223 weeks_seen = set()
224 for item, timestamp in backups:
225 week_key = timestamp.strftime("%Y-W%W")
226 if week_key not in weeks_seen:
227 keep.add(item)
228 weeks_seen.add(week_key)
229 if len(weeks_seen) >= KEEP_WEEKLY:
230 break
231
232 # Keep monthly backups (one per month)
233 months_seen = set()
234 for item, timestamp in backups:
235 month_key = timestamp.strftime("%Y-%m")
236 if month_key not in months_seen:
237 keep.add(item)
238 months_seen.add(month_key)
239 if len(months_seen) >= KEEP_MONTHLY:
240 break
241
242 # Delete old backups
243 deleted = 0
244 for item, timestamp in backups:
245 if item not in keep:
246 item.unlink()
247 logger.info(f" 🗑️ Deleted old backup: {item.name}")
248 deleted += 1
249
250 return deleted
251
252
253def verify_backup(backup_path):
254 """
255 Verify a backup archive is readable.
256
257 Args:
258 backup_path: Path to backup archive
259
260 Returns:
261 True if backup is valid
262 """
263 import tarfile
264 import zipfile
265
266 backup_path = Path(backup_path)
267
268 try:
269 if backup_path.suffix == '.zip':
270 with zipfile.ZipFile(backup_path, 'r') as zf:
271 # Test archive integrity
272 bad_file = zf.testzip()
273 return bad_file is None
274 elif backup_path.suffix in ['.tar', '.gz', '.bz2'] or '.tar' in backup_path.name:
275 with tarfile.open(backup_path, 'r:*') as tf:
276 # List contents to verify
277 tf.getnames()
278 return True
279 except Exception as e:
280 logger.error(f"Backup verification failed: {e}")
281
282 return False
283
284
285def get_backup_summary(destination):
286 """Generate summary of all backups."""
287 summary = {
288 "total_size": 0,
289 "backup_count": 0,
290 "sources": {}
291 }
292
293 destination = Path(destination)
294
295 if not destination.exists():
296 return summary
297
298 for source_dir in destination.iterdir():
299 if not source_dir.is_dir():
300 continue
301
302 source_summary = {
303 "count": 0,
304 "size": 0,
305 "latest": None,
306 "oldest": None,
307 }
308
309 backups = list(source_dir.glob("*"))
310 source_summary["count"] = len(backups)
311
312 for backup in backups:
313 if backup.is_file():
314 size = backup.stat().st_size
315 source_summary["size"] += size
316 summary["total_size"] += size
317 summary["backup_count"] += 1
318
319 # Track newest and oldest
320 mtime = backup.stat().st_mtime
321 if source_summary["latest"] is None or mtime > source_summary["latest"]:
322 source_summary["latest"] = datetime.fromtimestamp(mtime)
323 if source_summary["oldest"] is None or mtime < source_summary["oldest"]:
324 source_summary["oldest"] = datetime.fromtimestamp(mtime)
325
326 summary["sources"][source_dir.name] = source_summary
327
328 return summary
329
330
331# ========================================
332# MAIN
333# ========================================
334
335def run_backup():
336 """Run the complete backup process."""
337 logger.info("=" * 60)
338 logger.info("AUTOMATED BACKUP STARTING")
339 logger.info(f"Time: {datetime.now()}")
340 logger.info(f"Destination: {BACKUP_DESTINATION}")
341 logger.info("=" * 60)
342
343 # Ensure destination exists
344 BACKUP_DESTINATION.mkdir(parents=True, exist_ok=True)
345
346 # Track results
347 results = {
348 "successful": [],
349 "failed": [],
350 "rotated": 0,
351 }
352
353 # Process each source
354 for source in BACKUP_SOURCES:
355 if not source.get("enabled", True):
356 logger.info(f"Skipping disabled source: {source['name']}")
357 continue
358
359 logger.info("-" * 40)
360
361 # Create backup
362 backup_path = create_backup(source, BACKUP_DESTINATION)
363
364 if backup_path:
365 # Verify backup
366 if verify_backup(backup_path):
367 logger.info(f" ✓ Verified")
368 results["successful"].append(source["name"])
369 else:
370 logger.warning(f" ⚠️ Verification failed!")
371 results["failed"].append(source["name"])
372 else:
373 results["failed"].append(source["name"])
374
375 # Rotate old backups for this source
376 source_backup_dir = BACKUP_DESTINATION / source["name"]
377 deleted = rotate_backups(source_backup_dir)
378 results["rotated"] += deleted
379
380 # Summary
381 logger.info("\n" + "=" * 60)
382 logger.info("BACKUP COMPLETE")
383 logger.info("=" * 60)
384 logger.info(f"Successful: {len(results['successful'])}")
385 logger.info(f"Failed: {len(results['failed'])}")
386 logger.info(f"Old backups deleted: {results['rotated']}")
387
388 # Show storage summary
389 summary = get_backup_summary(BACKUP_DESTINATION)
390 logger.info(f"\nTotal backup storage: {format_size(summary['total_size'])}")
391 logger.info(f"Total backup files: {summary['backup_count']}")
392
393 return results
394
395
396def main():
397 """Main entry point."""
398 try:
399 run_backup()
400 except KeyboardInterrupt:
401 logger.info("\nBackup cancelled by user")
402 except Exception as e:
403 logger.exception(f"Backup failed: {e}")
404 sys.exit(1)
405
406
407if __name__ == "__main__":
408 main()How to Run This Script
-
Save the script as
backup_system.py -
Configure your backup sources in the
BACKUP_SOURCESlist -
Set your backup destination in
BACKUP_DESTINATION -
Run manually:
bash1python backup_system.py -
Expected output:
Prompt============================================================ AUTOMATED BACKUP STARTING Time: 2025-12-02 10:30:00 Destination: /media/backup/automated_backups ============================================================ ---------------------------------------- Backing up: Documents Source: /home/user/Documents Size: 2.3 GB ✅ Created: Documents_20251202_103000.tar.gz (1.8 GB) ✓ Verified ---------------------------------------- Backing up: Projects Source: /home/user/Projects Size: 5.1 GB ✅ Created: Projects_20251202_103045.tar.gz (3.2 GB) ✓ Verified ============================================================ BACKUP COMPLETE ============================================================ Successful: 2 Failed: 0 Old backups deleted: 3 Total backup storage: 25.6 GB Total backup files: 15
-
Schedule automatic backups:
bash1# Run daily at 2 AM 20 2 * * * /usr/bin/python3 /path/to/backup_system.py
Customization Options
Backup to Cloud Storage
1# pip install boto3
2import boto3
3
4def upload_to_s3(local_path, bucket, s3_key):
5 """Upload backup to AWS S3."""
6 s3 = boto3.client('s3')
7 s3.upload_file(str(local_path), bucket, s3_key)
8 logger.info(f"Uploaded to S3: s3://{bucket}/{s3_key}")Email Notification
1def send_backup_report(results):
2 """Send email with backup results."""
3 subject = f"Backup Report - {datetime.now().strftime('%Y-%m-%d')}"
4
5 body = f"""
6 Backup completed at {datetime.now()}
7
8 Successful: {len(results['successful'])}
9 Failed: {len(results['failed'])}
10
11 {'⚠️ FAILURES: ' + ', '.join(results['failed']) if results['failed'] else '✅ All backups successful'}
12 """
13
14 # Use email sending code from our email automation guideExclude Patterns
1BACKUP_SOURCES = [
2 {
3 "name": "Projects",
4 "path": Path.home() / "Projects",
5 "enabled": True,
6 "exclude": [
7 "node_modules",
8 "__pycache__",
9 ".git",
10 "*.pyc",
11 ".env",
12 "venv",
13 ],
14 },
15]Common Issues & Solutions
| Issue | Solution |
|---|---|
| Permission denied | Run with sudo or check folder permissions |
| Disk full | Reduce retention settings; check available space |
| Backup too slow | Use sync backup instead of full; exclude large folders |
| Archive corrupted | Always verify backups; check disk health |
| Can't find backup drive | Check mount point; use absolute paths |
Taking It Further
Encrypted Backups
1# pip install cryptography
2from cryptography.fernet import Fernet
3
4def encrypt_backup(backup_path, key):
5 """Encrypt a backup file."""
6 fernet = Fernet(key)
7
8 with open(backup_path, 'rb') as f:
9 data = f.read()
10
11 encrypted = fernet.encrypt(data)
12
13 encrypted_path = backup_path.with_suffix(backup_path.suffix + '.enc')
14 with open(encrypted_path, 'wb') as f:
15 f.write(encrypted)
16
17 return encrypted_pathBackup Integrity Checking
1import hashlib
2
3def calculate_checksum(filepath):
4 """Calculate SHA-256 checksum of file."""
5 sha256 = hashlib.sha256()
6
7 with open(filepath, 'rb') as f:
8 while chunk := f.read(8192):
9 sha256.update(chunk)
10
11 return sha256.hexdigest()
12
13
14def save_checksums(backup_dir):
15 """Save checksums for all backups."""
16 checksums = {}
17
18 for backup in Path(backup_dir).rglob("*"):
19 if backup.is_file() and not backup.name.endswith('.checksums'):
20 checksums[backup.name] = calculate_checksum(backup)
21
22 checksum_file = Path(backup_dir) / "backups.checksums"
23 with open(checksum_file, 'w') as f:
24 json.dump(checksums, f, indent=2)Conclusion
You've built a complete backup system. Your important files are now automatically copied, compressed, verified, and rotated—all without any manual intervention.
The key is running this regularly. Set up a scheduled task, and your data is protected. Hardware failures, accidental deletions, ransomware—you're covered.
Start with the basic setup, then customize. Add cloud backup for off-site protection. Add encryption for sensitive data. Add email notifications so you know everything's working.
Your data is too valuable to leave unprotected.
Backup today, sleep well tonight.
Sponsored Content
Interested in advertising? Reach automation professionals through our platform.