Maintenance - Foufou-exe/iac-terraform-ansible-aws-webserver-demo GitHub Wiki
Guide complet pour maintenir votre infrastructure AWS en bon Γ©tat de fonctionnement.
βββββββββββββββ¬ββββββββββββββ¬ββββββββββββββ¬ββββββββββββββ
β Quotidien β Hebdomadaireβ Mensuel β Trimestriel β
βββββββββββββββΌββββββββββββββΌββββββββββββββΌββββββββββββββ€
β β’ Monitoringβ β’ Updates β β’ Backups β β’ Security β
β β’ Logs β β’ Cleanup β β’ Reports β β’ Audit β
β β’ Alerts β β’ Tests β β’ Review β β’ DR Test β
βββββββββββββββ΄ββββββββββββββ΄ββββββββββββββ΄ββββββββββββββ
- PrΓ©ventive : Γviter les problΓ¨mes avant qu'ils surviennent
- Corrective : Réparer les problèmes existants
- Adaptative : Adapter aux nouvelles exigences
- Perfective : AmΓ©liorer les performances
#!/bin/bash
# System monitoring script
LOG_FILE="/var/log/system-monitor.log"
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEM=85
ALERT_THRESHOLD_DISK=90
log_message() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE"
}
check_cpu() {
CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1)
if (( $(echo "$CPU_USAGE > $ALERT_THRESHOLD_CPU" | bc -l) )); then
log_message "β οΈ ALERT: CPU usage is ${CPU_USAGE}%"
return 1
else
log_message "β
CPU usage: ${CPU_USAGE}%"
return 0
fi
}
check_memory() {
MEM_USAGE=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100.0}')
if [ "$MEM_USAGE" -gt "$ALERT_THRESHOLD_MEM" ]; then
log_message "β οΈ ALERT: Memory usage is ${MEM_USAGE}%"
return 1
else
log_message "β
Memory usage: ${MEM_USAGE}%"
return 0
fi
}
check_disk() {
DISK_USAGE=$(df / | tail -1 | awk '{print $5}' | cut -d'%' -f1)
if [ "$DISK_USAGE" -gt "$ALERT_THRESHOLD_DISK" ]; then
log_message "β οΈ ALERT: Disk usage is ${DISK_USAGE}%"
return 1
else
log_message "β
Disk usage: ${DISK_USAGE}%"
return 0
fi
}
check_services() {
services=("apache2" "ssh" "fail2ban" "ufw")
all_good=true
for service in "${services[@]}"; do
if systemctl is-active --quiet "$service"; then
log_message "β
Service $service is running"
else
log_message "β ALERT: Service $service is not running"
all_good=false
fi
done
return $all_good
}
# ExΓ©cution des vΓ©rifications
log_message "=== Starting system check ==="
check_cpu
check_memory
check_disk
check_services
log_message "=== System check completed ==="
---
- name: Deploy monitoring dashboard
hosts: ansible-tm
become: yes
tasks:
- name: Install monitoring tools
package:
name:
- htop
- iotop
- netstat-nat
- tcpdump
- strace
state: present
- name: Create monitoring script
copy:
src: ../scripts/system-monitor.sh
dest: /usr/local/bin/system-monitor.sh
mode: "0755"
owner: root
group: root
- name: Setup monitoring cron job
cron:
name: "System monitoring"
minute: "*/5"
job: "/usr/local/bin/system-monitor.sh"
user: root
- name: Create log rotation for monitoring
copy:
content: |
/var/log/system-monitor.log {
daily
rotate 30
compress
delaycompress
missingok
notifempty
postrotate
systemctl reload rsyslog
endscript
}
dest: /etc/logrotate.d/system-monitor
---
- name: Automated system updates
hosts: all
become: yes
serial: 1 # Un serveur Γ la fois pour Γ©viter les interruptions
vars:
reboot_required: false
update_cache_valid_time: 3600
pre_tasks:
- name: Create maintenance window notification
copy:
content: |
MAINTENANCE IN PROGRESS
Start: {{ ansible_date_time.iso8601 }}
Host: {{ ansible_hostname }}
Operation: System Updates
dest: /etc/motd
backup: yes
tasks:
- name: Update package cache
apt:
update_cache: yes
cache_valid_time: "{{ update_cache_valid_time }}"
- name: Check for available updates
shell: apt list --upgradable 2>/dev/null | grep -c upgradable
register: updates_available
changed_when: false
- name: Display available updates
debug:
msg: "{{ updates_available.stdout }} packages available for update"
- name: Upgrade security packages only
apt:
upgrade: safe
only_upgrade: yes
update_cache: yes
register: security_updates
when: updates_available.stdout | int > 0
- name: Check if reboot is required
stat:
path: /var/run/reboot-required
register: reboot_check
- name: Set reboot flag
set_fact:
reboot_required: true
when: reboot_check.stat.exists
- name: Reboot if required
reboot:
reboot_timeout: 300
connect_timeout: 20
test_command: systemctl is-system-running
when: reboot_required | bool
- name: Verify services after reboot
service:
name: "{{ item }}"
state: started
loop:
- apache2
- ssh
- fail2ban
when: reboot_required | bool
post_tasks:
- name: Restore original MOTD
copy:
src: /etc/motd.backup
dest: /etc/motd
remote_src: yes
ignore_errors: yes
- name: Log maintenance completion
lineinfile:
path: /var/log/maintenance.log
line: "{{ ansible_date_time.iso8601 }} - Updates completed on {{ ansible_hostname }}"
create: yes
---
- name: System cleanup and optimization
hosts: all
become: yes
tasks:
- name: Clean package cache
apt:
autoclean: yes
autoremove: yes
- name: Remove old kernels (keep current + 1)
shell: |
current_kernel=$(uname -r)
installed_kernels=$(dpkg --list | grep linux-image | awk '{print $2}' | grep -v "$current_kernel")
if [ ! -z "$installed_kernels" ]; then
echo "$installed_kernels" | head -n -1 | xargs apt-get purge -y
fi
register: kernel_cleanup
changed_when: kernel_cleanup.stdout != ""
- name: Clean temporary files
file:
path: "{{ item }}"
state: absent
loop:
- /tmp/*
- /var/tmp/*
- /var/cache/apt/archives/*.deb
ignore_errors: yes
- name: Rotate and compress logs
command: logrotate -f /etc/logrotate.conf
- name: Clean old log files
find:
paths:
- /var/log
patterns:
- "*.log.*.gz"
- "*.log.*"
age: "30d"
state: absent
- name: Clear systemd journal logs older than 30 days
command: journalctl --vacuum-time=30d
- name: Optimize Apache logs
find:
paths: /var/log/apache2
patterns: "*.log.*"
age: "14d"
register: old_apache_logs
- name: Compress old Apache logs
command: gzip {{ item.path }}
loop: "{{ old_apache_logs.files }}"
when: not item.path.endswith('.gz')
3 copies des donnΓ©es
2 supports diffΓ©rents (local + distant)
1 copie hors-site (S3, autre rΓ©gion)
#!/bin/bash
# Complete backup script with versioning and encryption
# Configuration
BACKUP_ROOT="/backup"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30
S3_BUCKET="my-project-backups"
GPG_KEY="[email protected]"
# Ensure backup directory exists
mkdir -p "$BACKUP_ROOT"/{system,web,logs,databases}
log() {
echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$BACKUP_ROOT/backup.log"
}
backup_system_config() {
log "Starting system configuration backup..."
tar -czpf "$BACKUP_ROOT/system/system-config-$DATE.tar.gz" \
--exclude='/proc' \
--exclude='/sys' \
--exclude='/dev' \
--exclude='/tmp' \
--exclude='/var/tmp' \
--exclude='/var/cache' \
/etc/ \
/var/spool/cron/ \
/usr/local/bin/ \
2>/dev/null
log "System configuration backup completed"
}
backup_web_content() {
log "Starting web content backup..."
tar -czpf "$BACKUP_ROOT/web/web-content-$DATE.tar.gz" \
/var/www/ \
2>/dev/null
log "Web content backup completed"
}
backup_logs() {
log "Starting logs backup..."
tar -czpf "$BACKUP_ROOT/logs/logs-$DATE.tar.gz" \
--exclude='*.gz' \
/var/log/ \
2>/dev/null
log "Logs backup completed"
}
encrypt_backups() {
log "Encrypting backups..."
find "$BACKUP_ROOT" -name "*-$DATE.tar.gz" -type f | while read file; do
gpg --encrypt --recipient "$GPG_KEY" --output "${file}.gpg" "$file"
if [ $? -eq 0 ]; then
rm "$file"
log "Encrypted: $(basename $file)"
else
log "ERROR: Failed to encrypt $(basename $file)"
fi
done
}
sync_to_s3() {
if command -v aws &> /dev/null && [ -n "$S3_BUCKET" ]; then
log "Syncing to S3..."
aws s3 sync "$BACKUP_ROOT" "s3://$S3_BUCKET/$(hostname)/" \
--exclude "*.log" \
--storage-class STANDARD_IA
log "S3 sync completed"
else
log "S3 sync skipped (aws cli not available or bucket not set)"
fi
}
cleanup_old_backups() {
log "Cleaning up backups older than $RETENTION_DAYS days..."
find "$BACKUP_ROOT" -name "*.tar.gz*" -mtime +$RETENTION_DAYS -delete
log "Cleanup completed"
}
verify_backups() {
log "Verifying today's backups..."
today_backups=$(find "$BACKUP_ROOT" -name "*$DATE*" -type f | wc -l)
if [ "$today_backups" -eq 0 ]; then
log "ERROR: No backups found for today!"
exit 1
else
log "Found $today_backups backup files for today"
fi
}
# Main execution
log "=== Starting backup process ==="
backup_system_config
backup_web_content
backup_logs
encrypt_backups
sync_to_s3
cleanup_old_backups
verify_backups
log "=== Backup process completed ==="
---
- name: System restoration from backup
hosts: all
become: yes
vars:
restore_date: "{{ ansible_date_time.date }}"
backup_source: "/backup"
restore_type: "system" # system, web, logs, full
tasks:
- name: Stop services before restoration
service:
name: "{{ item }}"
state: stopped
loop:
- apache2
- fail2ban
when: restore_type in ['system', 'full']
- name: Find backup files for restoration
find:
paths: "{{ backup_source }}"
patterns: "*{{ restore_date }}*.tar.gz*"
recurse: yes
register: backup_files
- name: Decrypt backup files if needed
shell: |
if [[ "{{ item.path }}" == *.gpg ]]; then
gpg --decrypt "{{ item.path }}" > "{{ item.path | regex_replace('\\.gpg$', '') }}"
fi
loop: "{{ backup_files.files }}"
when: item.path.endswith('.gpg')
- name: Restore system configuration
unarchive:
src: "{{ item.path }}"
dest: /
remote_src: yes
owner: root
group: root
keep_newer: no
loop: "{{ backup_files.files }}"
when:
- restore_type in ['system', 'full']
- "'system-config' in item.path"
- not item.path.endswith('.gpg')
- name: Restore web content
unarchive:
src: "{{ item.path }}"
dest: /
remote_src: yes
owner: www-data
group: www-data
loop: "{{ backup_files.files }}"
when:
- restore_type in ['web', 'full']
- "'web-content' in item.path"
- not item.path.endswith('.gpg')
- name: Restart services after restoration
service:
name: "{{ item }}"
state: started
loop:
- apache2
- fail2ban
- ssh
when: restore_type in ['system', 'full']
- name: Verify restoration
uri:
url: "http://{{ ansible_default_ipv4.address }}"
method: GET
status_code: 200
when: restore_type in ['web', 'full']
- name: Log restoration completion
lineinfile:
path: /var/log/restoration.log
line: "{{ ansible_date_time.iso8601 }} - Restoration completed: {{ restore_type }}"
create: yes
#!/bin/bash
# Weekly maintenance report generator
REPORT_FILE="/tmp/weekly-report-$(date +%Y%W).html"
WEEK_START=$(date -d 'last monday' +%Y-%m-%d)
WEEK_END=$(date -d 'next sunday' +%Y-%m-%d)
generate_html_header() {
cat > "$REPORT_FILE" << EOF
<!DOCTYPE html>
<html>
<head>
<title>Weekly Infrastructure Report</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
.header { background: #f0f0f0; padding: 20px; border-radius: 5px; }
.section { margin: 20px 0; padding: 15px; border-left: 4px solid #007acc; }
.alert { color: #d9534f; font-weight: bold; }
.ok { color: #5cb85c; font-weight: bold; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
</style>
</head>
<body>
<div class="header">
<h1>π Weekly Infrastructure Report</h1>
<p><strong>Period:</strong> $WEEK_START to $WEEK_END</p>
<p><strong>Generated:</strong> $(date)</p>
<p><strong>Host:</strong> $(hostname)</p>
</div>
EOF
}
add_system_status() {
cat >> "$REPORT_FILE" << EOF
<div class="section">
<h2>π₯οΈ System Status</h2>
<table>
<tr><th>Metric</th><th>Current Value</th><th>Status</th></tr>
<tr><td>Uptime</td><td>$(uptime -p)</td><td class="ok">OK</td></tr>
<tr><td>Load Average</td><td>$(cat /proc/loadavg | awk '{print $1, $2, $3}')</td><td class="ok">OK</td></tr>
<tr><td>Memory Usage</td><td>$(free -h | grep Mem | awk '{print $3"/"$2}')</td><td class="ok">OK</td></tr>
<tr><td>Disk Usage</td><td>$(df -h / | tail -1 | awk '{print $5}')</td><td class="ok">OK</td></tr>
</table>
</div>
EOF
}
add_service_status() {
cat >> "$REPORT_FILE" << EOF
<div class="section">
<h2>π§ Service Status</h2>
<table>
<tr><th>Service</th><th>Status</th><th>Uptime</th></tr>
EOF
for service in apache2 ssh fail2ban ufw; do
status=$(systemctl is-active $service)
uptime=$(systemctl status $service | grep "Active:" | awk '{print $3, $4}' | sed 's/;//')
if [ "$status" = "active" ]; then
status_class="ok"
status_text="β
Active"
else
status_class="alert"
status_text="β Inactive"
fi
echo " <tr><td>$service</td><td class=\"$status_class\">$status_text</td><td>$uptime</td></tr>" >> "$REPORT_FILE"
done
cat >> "$REPORT_FILE" << EOF
</table>
</div>
EOF
}
add_security_summary() {
cat >> "$REPORT_FILE" << EOF
<div class="section">
<h2>π Security Summary</h2>
<ul>
<li><strong>Failed SSH attempts:</strong> $(grep "Failed password" /var/log/auth.log | grep -c "$(date +%b)" || echo "0")</li>
<li><strong>Fail2Ban bans:</strong> $(grep "Ban " /var/log/fail2ban.log 2>/dev/null | grep -c "$(date +%Y-%m)" || echo "0")</li>
<li><strong>Last system update:</strong> $(stat -c %y /var/log/apt/history.log | cut -d' ' -f1)</li>
<li><strong>UFW status:</strong> $(ufw status | head -1)</li>
</ul>
</div>
EOF
}
generate_html_footer() {
cat >> "$REPORT_FILE" << EOF
<div class="section">
<h2>π Maintenance Actions</h2>
<p>π This report was generated automatically.</p>
<p>π For detailed logs, check: /var/log/system-monitor.log</p>
<p>π§ Report issues to: [email protected]</p>
</div>
</body>
</html>
EOF
}
# Generate complete report
generate_html_header
add_system_status
add_service_status
add_security_summary
generate_html_footer
echo "Weekly report generated: $REPORT_FILE"
# Optionally email the report
if command -v mail &> /dev/null; then
mail -s "Weekly Infrastructure Report - $(hostname)" -a "Content-Type: text/html" [email protected] < "$REPORT_FILE"
fi
π Navigation :
- β‘ Performance - Optimisation des performances
- π SΓ©curitΓ© - Maintenance sΓ©curisΓ©e
- π― Customization - Personnalisation de la maintenance