Python实用脚本实战：10个高效文件处理与自动化任务脚本-是一个基于内容分享折腾笔记的网站序与诗

Python作为一门简洁而强大的编程语言，在文件处理和自动化任务方面有着得天独厚的优势。本文将分享10个实用的Python脚本，帮助你提升工作效率，让重复性的任务自动化。

一、批量文件重命名脚本

在日常工作中，经常需要批量重命名文件。这个脚本可以帮你快速完成文件重命名任务。

import os

def batch_rename(directory, old_ext, new_ext):
    """
    批量重命名文件扩展名
    :param directory: 目标目录
    :param old_ext: 原扩展名（如 .txt）
    :param new_ext: 新扩展名（如 .md）
    """
    for filename in os.listdir(directory):
        if filename.endswith(old_ext):
            old_name = os.path.join(directory, filename)
            new_name = os.path.join(directory, filename.replace(old_ext, new_ext))
            os.rename(old_name, new_name)
            print(f"重命名: {filename} -> {filename.replace(old_ext, new_ext)}")

# 使用示例：将当前目录下所有.txt文件改为.md文件
batch_rename(".", ".txt", ".md")

1.1 高级版本：支持正则表达式重命名

import os
import re

def advanced_rename(directory, pattern, replacement):
    """
    使用正则表达式批量重命名文件
    :param directory: 目标目录
    :param pattern: 匹配模式（正则表达式）
    :param replacement: 替换字符串
    """
    for filename in os.listdir(directory):
        if re.search(pattern, filename):
            new_name = re.sub(pattern, replacement, filename)
            old_path = os.path.join(directory, filename)
            new_path = os.path.join(directory, new_name)
            os.rename(old_path, new_path)
            print(f"重命名: {filename} -> {new_name}")

# 使用示例：将文件名中的空格替换为下划线
advanced_rename(".", r"\s+", "_")

二、文件内容搜索工具

在多个文件中搜索指定内容，类似于Linux的grep命令。

import os
import re

def search_in_files(directory, keyword, file_ext=".py", case_sensitive=False):
    """
    在指定目录的文件中搜索关键字
    :param directory: 搜索目录
    :param keyword: 搜索关键字
    :param file_ext: 文件扩展名过滤
    :param case_sensitive: 是否区分大小写
    """
    results = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(file_ext):
                filepath = os.path.join(root, file)
                try:
                    with open(filepath, "r", encoding="utf-8") as f:
                        for line_num, line in enumerate(f, 1):
                            if case_sensitive:
                                if keyword in line:
                                    results.append((filepath, line_num, line.strip()))
                            else:
                                if keyword.lower() in line.lower():
                                    results.append((filepath, line_num, line.strip()))
                except Exception as e:
                    print(f"无法读取文件 {filepath}: {e}")
    
    return results

# 使用示例
results = search_in_files(".", "import", ".py")
for filepath, line_num, content in results[:10]:  # 只显示前10条
    print(f"{filepath}:{line_num} - {content}")

三、自动备份脚本

定期备份重要文件到指定目录，支持增量备份。

import os
import shutil
import datetime

def backup_files(source_dir, backup_dir, max_backups=5):
    """
    自动备份文件
    :param source_dir: 源目录
    :param backup_dir: 备份目录
    :param max_backups: 最大备份数量（超过则删除最早的）
    """
    # 创建带时间戳的备份文件夹
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = os.path.join(backup_dir, f"backup_{timestamp}")
    
    # 复制文件
    shutil.copytree(source_dir, backup_path)
    print(f"✅ 备份完成: {backup_path}")
    
    # 清理旧备份
    backups = sorted([d for d in os.listdir(backup_dir) if d.startswith("backup_")])
    if len(backups) > max_backups:
        for old_backup in backups[:-max_backups]:
            old_path = os.path.join(backup_dir, old_backup)
            shutil.rmtree(old_path)
            print(f"🗑️  删除旧备份: {old_path}")

# 使用示例
backup_files("./important_docs", "./backups", max_backups=5)

四、PDF文件合并工具

将多个PDF文件合并为一个文件。

# 需要安装: pip install PyPDF2
import PyPDF2
import os

def merge_pdfs(pdf_dir, output_filename):
    """
    合并指定目录下的所有PDF文件
    :param pdf_dir: PDF文件所在目录
    :param output_filename: 输出文件名
    """
    merger = PyPDF2.PdfMerger()
    
    # 获取所有PDF文件并排序
    pdf_files = sorted([f for f in os.listdir(pdf_dir) if f.endswith(".pdf")])
    
    if not pdf_files:
        print("❌ 未找到PDF文件")
        return
    
    # 逐个添加PDF
    for pdf in pdf_files:
        pdf_path = os.path.join(pdf_dir, pdf)
        merger.append(pdf_path)
        print(f"➕ 添加: {pdf}")
    
    # 保存合并后的文件
    merger.write(output_filename)
    merger.close()
    print(f"✅ 合并完成: {output_filename}")

# 使用示例
merge_pdfs("./pdfs", "merged_output.pdf")

五、图片批量压缩脚本

批量压缩图片文件，减小文件大小。

# 需要安装: pip install Pillow
from PIL import Image
import os

def compress_images(input_dir, output_dir, quality=85):
    """
    批量压缩图片
    :param input_dir: 输入目录
    :param output_dir: 输出目录
    :param quality: 压缩质量（1-100）
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    supported_formats = [".jpg", ".jpeg", ".png", ".webp"]
    
    for filename in os.listdir(input_dir):
        file_ext = os.path.splitext(filename)[1].lower()
        if file_ext in supported_formats:
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, filename)
            
            try:
                img = Image.open(input_path)
                # 转换为RGB模式（处理RGBA）
                if img.mode == "RGBA":
                    img = img.convert("RGB")
                img.save(output_path, quality=quality, optimize=True)
                print(f"✅ 压缩: {filename}")
            except Exception as e:
                print(f"❌ 处理失败 {filename}: {e}")

# 使用示例
compress_images("./images", "./compressed", quality=80)

六、Excel数据处理脚本

读取和处理Excel文件数据。

# 需要安装: pip install openpyxl pandas
import pandas as pd

def process_excel(input_file, output_file):
    """
    处理Excel文件：清洗、转换、分析
    :param input_file: 输入Excel文件
    :param output_file: 输出Excel文件
    """
    # 读取Excel
    df = pd.read_excel(input_file)
    print(f"原始数据行数: {len(df)}")
    
    # 数据清洗：删除空行
    df.dropna(how="all", inplace=True)
    
    # 数据转换：添加计算列
    if "price" in df.columns and "quantity" in df.columns:
        df["total"] = df["price"] * df["quantity"]
    
    # 数据分析：按类别分组统计
    if "category" in df.columns:
        summary = df.groupby("category").agg({
            "price": "mean",
            "quantity": "sum"
        }).round(2)
        print("\n分类统计:")
        print(summary)
    
    # 保存处理后的数据
    df.to_excel(output_file, index=False)
    print(f"\n✅ 处理完成: {output_file}")

# 使用示例
process_excel("input.xlsx", "output.xlsx")

七、定时任务执行器

使用schedule库实现定时任务调度。

# 需要安装: pip install schedule
import schedule
import time
import datetime

def job_task(name):
    """定义任务"""
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{now}] 执行任务: {name}")

def setup_schedules():
    """设置定时任务"""
    # 每天早上8点执行
    schedule.every().day.at("08:00").do(job_task, "早安任务")
    
    # 每10分钟执行一次
    schedule.every(10).minutes.do(job_task, "定期检查")
    
    # 每周一执行
    schedule.every().monday.do(job_task, "周报生成")
    
    print("✅ 定时任务已设置")

def run_scheduler():
    """运行调度器"""
    setup_schedules()
    print("⏰ 调度器启动，按Ctrl+C退出")
    
    try:
        while True:
            schedule.run_pending()
            time.sleep(1)
    except KeyboardInterrupt:
        print("\n⏹️  调度器已停止")

# 使用示例
run_scheduler()

八、日志分析工具

分析Web服务器日志，统计访问量、错误率等。

import re
from collections import Counter
import datetime

def analyze_access_log(log_file):
    """
    分析Nginx/Apache访问日志
    :param log_file: 日志文件路径
    """
    ip_pattern = r"(\d+\.\d+\.\d+\.\d+)"
    status_pattern = r"" (\d{3}) "
    url_pattern = r""\w+ (.*?) HTTP"
    
    ip_counter = Counter()
    status_counter = Counter()
    url_counter = Counter()
    
    with open(log_file, "r") as f:
        for line in f:
            # 统计IP
            ip_match = re.search(ip_pattern, line)
            if ip_match:
                ip_counter[ip_match.group(1)] += 1
            
            # 统计状态码
            status_match = re.search(status_pattern, line)
            if status_match:
                status_counter[status_match.group(1)] += 1
            
            # 统计访问URL
            url_match = re.search(url_pattern, line)
            if url_match:
                url_counter[url_match.group(1)] += 1
    
    print("=== TOP 10 IP ===")
    for ip, count in ip_counter.most_common(10):
        print(f"{ip}: {count}次")
    
    print("\n=== 状态码统计 ===")
    for status, count in status_counter.most_common():
        print(f"{status}: {count}次")
    
    print("\n=== TOP 10 URL ===")
    for url, count in url_counter.most_common(10):
        print(f"{url}: {count}次")

# 使用示例
analyze_access_log("/var/log/nginx/access.log")

九、邮件自动发送脚本

自动发送邮件，支持附件。

# 需要安装: pip install yagmail
import yagmail

def send_email(subject, content, to_emails, attachments=None):
    """
    发送邮件
    :param subject: 邮件主题
    :param content: 邮件内容
    :param to_emails: 收件人列表
    :param attachments: 附件列表
    """
    # 配置邮箱（建议使用应用专用密码）
    yag = yagmail.SMTP(user="your_email@gmail.com", password="your_password")
    
    try:
        if attachments:
            yag.send(to=to_emails, subject=subject, contents=content, attachments=attachments)
        else:
            yag.send(to=to_emails, subject=subject, contents=content)
        print(f"✅ 邮件发送成功: {', '.join(to_emails)}")
    except Exception as e:
        print(f"❌ 邮件发送失败: {e}")

# 使用示例
send_email(
    subject="Python自动邮件测试",
    content="这是一封由Python脚本自动发送的邮件。",
    to_emails=["recipient@example.com"],
    attachments=["report.pdf", "data.xlsx"]
)

十、系统监控脚本

监控CPU、内存、磁盘使用情况。

# 需要安装: pip install psutil
import psutil
import datetime

def monitor_system(threshold_cpu=80, threshold_mem=80, threshold_disk=90):
    """
    监控系统资源使用情况
    :param threshold_cpu: CPU使用率阈值
    :param threshold_mem: 内存使用率阈值
    :param threshold_disk: 磁盘使用率阈值
    """
    alerts = []
    
    # CPU使用率
    cpu_percent = psutil.cpu_percent(interval=1)
    if cpu_percent > threshold_cpu:
        alerts.append(f"⚠️  CPU使用率过高: {cpu_percent}%")
    
    # 内存使用率
    memory = psutil.virtual_memory()
    mem_percent = memory.percent
    if mem_percent > threshold_mem:
        alerts.append(f"⚠️  内存使用率过高: {mem_percent}%")
    
    # 磁盘使用率
    disk = psutil.disk_usage("/")
    disk_percent = disk.percent
    if disk_percent > threshold_disk:
        alerts.append(f"⚠️  磁盘使用率过高: {disk_percent}%")
    
    # 输出报告
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{now}] 系统监控报告")
    print(f"CPU使用率: {cpu_percent}%")
    print(f"内存使用率: {mem_percent}%")
    print(f"磁盘使用率: {disk_percent}%")
    
    if alerts:
        print("\n⚠️  警报:")
        for alert in alerts:
            print(alert)
    else:
        print("\n✅ 系统运行正常")

# 使用示例
monitor_system(threshold_cpu=80, threshold_mem=80, threshold_disk=90)