python脚本处理---(不同文件夹中的文件对比、移动,提取指定类型文件、中文文件名转英文)
一、对比两个文件夹的文件差异并导出差异
# -*-coding:utf-8-*- #===============================================================================
# 目录对比工具(包含子目录 ),并列出
# 1、A比B多了哪些文件
# 2、B比A多了哪些文件
# 3、二者相同的文件: md5比较
#=============================================================================== import os
import time
import difflib
import hashlib
import shutildef getFileMd5(filename):if not os.path.isfile(filename):print('file not exist: ' + filename)return# 哈希算法(加密算法)# 通过hashlib 来生成文件的MD5值。MD5(Message Digest Algorithm 5)是一种广泛使用的哈希函数,它生成一个128位的哈希值(32个字符的十六进制数)。# 通过计算文件的MD5值,可以验证文件的一致性和完整性,常用于文件校验和安全领域。myhash = hashlib.md5()f = open(filename,'rb')while True:b = f.read(8096)if not b :breakmyhash.update(b)f.close()return myhash.hexdigest()def getAllFiles(path):flist=[]for root, dirs , fs in os.walk(path):for f in fs:f_fullpath = os.path.join(root, f)f_relativepath = f_fullpath[len(path):]flist.append(f_relativepath)return flistdef dirCompare(comparePath,targetPath):afiles = getAllFiles(comparePath)bfiles = getAllFiles(targetPath)setA = set(afiles)setB = set(bfiles)commonfiles = setA & setB # 处理共有文件for of in sorted(commonfiles):amd=getFileMd5(comparePath+'\\'+of)bmd=getFileMd5(comparePath+'\\'+of)if amd != bmd: if not os.path.exists(os.path.dirname(os.getcwd() + of)):os.makedirs(os.path.dirname(os.getcwd() + of))# 将文件保存在执行此脚本的目录...shutil.copyfile(targetPath + of, os.getcwd() + of)# 处理仅出现在一个目录中的文件onlyFiles = setA ^ setBonlyInA = []onlyInB = []for of in onlyFiles:if of in afiles:onlyInA.append(of)elif of in bfiles:onlyInB.append(of)if len(onlyInA) > 0:print ('-' * 20,"only in ", comparePath, '-' * 20)for of in sorted(onlyInA):print (of)# 不保存comparePath多出来的文件...if len(onlyInB) > 0:print ('-' * 20,"only in ", targetPath, '-' * 20)for of in sorted(onlyInB):print (of)if not os.path.exists(os.path.dirname(os.getcwd() + of)):os.makedirs(os.path.dirname(os.getcwd() + of))# 将文件保存在执行此脚本的目录...shutil.copyfile(targetPath + of, os.getcwd() + of)if __name__ == '__main__':comparePath = 'E:\\multyfs2single\\FS-old-服务器\\SegmentationClass' #对比文件夹targetPath = 'E:\\multyfs2single\\FS-old-\\VOC2007\\SegmentationClass' #参考文件夹savePath = 'E:\\multyfs2single\\FS-old-服务器\\add' #保存文件夹,在这里我没保存到savePath,大家如果需要保存,可以把脚本中的os.getcwd()替换为savePathdirCompare(comparePath, targetPath)print("\ndone!")
二、从一个目录内提取另一个目录的同名文件,并转移
假如A目录中为jpg文件,B目录中为txt文件,我们需要从A中挑选出与B中同名(不同后缀)的文件,并保存到新的文件夹中。
import shutil
import osdef main():# 提取名称的目标,我们目前是txt格式path_label = 'E:/multyfs2single/FS-old-服务器/add'# 大目录,需要从里面挑出来同名的文件path_object = 'E:/multyfs2single/FS-old-/VOC2007/JPEGImages'type_object = 'jpg'# 备份到的输出路径path_output = 'E:/multyfs2single/FS-old-服务器/jpg'for i in os.walk(path_label):for j in i[2]:p_label = os.path.join(path_label, j)# 注意,默认label内是txt文件,长度为3,可以根据自己情况修改obj_name = j[:-3]+type_object# 挑选出来的同名文件路径obj_path = os.path.join(path_object, obj_name)# 若找到同名的jpg文件,则拷贝出来if os.path.exists(obj_path) == True:new_path = os.path.join(path_output, obj_name)shutil.copyfile(obj_path,new_path)if __name__ == '__main__':main()
三、提取路径下所有指定后缀文件到指定文件夹下 ,以.txt为例
import shutil
import os
def move_txt_files(source, dest):for root, dirs, files in os.walk(source):for file in files:if file.endswith('.txt'):source_path = os.path.join(root, file)destination_path = os.path.join(dest, file)shutil.move(source_path, destination_path)
# 请填写要移动的源文件夹和目标文件夹的路径
source_folder = "src_folder"
destination_folder = "dst_folder"
if not os.path.exists(destination_folder):os.makedirs(destination_folder)
move_txt_files(source_folder, destination_folder)
四、批量中文文件名转英文
1、需要安装xpinyin
pip install xpinyin
2、执行如下代码:
python change_name.py folder_dir
change_name.py:
from xpinyin import Pinyin
import os
import sys
import pdbPREFIX_NAME = ''def list_all(dir_name):pinyin_converter = Pinyin()for dirpath, dirnames, filenames in os.walk(dir_name):for filename in filenames:res = pinyin_converter.get_pinyin(filename, '_')# res = remove_chars(res, '-', '(', ')', '(', ')') 文件名中去掉了'-'res = remove_chars(res, '(', ')', '(', ')')res = res.replace(' ', '_')res = res.lower()if PREFIX_NAME != '':res = PREFIX_NAME + resif res[-5] == '_':res = res[:-5] + res[-4:]src_path = dirpath + '\\' + filenamedest_path = dirpath + '\\' + resprint(src_path + '->' + res)os.rename(src_path, dest_path)def remove_chars(filename, *chars):for i in range(len(chars)):filename = filename.replace(chars[i], '')return filename# arg1 dir
# arg2 prefix name default empty stringif __name__ == '__main__':print(sys.argv)print('len(sys.argv):',len(sys.argv))if len(sys.argv) == 1:print('must has name dir')exit(-1)if len(sys.argv) >= 3:PREFIX_NAME = sys.argv[3]for dir_name in sys.argv[1:]:list_all(dir_name)