# pip install send2trash
선택한 디렉토리 하위 모든 파일에 대해 해시값 조사하여 중복된 것을 확인하여 삭제할 수 있는 프로그램입니다.
시간 단축을 위해, 처음, 중간, 끝 1KB 씩만 추출하여 해시값을 계산합니다.
import os
import hashlib
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import datetime
import send2trash
import platform
import subprocess
PARTIAL_READ_SIZE = 1024 # 1024 바이트로 변경
def fast_hash(filepath):
try:
filesize = os.path.getsize(filepath)
if filesize == 0:
return hashlib.md5(b'').hexdigest()
m = hashlib.md5()
with open(filepath, 'rb') as f:
m.update(f.read(PARTIAL_READ_SIZE))
mid_pos = filesize // 2
f.seek(mid_pos)
m.update(f.read(PARTIAL_READ_SIZE))
end_pos = max(0, filesize - PARTIAL_READ_SIZE)
f.seek(end_pos)
m.update(f.read(PARTIAL_READ_SIZE))
return m.hexdigest()
except Exception:
return None
class FileInfo:
def __init__(self, path, size, mtime, hashval):
self.path = path
self.size = size
self.mtime = mtime
self.hashval = hashval
def formatted_mtime(self):
dt = datetime.datetime.fromtimestamp(self.mtime)
return dt.strftime('%Y-%m-%d %H:%M:%S')
class App(tk.Tk):
def __init__(self):
super().__init__()
self.title('다중 디렉토리 해시 계산 및 파일 관리')
self.geometry('960x700')
self.dir_list = [] # 디렉토리 문자열 목록
self.min_size_var = tk.StringVar(value='0') # 최소 파일크기 바이트 단위
self.running = False
self.stop_requested = False
self.create_widgets()
self.file_info_list = []
self.hash_groups = {}
def create_widgets(self):
# 디렉토리 선택 및 제어 박스
frame_dir = ttk.Frame(self)
frame_dir.pack(fill='both', padx=10, pady=5)
label_dir = ttk.Label(frame_dir, text='디렉토리 목록:')
label_dir.grid(row=0, column=0, sticky='w')
self.list_dir = tk.Listbox(frame_dir, height=6, selectmode='extended', width=50)
self.list_dir.grid(row=1, column=0, rowspan=4, sticky='nsew', padx=(0,10), pady=2)
frame_dir_btns = ttk.Frame(frame_dir)
frame_dir_btns.grid(row=1, column=1, sticky='ns')
btn_add = ttk.Button(frame_dir_btns, text='추가', width=8, command=self.add_directory)
btn_add.pack(pady=(0,5), fill='x')
btn_remove = ttk.Button(frame_dir_btns, text='삭제', width=8, command=self.remove_directory)
btn_remove.pack(fill='x')
# 최소 파일 크기와 콤보박스 세로 배치
frame_size = ttk.Frame(frame_dir)
frame_size.grid(row=0, column=2, rowspan=5, sticky='nw')
lbl_min_size = ttk.Label(frame_size, text='최소 파일 크기(Byte):')
lbl_min_size.pack(anchor='w', pady=(0, 3))
self.entry_min_size = ttk.Entry(frame_size, textvariable=self.min_size_var, width=15)
self.entry_min_size.pack(anchor='w', pady=(0, 10))
lbl_combo = ttk.Label(frame_size, text='크기 단위 선택:')
lbl_combo.pack(anchor='w', pady=(0, 3))
size_options = ['0', '1M', '10M', '100M', '1G']
self.combo_size_unit = ttk.Combobox(frame_size, values=size_options, state='readonly', width=12)
self.combo_size_unit.current(0)
self.combo_size_unit.pack(anchor='w')
self.combo_size_unit.bind('<<ComboboxSelected>>', self.on_size_unit_selected)
# 실행(중단) 버튼
self.btn_run = ttk.Button(frame_size, text='실행', width=10, command=self.toggle_run)
self.btn_run.pack(anchor='w', pady=(20, 0))
# 해시 그룹 트리뷰 (그리드 형태)
frame_hash_group = ttk.Frame(self)
frame_hash_group.pack(fill='both', padx=10, pady=5, expand=True)
ttk.Label(frame_hash_group, text='중복 그룹 목록').pack(anchor='w')
columns_hash = ('count', 'hash', 'rep_name', 'total_size')
self.tree_hash_groups = ttk.Treeview(frame_hash_group, columns=columns_hash, show='headings', selectmode='browse')
self.tree_hash_groups.pack(side='left', fill='both', expand=True)
self.tree_hash_groups.heading('count', text='개수')
self.tree_hash_groups.heading('hash', text='해시값')
self.tree_hash_groups.heading('rep_name', text='대표파일명')
self.tree_hash_groups.heading('total_size', text='크기 (Bytes)')
self.tree_hash_groups.column('count', width=50, anchor='center')
self.tree_hash_groups.column('hash', width=360, anchor='w')
self.tree_hash_groups.column('rep_name', width=250, anchor='w')
self.tree_hash_groups.column('total_size', width=120, anchor='e')
scrollbar_hash = ttk.Scrollbar(frame_hash_group, orient='vertical', command=self.tree_hash_groups.yview)
scrollbar_hash.pack(side='left', fill='y')
self.tree_hash_groups.configure(yscrollcommand=scrollbar_hash.set)
self.tree_hash_groups.bind('<<TreeviewSelect>>', self.on_hash_group_selected)
# 파일 목록 및 스크롤바, 삭제/원복 버튼
frame_file_list = ttk.Frame(self)
frame_file_list.pack(fill='both', expand=True, padx=10, pady=5)
ttk.Label(frame_file_list, text='파일 목록').pack(anchor='w')
columns_files = ('index', 'path', 'size', 'mtime', 'hash')
self.tree_files = ttk.Treeview(frame_file_list, columns=columns_files, show='headings', selectmode='extended')
self.tree_files.pack(side='left', fill='both', expand=True)
self.tree_files.heading('index', text='No.')
self.tree_files.heading('path', text='파일 경로')
self.tree_files.heading('size', text='크기 (Bytes)')
self.tree_files.heading('mtime', text='수정일시')
self.tree_files.heading('hash', text='해시값')
self.tree_files.column('index', width=50, anchor='center')
self.tree_files.column('path', width=450)
self.tree_files.column('size', width=120, anchor='e')
self.tree_files.column('mtime', width=150, anchor='center')
self.tree_files.column('hash', width=320)
scrollbar_files = ttk.Scrollbar(frame_file_list, orient='vertical', command=self.tree_files.yview)
scrollbar_files.pack(side='left', fill='y')
self.tree_files.configure(yscrollcommand=scrollbar_files.set)
frame_buttons = ttk.Frame(frame_file_list)
frame_buttons.pack(side='left', fill='y', padx=10)
btn_delete = ttk.Button(frame_buttons, text='삭제', command=self.delete_selected_files)
btn_delete.pack(fill='x', pady=5)
btn_restore = ttk.Button(frame_buttons, text='원복', command=self.restore_selection)
btn_restore.pack(fill='x', pady=5)
self.tree_files.bind('<Double-1>', self.open_file_location)
def on_size_unit_selected(self, event):
val = self.combo_size_unit.get()
mapping = {
'0':'0',
'1M': str(1_000_000),
'10M': str(10_000_000),
'100M': str(100_000_000),
'1G': str(1_000_000_000)
}
if val in mapping:
self.min_size_var.set(mapping[val])
def add_directory(self):
dir_selected = filedialog.askdirectory()
if dir_selected and dir_selected not in self.dir_list:
self.dir_list.append(dir_selected)
self.list_dir.insert(tk.END, dir_selected)
def remove_directory(self):
selected_indices = list(self.list_dir.curselection())
for i in reversed(selected_indices):
del self.dir_list[i]
self.list_dir.delete(i)
def toggle_run(self):
if not self.running:
self.start_run()
else:
self.stop_requested = True
def start_run(self):
if self.running:
return
try:
min_size = int(self.min_size_var.get())
if min_size < 0:
raise ValueError()
except ValueError:
messagebox.showwarning('입력오류', '최소 파일 크기는 0 이상의 정수여야 합니다.')
return
if not self.dir_list:
messagebox.showwarning('경고', '하나 이상의 디렉토리를 추가하세요.')
return
self.running = True
self.stop_requested = False
self.btn_run.config(text='중단')
self.file_info_list.clear()
self.hash_groups.clear()
self.tree_hash_groups.delete(*self.tree_hash_groups.get_children())
self.tree_files.delete(*self.tree_files.get_children())
original_title = self.title()
target_files = []
for directory in self.dir_list:
for root, dirs, files in os.walk(directory):
if self.stop_requested:
break
for fname in files:
if self.stop_requested:
break
fpath = os.path.join(root, fname)
try:
size = os.path.getsize(fpath)
if size >= min_size:
target_files.append((fpath, size))
except Exception:
continue
if self.stop_requested:
break
if self.stop_requested:
self.title(original_title)
self.btn_run.config(text='실행')
self.running = False
messagebox.showinfo('중단', '작업이 중단되었습니다.')
return
total_count = len(target_files)
if total_count == 0:
self.title(original_title)
self.btn_run.config(text='실행')
self.running = False
messagebox.showinfo('알림', '조건에 맞는 파일이 없습니다.')
return
processed_count = 0
for fpath, size in target_files:
if self.stop_requested:
break
processed_count += 1
percent = int(processed_count / total_count * 100)
self.title(f'{original_title} - 진행: {processed_count}/{total_count} ({percent}%)')
self.update()
try:
try:
mtime = os.path.getmtime(fpath)
if mtime == 0:
mtime = os.path.getctime(fpath)
except Exception:
mtime = os.path.getctime(fpath)
hashval = fast_hash(fpath)
if hashval is not None:
finfo = FileInfo(fpath, size, mtime, hashval)
self.file_info_list.append(finfo)
except Exception:
continue
if self.stop_requested:
self.title(original_title)
self.btn_run.config(text='실행')
self.running = False
messagebox.showinfo('중단', '작업이 중단되었습니다.')
return
# 그룹화 및 트리뷰 업데이트 (대표파일명 + 전체 크기, 크기 역순 정렬)
self.hash_groups = {}
for finfo in self.file_info_list:
self.hash_groups.setdefault(finfo.hashval, []).append(finfo)
self.hash_groups = {k: v for k, v in self.hash_groups.items() if len(v) > 1}
hash_group_data = []
for k, v in self.hash_groups.items():
total_size = sum(f.size for f in v)
rep_name = os.path.basename(v[0].path) if v else ''
count = len(v)
hash_group_data.append((count, k, rep_name, total_size))
# 크기(total_size) 역순 정렬
hash_group_data.sort(key=lambda x: x[3], reverse=True)
for count, hashval, rep_name, total_size in hash_group_data:
size_str = f'{total_size:,}'
self.tree_hash_groups.insert('', 'end', values=(count, hashval, rep_name, size_str))
self.title(original_title)
self.btn_run.config(text='실행')
self.running = False
messagebox.showinfo('완료', f'총 {len(self.file_info_list)}개의 파일 중 중복 그룹 {len(self.hash_groups)}개를 찾았습니다.')
def on_hash_group_selected(self, event):
selected = self.tree_hash_groups.selection()
if not selected:
return
item = self.tree_hash_groups.item(selected[0])
hashval = item['values'][1]
files = self.hash_groups.get(hashval, [])
self.tree_files.delete(*self.tree_files.get_children())
for i, finfo in enumerate(files, start=1):
size_str = f'{finfo.size:,}'
mtime_str = finfo.formatted_mtime()
self.tree_files.insert('', 'end', values=(i, finfo.path, size_str, mtime_str, finfo.hashval))
def delete_selected_files(self):
selected_items = self.tree_files.selection()
if not selected_items:
messagebox.showwarning('경고', '삭제할 파일을 선택해 주십시오.')
return
files_to_delete = []
for item in selected_items:
values = self.tree_files.item(item, 'values')
filepath = os.path.normpath(values[1]) # 경로 정규화
files_to_delete.append(filepath)
for fpath in files_to_delete:
if not os.path.exists(fpath):
messagebox.showwarning('경로 없음', f'파일 경로를 찾을 수 없습니다:\n{fpath}')
continue
try:
send2trash.send2trash(fpath)
except Exception as e:
messagebox.showerror('오류', f'파일 삭제 실패: {fpath}\n{str(e)}')
return
messagebox.showinfo('완료', '선택한 파일을 휴지통으로 이동했습니다.')
self.restore_selection()
def restore_selection(self):
for item in self.tree_files.selection():
self.tree_files.selection_remove(item)
def open_file_location(self, event):
item = self.tree_files.identify_row(event.y)
if not item:
return
vals = self.tree_files.item(item, 'values')
if not vals or len(vals) < 2:
return
filepath = os.path.normpath(vals[1]) # 경로 정규화
if not os.path.exists(filepath):
messagebox.showerror('오류', '파일을 찾을 수 없습니다.')
return
folder = os.path.dirname(filepath)
system = platform.system()
try:
if system == 'Windows':
subprocess.run(['explorer', '/select,', filepath])
elif system == 'Darwin': # macOS
subprocess.run(['open', '-R', filepath])
else: # Linux 등
subprocess.run(['xdg-open', folder])
except Exception as e:
messagebox.showerror('오류', f'파일 탐색기 실행 실패: {e}')
if __name__ == '__main__':
app = App()
app.mainloop()