diff --git a/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx b/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx index 6ceff294e..6f84911fb 100644 --- a/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx +++ b/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx @@ -17,6 +17,12 @@ const VerticalLine = styled.div` background: #303030; `; +const CheckboxText = styled.div` + margin-left: 0.5em; + font-size: 16px; + margin-right: 0.8em; +`; + interface IProps { deleteFileHelper: () => void; setDialogMessage: SetDialogMessage; @@ -55,7 +61,27 @@ export default function DeduplicateOptions({ {count} {constants.SELECTED} - + { + deduplicateContext.setClubSameFileHashesOnly( + !deduplicateContext.clubSameFileHashesOnly + ); + }}> + {constants.CLUB_BY_FILE_HASH} +
+ +
-
- {constants.CLUB_BY_CAPTURE_TIME} -
+ {constants.CLUB_BY_CAPTURE_TIME}
diff --git a/src/pages/deduplicate/index.tsx b/src/pages/deduplicate/index.tsx index 922e87971..e518b619d 100644 --- a/src/pages/deduplicate/index.tsx +++ b/src/pages/deduplicate/index.tsx @@ -6,6 +6,7 @@ import React, { createContext, useContext, useEffect, useState } from 'react'; import { getDuplicateFiles, clubDuplicatesByTime, + clubDuplicatesBySameFileHashes, } from 'services/deduplicationService'; import { syncFiles, trashFiles } from 'services/fileService'; import { EnteFile } from 'types/file'; @@ -43,6 +44,7 @@ export default function Deduplicate() { } = useContext(AppContext); const [duplicateFiles, setDuplicateFiles] = useState(null); const [clubSameTimeFilesOnly, setClubSameTimeFilesOnly] = useState(false); + const [clubSameFileHashesOnly, setClubSameFileHashesOnly] = useState(false); const [fileSizeMap, setFileSizeMap] = useState(new Map()); const [collectionNameMap, setCollectionNameMap] = useState( new Map() @@ -67,7 +69,7 @@ export default function Deduplicate() { useEffect(() => { syncWithRemote(); - }, [clubSameTimeFilesOnly]); + }, [clubSameTimeFilesOnly, clubSameFileHashesOnly]); const syncWithRemote = async () => { startLoading(); @@ -79,9 +81,15 @@ export default function Deduplicate() { setCollectionNameMap(collectionNameMap); const files = await syncFiles(collections, () => null); let duplicates = await getDuplicateFiles(files, collectionNameMap); + if (clubSameTimeFilesOnly) { duplicates = clubDuplicatesByTime(duplicates); } + + if (clubSameFileHashesOnly) { + duplicates = clubDuplicatesBySameFileHashes(duplicates); + } + const currFileSizeMap = new Map(); let allDuplicateFiles: EnteFile[] = []; let toSelectFileIDs: number[] = []; @@ -149,13 +157,16 @@ export default function Deduplicate() { collectionNameMap, clubSameTimeFilesOnly, setClubSameTimeFilesOnly, + clubSameFileHashesOnly, + setClubSameFileHashesOnly, fileSizeMap, isOnDeduplicatePage: true, }}> {duplicateFiles.length > 0 && ( {constants.DEDUPLICATION_LOGIC_MESSAGE( - clubSameTimeFilesOnly + clubSameTimeFilesOnly, + clubSameFileHashesOnly )} )} diff --git a/src/services/deduplicationService.ts b/src/services/deduplicationService.ts index 0b12d800c..c657cd06a 100644 --- a/src/services/deduplicationService.ts +++ b/src/services/deduplicationService.ts @@ -2,6 +2,7 @@ import { EnteFile } from 'types/file'; import { getEndpoint } from 'utils/common/apiUtil'; import { getToken } from 'utils/common/key'; import { logError } from 'utils/sentry'; +import { areFilesWithFileHashSame, fileHashExists } from 'utils/upload'; import HTTPService from './HTTPService'; const ENDPOINT = getEndpoint(); @@ -113,6 +114,64 @@ export function clubDuplicatesByTime(dupes: DuplicateFiles[]) { return result; } +export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) { + const result: DuplicateFiles[] = []; + + for (const dupe of dupes) { + let files: EnteFile[] = []; + + const filteredFiles = dupe.files.filter((file) => { + return fileHashExists(file.metadata); + }); + + if (filteredFiles.length <= 1) { + continue; + } + + const dupesSortedByFileHash = filteredFiles.map((file) => { + return { + file, + hash: + file.metadata.hash ?? + `${file.metadata.imageHash}_${file.metadata.videoHash}`, + }; + }); + + dupesSortedByFileHash.sort((firstFile, secondFile) => { + return firstFile.hash.localeCompare(secondFile.hash); + }); + + files.push(dupesSortedByFileHash[0].file); + for (let i = 1; i < dupesSortedByFileHash.length; i++) { + if ( + areFilesWithFileHashSame( + dupesSortedByFileHash[i - 1].file.metadata, + dupesSortedByFileHash[i].file.metadata + ) + ) { + files.push(dupesSortedByFileHash[i].file); + } else { + if (files.length > 1) { + result.push({ + files: [...files], + size: dupe.size, + }); + } + files = [dupesSortedByFileHash[i].file]; + } + } + + if (files.length > 1) { + result.push({ + files, + size: dupe.size, + }); + } + } + + return result; +} + async function fetchDuplicateFileIDs() { try { const response = await HTTPService.get( diff --git a/src/types/deduplicate/index.ts b/src/types/deduplicate/index.ts index b026bf8d3..c411954eb 100644 --- a/src/types/deduplicate/index.ts +++ b/src/types/deduplicate/index.ts @@ -1,6 +1,8 @@ export type DeduplicateContextType = { clubSameTimeFilesOnly: boolean; setClubSameTimeFilesOnly: (clubSameTimeFilesOnly: boolean) => void; + clubSameFileHashesOnly: boolean; + setClubSameFileHashesOnly: (clubSameFileHashes: boolean) => void; fileSizeMap: Map; isOnDeduplicatePage: boolean; collectionNameMap: Map; @@ -9,6 +11,8 @@ export type DeduplicateContextType = { export const DefaultDeduplicateContext = { clubSameTimeFilesOnly: false, setClubSameTimeFilesOnly: () => null, + clubSameFileHashesOnly: false, + setClubSameFileHashesOnly: () => null, fileSizeMap: new Map(), isOnDeduplicatePage: false, collectionNameMap: new Map(), diff --git a/src/utils/strings/englishConstants.tsx b/src/utils/strings/englishConstants.tsx index 52bb0d6c1..3fbde5484 100644 --- a/src/utils/strings/englishConstants.tsx +++ b/src/utils/strings/englishConstants.tsx @@ -721,13 +721,19 @@ const englishConstants = { DEDUPLICATE_FILES: 'deduplicate files', NO_DUPLICATES_FOUND: "you've no duplicate files that can be cleared", CLUB_BY_CAPTURE_TIME: 'club by capture time', + CLUB_BY_FILE_HASH: 'club by file hashes', FILES: 'files', EACH: 'each', - DEDUPLICATION_LOGIC_MESSAGE: (captureTime: boolean) => ( + DEDUPLICATION_LOGIC_MESSAGE: ( + captureTime: boolean, + fileHashes: boolean + ) => ( <> the following files were clubbed based on their sizes - {captureTime && ` and capture time`}, please review and delete items - you believe are duplicates{' '} + {captureTime && !fileHashes && ' and capture time'} + {fileHashes && !captureTime && ' and file hashes'} + {fileHashes && captureTime && ', capture time and file hashes'}, + please review and delete items you believe are duplicates{' '} ), STOP_ALL_UPLOADS_MESSAGE: