diff --git a/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx b/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx
index 6ceff294e..6f84911fb 100644
--- a/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx
+++ b/src/components/pages/gallery/SelectedFileOptions/DeduplicateOptions.tsx
@@ -17,6 +17,12 @@ const VerticalLine = styled.div`
background: #303030;
`;
+const CheckboxText = styled.div`
+ margin-left: 0.5em;
+ font-size: 16px;
+ margin-right: 0.8em;
+`;
+
interface IProps {
deleteFileHelper: () => void;
setDialogMessage: SetDialogMessage;
@@ -55,7 +61,27 @@ export default function DeduplicateOptions({
{count} {constants.SELECTED}
-
+ {
+ deduplicateContext.setClubSameFileHashesOnly(
+ !deduplicateContext.clubSameFileHashesOnly
+ );
+ }}>
+ {constants.CLUB_BY_FILE_HASH}
+
+
+
-
- {constants.CLUB_BY_CAPTURE_TIME}
-
+ {constants.CLUB_BY_CAPTURE_TIME}
diff --git a/src/pages/deduplicate/index.tsx b/src/pages/deduplicate/index.tsx
index 922e87971..e518b619d 100644
--- a/src/pages/deduplicate/index.tsx
+++ b/src/pages/deduplicate/index.tsx
@@ -6,6 +6,7 @@ import React, { createContext, useContext, useEffect, useState } from 'react';
import {
getDuplicateFiles,
clubDuplicatesByTime,
+ clubDuplicatesBySameFileHashes,
} from 'services/deduplicationService';
import { syncFiles, trashFiles } from 'services/fileService';
import { EnteFile } from 'types/file';
@@ -43,6 +44,7 @@ export default function Deduplicate() {
} = useContext(AppContext);
const [duplicateFiles, setDuplicateFiles] = useState(null);
const [clubSameTimeFilesOnly, setClubSameTimeFilesOnly] = useState(false);
+ const [clubSameFileHashesOnly, setClubSameFileHashesOnly] = useState(false);
const [fileSizeMap, setFileSizeMap] = useState(new Map());
const [collectionNameMap, setCollectionNameMap] = useState(
new Map()
@@ -67,7 +69,7 @@ export default function Deduplicate() {
useEffect(() => {
syncWithRemote();
- }, [clubSameTimeFilesOnly]);
+ }, [clubSameTimeFilesOnly, clubSameFileHashesOnly]);
const syncWithRemote = async () => {
startLoading();
@@ -79,9 +81,15 @@ export default function Deduplicate() {
setCollectionNameMap(collectionNameMap);
const files = await syncFiles(collections, () => null);
let duplicates = await getDuplicateFiles(files, collectionNameMap);
+
if (clubSameTimeFilesOnly) {
duplicates = clubDuplicatesByTime(duplicates);
}
+
+ if (clubSameFileHashesOnly) {
+ duplicates = clubDuplicatesBySameFileHashes(duplicates);
+ }
+
const currFileSizeMap = new Map();
let allDuplicateFiles: EnteFile[] = [];
let toSelectFileIDs: number[] = [];
@@ -149,13 +157,16 @@ export default function Deduplicate() {
collectionNameMap,
clubSameTimeFilesOnly,
setClubSameTimeFilesOnly,
+ clubSameFileHashesOnly,
+ setClubSameFileHashesOnly,
fileSizeMap,
isOnDeduplicatePage: true,
}}>
{duplicateFiles.length > 0 && (
{constants.DEDUPLICATION_LOGIC_MESSAGE(
- clubSameTimeFilesOnly
+ clubSameTimeFilesOnly,
+ clubSameFileHashesOnly
)}
)}
diff --git a/src/services/deduplicationService.ts b/src/services/deduplicationService.ts
index 0b12d800c..c657cd06a 100644
--- a/src/services/deduplicationService.ts
+++ b/src/services/deduplicationService.ts
@@ -2,6 +2,7 @@ import { EnteFile } from 'types/file';
import { getEndpoint } from 'utils/common/apiUtil';
import { getToken } from 'utils/common/key';
import { logError } from 'utils/sentry';
+import { areFilesWithFileHashSame, fileHashExists } from 'utils/upload';
import HTTPService from './HTTPService';
const ENDPOINT = getEndpoint();
@@ -113,6 +114,64 @@ export function clubDuplicatesByTime(dupes: DuplicateFiles[]) {
return result;
}
+export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) {
+ const result: DuplicateFiles[] = [];
+
+ for (const dupe of dupes) {
+ let files: EnteFile[] = [];
+
+ const filteredFiles = dupe.files.filter((file) => {
+ return fileHashExists(file.metadata);
+ });
+
+ if (filteredFiles.length <= 1) {
+ continue;
+ }
+
+ const dupesSortedByFileHash = filteredFiles.map((file) => {
+ return {
+ file,
+ hash:
+ file.metadata.hash ??
+ `${file.metadata.imageHash}_${file.metadata.videoHash}`,
+ };
+ });
+
+ dupesSortedByFileHash.sort((firstFile, secondFile) => {
+ return firstFile.hash.localeCompare(secondFile.hash);
+ });
+
+ files.push(dupesSortedByFileHash[0].file);
+ for (let i = 1; i < dupesSortedByFileHash.length; i++) {
+ if (
+ areFilesWithFileHashSame(
+ dupesSortedByFileHash[i - 1].file.metadata,
+ dupesSortedByFileHash[i].file.metadata
+ )
+ ) {
+ files.push(dupesSortedByFileHash[i].file);
+ } else {
+ if (files.length > 1) {
+ result.push({
+ files: [...files],
+ size: dupe.size,
+ });
+ }
+ files = [dupesSortedByFileHash[i].file];
+ }
+ }
+
+ if (files.length > 1) {
+ result.push({
+ files,
+ size: dupe.size,
+ });
+ }
+ }
+
+ return result;
+}
+
async function fetchDuplicateFileIDs() {
try {
const response = await HTTPService.get(
diff --git a/src/types/deduplicate/index.ts b/src/types/deduplicate/index.ts
index b026bf8d3..c411954eb 100644
--- a/src/types/deduplicate/index.ts
+++ b/src/types/deduplicate/index.ts
@@ -1,6 +1,8 @@
export type DeduplicateContextType = {
clubSameTimeFilesOnly: boolean;
setClubSameTimeFilesOnly: (clubSameTimeFilesOnly: boolean) => void;
+ clubSameFileHashesOnly: boolean;
+ setClubSameFileHashesOnly: (clubSameFileHashes: boolean) => void;
fileSizeMap: Map;
isOnDeduplicatePage: boolean;
collectionNameMap: Map;
@@ -9,6 +11,8 @@ export type DeduplicateContextType = {
export const DefaultDeduplicateContext = {
clubSameTimeFilesOnly: false,
setClubSameTimeFilesOnly: () => null,
+ clubSameFileHashesOnly: false,
+ setClubSameFileHashesOnly: () => null,
fileSizeMap: new Map(),
isOnDeduplicatePage: false,
collectionNameMap: new Map(),
diff --git a/src/utils/strings/englishConstants.tsx b/src/utils/strings/englishConstants.tsx
index 52bb0d6c1..3fbde5484 100644
--- a/src/utils/strings/englishConstants.tsx
+++ b/src/utils/strings/englishConstants.tsx
@@ -721,13 +721,19 @@ const englishConstants = {
DEDUPLICATE_FILES: 'deduplicate files',
NO_DUPLICATES_FOUND: "you've no duplicate files that can be cleared",
CLUB_BY_CAPTURE_TIME: 'club by capture time',
+ CLUB_BY_FILE_HASH: 'club by file hashes',
FILES: 'files',
EACH: 'each',
- DEDUPLICATION_LOGIC_MESSAGE: (captureTime: boolean) => (
+ DEDUPLICATION_LOGIC_MESSAGE: (
+ captureTime: boolean,
+ fileHashes: boolean
+ ) => (
<>
the following files were clubbed based on their sizes
- {captureTime && ` and capture time`}, please review and delete items
- you believe are duplicates{' '}
+ {captureTime && !fileHashes && ' and capture time'}
+ {fileHashes && !captureTime && ' and file hashes'}
+ {fileHashes && captureTime && ', capture time and file hashes'},
+ please review and delete items you believe are duplicates{' '}
>
),
STOP_ALL_UPLOADS_MESSAGE: