group dedupes by same file hashes when possible

This commit is contained in:
Rushikesh Tote 2022-05-23 18:32:35 +05:30
parent a463a37456
commit 55fd0fda4b
5 changed files with 37 additions and 48 deletions

View file

@ -70,27 +70,6 @@ export default function DeduplicateOptions({
{count} {constants.SELECTED}
</div>
</SelectionContainer>
<input
type="checkbox"
style={{
width: '1em',
height: '1em',
}}
value={
deduplicateContext.clubSameFileHashesOnly ? 'true' : 'false'
}
onChange={() => {
deduplicateContext.setClubSameFileHashesOnly(
!deduplicateContext.clubSameFileHashesOnly
);
}}></input>
<CheckboxText>{constants.CLUB_BY_FILE_HASH}</CheckboxText>
<div
style={{
marginRight: '14px',
}}>
<VerticalLine />
</div>
<input
type="checkbox"
style={{

View file

@ -6,7 +6,6 @@ import React, { createContext, useContext, useEffect, useState } from 'react';
import {
getDuplicateFiles,
clubDuplicatesByTime,
clubDuplicatesBySameFileHashes,
} from 'services/deduplicationService';
import { syncFiles, trashFiles } from 'services/fileService';
import { EnteFile } from 'types/file';
@ -44,7 +43,6 @@ export default function Deduplicate() {
} = useContext(AppContext);
const [duplicateFiles, setDuplicateFiles] = useState<EnteFile[]>(null);
const [clubSameTimeFilesOnly, setClubSameTimeFilesOnly] = useState(false);
const [clubSameFileHashesOnly, setClubSameFileHashesOnly] = useState(false);
const [fileSizeMap, setFileSizeMap] = useState(new Map<number, number>());
const [collectionNameMap, setCollectionNameMap] = useState(
new Map<number, string>()
@ -69,7 +67,7 @@ export default function Deduplicate() {
useEffect(() => {
syncWithRemote();
}, [clubSameTimeFilesOnly, clubSameFileHashesOnly]);
}, [clubSameTimeFilesOnly]);
const syncWithRemote = async () => {
startLoading();
@ -86,10 +84,6 @@ export default function Deduplicate() {
duplicates = clubDuplicatesByTime(duplicates);
}
if (clubSameFileHashesOnly) {
duplicates = clubDuplicatesBySameFileHashes(duplicates);
}
const currFileSizeMap = new Map<number, number>();
let allDuplicateFiles: EnteFile[] = [];
let toSelectFileIDs: number[] = [];
@ -161,16 +155,13 @@ export default function Deduplicate() {
collectionNameMap,
clubSameTimeFilesOnly,
setClubSameTimeFilesOnly,
clubSameFileHashesOnly,
setClubSameFileHashesOnly,
fileSizeMap,
isOnDeduplicatePage: true,
}}>
{duplicateFiles.length > 0 && (
<Info>
{constants.DEDUPLICATION_LOGIC_MESSAGE(
clubSameTimeFilesOnly,
clubSameFileHashesOnly
clubSameTimeFilesOnly
)}
</Info>
)}

View file

@ -58,10 +58,12 @@ export async function getDuplicateFiles(
);
if (duplicateFiles.length > 1) {
result.push({
files: duplicateFiles,
size: dupe.size,
});
result.push(
...getDupesGroupedBySameFileHashes(
duplicateFiles,
dupe.size
)
);
}
}
@ -174,6 +176,32 @@ export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) {
return result;
}
function getDupesGroupedBySameFileHashes(files: EnteFile[], size: number) {
const clubbedDupesByFileHash = clubDuplicatesBySameFileHashes([
{ files, size },
]);
const clubbedFileIDs = new Set<number>();
for (const dupe of clubbedDupesByFileHash) {
for (const file of dupe.files) {
clubbedFileIDs.add(file.id);
}
}
files = files.filter((file) => {
return !clubbedFileIDs.has(file.id);
});
if (files.length > 1) {
clubbedDupesByFileHash.push({
files: [...files],
size,
});
}
return clubbedDupesByFileHash;
}
async function fetchDuplicateFileIDs() {
try {
const response = await HTTPService.get(

View file

@ -1,8 +1,6 @@
export type DeduplicateContextType = {
clubSameTimeFilesOnly: boolean;
setClubSameTimeFilesOnly: (clubSameTimeFilesOnly: boolean) => void;
clubSameFileHashesOnly: boolean;
setClubSameFileHashesOnly: (clubSameFileHashes: boolean) => void;
fileSizeMap: Map<number, number>;
isOnDeduplicatePage: boolean;
collectionNameMap: Map<number, string>;
@ -11,8 +9,6 @@ export type DeduplicateContextType = {
export const DefaultDeduplicateContext = {
clubSameTimeFilesOnly: false,
setClubSameTimeFilesOnly: () => null,
clubSameFileHashesOnly: false,
setClubSameFileHashesOnly: () => null,
fileSizeMap: new Map<number, number>(),
isOnDeduplicatePage: false,
collectionNameMap: new Map<number, string>(),

View file

@ -724,16 +724,11 @@ const englishConstants = {
CLUB_BY_FILE_HASH: 'club by file hashes',
FILES: 'files',
EACH: 'each',
DEDUPLICATION_LOGIC_MESSAGE: (
captureTime: boolean,
fileHashes: boolean
) => (
DEDUPLICATION_LOGIC_MESSAGE: (captureTime: boolean) => (
<>
the following files were clubbed based on their sizes
{captureTime && !fileHashes && ' and capture time'}
{fileHashes && !captureTime && ' and file hashes'}
{fileHashes && captureTime && ', capture time and file hashes'},
please review and delete items you believe are duplicates{' '}
{captureTime && ' and capture time'}, please review and delete items
you believe are duplicates{' '}
</>
),
STOP_ALL_UPLOADS_MESSAGE: