dedupe by file hash
This commit is contained in:
parent
12d341b1be
commit
dad5e57e11
|
@ -17,6 +17,12 @@ const VerticalLine = styled.div`
|
|||
background: #303030;
|
||||
`;
|
||||
|
||||
const CheckboxText = styled.div`
|
||||
margin-left: 0.5em;
|
||||
font-size: 16px;
|
||||
margin-right: 0.8em;
|
||||
`;
|
||||
|
||||
interface IProps {
|
||||
deleteFileHelper: () => void;
|
||||
setDialogMessage: SetDialogMessage;
|
||||
|
@ -55,7 +61,27 @@ export default function DeduplicateOptions({
|
|||
{count} {constants.SELECTED}
|
||||
</div>
|
||||
</SelectionContainer>
|
||||
|
||||
<input
|
||||
type="checkbox"
|
||||
style={{
|
||||
width: '1em',
|
||||
height: '1em',
|
||||
}}
|
||||
value={
|
||||
deduplicateContext.clubSameFileHashesOnly ? 'true' : 'false'
|
||||
}
|
||||
onChange={() => {
|
||||
deduplicateContext.setClubSameFileHashesOnly(
|
||||
!deduplicateContext.clubSameFileHashesOnly
|
||||
);
|
||||
}}></input>
|
||||
<CheckboxText>{constants.CLUB_BY_FILE_HASH}</CheckboxText>
|
||||
<div
|
||||
style={{
|
||||
marginRight: '14px',
|
||||
}}>
|
||||
<VerticalLine />
|
||||
</div>
|
||||
<input
|
||||
type="checkbox"
|
||||
style={{
|
||||
|
@ -70,14 +96,7 @@ export default function DeduplicateOptions({
|
|||
!deduplicateContext.clubSameTimeFilesOnly
|
||||
);
|
||||
}}></input>
|
||||
<div
|
||||
style={{
|
||||
marginLeft: '0.5em',
|
||||
fontSize: '16px',
|
||||
marginRight: '0.8em',
|
||||
}}>
|
||||
{constants.CLUB_BY_CAPTURE_TIME}
|
||||
</div>
|
||||
<CheckboxText>{constants.CLUB_BY_CAPTURE_TIME}</CheckboxText>
|
||||
<div>
|
||||
<VerticalLine />
|
||||
</div>
|
||||
|
|
|
@ -6,6 +6,7 @@ import React, { createContext, useContext, useEffect, useState } from 'react';
|
|||
import {
|
||||
getDuplicateFiles,
|
||||
clubDuplicatesByTime,
|
||||
clubDuplicatesBySameFileHashes,
|
||||
} from 'services/deduplicationService';
|
||||
import { syncFiles, trashFiles } from 'services/fileService';
|
||||
import { EnteFile } from 'types/file';
|
||||
|
@ -43,6 +44,7 @@ export default function Deduplicate() {
|
|||
} = useContext(AppContext);
|
||||
const [duplicateFiles, setDuplicateFiles] = useState<EnteFile[]>(null);
|
||||
const [clubSameTimeFilesOnly, setClubSameTimeFilesOnly] = useState(false);
|
||||
const [clubSameFileHashesOnly, setClubSameFileHashesOnly] = useState(false);
|
||||
const [fileSizeMap, setFileSizeMap] = useState(new Map<number, number>());
|
||||
const [collectionNameMap, setCollectionNameMap] = useState(
|
||||
new Map<number, string>()
|
||||
|
@ -67,7 +69,7 @@ export default function Deduplicate() {
|
|||
|
||||
useEffect(() => {
|
||||
syncWithRemote();
|
||||
}, [clubSameTimeFilesOnly]);
|
||||
}, [clubSameTimeFilesOnly, clubSameFileHashesOnly]);
|
||||
|
||||
const syncWithRemote = async () => {
|
||||
startLoading();
|
||||
|
@ -79,9 +81,15 @@ export default function Deduplicate() {
|
|||
setCollectionNameMap(collectionNameMap);
|
||||
const files = await syncFiles(collections, () => null);
|
||||
let duplicates = await getDuplicateFiles(files, collectionNameMap);
|
||||
|
||||
if (clubSameTimeFilesOnly) {
|
||||
duplicates = clubDuplicatesByTime(duplicates);
|
||||
}
|
||||
|
||||
if (clubSameFileHashesOnly) {
|
||||
duplicates = clubDuplicatesBySameFileHashes(duplicates);
|
||||
}
|
||||
|
||||
const currFileSizeMap = new Map<number, number>();
|
||||
let allDuplicateFiles: EnteFile[] = [];
|
||||
let toSelectFileIDs: number[] = [];
|
||||
|
@ -149,13 +157,16 @@ export default function Deduplicate() {
|
|||
collectionNameMap,
|
||||
clubSameTimeFilesOnly,
|
||||
setClubSameTimeFilesOnly,
|
||||
clubSameFileHashesOnly,
|
||||
setClubSameFileHashesOnly,
|
||||
fileSizeMap,
|
||||
isOnDeduplicatePage: true,
|
||||
}}>
|
||||
{duplicateFiles.length > 0 && (
|
||||
<Info>
|
||||
{constants.DEDUPLICATION_LOGIC_MESSAGE(
|
||||
clubSameTimeFilesOnly
|
||||
clubSameTimeFilesOnly,
|
||||
clubSameFileHashesOnly
|
||||
)}
|
||||
</Info>
|
||||
)}
|
||||
|
|
|
@ -2,6 +2,7 @@ import { EnteFile } from 'types/file';
|
|||
import { getEndpoint } from 'utils/common/apiUtil';
|
||||
import { getToken } from 'utils/common/key';
|
||||
import { logError } from 'utils/sentry';
|
||||
import { areFilesWithFileHashSame, fileHashExists } from 'utils/upload';
|
||||
import HTTPService from './HTTPService';
|
||||
|
||||
const ENDPOINT = getEndpoint();
|
||||
|
@ -113,6 +114,64 @@ export function clubDuplicatesByTime(dupes: DuplicateFiles[]) {
|
|||
return result;
|
||||
}
|
||||
|
||||
export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) {
|
||||
const result: DuplicateFiles[] = [];
|
||||
|
||||
for (const dupe of dupes) {
|
||||
let files: EnteFile[] = [];
|
||||
|
||||
const filteredFiles = dupe.files.filter((file) => {
|
||||
return fileHashExists(file.metadata);
|
||||
});
|
||||
|
||||
if (filteredFiles.length <= 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const dupesSortedByFileHash = filteredFiles.map((file) => {
|
||||
return {
|
||||
file,
|
||||
hash:
|
||||
file.metadata.hash ??
|
||||
`${file.metadata.imageHash}_${file.metadata.videoHash}`,
|
||||
};
|
||||
});
|
||||
|
||||
dupesSortedByFileHash.sort((firstFile, secondFile) => {
|
||||
return firstFile.hash.localeCompare(secondFile.hash);
|
||||
});
|
||||
|
||||
files.push(dupesSortedByFileHash[0].file);
|
||||
for (let i = 1; i < dupesSortedByFileHash.length; i++) {
|
||||
if (
|
||||
areFilesWithFileHashSame(
|
||||
dupesSortedByFileHash[i - 1].file.metadata,
|
||||
dupesSortedByFileHash[i].file.metadata
|
||||
)
|
||||
) {
|
||||
files.push(dupesSortedByFileHash[i].file);
|
||||
} else {
|
||||
if (files.length > 1) {
|
||||
result.push({
|
||||
files: [...files],
|
||||
size: dupe.size,
|
||||
});
|
||||
}
|
||||
files = [dupesSortedByFileHash[i].file];
|
||||
}
|
||||
}
|
||||
|
||||
if (files.length > 1) {
|
||||
result.push({
|
||||
files,
|
||||
size: dupe.size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function fetchDuplicateFileIDs() {
|
||||
try {
|
||||
const response = await HTTPService.get(
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
export type DeduplicateContextType = {
|
||||
clubSameTimeFilesOnly: boolean;
|
||||
setClubSameTimeFilesOnly: (clubSameTimeFilesOnly: boolean) => void;
|
||||
clubSameFileHashesOnly: boolean;
|
||||
setClubSameFileHashesOnly: (clubSameFileHashes: boolean) => void;
|
||||
fileSizeMap: Map<number, number>;
|
||||
isOnDeduplicatePage: boolean;
|
||||
collectionNameMap: Map<number, string>;
|
||||
|
@ -9,6 +11,8 @@ export type DeduplicateContextType = {
|
|||
export const DefaultDeduplicateContext = {
|
||||
clubSameTimeFilesOnly: false,
|
||||
setClubSameTimeFilesOnly: () => null,
|
||||
clubSameFileHashesOnly: false,
|
||||
setClubSameFileHashesOnly: () => null,
|
||||
fileSizeMap: new Map<number, number>(),
|
||||
isOnDeduplicatePage: false,
|
||||
collectionNameMap: new Map<number, string>(),
|
||||
|
|
|
@ -721,13 +721,19 @@ const englishConstants = {
|
|||
DEDUPLICATE_FILES: 'deduplicate files',
|
||||
NO_DUPLICATES_FOUND: "you've no duplicate files that can be cleared",
|
||||
CLUB_BY_CAPTURE_TIME: 'club by capture time',
|
||||
CLUB_BY_FILE_HASH: 'club by file hashes',
|
||||
FILES: 'files',
|
||||
EACH: 'each',
|
||||
DEDUPLICATION_LOGIC_MESSAGE: (captureTime: boolean) => (
|
||||
DEDUPLICATION_LOGIC_MESSAGE: (
|
||||
captureTime: boolean,
|
||||
fileHashes: boolean
|
||||
) => (
|
||||
<>
|
||||
the following files were clubbed based on their sizes
|
||||
{captureTime && ` and capture time`}, please review and delete items
|
||||
you believe are duplicates{' '}
|
||||
{captureTime && !fileHashes && ' and capture time'}
|
||||
{fileHashes && !captureTime && ' and file hashes'}
|
||||
{fileHashes && captureTime && ', capture time and file hashes'},
|
||||
please review and delete items you believe are duplicates{' '}
|
||||
</>
|
||||
),
|
||||
STOP_ALL_UPLOADS_MESSAGE:
|
||||
|
|
Loading…
Reference in a new issue