2022-05-20 05:05:36 +00:00
|
|
|
import { FILE_TYPE } from 'constants/file';
|
2022-03-22 13:01:59 +00:00
|
|
|
import { EnteFile } from 'types/file';
|
2022-05-20 05:05:36 +00:00
|
|
|
import { Metadata } from 'types/upload';
|
2022-03-22 13:01:59 +00:00
|
|
|
import { getEndpoint } from 'utils/common/apiUtil';
|
|
|
|
import { getToken } from 'utils/common/key';
|
|
|
|
import { logError } from 'utils/sentry';
|
2022-05-20 05:05:36 +00:00
|
|
|
import { hasFileHash } from 'utils/upload';
|
2022-03-22 13:01:59 +00:00
|
|
|
import HTTPService from './HTTPService';
|
|
|
|
|
|
|
|
const ENDPOINT = getEndpoint();
|
|
|
|
|
|
|
|
interface DuplicatesResponse {
|
|
|
|
duplicates: Array<{
|
|
|
|
fileIDs: number[];
|
|
|
|
size: number;
|
|
|
|
}>;
|
|
|
|
}
|
|
|
|
|
2022-04-17 13:28:55 +00:00
|
|
|
const DuplicateItemSortingOrderDescBasedOnCollectionName = Object.fromEntries([
|
|
|
|
['icloud library', 0],
|
|
|
|
['icloudlibrary', 1],
|
|
|
|
['recents', 2],
|
|
|
|
['recently added', 3],
|
|
|
|
['my photo stream', 4],
|
|
|
|
]);
|
2022-04-17 10:13:15 +00:00
|
|
|
|
|
|
|
const OtherCollectionNameRanking = 5;
|
|
|
|
|
2022-03-22 13:01:59 +00:00
|
|
|
interface DuplicateFiles {
|
|
|
|
files: EnteFile[];
|
|
|
|
size: number;
|
|
|
|
}
|
|
|
|
|
2022-04-17 10:13:15 +00:00
|
|
|
export async function getDuplicateFiles(
|
|
|
|
files: EnteFile[],
|
2022-04-17 13:32:36 +00:00
|
|
|
collectionNameMap: Map<number, string>
|
2022-04-17 10:13:15 +00:00
|
|
|
) {
|
2022-03-26 14:38:52 +00:00
|
|
|
try {
|
|
|
|
const dupes = await fetchDuplicateFileIDs();
|
|
|
|
|
|
|
|
const fileMap = new Map<number, EnteFile>();
|
2022-04-17 10:13:15 +00:00
|
|
|
for (const file of files) {
|
2022-03-26 14:38:52 +00:00
|
|
|
fileMap.set(file.id, file);
|
2022-03-22 13:01:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const result: DuplicateFiles[] = [];
|
2022-03-26 14:38:52 +00:00
|
|
|
|
2022-03-22 13:01:59 +00:00
|
|
|
for (const dupe of dupes) {
|
2022-04-17 10:13:15 +00:00
|
|
|
let duplicateFiles: EnteFile[] = [];
|
2022-03-26 14:38:52 +00:00
|
|
|
for (const fileID of dupe.fileIDs) {
|
|
|
|
if (fileMap.has(fileID)) {
|
2022-04-17 10:13:15 +00:00
|
|
|
duplicateFiles.push(fileMap.get(fileID));
|
2022-03-22 13:01:59 +00:00
|
|
|
}
|
|
|
|
}
|
2022-04-17 10:13:15 +00:00
|
|
|
duplicateFiles = await sortDuplicateFiles(
|
|
|
|
duplicateFiles,
|
2022-04-17 13:32:36 +00:00
|
|
|
collectionNameMap
|
2022-04-17 10:13:15 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
if (duplicateFiles.length > 1) {
|
2022-03-22 13:01:59 +00:00
|
|
|
result.push({
|
2022-04-17 10:13:15 +00:00
|
|
|
files: duplicateFiles,
|
2022-03-22 13:01:59 +00:00
|
|
|
size: dupe.size,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
2022-03-26 14:38:52 +00:00
|
|
|
} catch (e) {
|
|
|
|
logError(e, 'failed to get duplicate files');
|
2022-03-22 13:01:59 +00:00
|
|
|
}
|
2022-03-26 14:38:52 +00:00
|
|
|
}
|
2022-03-22 13:01:59 +00:00
|
|
|
|
2022-04-03 07:33:42 +00:00
|
|
|
export function clubDuplicatesByTime(dupes: DuplicateFiles[]) {
|
2022-03-26 14:38:52 +00:00
|
|
|
const result: DuplicateFiles[] = [];
|
|
|
|
for (const dupe of dupes) {
|
|
|
|
let files: EnteFile[] = [];
|
|
|
|
const creationTimeCounter = new Map<number, number>();
|
|
|
|
|
|
|
|
let mostFreqCreationTime = 0;
|
|
|
|
let mostFreqCreationTimeCount = 0;
|
|
|
|
for (const file of dupe.files) {
|
|
|
|
const creationTime = file.metadata.creationTime;
|
|
|
|
if (creationTimeCounter.has(creationTime)) {
|
|
|
|
creationTimeCounter.set(
|
|
|
|
creationTime,
|
|
|
|
creationTimeCounter.get(creationTime) + 1
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
creationTimeCounter.set(creationTime, 1);
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
creationTimeCounter.get(creationTime) >
|
|
|
|
mostFreqCreationTimeCount
|
|
|
|
) {
|
|
|
|
mostFreqCreationTime = creationTime;
|
|
|
|
mostFreqCreationTimeCount =
|
|
|
|
creationTimeCounter.get(creationTime);
|
|
|
|
}
|
|
|
|
|
|
|
|
files.push(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
files = files.filter((file) => {
|
|
|
|
return file.metadata.creationTime === mostFreqCreationTime;
|
|
|
|
});
|
|
|
|
|
|
|
|
if (files.length > 1) {
|
|
|
|
result.push({
|
|
|
|
files,
|
|
|
|
size: dupe.size,
|
|
|
|
});
|
2022-03-22 13:01:59 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-26 14:38:52 +00:00
|
|
|
|
|
|
|
return result;
|
2022-03-22 13:01:59 +00:00
|
|
|
}
|
|
|
|
|
2022-05-19 07:31:04 +00:00
|
|
|
export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) {
|
|
|
|
const result: DuplicateFiles[] = [];
|
|
|
|
|
|
|
|
for (const dupe of dupes) {
|
|
|
|
let files: EnteFile[] = [];
|
|
|
|
|
|
|
|
const filteredFiles = dupe.files.filter((file) => {
|
2022-05-19 12:19:42 +00:00
|
|
|
return hasFileHash(file.metadata);
|
2022-05-19 07:31:04 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
if (filteredFiles.length <= 1) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-05-20 08:05:03 +00:00
|
|
|
const dupesSortedByFileHash = filteredFiles
|
|
|
|
.map((file) => {
|
|
|
|
return {
|
|
|
|
file,
|
|
|
|
hash:
|
|
|
|
file.metadata.hash ??
|
|
|
|
`${file.metadata.imageHash}_${file.metadata.videoHash}`,
|
|
|
|
};
|
|
|
|
})
|
|
|
|
.sort((firstFile, secondFile) => {
|
|
|
|
return firstFile.hash.localeCompare(secondFile.hash);
|
|
|
|
});
|
2022-05-19 07:31:04 +00:00
|
|
|
|
|
|
|
files.push(dupesSortedByFileHash[0].file);
|
|
|
|
for (let i = 1; i < dupesSortedByFileHash.length; i++) {
|
|
|
|
if (
|
2022-05-20 05:05:36 +00:00
|
|
|
areFileHashesSame(
|
2022-05-19 07:31:04 +00:00
|
|
|
dupesSortedByFileHash[i - 1].file.metadata,
|
|
|
|
dupesSortedByFileHash[i].file.metadata
|
|
|
|
)
|
|
|
|
) {
|
|
|
|
files.push(dupesSortedByFileHash[i].file);
|
|
|
|
} else {
|
|
|
|
if (files.length > 1) {
|
|
|
|
result.push({
|
|
|
|
files: [...files],
|
|
|
|
size: dupe.size,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
files = [dupesSortedByFileHash[i].file];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (files.length > 1) {
|
|
|
|
result.push({
|
|
|
|
files,
|
|
|
|
size: dupe.size,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-03-26 14:38:52 +00:00
|
|
|
async function fetchDuplicateFileIDs() {
|
|
|
|
try {
|
|
|
|
const response = await HTTPService.get(
|
|
|
|
`${ENDPOINT}/files/duplicates`,
|
|
|
|
null,
|
|
|
|
{
|
|
|
|
'X-Auth-Token': getToken(),
|
|
|
|
}
|
|
|
|
);
|
|
|
|
return (response.data as DuplicatesResponse).duplicates;
|
|
|
|
} catch (e) {
|
|
|
|
logError(e, 'failed to fetch duplicate file IDs');
|
|
|
|
}
|
|
|
|
}
|
2022-04-17 09:30:26 +00:00
|
|
|
|
2022-04-17 10:13:15 +00:00
|
|
|
async function sortDuplicateFiles(
|
|
|
|
files: EnteFile[],
|
2022-04-17 13:32:36 +00:00
|
|
|
collectionNameMap: Map<number, string>
|
2022-04-17 10:13:15 +00:00
|
|
|
) {
|
2022-04-17 09:30:26 +00:00
|
|
|
return files.sort((firstFile, secondFile) => {
|
2022-04-17 13:32:36 +00:00
|
|
|
const firstCollectionName = collectionNameMap
|
2022-04-17 09:30:26 +00:00
|
|
|
.get(firstFile.collectionID)
|
|
|
|
.toLocaleLowerCase();
|
2022-04-17 13:32:36 +00:00
|
|
|
const secondCollectionName = collectionNameMap
|
2022-04-17 09:30:26 +00:00
|
|
|
.get(secondFile.collectionID)
|
|
|
|
.toLocaleLowerCase();
|
2022-04-17 10:13:15 +00:00
|
|
|
const firstFileRanking =
|
|
|
|
DuplicateItemSortingOrderDescBasedOnCollectionName[
|
|
|
|
firstCollectionName
|
|
|
|
] ?? OtherCollectionNameRanking;
|
|
|
|
const secondFileRanking =
|
|
|
|
DuplicateItemSortingOrderDescBasedOnCollectionName[
|
|
|
|
secondCollectionName
|
|
|
|
] ?? OtherCollectionNameRanking;
|
|
|
|
return secondFileRanking - firstFileRanking;
|
2022-04-17 09:30:26 +00:00
|
|
|
});
|
|
|
|
}
|
2022-05-20 05:05:36 +00:00
|
|
|
|
|
|
|
function areFileHashesSame(firstFile: Metadata, secondFile: Metadata) {
|
|
|
|
if (firstFile.fileType === FILE_TYPE.LIVE_PHOTO) {
|
|
|
|
return (
|
|
|
|
firstFile.imageHash === secondFile.imageHash &&
|
|
|
|
firstFile.videoHash === secondFile.videoHash
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
return firstFile.hash === secondFile.hash;
|
|
|
|
}
|
|
|
|
}
|