ente/src/services/deduplicationService.ts

225 lines
6.4 KiB
TypeScript
Raw Normal View History

2022-05-20 05:05:36 +00:00
import { FILE_TYPE } from 'constants/file';
2022-03-22 13:01:59 +00:00
import { EnteFile } from 'types/file';
2022-05-20 05:05:36 +00:00
import { Metadata } from 'types/upload';
2022-03-22 13:01:59 +00:00
import { getEndpoint } from 'utils/common/apiUtil';
import { getToken } from 'utils/common/key';
import { logError } from 'utils/sentry';
2022-05-20 05:05:36 +00:00
import { hasFileHash } from 'utils/upload';
2022-03-22 13:01:59 +00:00
import HTTPService from './HTTPService';
const ENDPOINT = getEndpoint();
interface DuplicatesResponse {
duplicates: Array<{
fileIDs: number[];
size: number;
}>;
}
2022-04-17 13:28:55 +00:00
const DuplicateItemSortingOrderDescBasedOnCollectionName = Object.fromEntries([
['icloud library', 0],
['icloudlibrary', 1],
['recents', 2],
['recently added', 3],
['my photo stream', 4],
]);
2022-04-17 10:13:15 +00:00
const OtherCollectionNameRanking = 5;
2022-03-22 13:01:59 +00:00
interface DuplicateFiles {
files: EnteFile[];
size: number;
}
2022-04-17 10:13:15 +00:00
export async function getDuplicateFiles(
files: EnteFile[],
collectionNameMap: Map<number, string>
2022-04-17 10:13:15 +00:00
) {
2022-03-26 14:38:52 +00:00
try {
const dupes = await fetchDuplicateFileIDs();
const fileMap = new Map<number, EnteFile>();
2022-04-17 10:13:15 +00:00
for (const file of files) {
2022-03-26 14:38:52 +00:00
fileMap.set(file.id, file);
2022-03-22 13:01:59 +00:00
}
const result: DuplicateFiles[] = [];
2022-03-26 14:38:52 +00:00
2022-03-22 13:01:59 +00:00
for (const dupe of dupes) {
2022-04-17 10:13:15 +00:00
let duplicateFiles: EnteFile[] = [];
2022-03-26 14:38:52 +00:00
for (const fileID of dupe.fileIDs) {
if (fileMap.has(fileID)) {
2022-04-17 10:13:15 +00:00
duplicateFiles.push(fileMap.get(fileID));
2022-03-22 13:01:59 +00:00
}
}
2022-04-17 10:13:15 +00:00
duplicateFiles = await sortDuplicateFiles(
duplicateFiles,
collectionNameMap
2022-04-17 10:13:15 +00:00
);
if (duplicateFiles.length > 1) {
2022-03-22 13:01:59 +00:00
result.push({
2022-04-17 10:13:15 +00:00
files: duplicateFiles,
2022-03-22 13:01:59 +00:00
size: dupe.size,
});
}
}
return result;
2022-03-26 14:38:52 +00:00
} catch (e) {
logError(e, 'failed to get duplicate files');
2022-03-22 13:01:59 +00:00
}
2022-03-26 14:38:52 +00:00
}
2022-03-22 13:01:59 +00:00
2022-04-03 07:33:42 +00:00
export function clubDuplicatesByTime(dupes: DuplicateFiles[]) {
2022-03-26 14:38:52 +00:00
const result: DuplicateFiles[] = [];
for (const dupe of dupes) {
let files: EnteFile[] = [];
const creationTimeCounter = new Map<number, number>();
let mostFreqCreationTime = 0;
let mostFreqCreationTimeCount = 0;
for (const file of dupe.files) {
const creationTime = file.metadata.creationTime;
if (creationTimeCounter.has(creationTime)) {
creationTimeCounter.set(
creationTime,
creationTimeCounter.get(creationTime) + 1
);
} else {
creationTimeCounter.set(creationTime, 1);
}
if (
creationTimeCounter.get(creationTime) >
mostFreqCreationTimeCount
) {
mostFreqCreationTime = creationTime;
mostFreqCreationTimeCount =
creationTimeCounter.get(creationTime);
}
files.push(file);
}
files = files.filter((file) => {
return file.metadata.creationTime === mostFreqCreationTime;
});
if (files.length > 1) {
result.push({
files,
size: dupe.size,
});
2022-03-22 13:01:59 +00:00
}
}
2022-03-26 14:38:52 +00:00
return result;
2022-03-22 13:01:59 +00:00
}
2022-05-19 07:31:04 +00:00
export function clubDuplicatesBySameFileHashes(dupes: DuplicateFiles[]) {
const result: DuplicateFiles[] = [];
for (const dupe of dupes) {
let files: EnteFile[] = [];
const filteredFiles = dupe.files.filter((file) => {
2022-05-19 12:19:42 +00:00
return hasFileHash(file.metadata);
2022-05-19 07:31:04 +00:00
});
if (filteredFiles.length <= 1) {
continue;
}
2022-05-20 08:05:03 +00:00
const dupesSortedByFileHash = filteredFiles
.map((file) => {
return {
file,
hash:
file.metadata.hash ??
`${file.metadata.imageHash}_${file.metadata.videoHash}`,
};
})
.sort((firstFile, secondFile) => {
return firstFile.hash.localeCompare(secondFile.hash);
});
2022-05-19 07:31:04 +00:00
files.push(dupesSortedByFileHash[0].file);
for (let i = 1; i < dupesSortedByFileHash.length; i++) {
if (
2022-05-20 05:05:36 +00:00
areFileHashesSame(
2022-05-19 07:31:04 +00:00
dupesSortedByFileHash[i - 1].file.metadata,
dupesSortedByFileHash[i].file.metadata
)
) {
files.push(dupesSortedByFileHash[i].file);
} else {
if (files.length > 1) {
result.push({
files: [...files],
size: dupe.size,
});
}
files = [dupesSortedByFileHash[i].file];
}
}
if (files.length > 1) {
result.push({
files,
size: dupe.size,
});
}
}
return result;
}
2022-03-26 14:38:52 +00:00
async function fetchDuplicateFileIDs() {
try {
const response = await HTTPService.get(
`${ENDPOINT}/files/duplicates`,
null,
{
'X-Auth-Token': getToken(),
}
);
return (response.data as DuplicatesResponse).duplicates;
} catch (e) {
logError(e, 'failed to fetch duplicate file IDs');
}
}
2022-04-17 09:30:26 +00:00
2022-04-17 10:13:15 +00:00
async function sortDuplicateFiles(
files: EnteFile[],
collectionNameMap: Map<number, string>
2022-04-17 10:13:15 +00:00
) {
2022-04-17 09:30:26 +00:00
return files.sort((firstFile, secondFile) => {
const firstCollectionName = collectionNameMap
2022-04-17 09:30:26 +00:00
.get(firstFile.collectionID)
.toLocaleLowerCase();
const secondCollectionName = collectionNameMap
2022-04-17 09:30:26 +00:00
.get(secondFile.collectionID)
.toLocaleLowerCase();
2022-04-17 10:13:15 +00:00
const firstFileRanking =
DuplicateItemSortingOrderDescBasedOnCollectionName[
firstCollectionName
] ?? OtherCollectionNameRanking;
const secondFileRanking =
DuplicateItemSortingOrderDescBasedOnCollectionName[
secondCollectionName
] ?? OtherCollectionNameRanking;
return secondFileRanking - firstFileRanking;
2022-04-17 09:30:26 +00:00
});
}
2022-05-20 05:05:36 +00:00
function areFileHashesSame(firstFile: Metadata, secondFile: Metadata) {
if (firstFile.fileType === FILE_TYPE.LIVE_PHOTO) {
return (
firstFile.imageHash === secondFile.imageHash &&
firstFile.videoHash === secondFile.videoHash
);
} else {
return firstFile.hash === secondFile.hash;
}
}