check for file hash when uploading

This commit is contained in:
Rushikesh Tote 2022-05-18 10:23:58 +05:30
parent 2b8d47e087
commit 11b755a461
2 changed files with 75 additions and 0 deletions

View file

@ -3,6 +3,7 @@ import { handleUploadError, CustomError } from 'utils/error';
import { logError } from 'utils/sentry';
import {
fileAlreadyInCollection,
findSameFileInOtherCollection,
shouldDedupeAcrossCollection,
} from 'utils/upload';
import UploadHttpClient from './uploadHttpClient';
@ -14,6 +15,7 @@ import { FileWithCollection, BackupedFile, UploadFile } from 'types/upload';
import { logUploadInfo } from 'utils/upload';
import { convertBytesToHumanReadable } from 'utils/billing';
import { sleep } from 'utils/common';
import { addToCollection } from 'services/collectionService';
interface UploadResponse {
fileUploadResult: FileUploadResults;
@ -53,6 +55,25 @@ export default async function uploader(
return { fileUploadResult: FileUploadResults.ALREADY_UPLOADED };
}
const sameFileInOtherCollection = findSameFileInOtherCollection(
existingFiles,
metadata,
collection.id
);
if (sameFileInOtherCollection) {
logUploadInfo(
`same file in other collection found for ${fileNameSize}`
);
const resultFile = Object.assign({}, sameFileInOtherCollection);
resultFile.collectionID = collection.id;
await addToCollection(collection, [resultFile]);
return {
fileUploadResult: FileUploadResults.UPLOADED,
file: resultFile,
};
}
// iOS exports via album doesn't export files without collection and if user exports all photos, album info is not preserved.
// This change allow users to export by albums, upload to ente. And export all photos -> upload files which are not already uploaded
// as part of the albums

View file

@ -4,6 +4,7 @@ import { convertBytesToHumanReadable } from 'utils/billing';
import { formatDateTime } from 'utils/file';
import { getLogs, saveLogLine } from 'utils/storage';
import { A_SEC_IN_MICROSECONDS } from 'constants/upload';
import { FILE_TYPE } from 'constants/file';
const TYPE_JSON = 'json';
const DEDUPE_COLLECTION = new Set(['icloud library', 'icloudlibrary']);
@ -20,6 +21,28 @@ export function fileAlreadyInCollection(
return false;
}
export function findSameFileInOtherCollection(
existingFiles: EnteFile[],
newFileMetadata: Metadata,
collectionID: number
) {
if (!fileHashExists(newFileMetadata)) {
return null;
}
for (const existingFile of existingFiles) {
if (
existingFile.collectionID !== collectionID &&
fileHashExists(existingFile.metadata) &&
areFilesWithFileHashSame(existingFile.metadata, newFileMetadata) &&
existingFile.metadata.title === newFileMetadata.title
) {
return existingFile;
}
}
return null;
}
export function shouldDedupeAcrossCollection(collectionName: string): boolean {
// using set to avoid unnecessary regex for removing spaces for each upload
return DEDUPE_COLLECTION.has(collectionName.toLocaleLowerCase());
@ -35,6 +58,10 @@ export function areFilesSame(
* precision of file times to prevent timing attacks and fingerprinting.
* Context: https://developer.mozilla.org/en-US/docs/Web/API/File/lastModified#reduced_time_precision
*/
if (fileHashExists(existingFile) && fileHashExists(newFile)) {
return areFilesWithFileHashSame(existingFile, newFile);
}
if (
existingFile.fileType === newFile.fileType &&
Math.abs(existingFile.creationTime - newFile.creationTime) <
@ -49,6 +76,33 @@ export function areFilesSame(
}
}
export function fileHashExists(file: Metadata) {
if (file.hash || (file.imageHash && file.videoHash)) {
return true;
}
return false;
}
export function areFilesWithFileHashSame(
existingFile: Metadata,
newFile: Metadata
): boolean {
if (existingFile.fileType !== newFile.fileType) {
return false;
}
if (
existingFile.fileType === FILE_TYPE.IMAGE ||
existingFile.fileType === FILE_TYPE.VIDEO
) {
return existingFile.hash === newFile.hash;
} else {
return (
existingFile.imageHash === newFile.imageHash &&
existingFile.videoHash === newFile.videoHash
);
}
}
export function segregateMetadataAndMediaFiles(
filesWithCollectionToUpload: FileWithCollection[]
) {