type-detect

This commit is contained in:
Manav Rathi 2024-04-24 18:02:15 +05:30
parent 319fe0fb71
commit f3c798148c
No known key found for this signature in database
38 changed files with 142 additions and 240 deletions

View file

@ -1,6 +1,3 @@
import { FILE_TYPE } from "@/media/file";
import { FileTypeInfo } from "types/upload";
export const RAW_FORMATS = [
"heic",
"rw2",
@ -14,42 +11,3 @@ export const RAW_FORMATS = [
"dng",
"tif",
];
// list of format that were missed by type-detection for some files.
export const WHITELISTED_FILE_FORMATS: FileTypeInfo[] = [
{ fileType: FILE_TYPE.IMAGE, exactType: "jpeg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.IMAGE, exactType: "jpg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "webm", mimeType: "video/webm" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mod", mimeType: "video/mpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mp4", mimeType: "video/mp4" },
{ fileType: FILE_TYPE.IMAGE, exactType: "gif", mimeType: "image/gif" },
{ fileType: FILE_TYPE.VIDEO, exactType: "dv", mimeType: "video/x-dv" },
{
fileType: FILE_TYPE.VIDEO,
exactType: "wmv",
mimeType: "video/x-ms-asf",
},
{
fileType: FILE_TYPE.VIDEO,
exactType: "hevc",
mimeType: "video/hevc",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "raf",
mimeType: "image/x-fuji-raf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "orf",
mimeType: "image/x-olympus-orf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "crw",
mimeType: "image/x-canon-crw",
},
];
export const KNOWN_NON_MEDIA_FORMATS = ["xmp", "html", "txt"];

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import PairedSuccessfullyOverlay from "components/PairedSuccessfullyOverlay";
import { PhotoAuditorium } from "components/PhotoAuditorium";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import ComlinkCryptoWorker from "@ente/shared/crypto";
import { CustomError } from "@ente/shared/error";
import HTTPService from "@ente/shared/network/HTTPService";

View file

@ -0,0 +1,39 @@
import { KnownFileTypeInfos } from "@/media/file-type";
import { nameAndExtension } from "@/next/file";
import log from "@/next/log";
import FileType from "file-type";
/**
* Try to deduce the MIME type for the given {@link file}. Return the MIME type
* string if successful _and_ if it is an image or a video, otherwise return
* `undefined`.
*
* It first peeks into the file's initial contents to detect the MIME type. If
* that doesn't give any results, it tries to deduce it from the file's name.
*/
export const tryDetectMediaMIMEType = async (file: File): Promise<string> => {
const chunkSizeForTypeDetection = 4100;
let mime: string | undefined;
try {
const fileChunk = file.slice(0, chunkSizeForTypeDetection);
const chunkData = new Uint8Array(await fileChunk.arrayBuffer());
const result = await FileType.fromBuffer(chunkData);
mime = result?.mime;
} catch (e) {
log.error(
`Failed to deduce MIME type from file contents for ${file}, will try with the file name instead`,
e,
);
}
if (mime) {
if (mime.startsWith("image/") || mime.startsWith("video/")) return mime;
else throw new Error(`Detected MIME type ${mime} is not a media file`);
}
let [, ext] = nameAndExtension(file.name);
if (!ext) return undefined;
ext = ext.toLowerCase();
return KnownFileTypeInfos.find((f) => f.exactType == ext)?.mimeType;
};

View file

@ -1,92 +0,0 @@
import { FILE_TYPE } from "@/media/file";
import { convertBytesToHumanReadable, nameAndExtension } from "@/next/file";
import log from "@/next/log";
import { CustomError } from "@ente/shared/error";
import {
KNOWN_NON_MEDIA_FORMATS,
WHITELISTED_FILE_FORMATS,
} from "constants/upload";
import FileType from "file-type";
import { FileTypeInfo } from "types/upload";
const TYPE_VIDEO = "video";
const TYPE_IMAGE = "image";
const CHUNK_SIZE_FOR_TYPE_DETECTION = 4100;
export async function getFileType(receivedFile: File): Promise<FileTypeInfo> {
try {
let fileType: FILE_TYPE;
const typeResult = await extractFileType(receivedFile);
const mimTypeParts: string[] = typeResult.mime?.split("/");
if (mimTypeParts?.length !== 2) {
throw Error(CustomError.INVALID_MIME_TYPE(typeResult.mime));
}
switch (mimTypeParts[0]) {
case TYPE_IMAGE:
fileType = FILE_TYPE.IMAGE;
break;
case TYPE_VIDEO:
fileType = FILE_TYPE.VIDEO;
break;
default:
throw Error(CustomError.NON_MEDIA_FILE);
}
return {
fileType,
exactType: typeResult.ext,
mimeType: typeResult.mime,
};
} catch (e) {
const ne = nameAndExtension(receivedFile.name);
const fileFormat = ne[1].toLowerCase();
const whiteListedFormat = WHITELISTED_FILE_FORMATS.find(
(a) => a.exactType === fileFormat,
);
if (whiteListedFormat) {
return whiteListedFormat;
}
if (KNOWN_NON_MEDIA_FORMATS.includes(fileFormat)) {
throw Error(CustomError.UNSUPPORTED_FILE_FORMAT);
}
if (e.message === CustomError.NON_MEDIA_FILE) {
log.error(`unsupported file format ${fileFormat}`, e);
throw Error(CustomError.UNSUPPORTED_FILE_FORMAT);
}
log.error(`type detection failed for format ${fileFormat}`, e);
throw Error(CustomError.TYPE_DETECTION_FAILED(fileFormat));
}
}
async function extractFileType(file: File) {
const fileBlobChunk = file.slice(0, CHUNK_SIZE_FOR_TYPE_DETECTION);
const fileDataChunk = await getUint8ArrayView(fileBlobChunk);
return getFileTypeFromBuffer(fileDataChunk);
}
export async function getUint8ArrayView(file: Blob): Promise<Uint8Array> {
try {
return new Uint8Array(await file.arrayBuffer());
} catch (e) {
log.error(
`Failed to read file blob of size ${convertBytesToHumanReadable(file.size)}`,
e,
);
throw e;
}
}
async function getFileTypeFromBuffer(buffer: Uint8Array) {
const result = await FileType.fromBuffer(buffer);
if (!result?.mime) {
let logableInfo = "";
try {
logableInfo = `result: ${JSON.stringify(result)}`;
} catch (e) {
logableInfo = "failed to stringify result";
}
throw Error(`mimetype missing from file type result - ${logableInfo}`);
}
return result;
}

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
export interface Metadata {
title: string;

View file

@ -1,10 +1,10 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import ComlinkCryptoWorker from "@ente/shared/crypto";
import { RAW_FORMATS } from "constants/upload";
import CastDownloadManager from "services/castDownloadManager";
import { getFileType } from "services/typeDetectionService";
import { tryDetectMediaMIMEType } from "services/detect-type";
import {
EncryptedEnteFile,
EnteFile,
@ -132,10 +132,11 @@ export const getPreviewableImage = async (
);
fileBlob = new Blob([imageData]);
}
const fileType = await getFileType(
const mimeType = await tryDetectMediaMIMEType(
new File([fileBlob], file.metadata.title),
);
fileBlob = new Blob([fileBlob], { type: fileType.mimeType });
if (!mimeType) return undefined;
fileBlob = new Blob([fileBlob], { type: mimeType });
return fileBlob;
} catch (e) {
log.error("failed to download file", e);

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { PHOTOS_PAGES } from "@ente/shared/constants/pages";
import { CustomError } from "@ente/shared/error";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { nameAndExtension } from "@/next/file";
import log from "@/next/log";
import { FlexWrapper } from "@ente/shared/components/Container";

View file

@ -16,7 +16,7 @@ import {
isSupportedRawFormat,
} from "utils/file";
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { FlexWrapper } from "@ente/shared/components/Container";
import EnteSpinner from "@ente/shared/components/EnteSpinner";
import AlbumOutlined from "@mui/icons-material/AlbumOutlined";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { Overlay } from "@ente/shared/components/Container";
import PhotoOutlined from "@mui/icons-material/PhotoOutlined";
import PlayCircleOutlineOutlined from "@mui/icons-material/PlayCircleOutlineOutlined";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import CloseIcon from "@mui/icons-material/Close";
import { IconButton } from "@mui/material";
import { t } from "i18next";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { Overlay } from "@ente/shared/components/Container";
import { CustomError } from "@ente/shared/error";

View file

@ -1,50 +1,5 @@
import { FILE_TYPE } from "@/media/file";
import { ENCRYPTION_CHUNK_SIZE } from "@ente/shared/crypto/constants";
import { FileTypeInfo, Location } from "types/upload";
// list of format that were missed by type-detection for some files.
export const WHITELISTED_FILE_FORMATS: FileTypeInfo[] = [
{ fileType: FILE_TYPE.IMAGE, exactType: "jpeg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.IMAGE, exactType: "jpg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "webm", mimeType: "video/webm" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mod", mimeType: "video/mpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mp4", mimeType: "video/mp4" },
{ fileType: FILE_TYPE.IMAGE, exactType: "gif", mimeType: "image/gif" },
{ fileType: FILE_TYPE.VIDEO, exactType: "dv", mimeType: "video/x-dv" },
{
fileType: FILE_TYPE.VIDEO,
exactType: "wmv",
mimeType: "video/x-ms-asf",
},
{
fileType: FILE_TYPE.VIDEO,
exactType: "hevc",
mimeType: "video/hevc",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "raf",
mimeType: "image/x-fuji-raf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "orf",
mimeType: "image/x-olympus-orf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "crw",
mimeType: "image/x-canon-crw",
},
{
fileType: FILE_TYPE.VIDEO,
exactType: "mov",
mimeType: "video/quicktime",
},
];
export const KNOWN_NON_MEDIA_FORMATS = ["xmp", "html", "txt"];
import { Location } from "types/upload";
// this is the chunk size of the un-encrypted file which is read and encrypted before uploading it as a single part.
export const MULTIPART_PART_SIZE = 20 * 1024 * 1024;

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { ensureElectron } from "@/next/electron";
import log from "@/next/log";
import ComlinkCryptoWorker from "@ente/shared/crypto";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import HTTPService from "@ente/shared/network/HTTPService";
import { getEndpoint } from "@ente/shared/network/api";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import { openCache, type BlobCache } from "@/next/blob-cache";
import log from "@/next/log";

View file

@ -1,9 +1,10 @@
import { type FileTypeInfo } from "@/media/file-type";
import log from "@/next/log";
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
import { NULL_LOCATION } from "constants/upload";
import exifr from "exifr";
import piexif from "piexifjs";
import { FileTypeInfo, Location } from "types/upload";
import { Location } from "types/upload";
type ParsedEXIFData = Record<string, any> &
Partial<{

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import { ensureElectron } from "@/next/electron";
import log from "@/next/log";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import { ensureElectron } from "@/next/electron";
import log from "@/next/log";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
import type { FixOption } from "components/FixCreationTime";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { ComlinkWorker } from "@/next/worker/comlink-worker";
import { eventBus, Events } from "@ente/shared/events";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { MLSyncContext, MLSyncFileContext } from "types/machineLearning";
import {

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import * as chrono from "chrono-node";
import { t } from "i18next";

View file

@ -1,13 +1,13 @@
import { FILE_TYPE } from "@/media/file";
import {
FILE_TYPE,
KnownFileTypeInfos,
KnownNonMediaFileExtensions,
type FileTypeInfo,
} from "@/media/file-type";
import log from "@/next/log";
import { ElectronFile } from "@/next/types/file";
import { CustomError } from "@ente/shared/error";
import {
KNOWN_NON_MEDIA_FORMATS,
WHITELISTED_FILE_FORMATS,
} from "constants/upload";
import FileType, { FileTypeResult } from "file-type";
import { FileTypeInfo } from "types/upload";
import { getFileExtension } from "utils/file";
import { getUint8ArrayView } from "./readerService";
@ -50,13 +50,13 @@ export async function getFileType(
};
} catch (e) {
const fileFormat = getFileExtension(receivedFile.name);
const whiteListedFormat = WHITELISTED_FILE_FORMATS.find(
const whiteListedFormat = KnownFileTypeInfos.find(
(a) => a.exactType === fileFormat,
);
if (whiteListedFormat) {
return whiteListedFormat;
}
if (KNOWN_NON_MEDIA_FORMATS.includes(fileFormat)) {
if (KnownNonMediaFileExtensions.includes(fileFormat)) {
throw Error(CustomError.UNSUPPORTED_FILE_FORMAT);
}
if (e.message === CustomError.NON_MEDIA_FILE) {

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE, type FileTypeInfo } from "@/media/file-type";
import { getFileNameSize } from "@/next/file";
import log from "@/next/log";
import { ElectronFile } from "@/next/types/file";
@ -17,7 +17,6 @@ import * as ffmpegService from "services/ffmpeg";
import { getElectronFileStream, getFileStream } from "services/readerService";
import { FilePublicMagicMetadataProps } from "types/file";
import {
FileTypeInfo,
Metadata,
ParsedExtractedMetadata,
type LivePhotoAssets2,

View file

@ -1,11 +1,10 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE, type FileTypeInfo } from "@/media/file-type";
import log from "@/next/log";
import { type Electron } from "@/next/types/ipc";
import { withTimeout } from "@ente/shared/utils";
import { BLACK_THUMBNAIL_BASE64 } from "constants/upload";
import * as ffmpeg from "services/ffmpeg";
import { heicToJPEG } from "services/heic-convert";
import { FileTypeInfo } from "types/upload";
import { isFileHEIC } from "utils/file";
/** Maximum width or height of the generated thumbnail */

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { potentialFileTypeFromExtension } from "@/media/live-photo";
import { ensureElectron } from "@/next/electron";
import { nameAndExtension } from "@/next/file";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE, type FileTypeInfo } from "@/media/file-type";
import { encodeLivePhoto } from "@/media/live-photo";
import { ensureElectron } from "@/next/electron";
import { basename } from "@/next/file";
@ -28,7 +28,6 @@ import {
BackupedFile,
EncryptedFile,
FileInMemory,
FileTypeInfo,
FileWithMetadata,
ProcessedFile,
PublicUploadProps,

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { City } from "services/locationSearchService";
import { LocationTagData } from "types/entity";
import { EnteFile } from "types/file";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import type { ElectronFile } from "@/next/types/file";
import {
B64EncryptionResult,
@ -46,14 +46,6 @@ export interface MultipartUploadURLs {
completeURL: string;
}
export interface FileTypeInfo {
fileType: FILE_TYPE;
exactType: string;
mimeType?: string;
imageType?: string;
videoType?: string;
}
export interface UploadAsset {
isLivePhoto?: boolean;
file?: File | ElectronFile;

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE, type FileTypeInfo } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import { CustomErrorMessage, type Electron } from "@/next/types/ipc";
@ -35,7 +35,6 @@ import {
SetFilesDownloadProgressAttributesCreator,
} from "types/gallery";
import { VISIBILITY_STATE } from "types/magicMetadata";
import { FileTypeInfo } from "types/upload";
import { isArchivedFile, updateMagicMetadata } from "utils/magicMetadata";
import { safeFileName } from "utils/native-fs";
import { writeStream } from "utils/native-stream";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { decodeLivePhoto } from "@/media/live-photo";
import log from "@/next/log";
import PQueue from "p-queue";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { LivePhotoSourceURL, SourceURLs } from "services/download";
import { EnteFile } from "types/file";

View file

@ -1,4 +1,4 @@
import { FILE_TYPE } from "@/media/file";
import { FILE_TYPE } from "@/media/file-type";
import { tryToParseDateTime } from "@ente/shared/time";
import { getLocalCollections } from "services/collectionService";
import { getLocalFiles } from "services/fileService";

View file

@ -0,0 +1,58 @@
export enum FILE_TYPE {
IMAGE,
VIDEO,
LIVE_PHOTO,
OTHERS,
}
export interface FileTypeInfo {
fileType: FILE_TYPE;
exactType: string;
mimeType?: string;
imageType?: string;
videoType?: string;
}
// list of format that were missed by type-detection for some files.
export const KnownFileTypeInfos: FileTypeInfo[] = [
{ fileType: FILE_TYPE.IMAGE, exactType: "jpeg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.IMAGE, exactType: "jpg", mimeType: "image/jpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "webm", mimeType: "video/webm" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mod", mimeType: "video/mpeg" },
{ fileType: FILE_TYPE.VIDEO, exactType: "mp4", mimeType: "video/mp4" },
{ fileType: FILE_TYPE.IMAGE, exactType: "gif", mimeType: "image/gif" },
{ fileType: FILE_TYPE.VIDEO, exactType: "dv", mimeType: "video/x-dv" },
{
fileType: FILE_TYPE.VIDEO,
exactType: "wmv",
mimeType: "video/x-ms-asf",
},
{
fileType: FILE_TYPE.VIDEO,
exactType: "hevc",
mimeType: "video/hevc",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "raf",
mimeType: "image/x-fuji-raf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "orf",
mimeType: "image/x-olympus-orf",
},
{
fileType: FILE_TYPE.IMAGE,
exactType: "crw",
mimeType: "image/x-canon-crw",
},
{
fileType: FILE_TYPE.VIDEO,
exactType: "mov",
mimeType: "video/quicktime",
},
];
export const KnownNonMediaFileExtensions = ["xmp", "html", "txt"];

View file

@ -1,6 +0,0 @@
export enum FILE_TYPE {
IMAGE,
VIDEO,
LIVE_PHOTO,
OTHERS,
}

View file

@ -1,6 +1,6 @@
import { fileNameFromComponents, nameAndExtension } from "@/next/file";
import JSZip from "jszip";
import { FILE_TYPE } from "./file";
import { FILE_TYPE } from "./file-type";
const potentialImageExtensions = [
"heic",