[mob] Skip already indexed files on remote

This commit is contained in:
Neeraj Gupta 2024-03-19 15:06:32 +05:30
parent 8a4ca15eb8
commit 3e4cf4b4f2
7 changed files with 242 additions and 108 deletions

@ -1 +1 @@
Subproject commit 41456452f29d64e8deb623a3c927524bcf9f111b
Subproject commit 77b965b8aa443b92bd5988a20c4453f66aa67e06

View file

@ -32,9 +32,8 @@ import 'package:photos/services/home_widget_service.dart';
import 'package:photos/services/local_file_update_service.dart';
import 'package:photos/services/local_sync_service.dart';
import "package:photos/services/location_service.dart";
import 'package:photos/services/machine_learning/file_ml/remote_fileml_service.dart';
import "package:photos/services/machine_learning/machine_learning_controller.dart";
import "package:photos/services/machine_learning/remote_embedding_service.dart";
import "package:photos/services/machine_learning/semantic_search/remote_embedding.dart";
import 'package:photos/services/machine_learning/semantic_search/semantic_search_service.dart';
import 'package:photos/services/memories_service.dart';
import 'package:photos/services/push_service.dart';
@ -213,7 +212,7 @@ Future<void> _init(bool isBackground, {String via = ''}) async {
LocalFileUpdateService.instance.init(preferences);
SearchService.instance.init();
StorageBonusService.instance.init(preferences);
RemoteEmbeddingService.instance.init(preferences);
RemoteFileMLService.instance.init(preferences);
if (!isBackground &&
Platform.isAndroid &&
await HomeWidgetService.instance.countHomeWidgets() == 0) {

View file

@ -20,7 +20,6 @@ import "package:photos/face/db.dart";
import "package:photos/face/model/box.dart";
import "package:photos/face/model/detection.dart" as face_detection;
import "package:photos/face/model/face.dart";
import "package:photos/face/model/file_ml.dart";
import "package:photos/face/model/landmark.dart";
import "package:photos/models/file/extensions/file_props.dart";
import "package:photos/models/file/file.dart";
@ -34,7 +33,8 @@ import "package:photos/services/face_ml/face_embedding/face_embedding_exceptions
import 'package:photos/services/face_ml/face_embedding/onnx_face_embedding.dart';
import "package:photos/services/face_ml/face_ml_exceptions.dart";
import "package:photos/services/face_ml/face_ml_result.dart";
import "package:photos/services/machine_learning/remote_embedding_service.dart";
import 'package:photos/services/machine_learning/file_ml/file_ml.dart';
import 'package:photos/services/machine_learning/file_ml/remote_fileml_service.dart';
import "package:photos/services/search_service.dart";
import "package:photos/utils/file_util.dart";
import 'package:photos/utils/image_ml_isolate.dart';
@ -413,7 +413,6 @@ class FaceMlService {
try {
isImageIndexRunning = true;
_logger.info('starting image indexing');
final List<EnteFile> enteFiles =
await SearchService.instance.getAllFiles();
final Set<int> alreadyIndexedFiles =
@ -435,6 +434,44 @@ class FaceMlService {
outerLoop:
for (final chunk in chunks) {
final futures = <Future>[];
final List<int> fileIds = [];
// Try to find embeddings on the remote server
for (final f in chunk) {
if (!alreadyIndexedFiles.contains(f.uploadedFileID)) {
fileIds.add(f.uploadedFileID!);
}
}
try {
final res =
await RemoteFileMLService.instance.getFilessEmbedding(fileIds);
final List<Face> faces = [];
final indexedFileIds = <int>{};
for (FileMl fileMl in res.mlData.values) {
if (fileMl.faceEmbedding.version != faceMlVersion) continue;
if (fileMl.faceEmbedding.faces.isEmpty) {
faces.add(
Face(
'${fileMl.fileID}-0',
fileMl.fileID,
<double>[],
(fileMl.faceEmbedding.error ?? false) ? -1.0 : 0.0,
face_detection.Detection.empty(),
0.0,
),
);
} else {
faces.addAll(fileMl.faceEmbedding.faces);
}
indexedFileIds.add(fileMl.fileID);
}
await FaceMLDataDB.instance.bulkInsertFaces(faces);
alreadyIndexedFiles.addAll(indexedFileIds);
_logger.info('already indexed files ${indexedFileIds.length}');
} catch (e, s) {
_logger.severe("err while getting files embeddings", e, s);
rethrow;
}
for (final enteFile in chunk) {
if (isImageIndexRunning == false) {
_logger.info("indexAllImages() was paused, stopping");
@ -447,7 +484,7 @@ class FaceMlService {
fileSkippedCount++;
continue;
}
futures.add(processImage(enteFile, alreadyIndexedFiles));
futures.add(processImage(enteFile));
}
await Future.wait(futures);
fileAnalyzedCount += futures.length;
@ -468,10 +505,7 @@ class FaceMlService {
}
}
Future<void> processImage(
EnteFile enteFile,
Set<int> alreadyIndexedFiles,
) async {
Future<void> processImage(EnteFile enteFile) async {
_logger.info(
"`indexAllImages()` on file number start processing image with uploadedFileID: ${enteFile.uploadedFileID}",
);
@ -545,11 +579,15 @@ class FaceMlService {
}
}
_logger.info("inserting ${faces.length} faces for ${result.fileId}");
await RemoteEmbeddingService.instance.putFaceEmbedding(
await RemoteFileMLService.instance.putFileEmbedding(
enteFile,
FileMl(
enteFile.uploadedFileID!,
FaceEmbeddings(faces, result.mlVersion),
FaceEmbeddings(
faces,
result.mlVersion,
error: result.errorOccured ? true : null,
),
),
);
await FaceMLDataDB.instance.bulkInsertFaces(faces);

View file

@ -1,5 +1,39 @@
import "package:photos/face/model/face.dart";
class FileMl {
final int fileID;
// json: face
final FaceEmbeddings faceEmbedding;
final ClipEmbedding? clipEmbedding;
// int updationTime that is not serialized
int? updationTime;
FileMl(
this.fileID,
this.faceEmbedding, {
this.clipEmbedding,
});
// toJson
Map<String, dynamic> toJson() => {
'fileID': fileID,
'faceEmbedding': faceEmbedding.toJson(),
'clipEmbedding': clipEmbedding?.toJson(),
};
// fromJson
factory FileMl.fromJson(Map<String, dynamic> json) {
return FileMl(
json['fileID'] as int,
FaceEmbeddings.fromJson(json['faceEmbedding'] as Map<String, dynamic>),
clipEmbedding: json['clipEmbedding'] == null
? null
: ClipEmbedding.fromJson(
json['clipEmbedding'] as Map<String, dynamic>,
),
);
}
}
class FaceEmbeddings {
final List<Face> faces;
final int version;
@ -36,49 +70,18 @@ class FaceEmbeddings {
class ClipEmbedding {
final int? version;
final String framwork;
final List<double> embedding;
ClipEmbedding(this.embedding, this.framwork, {this.version});
ClipEmbedding(this.embedding, {this.version});
// toJson
Map<String, dynamic> toJson() => {
'version': version,
'framwork': framwork,
'embedding': embedding,
};
// fromJson
factory ClipEmbedding.fromJson(Map<String, dynamic> json) {
return ClipEmbedding(
List<double>.from(json['embedding'] as List),
json['framwork'] as String,
version: json['version'] as int?,
);
}
}
class FileMl {
final int fileID;
final FaceEmbeddings face;
final ClipEmbedding? clip;
final String? last4Hash;
FileMl(this.fileID, this.face, {this.clip, this.last4Hash});
// toJson
Map<String, dynamic> toJson() => {
'fileID': fileID,
'face': face.toJson(),
'clip': clip?.toJson(),
'last4Hash': last4Hash,
};
// fromJson
factory FileMl.fromJson(Map<String, dynamic> json) {
return FileMl(
json['fileID'] as int,
FaceEmbeddings.fromJson(json['face'] as Map<String, dynamic>),
clip: json['clip'] == null
? null
: ClipEmbedding.fromJson(json['clip'] as Map<String, dynamic>),
last4Hash: json['last4Hash'] as String?,
);
}
}

View file

@ -0,0 +1,12 @@
import 'package:photos/services/machine_learning/file_ml/file_ml.dart';
class FilesMLDataResponse {
final Map<int, FileMl> mlData;
final Set<int> notIndexedFileIds;
final Set<int> fetchErrorFileIds;
FilesMLDataResponse(
this.mlData, {
required this.notIndexedFileIds,
required this.fetchErrorFileIds,
});
}

View file

@ -0,0 +1,144 @@
import "dart:async";
import "dart:convert";
import "dart:typed_data";
import "package:computer/computer.dart";
import "package:logging/logging.dart";
import "package:photos/core/network/network.dart";
import "package:photos/db/files_db.dart";
import "package:photos/models/file/file.dart";
import 'package:photos/services/machine_learning/file_ml/file_ml.dart';
import "package:photos/services/machine_learning/file_ml/files_ml_data_response.dart";
import "package:photos/services/machine_learning/semantic_search/embedding_store.dart";
import "package:photos/services/machine_learning/semantic_search/remote_embedding.dart";
import "package:photos/utils/crypto_util.dart";
import "package:photos/utils/file_download_util.dart";
import "package:shared_preferences/shared_preferences.dart";
class RemoteFileMLService {
RemoteFileMLService._privateConstructor();
static final RemoteFileMLService instance =
RemoteFileMLService._privateConstructor();
final _logger = Logger("RemoteFileMLService");
final _dio = NetworkClient.instance.enteDio;
final _computer = Computer.shared();
late SharedPreferences _preferences;
Completer<void>? _syncStatus;
void init(SharedPreferences prefs) {
_preferences = prefs;
}
Future<void> putFileEmbedding(EnteFile file, FileMl fileML) async {
final encryptionKey = getFileKey(file);
final embeddingJSON = jsonEncode(fileML.toJson());
final encryptedEmbedding = await CryptoUtil.encryptChaCha(
utf8.encode(embeddingJSON) as Uint8List,
encryptionKey,
);
final encryptedData =
CryptoUtil.bin2base64(encryptedEmbedding.encryptedData!);
final header = CryptoUtil.bin2base64(encryptedEmbedding.header!);
try {
final _ = await _dio.put(
"/embeddings",
data: {
"fileID": file.uploadedFileID!,
"model": 'file-ml-clip-face',
"encryptedEmbedding": encryptedData,
"decryptionHeader": header,
},
);
// final updationTime = response.data["updatedAt"];
} catch (e, s) {
_logger.severe("Failed to put embedding", e, s);
rethrow;
}
}
Future<FilesMLDataResponse> getFilessEmbedding(
List<int> fileIds,
) async {
try {
final res = await _dio.post(
"/embeddings/files",
data: {
"fileIDs": fileIds,
"model": 'file-ml-clip-face',
},
);
final remoteEmb = res.data['embeddings'] as List;
final noFileIds = res.data['noDataFileIDs'] as List;
final errFileIds = res.data['errFileIDs'] as List;
final List<RemoteEmbedding> remoteEmbeddings = <RemoteEmbedding>[];
for (var entry in remoteEmb) {
final embedding = RemoteEmbedding.fromMap(entry);
remoteEmbeddings.add(embedding);
}
final notIndexedFileIds = Set<int>.from(noFileIds.map((x) => x as int));
final fetchErrorFileIds = Set<int>.from(errFileIds.map((x) => x as int));
final fileIDToFileMl = await decryptFileMLData(remoteEmbeddings);
return FilesMLDataResponse(
fileIDToFileMl,
notIndexedFileIds: notIndexedFileIds,
fetchErrorFileIds: fetchErrorFileIds,
);
} catch (e, s) {
_logger.severe("Failed to get embeddings", e, s);
rethrow;
}
}
Future<Map<int, FileMl>> decryptFileMLData(
List<RemoteEmbedding> remoteEmbeddings,
) async {
final result = <int, FileMl>{};
if (remoteEmbeddings.isEmpty) {
return result;
}
final inputs = <EmbeddingsDecoderInput>[];
final fileMap = await FilesDB.instance
.getFilesFromIDs(remoteEmbeddings.map((e) => e.fileID).toList());
for (final embedding in remoteEmbeddings) {
final file = fileMap[embedding.fileID];
if (file == null) {
continue;
}
final fileKey = getFileKey(file);
final input = EmbeddingsDecoderInput(embedding, fileKey);
inputs.add(input);
}
// todo: use compute or isolate
return decryptFileMLComputer(
{
"inputs": inputs,
},
);
}
Future<Map<int, FileMl>> decryptFileMLComputer(
Map<String, dynamic> args) async {
final result = <int, FileMl>{};
final inputs = args["inputs"] as List<EmbeddingsDecoderInput>;
for (final input in inputs) {
final decryptArgs = <String, dynamic>{};
decryptArgs["source"] =
CryptoUtil.base642bin(input.embedding.encryptedEmbedding);
decryptArgs["key"] = input.decryptionKey;
decryptArgs["header"] =
CryptoUtil.base642bin(input.embedding.decryptionHeader);
final embeddingData = chachaDecryptData(decryptArgs);
final decodedJson = jsonDecode(utf8.decode(embeddingData));
final FileMl decodedEmbedding =
FileMl.fromJson(decodedJson as Map<String, dynamic>);
result[input.embedding.fileID] = decodedEmbedding;
}
return result;
}
}

View file

@ -1,62 +0,0 @@
import "dart:async";
import "dart:convert";
import "dart:typed_data";
import "package:computer/computer.dart";
import "package:logging/logging.dart";
import "package:photos/core/network/network.dart";
import "package:photos/face/model/face.dart";
import "package:photos/face/model/file_ml.dart";
import "package:photos/models/file/file.dart";
import "package:photos/utils/crypto_util.dart";
import "package:photos/utils/file_download_util.dart";
import "package:shared_preferences/shared_preferences.dart";
class RemoteEmbeddingService {
RemoteEmbeddingService._privateConstructor();
static final RemoteEmbeddingService instance =
RemoteEmbeddingService._privateConstructor();
static const kEmbeddingsSyncTimeKey = "sync_time_embeddings_v2";
final _logger = Logger("RemoteEmbeddingService");
final _dio = NetworkClient.instance.enteDio;
final _computer = Computer.shared();
late SharedPreferences _preferences;
Completer<void>? _syncStatus;
void init(SharedPreferences prefs) {
_preferences = prefs;
}
Future<void> putFaceEmbedding(EnteFile file, FileMl fileML) async {
_logger.info("Pushing embedding for $file");
final encryptionKey = getFileKey(file);
final embeddingJSON = jsonEncode(fileML.toJson());
final encryptedEmbedding = await CryptoUtil.encryptChaCha(
utf8.encode(embeddingJSON) as Uint8List,
encryptionKey,
);
final encryptedData =
CryptoUtil.bin2base64(encryptedEmbedding.encryptedData!);
final header = CryptoUtil.bin2base64(encryptedEmbedding.header!);
try {
final response = await _dio.put(
"/embeddings",
data: {
"fileID": file.uploadedFileID!,
"model": 'onnx-yolo5-mobile',
"encryptedEmbedding": encryptedData,
"decryptionHeader": header,
},
);
// final updationTime = response.data["updatedAt"];
} catch (e, s) {
_logger.severe("Failed to put embedding", e, s);
rethrow;
}
}
}