Move from SQLite to ObjectBox for storing embeddings

This commit is contained in:
vishnukvmd 2023-11-14 13:26:54 +05:30
parent b9cd9cf9cd
commit b54f082cb6
11 changed files with 570 additions and 368 deletions

View file

@ -12,6 +12,7 @@ import 'package:photos/core/event_bus.dart';
import 'package:photos/db/collections_db.dart';
import 'package:photos/db/files_db.dart';
import 'package:photos/db/memories_db.dart';
import "package:photos/db/object_box.dart";
import 'package:photos/db/public_keys_db.dart';
import 'package:photos/db/trash_db.dart';
import 'package:photos/db/upload_locks_db.dart';
@ -156,6 +157,7 @@ class Configuration {
_cachedToken = null;
_secretKey = null;
await FilesDB.instance.clearTable();
await ObjectBox.instance.clearTable();
await CollectionsDB.instance.clearTable();
await MemoriesDB.instance.clearTable();
await PublicKeysDB.instance.clearTable();

View file

@ -6,7 +6,6 @@ import 'package:path/path.dart';
import 'package:path_provider/path_provider.dart';
import "package:photos/extensions/stop_watch.dart";
import 'package:photos/models/backup_status.dart';
import "package:photos/models/embedding.dart";
import 'package:photos/models/file/file.dart';
import 'package:photos/models/file/file_type.dart';
import 'package:photos/models/file_load_result.dart';
@ -31,7 +30,6 @@ class FilesDB {
static const filesTable = 'files';
static const tempTable = 'temp_files';
static const embeddingsTable = 'clip_embeddings';
static const columnGeneratedID = '_id';
static const columnUploadedFileID = 'uploaded_file_id';
@ -59,8 +57,6 @@ class FilesDB {
static const columnThumbnailDecryptionHeader = 'thumbnail_decryption_header';
static const columnMetadataDecryptionHeader = 'metadata_decryption_header';
static const columnFileSize = 'file_size';
static const columnEmbedding = 'embedding';
static const columnModel = 'model';
// MMD -> Magic Metadata
static const columnMMdEncodedJson = 'mmd_encoded_json';
@ -90,7 +86,6 @@ class FilesDB {
...updateIndexes(),
...createEntityDataTable(),
...addAddedTime(),
...createEmbeddingsTable(),
];
final dbConfig = MigrationConfig(
@ -387,26 +382,12 @@ class FilesDB {
];
}
static List<String> createEmbeddingsTable() {
return [
'''
CREATE TABLE IF NOT EXISTS $embeddingsTable (
$columnUploadedFileID INTEGER PRIMARY KEY,
$columnModel TEXT NOT NULL,
$columnEmbedding TEXT NOT NULL,
$columnUpdationTime INTEGER
);
'''
];
}
Future<void> clearTable() async {
final db = await instance.database;
await db.delete(filesTable);
await db.delete("device_files");
await db.delete("device_collections");
await db.delete("entities");
await db.delete(embeddingsTable);
}
Future<void> deleteDB() async {
@ -1557,89 +1538,38 @@ class FilesDB {
return FileLoadResult(filteredFiles, files.length == limit);
}
Future<void> upsertEmbedding(Embedding embedding) async {
final db = await instance.database;
await db.insert(
embeddingsTable,
_getRowForEmbedding(embedding),
conflictAlgorithm: ConflictAlgorithm.replace,
);
}
Future<void> insertEmbeddings(List<Embedding> embeddings) async {
final db = await database;
var batch = db.batch();
int batchCounter = 0;
for (final embedding in embeddings) {
if (batchCounter == 400) {
await batch.commit(noResult: true);
batch = db.batch();
batchCounter = 0;
}
batch.insert(
embeddingsTable,
_getRowForEmbedding(embedding),
conflictAlgorithm: ConflictAlgorithm.replace,
);
batchCounter++;
}
await batch.commit(noResult: true);
}
Future<List<Embedding>> getAllEmbeddings() async {
Future<List<int>> getOwnedFileIDs(int ownerID) async {
final db = await instance.database;
final results = await db.query(
embeddingsTable,
filesTable,
columns: [columnUploadedFileID],
where:
'($columnOwnerID = $ownerID AND $columnUploadedFileID IS NOT NULL AND $columnUploadedFileID IS NOT -1)',
distinct: true,
);
return _convertToEmbeddings(results);
}
Future<List<Embedding>> getAllEmbeddingsV2() async {
final db = await instance.database;
final List<Embedding> allEmbeddings = [];
int offset = 0;
while (true) {
final results = await db.query(
embeddingsTable,
orderBy: '$columnUploadedFileID ASC',
limit: 5000,
offset: offset,
);
if (results.isEmpty) {
break; // No more results left to fetch
}
allEmbeddings.addAll(_convertToEmbeddings(results));
offset += 5000; // Increment offset for the next batch
final ids = <int>[];
for (final result in results) {
ids.add(result[columnUploadedFileID] as int);
}
return allEmbeddings;
return ids;
}
Future<List<Embedding>> getUnSyncedEmbeddings() async {
Future<List<EnteFile>> getUploadedFiles(List<int> uploadedIDs) async {
final db = await instance.database;
String inParam = "";
for (final id in uploadedIDs) {
inParam += "'" + id.toString() + "',";
}
inParam = inParam.substring(0, inParam.length - 1);
final results = await db.query(
embeddingsTable,
where: '$columnUpdationTime IS NULL',
filesTable,
where: '$columnUploadedFileID IN ($inParam)',
groupBy: columnUploadedFileID,
);
return _convertToEmbeddings(results);
}
Future<List<EnteFile>> getFilesWithoutEmbeddings() async {
final db = await instance.database;
final result = await db.rawQuery('''
SELECT $filesTable.*
FROM $filesTable
LEFT JOIN $embeddingsTable ON $filesTable.$columnUploadedFileID = $embeddingsTable.$columnUploadedFileID
WHERE $filesTable.$columnUploadedFileID IS NOT NULL
AND $filesTable.$columnUploadedFileID != -1
AND $embeddingsTable.$columnUploadedFileID IS NULL
GROUP BY $filesTable.$columnUploadedFileID;
''');
return convertToFiles(result);
if (results.isEmpty) {
return <EnteFile>[];
}
return convertToFiles(results);
}
Map<String, dynamic> _getRowForFile(EnteFile file) {
@ -1779,30 +1709,4 @@ class FilesDB {
file.pubMmdEncodedJson = row[columnPubMMdEncodedJson] ?? '{}';
return file;
}
Map<String, dynamic> _getRowForEmbedding(Embedding embedding) {
final row = <String, dynamic>{};
row[columnUploadedFileID] = embedding.fileID;
row[columnModel] = embedding.model;
row[columnEmbedding] = Embedding.encodeEmbedding(embedding.embedding);
row[columnUpdationTime] = embedding.updationTime;
return row;
}
Embedding _getEmbeddingFromRow(Map<String, dynamic> row) {
return Embedding(
row[columnUploadedFileID],
row[columnModel],
Embedding.decodeEmbedding(row[columnEmbedding]),
updationTime: row[columnUpdationTime],
);
}
List<Embedding> _convertToEmbeddings(List<Map<String, dynamic>> results) {
final List<Embedding> embeddings = [];
for (final result in results) {
embeddings.add(_getEmbeddingFromRow(result));
}
return embeddings;
}
}

27
lib/db/object_box.dart Normal file
View file

@ -0,0 +1,27 @@
import 'package:path/path.dart' as p;
import 'package:path_provider/path_provider.dart';
import "package:photos/models/embedding.dart";
import "package:photos/objectbox.g.dart"; // created by `flutter pub run build_runner build`
class ObjectBox {
/// The Store of this app.
late final Store store;
ObjectBox._privateConstructor();
static final ObjectBox instance = ObjectBox._privateConstructor();
Future<void> init() async {
final docsDir = await getApplicationDocumentsDirectory();
// Future<Store> openStore() {...} is defined in the generated objectbox.g.dart
store = await openStore(directory: p.join(docsDir.path, "obx-example"));
}
Future<void> clearTable() async {
getEmbeddingBox().removeAll();
}
Box<Embedding> getEmbeddingBox() {
return store.box<Embedding>();
}
}

View file

@ -18,6 +18,7 @@ import 'package:photos/core/constants.dart';
import 'package:photos/core/error-reporting/super_logging.dart';
import 'package:photos/core/errors.dart';
import 'package:photos/core/network/network.dart';
import "package:photos/db/object_box.dart";
import 'package:photos/db/upload_locks_db.dart';
import 'package:photos/ente_theme_data.dart';
import "package:photos/l10n/l10n.dart";
@ -161,6 +162,7 @@ Future<void> _init(bool isBackground, {String via = ''}) async {
// Start workers asynchronously. No need to wait for them to start
Computer.shared().turnOn(workersCount: 4);
CryptoUtil.init();
await ObjectBox.instance.init();
await NetworkClient.instance.init();
await Configuration.instance.init();
await UserService.instance.init();

View file

@ -1,15 +1,19 @@
import "dart:convert";
import "package:objectbox/objectbox.dart";
@Entity()
class Embedding {
@Id(assignable: true)
final int fileID;
final String model;
final List<double> embedding;
int? updationTime;
Embedding(
this.fileID,
this.model,
this.embedding, {
Embedding({
required this.fileID,
required this.model,
required this.embedding,
this.updationTime,
});

47
lib/objectbox-model.json Normal file
View file

@ -0,0 +1,47 @@
{
"_note1": "KEEP THIS FILE! Check it into a version control system (VCS) like git.",
"_note2": "ObjectBox manages crucial IDs for your object model. See docs for details.",
"_note3": "If you have VCS merge conflicts, you must resolve them according to ObjectBox docs.",
"entities": [
{
"id": "1:4067035246682038114",
"lastPropertyId": "4:7974898435327252398",
"name": "Embedding",
"properties": [
{
"id": "1:2902120230153008095",
"name": "fileID",
"type": 6,
"flags": 129
},
{
"id": "2:5644004076892986076",
"name": "model",
"type": 9
},
{
"id": "3:4818114203635230783",
"name": "embedding",
"type": 29
},
{
"id": "4:7974898435327252398",
"name": "updationTime",
"type": 6
}
],
"relations": []
}
],
"lastEntityId": "1:4067035246682038114",
"lastIndexId": "0:0",
"lastRelationId": "0:0",
"lastSequenceId": "0:0",
"modelVersion": 5,
"modelVersionParserMinimum": 5,
"retiredEntityUids": [],
"retiredIndexUids": [],
"retiredPropertyUids": [],
"retiredRelationUids": [],
"version": 1
}

171
lib/objectbox.g.dart Normal file
View file

@ -0,0 +1,171 @@
// GENERATED CODE - DO NOT MODIFY BY HAND
// This code was generated by ObjectBox. To update it run the generator again:
// With a Flutter package, run `flutter pub run build_runner build`.
// With a Dart package, run `dart run build_runner build`.
// See also https://docs.objectbox.io/getting-started#generate-objectbox-code
// ignore_for_file: camel_case_types, depend_on_referenced_packages
// coverage:ignore-file
import 'dart:typed_data';
import 'package:flat_buffers/flat_buffers.dart' as fb;
import 'package:objectbox/internal.dart'; // generated code can access "internal" functionality
import 'package:objectbox/objectbox.dart';
import 'package:objectbox_flutter_libs/objectbox_flutter_libs.dart';
import 'package:photos/models/embedding.dart';
export 'package:objectbox/objectbox.dart'; // so that callers only have to import this file
final _entities = <ModelEntity>[
ModelEntity(
id: const IdUid(1, 4067035246682038114),
name: 'Embedding',
lastPropertyId: const IdUid(4, 7974898435327252398),
flags: 0,
properties: <ModelProperty>[
ModelProperty(
id: const IdUid(1, 2902120230153008095),
name: 'fileID',
type: 6,
flags: 129,
),
ModelProperty(
id: const IdUid(2, 5644004076892986076),
name: 'model',
type: 9,
flags: 0,
),
ModelProperty(
id: const IdUid(3, 4818114203635230783),
name: 'embedding',
type: 29,
flags: 0,
),
ModelProperty(
id: const IdUid(4, 7974898435327252398),
name: 'updationTime',
type: 6,
flags: 0,
),
],
relations: <ModelRelation>[],
backlinks: <ModelBacklink>[],
),
];
/// Shortcut for [Store.new] that passes [getObjectBoxModel] and for Flutter
/// apps by default a [directory] using `defaultStoreDirectory()` from the
/// ObjectBox Flutter library.
///
/// Note: for desktop apps it is recommended to specify a unique [directory].
///
/// See [Store.new] for an explanation of all parameters.
Future<Store> openStore({
String? directory,
int? maxDBSizeInKB,
int? fileMode,
int? maxReaders,
bool queriesCaseSensitiveDefault = true,
String? macosApplicationGroup,
}) async =>
Store(
getObjectBoxModel(),
directory: directory ?? (await defaultStoreDirectory()).path,
maxDBSizeInKB: maxDBSizeInKB,
fileMode: fileMode,
maxReaders: maxReaders,
queriesCaseSensitiveDefault: queriesCaseSensitiveDefault,
macosApplicationGroup: macosApplicationGroup,
);
/// Returns the ObjectBox model definition for this project for use with
/// [Store.new].
ModelDefinition getObjectBoxModel() {
final model = ModelInfo(
entities: _entities,
lastEntityId: const IdUid(1, 4067035246682038114),
lastIndexId: const IdUid(0, 0),
lastRelationId: const IdUid(0, 0),
lastSequenceId: const IdUid(0, 0),
retiredEntityUids: const [],
retiredIndexUids: const [],
retiredPropertyUids: const [],
retiredRelationUids: const [],
modelVersion: 5,
modelVersionParserMinimum: 5,
version: 1,
);
final bindings = <Type, EntityDefinition>{
Embedding: EntityDefinition<Embedding>(
model: _entities[0],
toOneRelations: (Embedding object) => [],
toManyRelations: (Embedding object) => {},
getId: (Embedding object) => object.fileID,
setId: (Embedding object, int id) {
if (object.fileID != id) {
throw ArgumentError('Field Embedding.fileID is read-only '
'(final or getter-only) and it was declared to be self-assigned. '
'However, the currently inserted object (.fileID=${object.fileID}) '
"doesn't match the inserted ID (ID $id). "
'You must assign an ID before calling [box.put()].');
}
},
objectToFB: (Embedding object, fb.Builder fbb) {
final modelOffset = fbb.writeString(object.model);
final embeddingOffset = fbb.writeListFloat64(object.embedding);
fbb.startTable(5);
fbb.addInt64(0, object.fileID);
fbb.addOffset(1, modelOffset);
fbb.addOffset(2, embeddingOffset);
fbb.addInt64(3, object.updationTime);
fbb.finish(fbb.endTable());
return object.fileID;
},
objectFromFB: (Store store, ByteData fbData) {
final buffer = fb.BufferContext(fbData);
final rootOffset = buffer.derefObject(0);
final fileIDParam =
const fb.Int64Reader().vTableGet(buffer, rootOffset, 4, 0);
final modelParam = const fb.StringReader(asciiOptimization: true)
.vTableGet(buffer, rootOffset, 6, '');
final embeddingParam =
const fb.ListReader<double>(fb.Float64Reader(), lazy: false)
.vTableGet(buffer, rootOffset, 8, []);
final updationTimeParam =
const fb.Int64Reader().vTableGetNullable(buffer, rootOffset, 10);
final object = Embedding(
fileID: fileIDParam,
model: modelParam,
embedding: embeddingParam,
updationTime: updationTimeParam,
);
return object;
},
),
};
return ModelDefinition(model, bindings);
}
/// [Embedding] entity fields to define ObjectBox queries.
class Embedding_ {
/// see [Embedding.fileID]
static final fileID =
QueryIntegerProperty<Embedding>(_entities[0].properties[0]);
/// see [Embedding.model]
static final model =
QueryStringProperty<Embedding>(_entities[0].properties[1]);
/// see [Embedding.embedding]
static final embedding =
QueryDoubleVectorProperty<Embedding>(_entities[0].properties[2]);
/// see [Embedding.updationTime]
static final updationTime =
QueryIntegerProperty<Embedding>(_entities[0].properties[3]);
}

View file

@ -4,8 +4,10 @@ import "dart:typed_data";
import "package:logging/logging.dart";
import "package:photos/core/network/network.dart";
import "package:photos/db/files_db.dart";
import "package:photos/db/object_box.dart";
import "package:photos/models/embedding.dart";
import "package:photos/models/file/file.dart";
import "package:photos/objectbox.g.dart";
import "package:photos/services/semantic_search/remote_embedding.dart";
import "package:photos/utils/crypto_util.dart";
import "package:photos/utils/file_download_util.dart";
@ -44,7 +46,12 @@ class EmbeddingStore {
}
Future<void> pushEmbeddings() async {
final pendingItems = await FilesDB.instance.getUnSyncedEmbeddings();
final query = (ObjectBox.instance
.getEmbeddingBox()
.query(Embedding_.updationTime.isNull()))
.build();
final pendingItems = query.find();
query.close();
for (final item in pendingItems) {
final file = await FilesDB.instance.getAnyUploadedFile(item.fileID);
await _pushEmbedding(file!, item);
@ -52,7 +59,7 @@ class EmbeddingStore {
}
Future<void> storeEmbedding(EnteFile file, Embedding embedding) async {
await FilesDB.instance.upsertEmbedding(embedding);
ObjectBox.instance.getEmbeddingBox().put(embedding);
_pushEmbedding(file, embedding);
}
@ -78,7 +85,7 @@ class EmbeddingStore {
);
final updationTime = response.data["updationTime"];
embedding.updationTime = updationTime;
await FilesDB.instance.upsertEmbedding(embedding);
ObjectBox.instance.getEmbeddingBox().put(embedding);
} catch (e, s) {
_logger.severe(e, s);
}
@ -140,14 +147,14 @@ class EmbeddingStore {
embeddings.add(
Embedding(
embedding.fileID,
embedding.model,
decodedEmbedding,
fileID: embedding.fileID,
model: embedding.model,
embedding: decodedEmbedding,
updationTime: embedding.updatedAt,
),
);
}
await FilesDB.instance.insertEmbeddings(embeddings);
await ObjectBox.instance.getEmbeddingBox().putManyAsync(embeddings);
_logger.info("${embeddings.length} embeddings stored");
await _preferences.setInt(
kEmbeddingsSyncTimeKey,

View file

@ -8,6 +8,7 @@ import "package:logging/logging.dart";
import "package:photos/core/configuration.dart";
import "package:photos/core/event_bus.dart";
import "package:photos/db/files_db.dart";
import "package:photos/db/object_box.dart";
import "package:photos/events/file_indexed_event.dart";
import "package:photos/events/file_uploaded_event.dart";
import "package:photos/events/sync_status_update_event.dart";
@ -65,7 +66,7 @@ class SemanticSearchService {
}
Future<List<EnteFile>> search(String query) async {
if (Platform.isIOS || !LocalSettings.instance.hasEnabledMagicSearch()) {
if (Platform.isIOS) {
return [];
}
if (_ongoingRequest == null) {
@ -160,7 +161,7 @@ class SemanticSearchService {
}
Future<IndexStatus> getIndexStatus() async {
final embeddings = await FilesDB.instance.getAllEmbeddingsV2();
final embeddings = ObjectBox.instance.getEmbeddingBox().getAll();
return IndexStatus(embeddings.length, _queue.length);
}
@ -174,9 +175,11 @@ class SemanticSearchService {
if (!LocalSettings.instance.hasEnabledMagicSearch()) {
return;
}
final files = await FilesDB.instance.getFilesWithoutEmbeddings();
final ownerID = Configuration.instance.getUserID();
files.removeWhere((f) => f.ownerID != ownerID);
final uploadedFileIDs = await FilesDB.instance
.getOwnedFileIDs(Configuration.instance.getUserID()!);
final embeddedFileIDs = _cachedEmbeddings.map((e) => e.fileID).toSet();
uploadedFileIDs.removeWhere((id) => embeddedFileIDs.contains(id));
final files = await FilesDB.instance.getUploadedFiles(uploadedFileIDs);
_logger.info(files.length.toString() + " pending to be embedded");
_queue.addAll(files);
_pollQueue();
@ -223,9 +226,9 @@ class SemanticSearchService {
return;
}
final embedding = Embedding(
file.uploadedFileID!,
kModelName,
result,
fileID: file.uploadedFileID!,
model: kModelName,
embedding: result,
);
await EmbeddingStore.instance.storeEmbedding(
file,
@ -241,7 +244,7 @@ class SemanticSearchService {
Future<void> _cacheEmbeddings() async {
final startTime = DateTime.now();
final embeddings = await FilesDB.instance.getAllEmbeddingsV2();
final embeddings = ObjectBox.instance.store.box<Embedding>().getAll();
_cachedEmbeddings.clear();
_cachedEmbeddings.addAll(embeddings);
final endTime = DateTime.now();

File diff suppressed because it is too large Load diff

View file

@ -111,6 +111,8 @@ dependencies:
move_to_background: ^1.0.2
# open_file: ^3.2.1
objectbox: ^2.3.1
objectbox_flutter_libs: any
open_mail_app: ^0.4.5
package_info_plus: ^4.1.0
page_transition: ^2.0.2
@ -184,6 +186,7 @@ dev_dependencies:
integration_test:
sdk: flutter
json_serializable: ^6.6.1
objectbox_generator: any
test: ^1.22.0
flutter_icons: