Replace Isar with SQLite for storing CLIP embeddings (#1575)

## Description
- This PR removes the dependency on Isar, and sets up a SQLite DB for
storing embeddings.
- The existing DB is deleted, and the new DB is populated by pulling
embeddings from the server. Local migration was possible, but that would
have required us to keep Isar as a dependency for an unknown period of
time.
- For 30k embeddings, DB size has dropped from ~420MB to ~115MB. The
first load on a Pixel 7 has increased from ~500ms to ~600ms.
- More details @ https://ente.io/blog/tech/sqlite-objectbox-isar/#update

## Tests
- [x] Verified over internal builds that semantic search is working as
expected

> Note: This fixes that jank that would happen when a foreground process
tries to read data from the Isar DB while a background process is alive.
This commit is contained in:
Vishnu Mohandas 2024-05-02 09:35:24 +05:30 committed by GitHub
commit ab471dde96
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 132 additions and 1189 deletions

View file

@ -108,8 +108,6 @@ PODS:
- FlutterMacOS
- integration_test (0.0.1):
- Flutter
- isar_flutter_libs (1.0.0):
- Flutter
- libwebp (1.3.2):
- libwebp/demux (= 1.3.2)
- libwebp/mux (= 1.3.2)
@ -246,7 +244,6 @@ DEPENDENCIES:
- image_editor_common (from `.symlinks/plugins/image_editor_common/ios`)
- in_app_purchase_storekit (from `.symlinks/plugins/in_app_purchase_storekit/darwin`)
- integration_test (from `.symlinks/plugins/integration_test/ios`)
- isar_flutter_libs (from `.symlinks/plugins/isar_flutter_libs/ios`)
- local_auth_darwin (from `.symlinks/plugins/local_auth_darwin/darwin`)
- local_auth_ios (from `.symlinks/plugins/local_auth_ios/ios`)
- media_extension (from `.symlinks/plugins/media_extension/ios`)
@ -341,8 +338,6 @@ EXTERNAL SOURCES:
:path: ".symlinks/plugins/in_app_purchase_storekit/darwin"
integration_test:
:path: ".symlinks/plugins/integration_test/ios"
isar_flutter_libs:
:path: ".symlinks/plugins/isar_flutter_libs/ios"
local_auth_darwin:
:path: ".symlinks/plugins/local_auth_darwin/darwin"
local_auth_ios:
@ -427,7 +422,6 @@ SPEC CHECKSUMS:
image_editor_common: d6f6644ae4a6de80481e89fe6d0a8c49e30b4b43
in_app_purchase_storekit: 0e4b3c2e43ba1e1281f4f46dd71b0593ce529892
integration_test: 13825b8a9334a850581300559b8839134b124670
isar_flutter_libs: b69f437aeab9c521821c3f376198c4371fa21073
libwebp: 1786c9f4ff8a279e4dac1e8f385004d5fc253009
local_auth_darwin: c7e464000a6a89e952235699e32b329457608d98
local_auth_ios: 5046a18c018dd973247a0564496c8898dbb5adf9

View file

@ -308,7 +308,6 @@
"${BUILT_PRODUCTS_DIR}/image_editor_common/image_editor_common.framework",
"${BUILT_PRODUCTS_DIR}/in_app_purchase_storekit/in_app_purchase_storekit.framework",
"${BUILT_PRODUCTS_DIR}/integration_test/integration_test.framework",
"${BUILT_PRODUCTS_DIR}/isar_flutter_libs/isar_flutter_libs.framework",
"${BUILT_PRODUCTS_DIR}/libwebp/libwebp.framework",
"${BUILT_PRODUCTS_DIR}/local_auth_darwin/local_auth_darwin.framework",
"${BUILT_PRODUCTS_DIR}/local_auth_ios/local_auth_ios.framework",
@ -390,7 +389,6 @@
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/image_editor_common.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/in_app_purchase_storekit.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/integration_test.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/isar_flutter_libs.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/libwebp.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/local_auth_darwin.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/local_auth_ios.framework",

View file

@ -1,79 +1,167 @@
import "dart:io";
import "dart:typed_data";
import "package:isar/isar.dart";
import "package:path/path.dart";
import 'package:path_provider/path_provider.dart';
import "package:photos/core/event_bus.dart";
import "package:photos/events/embedding_updated_event.dart";
import "package:photos/models/embedding.dart";
import "package:sqlite_async/sqlite_async.dart";
class EmbeddingsDB {
late final Isar _isar;
EmbeddingsDB._privateConstructor();
static final EmbeddingsDB instance = EmbeddingsDB._privateConstructor();
static const databaseName = "ente.embeddings.db";
static const tableName = "embeddings";
static const columnFileID = "file_id";
static const columnModel = "model";
static const columnEmbedding = "embedding";
static const columnUpdationTime = "updation_time";
static Future<SqliteDatabase>? _dbFuture;
Future<SqliteDatabase> get _database async {
_dbFuture ??= _initDatabase();
return _dbFuture!;
}
Future<void> init() async {
final dir = await getApplicationDocumentsDirectory();
_isar = await Isar.open(
[EmbeddingSchema],
directory: dir.path,
);
await _clearDeprecatedStore(dir);
await _clearDeprecatedStores(dir);
}
Future<SqliteDatabase> _initDatabase() async {
final Directory documentsDirectory =
await getApplicationDocumentsDirectory();
final String path = join(documentsDirectory.path, databaseName);
final migrations = SqliteMigrations()
..add(
SqliteMigration(
1,
(tx) async {
await tx.execute(
'CREATE TABLE $tableName ($columnFileID INTEGER NOT NULL, $columnModel INTEGER NOT NULL, $columnEmbedding BLOB NOT NULL, $columnUpdationTime INTEGER, UNIQUE ($columnFileID, $columnModel))',
);
},
),
);
final database = SqliteDatabase(path: path);
await migrations.migrate(database);
return database;
}
Future<void> clearTable() async {
await _isar.writeTxn(() => _isar.clear());
final db = await _database;
await db.execute('DELETE * FROM $tableName');
}
Future<List<Embedding>> getAll(Model model) async {
return _isar.embeddings.filter().modelEqualTo(model).findAll();
final db = await _database;
final results = await db.getAll('SELECT * FROM $tableName');
return _convertToEmbeddings(results);
}
Future<void> put(Embedding embedding) {
return _isar.writeTxn(() async {
await _isar.embeddings.putByIndex(Embedding.index, embedding);
Bus.instance.fire(EmbeddingUpdatedEvent());
});
Future<void> put(Embedding embedding) async {
final db = await _database;
await db.execute(
'INSERT OR REPLACE INTO $tableName ($columnFileID, $columnModel, $columnEmbedding, $columnUpdationTime) VALUES (?, ?, ?, ?)',
_getRowFromEmbedding(embedding),
);
Bus.instance.fire(EmbeddingUpdatedEvent());
}
Future<void> putMany(List<Embedding> embeddings) {
return _isar.writeTxn(() async {
await _isar.embeddings.putAllByIndex(Embedding.index, embeddings);
Bus.instance.fire(EmbeddingUpdatedEvent());
});
Future<void> putMany(List<Embedding> embeddings) async {
final db = await _database;
final inputs = embeddings.map((e) => _getRowFromEmbedding(e)).toList();
await db.executeBatch(
'INSERT OR REPLACE INTO $tableName ($columnFileID, $columnModel, $columnEmbedding, $columnUpdationTime) values(?, ?, ?, ?)',
inputs,
);
Bus.instance.fire(EmbeddingUpdatedEvent());
}
Future<List<Embedding>> getUnsyncedEmbeddings() async {
return await _isar.embeddings.filter().updationTimeEqualTo(null).findAll();
final db = await _database;
final results = await db.getAll(
'SELECT * FROM $tableName WHERE $columnUpdationTime IS NULL',
);
return _convertToEmbeddings(results);
}
Future<void> deleteEmbeddings(List<int> fileIDs) async {
await _isar.writeTxn(() async {
final embeddings = <Embedding>[];
for (final fileID in fileIDs) {
embeddings.addAll(
await _isar.embeddings.filter().fileIDEqualTo(fileID).findAll(),
);
}
await _isar.embeddings.deleteAll(embeddings.map((e) => e.id).toList());
Bus.instance.fire(EmbeddingUpdatedEvent());
});
final db = await _database;
await db.execute(
'DELETE FROM $tableName WHERE $columnFileID IN (${fileIDs.join(", ")})',
);
Bus.instance.fire(EmbeddingUpdatedEvent());
}
Future<void> deleteAllForModel(Model model) async {
await _isar.writeTxn(() async {
final embeddings =
await _isar.embeddings.filter().modelEqualTo(model).findAll();
await _isar.embeddings.deleteAll(embeddings.map((e) => e.id).toList());
Bus.instance.fire(EmbeddingUpdatedEvent());
});
final db = await _database;
await db.execute(
'DELETE FROM $tableName WHERE $columnModel = ?',
[modelToInt(model)!],
);
Bus.instance.fire(EmbeddingUpdatedEvent());
}
Future<void> _clearDeprecatedStore(Directory dir) async {
final deprecatedStore = Directory(dir.path + "/object-box-store");
if (await deprecatedStore.exists()) {
await deprecatedStore.delete(recursive: true);
List<Embedding> _convertToEmbeddings(List<Map<String, dynamic>> results) {
final List<Embedding> embeddings = [];
for (final result in results) {
embeddings.add(_getEmbeddingFromRow(result));
}
return embeddings;
}
Embedding _getEmbeddingFromRow(Map<String, dynamic> row) {
final fileID = row[columnFileID];
final model = intToModel(row[columnModel])!;
final bytes = row[columnEmbedding] as Uint8List;
final list = Float32List.view(bytes.buffer);
return Embedding(fileID: fileID, model: model, embedding: list);
}
List<Object?> _getRowFromEmbedding(Embedding embedding) {
return [
embedding.fileID,
modelToInt(embedding.model)!,
Float32List.fromList(embedding.embedding).buffer.asUint8List(),
embedding.updationTime,
];
}
Future<void> _clearDeprecatedStores(Directory dir) async {
final deprecatedObjectBox = Directory(dir.path + "/object-box-store");
if (await deprecatedObjectBox.exists()) {
await deprecatedObjectBox.delete(recursive: true);
}
final deprecatedIsar = File(dir.path + "/default.isar");
if (await deprecatedIsar.exists()) {
await deprecatedIsar.delete();
}
}
int? modelToInt(Model model) {
switch (model) {
case Model.onnxClip:
return 1;
case Model.ggmlClip:
return 2;
default:
return null;
}
}
Model? intToModel(int model) {
switch (model) {
case 1:
return Model.onnxClip;
case 2:
return Model.ggmlClip;
default:
return null;
}
}
}

View file

@ -1,17 +1,7 @@
import "dart:convert";
import "package:isar/isar.dart";
part 'embedding.g.dart';
@collection
class Embedding {
static const index = 'unique_file_model_embedding';
Id id = Isar.autoIncrement;
final int fileID;
@enumerated
@Index(name: index, composite: [CompositeIndex('fileID')], unique: true, replace: true)
final Model model;
final List<double> embedding;
int? updationTime;

File diff suppressed because it is too large Load diff

View file

@ -19,7 +19,7 @@ class EmbeddingStore {
static final EmbeddingStore instance = EmbeddingStore._privateConstructor();
static const kEmbeddingsSyncTimeKey = "sync_time_embeddings_v2";
static const kEmbeddingsSyncTimeKey = "sync_time_embeddings_v3";
final _logger = Logger("EmbeddingStore");
final _dio = NetworkClient.instance.enteDio;

View file

@ -72,8 +72,8 @@ class SemanticSearchService {
_mlFramework = _currentModel == Model.onnxClip
? ONNX(shouldDownloadOverMobileData)
: GGML(shouldDownloadOverMobileData);
await EmbeddingsDB.instance.init();
await EmbeddingStore.instance.init();
await EmbeddingsDB.instance.init();
await _loadEmbeddings();
Bus.instance.on<EmbeddingUpdatedEvent>().listen((event) {
_embeddingLoaderDebouncer.run(() async {

View file

@ -363,14 +363,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "2.3.2"
dartx:
dependency: transitive
description:
name: dartx
sha256: "8b25435617027257d43e6508b5fe061012880ddfdaa75a71d607c3de2a13d244"
url: "https://pub.dev"
source: hosted
version: "1.2.0"
dbus:
dependency: transitive
description:
@ -1116,30 +1108,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.0.4"
isar:
dependency: "direct main"
description:
name: isar
sha256: "99165dadb2cf2329d3140198363a7e7bff9bbd441871898a87e26914d25cf1ea"
url: "https://pub.dev"
source: hosted
version: "3.1.0+1"
isar_flutter_libs:
dependency: "direct main"
description:
name: isar_flutter_libs
sha256: bc6768cc4b9c61aabff77152e7f33b4b17d2fc93134f7af1c3dd51500fe8d5e8
url: "https://pub.dev"
source: hosted
version: "3.1.0+1"
isar_generator:
dependency: "direct dev"
description:
name: isar_generator
sha256: "76c121e1295a30423604f2f819bc255bc79f852f3bc8743a24017df6068ad133"
url: "https://pub.dev"
source: hosted
version: "3.1.0+1"
js:
dependency: transitive
description:
@ -2220,14 +2188,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "0.5.9"
time:
dependency: transitive
description:
name: time
sha256: ad8e018a6c9db36cb917a031853a1aae49467a93e0d464683e029537d848c221
url: "https://pub.dev"
source: hosted
version: "2.1.4"
timezone:
dependency: transitive
description:
@ -2597,14 +2557,6 @@ packages:
url: "https://pub.dev"
source: hosted
version: "1.1.1"
xxh3:
dependency: transitive
description:
name: xxh3
sha256: a92b30944a9aeb4e3d4f3c3d4ddb3c7816ca73475cd603682c4f8149690f56d7
url: "https://pub.dev"
source: hosted
version: "1.0.1"
yaml:
dependency: transitive
description:

View file

@ -12,7 +12,7 @@ description: ente photos application
# Read more about iOS versioning at
# https://developer.apple.com/library/archive/documentation/General/Reference/InfoPlistKeyReference/Articles/CoreFoundationKeys.html
version: 0.8.84+604
version: 0.8.88+608
publish_to: none
environment:
@ -99,8 +99,6 @@ dependencies:
image_editor: ^1.3.0
in_app_purchase: ^3.0.7
intl: ^0.18.0
isar: ^3.1.0+1
isar_flutter_libs: ^3.1.0+1
json_annotation: ^4.8.0
latlong2: ^0.9.0
like_button: ^2.0.5
@ -196,7 +194,6 @@ dev_dependencies:
freezed: ^2.5.2
integration_test:
sdk: flutter
isar_generator: ^3.1.0+1
json_serializable: ^6.6.1
test: ^1.22.0

View file

@ -1,17 +0,0 @@
# TODO: add `rustup@1.25.2` to `srclibs`
# TODO: verify if `gcc-multilib` or `libc-dev` is needed
$$rustup$$/rustup-init.sh -y
source $HOME/.cargo/env
cd thirdparty/isar/
bash tool/build_android.sh x86
bash tool/build_android.sh x64
bash tool/build_android.sh armv7
bash tool/build_android.sh arm64
mv libisar_android_arm64.so libisar.so
mv libisar.so $PUB_CACHE/hosted/pub.dev/isar_flutter_libs-*/android/src/main/jniLibs/arm64-v8a/
mv libisar_android_armv7.so libisar.so
mv libisar.so $PUB_CACHE/hosted/pub.dev/isar_flutter_libs-*/android/src/main/jniLibs/armeabi-v7a/
mv libisar_android_x64.so libisar.so
mv libisar.so $PUB_CACHE/hosted/pub.dev/isar_flutter_libs-*/android/src/main/jniLibs/x86_64/
mv libisar_android_x86.so libisar.so
mv libisar.so $PUB_CACHE/hosted/pub.dev/isar_flutter_libs-*/android/src/main/jniLibs/x86/