Merge pull request #121 from mCaptcha/feat-percentile

compute percentile on analytics records
This commit is contained in:
Aravinth Manivannan 2023-11-04 20:32:28 +00:00 committed by GitHub
commit 8bed3cb352
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 617 additions and 52 deletions

View file

@ -1,2 +1,2 @@
export POSTGRES_DATABASE_URL="postgres://postgres:password@localhost:5432/postgres"
export MARIA_DATABASE_URL="mysql://maria:password@localhost:3306/maria"
export MARIA_DATABASE_URL="mysql://root:password@localhost:3306/maria"

View file

@ -307,6 +307,17 @@ pub trait MCDatabase: std::marker::Send + std::marker::Sync + CloneSPDatabase {
captcha_key: &str,
difficulty_factor: u32,
) -> DBResult<u32>;
/// Get number of analytics entries that are under a certain duration
async fn stats_get_num_logs_under_time(&self, duration: u32) -> DBResult<usize>;
/// Get the entry at a location in the list of analytics entires under a certain time limit
/// and sorted in ascending order
async fn stats_get_entry_at_location_for_time_limit_asc(
&self,
duration: u32,
location: u32,
) -> DBResult<Option<usize>>;
}
#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)]

View file

@ -7,6 +7,29 @@
use crate::errors::*;
use crate::prelude::*;
/// easy traffic pattern
pub const TRAFFIC_PATTERN: TrafficPattern = TrafficPattern {
avg_traffic: 500,
peak_sustainable_traffic: 5_000,
broke_my_site_traffic: Some(10_000),
};
/// levels for complex captcha config
pub const LEVELS: [Level; 3] = [
Level {
difficulty_factor: 1,
visitor_threshold: 1,
},
Level {
difficulty_factor: 2,
visitor_threshold: 2,
},
Level {
difficulty_factor: 3,
visitor_threshold: 3,
},
];
/// test all database functions
pub async fn database_works<'a, T: MCDatabase>(
db: &T,
@ -250,7 +273,6 @@ pub async fn database_works<'a, T: MCDatabase>(
db.record_confirm(c.key).await.unwrap();
// analytics start
db.analytics_create_psuedo_id_if_not_exists(c.key)
.await
.unwrap();
@ -282,11 +304,31 @@ pub async fn database_works<'a, T: MCDatabase>(
);
let analytics = CreatePerformanceAnalytics {
time: 0,
difficulty_factor: 0,
time: 1,
difficulty_factor: 1,
worker_type: "wasm".into(),
};
assert_eq!(
db.stats_get_num_logs_under_time(analytics.time)
.await
.unwrap(),
0
);
db.analysis_save(c.key, &analytics).await.unwrap();
assert_eq!(
db.stats_get_num_logs_under_time(analytics.time)
.await
.unwrap(),
1
);
assert_eq!(
db.stats_get_num_logs_under_time(analytics.time - 1)
.await
.unwrap(),
0
);
let limit = 50;
let mut offset = 0;
let a = db.analytics_fetch(c.key, limit, offset).await.unwrap();
@ -305,6 +347,50 @@ pub async fn database_works<'a, T: MCDatabase>(
.unwrap();
assert_eq!(db.analytics_fetch(c.key, 1000, 0).await.unwrap().len(), 0);
assert!(!db.analytics_captcha_is_published(c.key).await.unwrap());
let rest_analytics = [
CreatePerformanceAnalytics {
time: 2,
difficulty_factor: 2,
worker_type: "wasm".into(),
},
CreatePerformanceAnalytics {
time: 3,
difficulty_factor: 3,
worker_type: "wasm".into(),
},
CreatePerformanceAnalytics {
time: 4,
difficulty_factor: 4,
worker_type: "wasm".into(),
},
CreatePerformanceAnalytics {
time: 5,
difficulty_factor: 5,
worker_type: "wasm".into(),
},
];
for a in rest_analytics.iter() {
db.analysis_save(c.key, &a).await.unwrap();
}
assert!(db
.stats_get_entry_at_location_for_time_limit_asc(1, 2)
.await
.unwrap()
.is_none());
assert_eq!(
db.stats_get_entry_at_location_for_time_limit_asc(2, 1)
.await
.unwrap(),
Some(2)
);
assert_eq!(
db.stats_get_entry_at_location_for_time_limit_asc(3, 2)
.await
.unwrap(),
Some(3)
);
db.analytics_delete_all_records_for_campaign(c.key)
.await
.unwrap();

View file

@ -0,0 +1,25 @@
{
"db_name": "MySQL",
"query": "SELECT\n COUNT(difficulty_factor) AS count\n FROM\n mcaptcha_pow_analytics\n WHERE time <= ?;",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "count",
"type_info": {
"type": "LongLong",
"flags": "NOT_NULL | BINARY",
"char_set": 63,
"max_size": 21
}
}
],
"parameters": {
"Right": 1
},
"nullable": [
false
]
},
"hash": "9bae79667a8cc631541879321e72a40f20cf812584aaf44418089bc7a51e07c4"
}

View file

@ -0,0 +1,25 @@
{
"db_name": "MySQL",
"query": "SELECT\n difficulty_factor\n FROM\n mcaptcha_pow_analytics\n WHERE\n time <= ?\n ORDER BY difficulty_factor ASC LIMIT 1 OFFSET ?;",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "difficulty_factor",
"type_info": {
"type": "Long",
"flags": "NOT_NULL | NO_DEFAULT_VALUE",
"char_set": 63,
"max_size": 11
}
}
],
"parameters": {
"Right": 2
},
"nullable": [
false
]
},
"hash": "c4d6ad934e38218931e74ae1c31c6712cbadb40f31bb12e160c9d333c7e3835c"
}

View file

@ -1219,6 +1219,61 @@ impl MCDatabase for Database {
Ok(res.nonce as u32)
}
}
/// Get number of analytics entries that are under a certain duration
async fn stats_get_num_logs_under_time(&self, duration: u32) -> DBResult<usize> {
struct Count {
count: Option<i64>,
}
//"SELECT COUNT(*) FROM (SELECT difficulty_factor FROM mcaptcha_pow_analytics WHERE time <= ?) as count",
let count = sqlx::query_as!(
Count,
"SELECT
COUNT(difficulty_factor) AS count
FROM
mcaptcha_pow_analytics
WHERE time <= ?;",
duration as i32,
)
.fetch_one(&self.pool)
.await
.map_err(|e| map_row_not_found_err(e, DBError::CaptchaNotFound))?;
Ok(count.count.unwrap_or_else(|| 0) as usize)
}
/// Get the entry at a location in the list of analytics entires under a certain time limited
/// and sorted in ascending order
async fn stats_get_entry_at_location_for_time_limit_asc(
&self,
duration: u32,
location: u32,
) -> DBResult<Option<usize>> {
struct Difficulty {
difficulty_factor: Option<i32>,
}
match sqlx::query_as!(
Difficulty,
"SELECT
difficulty_factor
FROM
mcaptcha_pow_analytics
WHERE
time <= ?
ORDER BY difficulty_factor ASC LIMIT 1 OFFSET ?;",
duration as i32,
location as i64 - 1,
)
.fetch_one(&self.pool)
.await
{
Ok(res) => Ok(Some(res.difficulty_factor.unwrap() as usize)),
Err(sqlx::Error::RowNotFound) => Ok(None),
Err(e) => Err(map_row_not_found_err(e, DBError::CaptchaNotFound)),
}
}
}
#[derive(Clone)]

View file

@ -5,9 +5,11 @@
#![cfg(test)]
use sqlx::mysql::MySqlPoolOptions;
use std::env;
use sqlx::{migrate::MigrateDatabase, mysql::MySqlPoolOptions};
use url::Url;
use crate::*;
use db_core::tests::*;
@ -26,28 +28,6 @@ async fn everyting_works() {
const HEADING: &str = "testing notifications get db mariadb";
const MESSAGE: &str = "testing notifications get message db mariadb";
// easy traffic pattern
const TRAFFIC_PATTERN: TrafficPattern = TrafficPattern {
avg_traffic: 500,
peak_sustainable_traffic: 5_000,
broke_my_site_traffic: Some(10_000),
};
const LEVELS: [Level; 3] = [
Level {
difficulty_factor: 1,
visitor_threshold: 1,
},
Level {
difficulty_factor: 2,
visitor_threshold: 2,
},
Level {
difficulty_factor: 3,
visitor_threshold: 3,
},
];
const ADD_NOTIFICATION: AddNotification = AddNotification {
from: NAME,
to: NAME,
@ -56,10 +36,20 @@ async fn everyting_works() {
};
let url = env::var("MARIA_DATABASE_URL").unwrap();
let mut parsed = Url::parse(&url).unwrap();
parsed.set_path("db_maria_test");
let url = parsed.to_string();
if sqlx::MySql::database_exists(&url).await.unwrap() {
sqlx::MySql::drop_database(&url).await.unwrap();
}
sqlx::MySql::create_database(&url).await.unwrap();
let pool_options = MySqlPoolOptions::new().max_connections(2);
let connection_options = ConnectionOptions::Fresh(Fresh {
pool_options,
url,
url: url.clone(),
disable_logging: false,
});
let db = connection_options.connect().await.unwrap();
@ -78,4 +68,6 @@ async fn everyting_works() {
description: CAPTCHA_DESCRIPTION,
};
database_works(&db, &p, &c, &LEVELS, &TRAFFIC_PATTERN, &ADD_NOTIFICATION).await;
drop(db);
sqlx::MySql::drop_database(&url).await.unwrap();
}

View file

@ -0,0 +1,22 @@
{
"db_name": "PostgreSQL",
"query": "SELECT COUNT(difficulty_factor) FROM mcaptcha_pow_analytics WHERE time <= $1;",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "count",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"Int4"
]
},
"nullable": [
null
]
},
"hash": "c08c1dd4bfcb6cbd0359c79cc3be79526a012b006ce9deb80bceb4e1a04c835d"
}

View file

@ -0,0 +1,23 @@
{
"db_name": "PostgreSQL",
"query": "SELECT\n difficulty_factor\n FROM\n mcaptcha_pow_analytics\n WHERE\n time <= $1\n ORDER BY difficulty_factor ASC LIMIT 1 OFFSET $2;",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "difficulty_factor",
"type_info": "Int4"
}
],
"parameters": {
"Left": [
"Int4",
"Int8"
]
},
"nullable": [
false
]
},
"hash": "c67aec0c3d5786fb495b6ed60fa106437d8e5034d3a40bf8face2ca7c12f2694"
}

View file

@ -1227,6 +1227,56 @@ impl MCDatabase for Database {
Ok(res.nonce as u32)
}
}
/// Get number of analytics entries that are under a certain duration
async fn stats_get_num_logs_under_time(&self, duration: u32) -> DBResult<usize> {
struct Count {
count: Option<i64>,
}
let count = sqlx::query_as!(
Count,
"SELECT COUNT(difficulty_factor) FROM mcaptcha_pow_analytics WHERE time <= $1;",
duration as i32,
)
.fetch_one(&self.pool)
.await
.map_err(|e| map_row_not_found_err(e, DBError::CaptchaNotFound))?;
Ok(count.count.unwrap_or_else(|| 0) as usize)
}
/// Get the entry at a location in the list of analytics entires under a certain time limit
/// and sorted in ascending order
async fn stats_get_entry_at_location_for_time_limit_asc(
&self,
duration: u32,
location: u32,
) -> DBResult<Option<usize>> {
struct Difficulty {
difficulty_factor: Option<i32>,
}
match sqlx::query_as!(
Difficulty,
"SELECT
difficulty_factor
FROM
mcaptcha_pow_analytics
WHERE
time <= $1
ORDER BY difficulty_factor ASC LIMIT 1 OFFSET $2;",
duration as i32,
location as i64 - 1,
)
.fetch_one(&self.pool)
.await
{
Ok(res) => Ok(Some(res.difficulty_factor.unwrap() as usize)),
Err(sqlx::Error::RowNotFound) => Ok(None),
Err(e) => Err(map_row_not_found_err(e, DBError::CaptchaNotFound)),
}
}
}
#[derive(Clone)]

View file

@ -5,9 +5,12 @@
#![cfg(test)]
use sqlx::postgres::PgPoolOptions;
use std::env;
use sqlx::migrate::MigrateDatabase;
use sqlx::postgres::PgPoolOptions;
use url::Url;
use crate::*;
use db_core::tests::*;
@ -26,28 +29,6 @@ async fn everyting_works() {
const HEADING: &str = "testing notifications get db postgres";
const MESSAGE: &str = "testing notifications get message db postgres";
// easy traffic pattern
const TRAFFIC_PATTERN: TrafficPattern = TrafficPattern {
avg_traffic: 500,
peak_sustainable_traffic: 5_000,
broke_my_site_traffic: Some(10_000),
};
const LEVELS: [Level; 3] = [
Level {
difficulty_factor: 1,
visitor_threshold: 1,
},
Level {
difficulty_factor: 2,
visitor_threshold: 2,
},
Level {
difficulty_factor: 3,
visitor_threshold: 3,
},
];
const ADD_NOTIFICATION: AddNotification = AddNotification {
from: NAME,
to: NAME,
@ -56,10 +37,20 @@ async fn everyting_works() {
};
let url = env::var("POSTGRES_DATABASE_URL").unwrap();
let mut parsed = Url::parse(&url).unwrap();
parsed.set_path("db_postgres_test");
let url = parsed.to_string();
if sqlx::Postgres::database_exists(&url).await.unwrap() {
sqlx::Postgres::drop_database(&url).await.unwrap();
}
sqlx::Postgres::create_database(&url).await.unwrap();
let pool_options = PgPoolOptions::new().max_connections(2);
let connection_options = ConnectionOptions::Fresh(Fresh {
pool_options,
url,
url: url.clone(),
disable_logging: false,
});
let db = connection_options.connect().await.unwrap();
@ -78,4 +69,6 @@ async fn everyting_works() {
description: CAPTCHA_DESCRIPTION,
};
database_works(&db, &p, &c, &LEVELS, &TRAFFIC_PATTERN, &ADD_NOTIFICATION).await;
drop(db);
sqlx::Postgres::drop_database(&url).await.unwrap();
}

View file

@ -14,6 +14,7 @@ pub mod meta;
pub mod notifications;
pub mod pow;
mod routes;
pub mod stats;
pub mod survey;
pub use routes::ROUTES;
@ -26,6 +27,7 @@ pub fn services(cfg: &mut ServiceConfig) {
mcaptcha::services(cfg);
notifications::services(cfg);
survey::services(cfg);
stats::services(cfg);
}
#[derive(Deserialize)]

View file

@ -11,6 +11,7 @@ use super::mcaptcha::routes::Captcha;
use super::meta::routes::Meta;
use super::notifications::routes::Notifications;
use super::pow::routes::PoW;
use super::stats::routes::Stats;
use super::survey::routes::Survey;
pub const ROUTES: Routes = Routes::new();
@ -23,6 +24,7 @@ pub struct Routes {
pub pow: PoW,
pub survey: Survey,
pub notifications: Notifications,
pub stats: Stats,
}
impl Routes {
@ -35,6 +37,7 @@ impl Routes {
pow: PoW::new(),
notifications: Notifications::new(),
survey: Survey::new(),
stats: Stats::new(),
}
}
}

252
src/api/v1/stats.rs Normal file
View file

@ -0,0 +1,252 @@
// Copyright (C) 2021 Aravinth Manivannan <realaravinth@batsense.net>
// SPDX-FileCopyrightText: 2023 Aravinth Manivannan <realaravinth@batsense.net>
//
// SPDX-License-Identifier: AGPL-3.0-or-later
use actix_web::{web, HttpResponse, Responder};
use derive_builder::Builder;
use serde::{Deserialize, Serialize};
use crate::errors::*;
use crate::AppData;
#[derive(Clone, Debug, Deserialize, Builder, Serialize)]
pub struct BuildDetails {
pub version: &'static str,
pub git_commit_hash: &'static str,
}
pub mod routes {
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct Stats {
pub percentile_benches: &'static str,
}
impl Stats {
pub const fn new() -> Self {
Self {
percentile_benches: "/api/v1/stats/analytics/percentile",
}
}
}
}
/// Get difficulty factor with max time limit for percentile of stats
#[my_codegen::post(path = "crate::V1_API_ROUTES.stats.percentile_benches")]
async fn percentile_benches(
data: AppData,
payload: web::Json<PercentileReq>,
) -> ServiceResult<impl Responder> {
let count = data.db.stats_get_num_logs_under_time(payload.time).await?;
if count == 0 {
return Ok(HttpResponse::Ok().json(PercentileResp {
difficulty_factor: None,
}));
}
if count < 2 {
return Ok(HttpResponse::Ok().json(PercentileResp {
difficulty_factor: None,
}));
}
let location = ((count - 1) as f64 * (payload.percentile / 100.00)) + 1.00;
let fraction = location - location.floor();
if fraction > 0.00 {
if let (Some(base), Some(ceiling)) = (
data.db
.stats_get_entry_at_location_for_time_limit_asc(
payload.time,
location.floor() as u32,
)
.await?,
data.db
.stats_get_entry_at_location_for_time_limit_asc(
payload.time,
location.floor() as u32 + 1,
)
.await?,
) {
let res = base as u32 + ((ceiling - base) as f64 * fraction).floor() as u32;
return Ok(HttpResponse::Ok().json(PercentileResp {
difficulty_factor: Some(res),
}));
}
} else {
if let Some(base) = data
.db
.stats_get_entry_at_location_for_time_limit_asc(
payload.time,
location.floor() as u32,
)
.await?
{
let res = base as u32;
return Ok(HttpResponse::Ok().json(PercentileResp {
difficulty_factor: Some(res),
}));
}
};
Ok(HttpResponse::Ok().json(PercentileResp {
difficulty_factor: None,
}))
}
#[derive(Clone, Debug, Deserialize, Builder, Serialize)]
/// Health check return datatype
pub struct PercentileReq {
time: u32,
percentile: f64,
}
#[derive(Clone, Debug, Deserialize, Builder, Serialize)]
/// Health check return datatype
pub struct PercentileResp {
difficulty_factor: Option<u32>,
}
pub fn services(cfg: &mut web::ServiceConfig) {
cfg.service(percentile_benches);
}
#[cfg(test)]
mod tests {
use actix_web::{http::StatusCode, test, App};
use super::*;
use crate::api::v1::services;
use crate::*;
#[actix_rt::test]
async fn stats_bench_work_pg() {
let data = crate::tests::pg::get_data().await;
stats_bench_work(data).await;
}
#[actix_rt::test]
async fn stats_bench_work_maria() {
let data = crate::tests::maria::get_data().await;
stats_bench_work(data).await;
}
async fn stats_bench_work(data: ArcData) {
use crate::tests::*;
const NAME: &str = "benchstatsuesr";
const EMAIL: &str = "benchstatsuesr@testadminuser.com";
const PASSWORD: &str = "longpassword2";
const DEVICE_USER_PROVIDED: &str = "foo";
const DEVICE_SOFTWARE_RECOGNISED: &str = "Foobar.v2";
const THREADS: i32 = 4;
let data = &data;
{
delete_user(&data, NAME).await;
}
register_and_signin(data, NAME, EMAIL, PASSWORD).await;
// create captcha
let (_, _signin_resp, key) = add_levels_util(data, NAME, PASSWORD).await;
let app = get_app!(data).await;
let page = 1;
let tmp_id = uuid::Uuid::new_v4();
let download_rotue = V1_API_ROUTES
.survey
.get_download_route(&tmp_id.to_string(), page);
let download_req = test::call_service(
&app,
test::TestRequest::get().uri(&download_rotue).to_request(),
)
.await;
assert_eq!(download_req.status(), StatusCode::NOT_FOUND);
data.db
.analytics_create_psuedo_id_if_not_exists(&key.key)
.await
.unwrap();
let psuedo_id = data
.db
.analytics_get_psuedo_id_from_capmaign_id(&key.key)
.await
.unwrap();
for i in 1..6 {
println!("[{i}] Saving analytics");
let analytics = db_core::CreatePerformanceAnalytics {
time: i,
difficulty_factor: i,
worker_type: "wasm".into(),
};
data.db.analysis_save(&key.key, &analytics).await.unwrap();
}
let msg = PercentileReq {
time: 1,
percentile: 99.00,
};
let resp = test::call_service(
&app,
post_request!(&msg, V1_API_ROUTES.stats.percentile_benches).to_request(),
)
.await;
assert_eq!(resp.status(), StatusCode::OK);
let resp: PercentileResp = test::read_body_json(resp).await;
assert!(resp.difficulty_factor.is_none());
let msg = PercentileReq {
time: 1,
percentile: 100.00,
};
let resp = test::call_service(
&app,
post_request!(&msg, V1_API_ROUTES.stats.percentile_benches).to_request(),
)
.await;
assert_eq!(resp.status(), StatusCode::OK);
let resp: PercentileResp = test::read_body_json(resp).await;
assert!(resp.difficulty_factor.is_none());
let msg = PercentileReq {
time: 2,
percentile: 100.00,
};
let resp = test::call_service(
&app,
post_request!(&msg, V1_API_ROUTES.stats.percentile_benches).to_request(),
)
.await;
assert_eq!(resp.status(), StatusCode::OK);
let resp: PercentileResp = test::read_body_json(resp).await;
assert_eq!(resp.difficulty_factor.unwrap(), 2);
let msg = PercentileReq {
time: 5,
percentile: 90.00,
};
let resp = test::call_service(
&app,
post_request!(&msg, V1_API_ROUTES.stats.percentile_benches).to_request(),
)
.await;
assert_eq!(resp.status(), StatusCode::OK);
let resp: PercentileResp = test::read_body_json(resp).await;
assert_eq!(resp.difficulty_factor.unwrap(), 4);
delete_user(&data, NAME).await;
}
}

View file

@ -29,6 +29,9 @@ pub fn get_settings() -> Settings {
pub mod pg {
use std::env;
use sqlx::migrate::MigrateDatabase;
use crate::api::v1::mcaptcha::get_random;
use crate::data::Data;
use crate::settings::*;
use crate::survey::SecretsStore;
@ -38,6 +41,16 @@ pub mod pg {
pub async fn get_data() -> ArcData {
let url = env::var("POSTGRES_DATABASE_URL").unwrap();
let mut parsed = url::Url::parse(&url).unwrap();
parsed.set_path(&get_random(16));
let url = parsed.to_string();
if sqlx::Postgres::database_exists(&url).await.unwrap() {
sqlx::Postgres::drop_database(&url).await.unwrap();
}
sqlx::Postgres::create_database(&url).await.unwrap();
let mut settings = get_settings();
settings.captcha.runners = Some(1);
settings.database.url = url.clone();
@ -50,6 +63,9 @@ pub mod pg {
pub mod maria {
use std::env;
use sqlx::migrate::MigrateDatabase;
use crate::api::v1::mcaptcha::get_random;
use crate::data::Data;
use crate::settings::*;
use crate::survey::SecretsStore;
@ -59,6 +75,16 @@ pub mod maria {
pub async fn get_data() -> ArcData {
let url = env::var("MARIA_DATABASE_URL").unwrap();
let mut parsed = url::Url::parse(&url).unwrap();
parsed.set_path(&get_random(16));
let url = parsed.to_string();
if sqlx::MySql::database_exists(&url).await.unwrap() {
sqlx::MySql::drop_database(&url).await.unwrap();
}
sqlx::MySql::create_database(&url).await.unwrap();
let mut settings = get_settings();
settings.captcha.runners = Some(1);
settings.database.url = url.clone();