diff --git a/Cargo.toml b/Cargo.toml index 3c266e5..d69a1e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -version = "0.5.1" +version = "0.5.2" edition = "2024" license = "MIT" repository = "https://git.sasedev.com/Sasedev/khadhroony-bobot" diff --git a/khbb_lib/src/domain_classifier.rs b/khbb_lib/src/domain_classifier.rs index 39a9135..864a349 100644 --- a/khbb_lib/src/domain_classifier.rs +++ b/khbb_lib/src/domain_classifier.rs @@ -88,51 +88,54 @@ pub(crate) fn classify_domain_event( } } if detected_programs.is_empty() { - return Ok(Some( - KhbbClassifiedDomainEvent::UnknownProgramLogActivity( - KhbbUnknownProgramLogActivityEvent { - signature: log_event.signature.clone(), - context_slot: log_event.context_slot, - has_error: log_event.has_error, - log_count: log_event.log_count, - }, - ), - )); - } - Ok(Some( - KhbbClassifiedDomainEvent::KnownProgramLogActivity( - KhbbKnownProgramLogActivityEvent { + return Ok(Some(KhbbClassifiedDomainEvent::UnknownProgramLogActivity( + KhbbUnknownProgramLogActivityEvent { signature: log_event.signature.clone(), context_slot: log_event.context_slot, has_error: log_event.has_error, - programs: detected_programs, + log_count: log_event.log_count, }, - ), - )) - } + ))); + } + Ok(Some(KhbbClassifiedDomainEvent::KnownProgramLogActivity( + KhbbKnownProgramLogActivityEvent { + signature: log_event.signature.clone(), + context_slot: log_event.context_slot, + has_error: log_event.has_error, + programs: detected_programs, + }, + ))) + }, crate::KhbbDomainEvent::TokenProgramActivity(token_event) => { - match token_event.token_program_family.as_str() { - "spl-token" => Ok(Some( - KhbbClassifiedDomainEvent::SplTokenProgramActivity( + let source_label = match token_event.source_label.as_deref() { + Some(value) => value, + None => { + return Ok(None); + }, + }; + let known_program = crate::program_registry::classify_known_program_id(source_label); + match known_program { + Some(crate::program_registry::KhbbKnownProgram::SplToken) => { + Ok(Some(KhbbClassifiedDomainEvent::SplTokenProgramActivity( KhbbSplTokenProgramActivityEvent { pubkey: token_event.pubkey.clone(), context_slot: token_event.context_slot, subscription_id: token_event.subscription_id, }, - ), - )), - "spl-token-2022" => Ok(Some( - KhbbClassifiedDomainEvent::SplToken2022ProgramActivity( + ))) + }, + Some(crate::program_registry::KhbbKnownProgram::SplToken2022) => { + Ok(Some(KhbbClassifiedDomainEvent::SplToken2022ProgramActivity( KhbbSplToken2022ProgramActivityEvent { pubkey: token_event.pubkey.clone(), context_slot: token_event.context_slot, subscription_id: token_event.subscription_id, }, - ), - )), + ))) + }, _ => Ok(None), } - } + }, } } @@ -140,8 +143,8 @@ pub(crate) fn classify_domain_event( mod tests { #[test] fn classify_token_program_activity_as_spl_token() { - let event = crate::KhbbDomainEvent::TokenProgramActivity( - crate::KhbbTokenProgramActivityEvent { + let event = + crate::KhbbDomainEvent::TokenProgramActivity(crate::KhbbTokenProgramActivityEvent { subscription_id: 1, source_kind: crate::KhbbWsSubscriptionKind::Program, source_label: Some(std::string::String::from( @@ -150,8 +153,7 @@ mod tests { pubkey: std::string::String::from("SomePubkey"), context_slot: 100, token_program_family: std::string::String::from("spl-token"), - }, - ); + }); let result = super::classify_domain_event(&event); assert!(result.is_ok()); let classified_option = result.expect("classify token event"); @@ -160,10 +162,10 @@ mod tests { super::KhbbClassifiedDomainEvent::SplTokenProgramActivity(inner) => { assert_eq!(inner.pubkey, "SomePubkey"); assert_eq!(inner.context_slot, 100); - } + }, _ => { panic!("expected spl-token classified event"); - } + }, } } @@ -197,10 +199,10 @@ mod tests { assert_eq!(inner.signature, "sig-1"); assert_eq!(inner.context_slot, 200); assert_eq!(inner.programs.len(), 2); - } + }, _ => { panic!("expected known program log activity"); - } + }, } } @@ -229,10 +231,10 @@ mod tests { assert_eq!(inner.signature, "sig-2"); assert!(inner.has_error); assert_eq!(inner.log_count, 1); - } + }, _ => { panic!("expected unknown program log activity"); - } + }, } } } diff --git a/khbb_lib/src/heuristics.rs b/khbb_lib/src/heuristics.rs new file mode 100644 index 0000000..de9dd50 --- /dev/null +++ b/khbb_lib/src/heuristics.rs @@ -0,0 +1,181 @@ +// file: khbb_lib/src/heuristics.rs + +//! Early heuristic signals derived from classified domain events. + +/// Early heuristic signal derived from classified activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum KhbbHeuristicSignal { + /// Potential token account creation or initialization activity. + PotentialTokenAccountActivity(KhbbPotentialTokenAccountActivitySignal), + /// Potential mint-related activity. + PotentialMintActivity(KhbbPotentialMintActivitySignal), + /// Potential initial transaction activity around a token account. + PotentialInitialTokenActivity(KhbbPotentialInitialTokenActivitySignal), +} + +/// Heuristic signal indicating potential token account activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbPotentialTokenAccountActivitySignal { + /// Account pubkey involved in the signal. + pub pubkey: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Subscription identifier. + pub subscription_id: u64, + /// Token program family. + pub token_program_family: std::string::String, +} + +/// Heuristic signal indicating potential mint-related activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbPotentialMintActivitySignal { + /// Account pubkey involved in the signal. + pub pubkey: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Token program family. + pub token_program_family: std::string::String, +} + +/// Heuristic signal indicating a possibly relevant early transaction activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbPotentialInitialTokenActivitySignal { + /// Transaction signature. + pub signature: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Whether the transaction errored. + pub has_error: bool, + /// Number of logs in the transaction. + pub log_count: usize, + /// Known programs seen in the logs. + pub programs: std::vec::Vec, +} + +/// Derives early heuristic signals from classified domain events. +pub(crate) fn derive_heuristic_signals( + event: &crate::KhbbClassifiedDomainEvent, +) -> core::result::Result, crate::KhbbError> { + match event { + crate::KhbbClassifiedDomainEvent::SplTokenProgramActivity(activity) => { + let mut signals = std::vec::Vec::::new(); + signals.push(KhbbHeuristicSignal::PotentialTokenAccountActivity( + KhbbPotentialTokenAccountActivitySignal { + pubkey: activity.pubkey.clone(), + context_slot: activity.context_slot, + subscription_id: activity.subscription_id, + token_program_family: std::string::String::from("spl-token"), + }, + )); + if activity.pubkey != crate::ids::WSOL_MINT_ID.to_string() { + signals.push(KhbbHeuristicSignal::PotentialMintActivity( + KhbbPotentialMintActivitySignal { + pubkey: activity.pubkey.clone(), + context_slot: activity.context_slot, + token_program_family: std::string::String::from("spl-token"), + }, + )); + } + Ok(signals) + } + crate::KhbbClassifiedDomainEvent::SplToken2022ProgramActivity(activity) => { + let mut signals = std::vec::Vec::::new(); + signals.push(KhbbHeuristicSignal::PotentialTokenAccountActivity( + KhbbPotentialTokenAccountActivitySignal { + pubkey: activity.pubkey.clone(), + context_slot: activity.context_slot, + subscription_id: activity.subscription_id, + token_program_family: std::string::String::from("spl-token-2022"), + }, + )); + signals.push(KhbbHeuristicSignal::PotentialMintActivity( + KhbbPotentialMintActivitySignal { + pubkey: activity.pubkey.clone(), + context_slot: activity.context_slot, + token_program_family: std::string::String::from("spl-token-2022"), + }, + )); + Ok(signals) + } + crate::KhbbClassifiedDomainEvent::KnownProgramLogActivity(activity) => { + let mut contains_token_program = false; + for program in &activity.programs { + match program { + crate::KhbbKnownProgram::SplToken + | crate::KhbbKnownProgram::SplToken2022 + | crate::KhbbKnownProgram::AssociatedTokenAccount => { + contains_token_program = true; + break; + } + _ => {} + } + } + if !contains_token_program { + return Ok(vec![]); + } + Ok(vec![KhbbHeuristicSignal::PotentialInitialTokenActivity( + KhbbPotentialInitialTokenActivitySignal { + signature: activity.signature.clone(), + context_slot: activity.context_slot, + has_error: activity.has_error, + log_count: activity.programs.len(), + programs: activity.programs.clone(), + }, + )]) + } + crate::KhbbClassifiedDomainEvent::UnknownProgramLogActivity(_) => Ok(vec![]), + } +} + +#[cfg(test)] +mod tests { + #[test] + fn derive_heuristics_from_spl_token_program_activity_returns_signals() { + let event = crate::KhbbClassifiedDomainEvent::SplTokenProgramActivity( + crate::KhbbSplTokenProgramActivityEvent { + pubkey: std::string::String::from("SomeTokenAccountPubkey"), + context_slot: 100, + subscription_id: 1, + }, + ); + let result = super::derive_heuristic_signals(&event); + assert!(result.is_ok()); + let signals = result.expect("derive spl-token signals"); + assert_eq!(signals.len(), 2); + } + + #[test] + fn derive_heuristics_from_known_program_logs_returns_signal_when_token_program_seen() { + let event = crate::KhbbClassifiedDomainEvent::KnownProgramLogActivity( + crate::KhbbKnownProgramLogActivityEvent { + signature: std::string::String::from("sig-1"), + context_slot: 200, + has_error: false, + programs: vec![ + crate::KhbbKnownProgram::ComputeBudget, + crate::KhbbKnownProgram::SplToken, + ], + }, + ); + let result = super::derive_heuristic_signals(&event); + assert!(result.is_ok()); + let signals = result.expect("derive known program log signals"); + assert_eq!(signals.len(), 1); + } + + #[test] + fn derive_heuristics_from_unknown_program_logs_returns_no_signal() { + let event = crate::KhbbClassifiedDomainEvent::UnknownProgramLogActivity( + crate::KhbbUnknownProgramLogActivityEvent { + signature: std::string::String::from("sig-2"), + context_slot: 300, + has_error: false, + log_count: 2, + }, + ); + let result = super::derive_heuristic_signals(&event); + assert!(result.is_ok()); + let signals = result.expect("derive unknown log signals"); + assert!(signals.is_empty()); + } +} diff --git a/khbb_lib/src/ids.rs b/khbb_lib/src/ids.rs new file mode 100644 index 0000000..4807fab --- /dev/null +++ b/khbb_lib/src/ids.rs @@ -0,0 +1,22 @@ +// file: khbb_lib/src/ids.rs + +//! Centralized official Solana and SPL program identifiers. + +/// SPL Token program (legacy). +pub const SPL_TOKEN_PROGRAM_ID: solana_sdk::pubkey::Pubkey = spl_token_interface::ID; + +/// SPL Token-2022 program. +pub const SPL_TOKEN_2022_PROGRAM_ID: solana_sdk::pubkey::Pubkey = spl_token_2022_interface::ID; + +/// Associated Token Account program. +pub const ASSOCIATED_TOKEN_PROGRAM_ID: solana_sdk::pubkey::Pubkey = + spl_associated_token_account_interface::program::ID; + +/// Wrapped SOL mint. +pub const WSOL_MINT_ID: solana_sdk::pubkey::Pubkey = spl_token_interface::native_mint::ID; + +/// System program. +pub const SYSTEM_PROGRAM_ID: solana_sdk::pubkey::Pubkey = solana_sdk_ids::system_program::ID; + +/// Compute Budget program. +pub const COMPUTE_BUDGET_PROGRAM_ID: solana_sdk::pubkey::Pubkey = solana_sdk_ids::compute_budget::ID; diff --git a/khbb_lib/src/lib.rs b/khbb_lib/src/lib.rs index 06c7533..1972b94 100644 --- a/khbb_lib/src/lib.rs +++ b/khbb_lib/src/lib.rs @@ -22,6 +22,8 @@ mod ws_event; mod domain_event; mod program_registry; mod domain_classifier; +mod ids; +mod heuristics; /// Runs the listener application bootstrap workflow. pub use crate::app::run_listener_app; @@ -97,3 +99,13 @@ pub use crate::domain_classifier::KhbbSplToken2022ProgramActivityEvent; pub use crate::domain_classifier::KhbbKnownProgramLogActivityEvent; /// Classified log activity not matching any known program. pub use crate::domain_classifier::KhbbUnknownProgramLogActivityEvent; +/// Re-exeport official Solana and SPL program identifiers +pub use crate::ids::*; +/// Early heuristic signal derived from classified activity. +pub use crate::heuristics::KhbbHeuristicSignal; +/// Heuristic signal indicating potential token account activity. +pub use crate::heuristics::KhbbPotentialTokenAccountActivitySignal; +/// Heuristic signal indicating potential mint-related activity. +pub use crate::heuristics::KhbbPotentialMintActivitySignal; +/// Heuristic signal indicating a possibly relevant early transaction activity. +pub use crate::heuristics::KhbbPotentialInitialTokenActivitySignal; diff --git a/khbb_lib/src/listener.rs b/khbb_lib/src/listener.rs index 7f7c6ea..a72c984 100644 --- a/khbb_lib/src/listener.rs +++ b/khbb_lib/src/listener.rs @@ -458,6 +458,40 @@ pub async fn run_listener_runtime( programs = ?classified.programs, "classified known program log activity event" ); + let heuristic_result = + crate::heuristics::derive_heuristic_signals( + &crate::KhbbClassifiedDomainEvent::KnownProgramLogActivity( + classified.clone(), + ), + ); + match heuristic_result { + Ok(signals) => { + for signal in signals { + match signal { + crate::KhbbHeuristicSignal::PotentialInitialTokenActivity(inner) => { + tracing::trace!( + listener_session_id = session.id, + signature = %inner.signature, + context_slot = inner.context_slot, + has_error = inner.has_error, + log_count = inner.log_count, + programs = ?inner.programs, + "heuristic potential initial token activity signal" + ); + } + crate::KhbbHeuristicSignal::PotentialTokenAccountActivity(_) => {} + crate::KhbbHeuristicSignal::PotentialMintActivity(_) => {} + } + } + } + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to derive heuristic signals from known program log activity" + ); + } + } } Ok(Some(crate::KhbbClassifiedDomainEvent::UnknownProgramLogActivity(classified))) => { tracing::trace!( @@ -506,6 +540,47 @@ pub async fn run_listener_runtime( context_slot = classified.context_slot, "classified spl-token program activity event" ); + let heuristic_result = + crate::heuristics::derive_heuristic_signals( + &crate::KhbbClassifiedDomainEvent::SplTokenProgramActivity( + classified.clone(), + ), + ); + match heuristic_result { + Ok(signals) => { + for signal in signals { + match signal { + crate::KhbbHeuristicSignal::PotentialTokenAccountActivity(inner) => { + tracing::trace!( + listener_session_id = session.id, + pubkey = %inner.pubkey, + context_slot = inner.context_slot, + subscription_id = inner.subscription_id, + token_program_family = %inner.token_program_family, + "heuristic potential token account activity signal" + ); + } + crate::KhbbHeuristicSignal::PotentialMintActivity(inner) => { + tracing::trace!( + listener_session_id = session.id, + pubkey = %inner.pubkey, + context_slot = inner.context_slot, + token_program_family = %inner.token_program_family, + "heuristic potential mint activity signal" + ); + } + crate::KhbbHeuristicSignal::PotentialInitialTokenActivity(_) => {} + } + } + } + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to derive heuristic signals from spl-token program activity" + ); + } + } } Ok(Some(crate::KhbbClassifiedDomainEvent::SplToken2022ProgramActivity(classified))) => { tracing::trace!( @@ -515,6 +590,47 @@ pub async fn run_listener_runtime( context_slot = classified.context_slot, "classified spl-token-2022 program activity event" ); + let heuristic_result = + crate::heuristics::derive_heuristic_signals( + &crate::KhbbClassifiedDomainEvent::SplToken2022ProgramActivity( + classified.clone(), + ), + ); + match heuristic_result { + Ok(signals) => { + for signal in signals { + match signal { + crate::KhbbHeuristicSignal::PotentialTokenAccountActivity(inner) => { + tracing::trace!( + listener_session_id = session.id, + pubkey = %inner.pubkey, + context_slot = inner.context_slot, + subscription_id = inner.subscription_id, + token_program_family = %inner.token_program_family, + "heuristic potential token account activity signal" + ); + } + crate::KhbbHeuristicSignal::PotentialMintActivity(inner) => { + tracing::trace!( + listener_session_id = session.id, + pubkey = %inner.pubkey, + context_slot = inner.context_slot, + token_program_family = %inner.token_program_family, + "heuristic potential mint activity signal" + ); + } + crate::KhbbHeuristicSignal::PotentialInitialTokenActivity(_) => {} + } + } + } + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to derive heuristic signals from spl-token-2022 program activity" + ); + } + } } Ok(Some(_)) => {} Ok(None) => {} diff --git a/khbb_lib/src/program_registry.rs b/khbb_lib/src/program_registry.rs index 4afc409..b222360 100644 --- a/khbb_lib/src/program_registry.rs +++ b/khbb_lib/src/program_registry.rs @@ -18,17 +18,32 @@ pub enum KhbbKnownProgram { } /// Returns the known program classification for a given program id. -pub(crate) fn classify_known_program_id(program_id: &str) -> std::option::Option { - match program_id { - "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA" => Some(KhbbKnownProgram::SplToken), - "TokenzQdBNbLqP5VEhdkAS6EPFLC1PHnBqCXEpPxuEb" => Some(KhbbKnownProgram::SplToken2022), - "11111111111111111111111111111111" => Some(KhbbKnownProgram::System), - "ComputeBudget111111111111111111111111111111" => Some(KhbbKnownProgram::ComputeBudget), - "ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL" => { - Some(KhbbKnownProgram::AssociatedTokenAccount) - }, - _ => None, +pub(crate) fn classify_known_program_id( + program_id: &str, +) -> std::option::Option { + let parse_result = program_id.parse::(); + let pubkey = match parse_result { + Ok(value) => value, + Err(_) => { + return None; + } + }; + if pubkey == crate::SPL_TOKEN_PROGRAM_ID { + return Some(KhbbKnownProgram::SplToken); } + if pubkey == crate::SPL_TOKEN_2022_PROGRAM_ID { + return Some(KhbbKnownProgram::SplToken2022); + } + if pubkey == crate::SYSTEM_PROGRAM_ID { + return Some(KhbbKnownProgram::System); + } + if pubkey == crate::COMPUTE_BUDGET_PROGRAM_ID { + return Some(KhbbKnownProgram::ComputeBudget); + } + if pubkey == crate::ASSOCIATED_TOKEN_PROGRAM_ID { + return Some(KhbbKnownProgram::AssociatedTokenAccount); + } + None } /// Detects known program mentions in a transaction log line.