diff --git a/Cargo.toml b/Cargo.toml index 6043476..3c266e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ members = [ ] [workspace.package] -version = "0.5.0" +version = "0.5.1" edition = "2024" license = "MIT" repository = "https://git.sasedev.com/Sasedev/khadhroony-bobot" diff --git a/khbb_lib/src/domain_classifier.rs b/khbb_lib/src/domain_classifier.rs new file mode 100644 index 0000000..39a9135 --- /dev/null +++ b/khbb_lib/src/domain_classifier.rs @@ -0,0 +1,238 @@ +// file: khbb_lib/src/domain_classifier.rs +//! Secondary domain classification derived from first-level domain events. + +/// Classified domain event derived from a first-level domain event. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub enum KhbbClassifiedDomainEvent { + /// Classified SPL Token program activity. + SplTokenProgramActivity(KhbbSplTokenProgramActivityEvent), + /// Classified SPL Token-2022 program activity. + SplToken2022ProgramActivity(KhbbSplToken2022ProgramActivityEvent), + /// Classified log activity mentioning one or more known programs. + KnownProgramLogActivity(KhbbKnownProgramLogActivityEvent), + /// Classified log activity that did not match any known program. + UnknownProgramLogActivity(KhbbUnknownProgramLogActivityEvent), +} + +/// Classified SPL Token program activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbSplTokenProgramActivityEvent { + /// Account pubkey involved in the activity. + pub pubkey: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Subscription identifier. + pub subscription_id: u64, +} + +/// Classified SPL Token-2022 program activity. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbSplToken2022ProgramActivityEvent { + /// Account pubkey involved in the activity. + pub pubkey: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Subscription identifier. + pub subscription_id: u64, +} + +/// Classified log activity mentioning known programs. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbKnownProgramLogActivityEvent { + /// Transaction signature. + pub signature: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Whether the transaction errored. + pub has_error: bool, + /// Programs detected in the logs. + pub programs: std::vec::Vec, +} + +/// Classified log activity not matching any known program. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct KhbbUnknownProgramLogActivityEvent { + /// Transaction signature. + pub signature: std::string::String, + /// Context slot. + pub context_slot: u64, + /// Whether the transaction errored. + pub has_error: bool, + /// Number of logs observed. + pub log_count: usize, +} + +/// Derives a classified domain event from a first-level domain event. +pub(crate) fn classify_domain_event( + event: &crate::KhbbDomainEvent, +) -> core::result::Result, crate::KhbbError> { + match event { + crate::KhbbDomainEvent::SlotAdvanced(_) => Ok(None), + crate::KhbbDomainEvent::TransactionLogActivity(log_event) => { + let mut detected_programs = + std::vec::Vec::::new(); + for log_line in &log_event.logs { + let detected_program = + crate::program_registry::classify_known_program_from_log_line(log_line); + if let Some(program) = detected_program { + let mut already_present = false; + for existing_program in &detected_programs { + if *existing_program == program { + already_present = true; + break; + } + } + if !already_present { + detected_programs.push(program); + } + } + } + if detected_programs.is_empty() { + return Ok(Some( + KhbbClassifiedDomainEvent::UnknownProgramLogActivity( + KhbbUnknownProgramLogActivityEvent { + signature: log_event.signature.clone(), + context_slot: log_event.context_slot, + has_error: log_event.has_error, + log_count: log_event.log_count, + }, + ), + )); + } + Ok(Some( + KhbbClassifiedDomainEvent::KnownProgramLogActivity( + KhbbKnownProgramLogActivityEvent { + signature: log_event.signature.clone(), + context_slot: log_event.context_slot, + has_error: log_event.has_error, + programs: detected_programs, + }, + ), + )) + } + crate::KhbbDomainEvent::TokenProgramActivity(token_event) => { + match token_event.token_program_family.as_str() { + "spl-token" => Ok(Some( + KhbbClassifiedDomainEvent::SplTokenProgramActivity( + KhbbSplTokenProgramActivityEvent { + pubkey: token_event.pubkey.clone(), + context_slot: token_event.context_slot, + subscription_id: token_event.subscription_id, + }, + ), + )), + "spl-token-2022" => Ok(Some( + KhbbClassifiedDomainEvent::SplToken2022ProgramActivity( + KhbbSplToken2022ProgramActivityEvent { + pubkey: token_event.pubkey.clone(), + context_slot: token_event.context_slot, + subscription_id: token_event.subscription_id, + }, + ), + )), + _ => Ok(None), + } + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn classify_token_program_activity_as_spl_token() { + let event = crate::KhbbDomainEvent::TokenProgramActivity( + crate::KhbbTokenProgramActivityEvent { + subscription_id: 1, + source_kind: crate::KhbbWsSubscriptionKind::Program, + source_label: Some(std::string::String::from( + "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA", + )), + pubkey: std::string::String::from("SomePubkey"), + context_slot: 100, + token_program_family: std::string::String::from("spl-token"), + }, + ); + let result = super::classify_domain_event(&event); + assert!(result.is_ok()); + let classified_option = result.expect("classify token event"); + assert!(classified_option.is_some()); + match classified_option.expect("classified token event") { + super::KhbbClassifiedDomainEvent::SplTokenProgramActivity(inner) => { + assert_eq!(inner.pubkey, "SomePubkey"); + assert_eq!(inner.context_slot, 100); + } + _ => { + panic!("expected spl-token classified event"); + } + } + } + + #[test] + fn classify_logs_event_as_known_program_activity() { + let event = crate::KhbbDomainEvent::TransactionLogActivity( + crate::KhbbTransactionLogActivityEvent { + subscription_id: 2, + source_kind: crate::KhbbWsSubscriptionKind::Logs, + source_label: None, + signature: std::string::String::from("sig-1"), + has_error: false, + context_slot: 200, + log_count: 2, + logs: vec![ + std::string::String::from( + "Program ComputeBudget111111111111111111111111111111 invoke [1]", + ), + std::string::String::from( + "Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA invoke [1]", + ), + ], + }, + ); + let result = super::classify_domain_event(&event); + assert!(result.is_ok()); + let classified_option = result.expect("classify logs event"); + assert!(classified_option.is_some()); + match classified_option.expect("classified logs event") { + super::KhbbClassifiedDomainEvent::KnownProgramLogActivity(inner) => { + assert_eq!(inner.signature, "sig-1"); + assert_eq!(inner.context_slot, 200); + assert_eq!(inner.programs.len(), 2); + } + _ => { + panic!("expected known program log activity"); + } + } + } + + #[test] + fn classify_logs_event_as_unknown_program_activity() { + let event = crate::KhbbDomainEvent::TransactionLogActivity( + crate::KhbbTransactionLogActivityEvent { + subscription_id: 3, + source_kind: crate::KhbbWsSubscriptionKind::Logs, + source_label: None, + signature: std::string::String::from("sig-2"), + has_error: true, + context_slot: 300, + log_count: 1, + logs: vec![std::string::String::from( + "Program SomeUnknown111111111111111111111111111111 invoke [1]", + )], + }, + ); + let result = super::classify_domain_event(&event); + assert!(result.is_ok()); + let classified_option = result.expect("classify unknown logs event"); + assert!(classified_option.is_some()); + match classified_option.expect("classified unknown logs event") { + super::KhbbClassifiedDomainEvent::UnknownProgramLogActivity(inner) => { + assert_eq!(inner.signature, "sig-2"); + assert!(inner.has_error); + assert_eq!(inner.log_count, 1); + } + _ => { + panic!("expected unknown program log activity"); + } + } + } +} diff --git a/khbb_lib/src/domain_event.rs b/khbb_lib/src/domain_event.rs index a2c9aca..eadb74a 100644 --- a/khbb_lib/src/domain_event.rs +++ b/khbb_lib/src/domain_event.rs @@ -47,6 +47,8 @@ pub struct KhbbTransactionLogActivityEvent { pub context_slot: u64, /// Number of log lines. pub log_count: usize, + /// Raw logs emitted by the transaction. + pub logs: std::vec::Vec, } /// Domain event emitted when activity is observed on a token program. @@ -83,45 +85,44 @@ pub(crate) fn derive_domain_event_from_ws_event( parent: slot_event.parent, root: slot_event.root, }))) - } + }, crate::KhbbWsNormalizedEvent::Logs(logs_event) => { - Ok(Some(KhbbDomainEvent::TransactionLogActivity( - KhbbTransactionLogActivityEvent { - subscription_id: logs_event.subscription_id, - source_kind: logs_event.source_kind, - source_label: logs_event.source_label.clone(), - signature: logs_event.signature.clone(), - has_error: logs_event.has_error, - context_slot: logs_event.context_slot, - log_count: logs_event.logs.len(), - }, - ))) - } + Ok(Some(KhbbDomainEvent::TransactionLogActivity(KhbbTransactionLogActivityEvent { + subscription_id: logs_event.subscription_id, + source_kind: logs_event.source_kind, + source_label: logs_event.source_label.clone(), + signature: logs_event.signature.clone(), + has_error: logs_event.has_error, + context_slot: logs_event.context_slot, + log_count: logs_event.logs.len(), + logs: logs_event.logs.clone(), + }))) + }, crate::KhbbWsNormalizedEvent::Program(program_event) => { let label_option = program_event.source_label.as_deref(); let token_program_family = match label_option { Some("TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA") => { Some(std::string::String::from("spl-token")) - } + }, Some("TokenzQdBNbLqP5VEhdkAS6EPFLC1PHnBqCXEpPxuEb") => { Some(std::string::String::from("spl-token-2022")) - } + }, _ => None, }; match token_program_family { - Some(family) => Ok(Some(KhbbDomainEvent::TokenProgramActivity( - KhbbTokenProgramActivityEvent { + Some(family) => { + Ok(Some(KhbbDomainEvent::TokenProgramActivity(KhbbTokenProgramActivityEvent { subscription_id: program_event.subscription_id, source_kind: program_event.source_kind, source_label: program_event.source_label.clone(), pubkey: program_event.pubkey.clone(), context_slot: program_event.context_slot, token_program_family: family, - }, - ))), + }))) + }, None => Ok(None), } - } + }, } } @@ -148,10 +149,10 @@ mod tests { assert_eq!(event.slot, 100); assert_eq!(event.parent, 99); assert_eq!(event.root, 90); - } + }, _ => { panic!("expected slot advanced event"); - } + }, } } @@ -163,10 +164,7 @@ mod tests { source_label: None, signature: std::string::String::from("sig-1"), has_error: false, - logs: vec![ - std::string::String::from("log-1"), - std::string::String::from("log-2"), - ], + logs: vec![std::string::String::from("log-1"), std::string::String::from("log-2")], context_slot: 123, }); @@ -181,10 +179,10 @@ mod tests { assert!(!event.has_error); assert_eq!(event.context_slot, 123); assert_eq!(event.log_count, 2); - } + }, _ => { panic!("expected transaction log activity event"); - } + }, } } @@ -209,10 +207,10 @@ mod tests { assert_eq!(event.pubkey, "SomeTokenAccountPubkey"); assert_eq!(event.context_slot, 456); assert_eq!(event.token_program_family, "spl-token"); - } + }, _ => { panic!("expected token program activity event"); - } + }, } } @@ -221,7 +219,9 @@ mod tests { let ws_event = crate::KhbbWsNormalizedEvent::Program(crate::KhbbWsProgramEvent { subscription_id: 4, source_kind: crate::KhbbWsSubscriptionKind::Program, - source_label: Some(std::string::String::from("UnknownProgram11111111111111111111111111111111")), + source_label: Some(std::string::String::from( + "UnknownProgram11111111111111111111111111111111", + )), pubkey: std::string::String::from("SomePubkey"), context_slot: 789, }); diff --git a/khbb_lib/src/lib.rs b/khbb_lib/src/lib.rs index ab7c6e4..06c7533 100644 --- a/khbb_lib/src/lib.rs +++ b/khbb_lib/src/lib.rs @@ -20,6 +20,8 @@ mod tracing_setup; mod solana_rpc_ws; mod ws_event; mod domain_event; +mod program_registry; +mod domain_classifier; /// Runs the listener application bootstrap workflow. pub use crate::app::run_listener_app; @@ -83,3 +85,15 @@ pub use crate::domain_event::KhbbSlotAdvancedEvent; pub use crate::domain_event::KhbbTransactionLogActivityEvent; /// Domain event emitted when activity is observed on a token program. pub use crate::domain_event::KhbbTokenProgramActivityEvent; +/// Known Solana program family used for early classification. +pub use crate::program_registry::KhbbKnownProgram; +/// Classified domain event derived from a first-level domain event. +pub use crate::domain_classifier::KhbbClassifiedDomainEvent; +/// Classified SPL Token program activity. +pub use crate::domain_classifier::KhbbSplTokenProgramActivityEvent; +/// Classified SPL Token-2022 program activity. +pub use crate::domain_classifier::KhbbSplToken2022ProgramActivityEvent; +/// Classified log activity mentioning known programs. +pub use crate::domain_classifier::KhbbKnownProgramLogActivityEvent; +/// Classified log activity not matching any known program. +pub use crate::domain_classifier::KhbbUnknownProgramLogActivityEvent; diff --git a/khbb_lib/src/listener.rs b/khbb_lib/src/listener.rs index 1dd2048..7f7c6ea 100644 --- a/khbb_lib/src/listener.rs +++ b/khbb_lib/src/listener.rs @@ -412,6 +412,23 @@ pub async fn run_listener_runtime( root = event.root, "domain slot advanced event" ); + let classified_event_result = + crate::domain_classifier::classify_domain_event( + &crate::KhbbDomainEvent::SlotAdvanced( + event.clone(), + ), + ); + match classified_event_result { + Ok(Some(_)) => {} + Ok(None) => {} + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to classify slot advanced domain event" + ); + } + } } Ok(Some(crate::KhbbDomainEvent::TransactionLogActivity(event))) => { tracing::trace!( @@ -425,6 +442,43 @@ pub async fn run_listener_runtime( log_count = event.log_count, "domain transaction log activity event" ); + let classified_event_result = + crate::domain_classifier::classify_domain_event( + &crate::KhbbDomainEvent::TransactionLogActivity( + event.clone(), + ), + ); + match classified_event_result { + Ok(Some(crate::KhbbClassifiedDomainEvent::KnownProgramLogActivity(classified))) => { + tracing::trace!( + listener_session_id = session.id, + signature = %classified.signature, + has_error = classified.has_error, + context_slot = classified.context_slot, + programs = ?classified.programs, + "classified known program log activity event" + ); + } + Ok(Some(crate::KhbbClassifiedDomainEvent::UnknownProgramLogActivity(classified))) => { + tracing::trace!( + listener_session_id = session.id, + signature = %classified.signature, + has_error = classified.has_error, + context_slot = classified.context_slot, + log_count = classified.log_count, + "classified unknown program log activity event" + ); + } + Ok(Some(_)) => {} + Ok(None) => {} + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to classify transaction log activity domain event" + ); + } + } } Ok(Some(crate::KhbbDomainEvent::TokenProgramActivity(event))) => { tracing::trace!( @@ -437,6 +491,41 @@ pub async fn run_listener_runtime( token_program_family = %event.token_program_family, "domain token program activity event" ); + let classified_event_result = + crate::domain_classifier::classify_domain_event( + &crate::KhbbDomainEvent::TokenProgramActivity( + event.clone(), + ), + ); + match classified_event_result { + Ok(Some(crate::KhbbClassifiedDomainEvent::SplTokenProgramActivity(classified))) => { + tracing::trace!( + listener_session_id = session.id, + subscription_id = classified.subscription_id, + pubkey = %classified.pubkey, + context_slot = classified.context_slot, + "classified spl-token program activity event" + ); + } + Ok(Some(crate::KhbbClassifiedDomainEvent::SplToken2022ProgramActivity(classified))) => { + tracing::trace!( + listener_session_id = session.id, + subscription_id = classified.subscription_id, + pubkey = %classified.pubkey, + context_slot = classified.context_slot, + "classified spl-token-2022 program activity event" + ); + } + Ok(Some(_)) => {} + Ok(None) => {} + Err(error) => { + tracing::error!( + listener_session_id = session.id, + error = %error, + "failed to classify token program activity domain event" + ); + } + } } Ok(None) => { tracing::trace!( diff --git a/khbb_lib/src/program_registry.rs b/khbb_lib/src/program_registry.rs new file mode 100644 index 0000000..4afc409 --- /dev/null +++ b/khbb_lib/src/program_registry.rs @@ -0,0 +1,82 @@ +// file: khbb_lib/src/program_registry.rs + +//! Registry of known Solana programs used for early domain classification. + +/// Known Solana program family. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub enum KhbbKnownProgram { + /// SPL Token program. + SplToken, + /// SPL Token-2022 program. + SplToken2022, + /// System program. + System, + /// Compute budget program. + ComputeBudget, + /// Associated token account program. + AssociatedTokenAccount, +} + +/// Returns the known program classification for a given program id. +pub(crate) fn classify_known_program_id(program_id: &str) -> std::option::Option { + match program_id { + "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA" => Some(KhbbKnownProgram::SplToken), + "TokenzQdBNbLqP5VEhdkAS6EPFLC1PHnBqCXEpPxuEb" => Some(KhbbKnownProgram::SplToken2022), + "11111111111111111111111111111111" => Some(KhbbKnownProgram::System), + "ComputeBudget111111111111111111111111111111" => Some(KhbbKnownProgram::ComputeBudget), + "ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL" => { + Some(KhbbKnownProgram::AssociatedTokenAccount) + }, + _ => None, + } +} + +/// Detects known program mentions in a transaction log line. +/// +/// This is intentionally simple and string-based in the first version. +pub(crate) fn classify_known_program_from_log_line( + log_line: &str, +) -> std::option::Option { + if log_line.contains("Program TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA") { + return Some(KhbbKnownProgram::SplToken); + } + if log_line.contains("Program TokenzQdBNbLqP5VEhdkAS6EPFLC1PHnBqCXEpPxuEb") { + return Some(KhbbKnownProgram::SplToken2022); + } + if log_line.contains("Program 11111111111111111111111111111111") { + return Some(KhbbKnownProgram::System); + } + if log_line.contains("Program ComputeBudget111111111111111111111111111111") { + return Some(KhbbKnownProgram::ComputeBudget); + } + if log_line.contains("Program ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL") { + return Some(KhbbKnownProgram::AssociatedTokenAccount); + } + None +} + +#[cfg(test)] +mod tests { + #[test] + fn classify_known_program_id_detects_spl_token() { + let result = + super::classify_known_program_id("TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"); + assert_eq!(result, Some(super::KhbbKnownProgram::SplToken)); + } + + #[test] + fn classify_known_program_from_log_line_detects_compute_budget() { + let result = super::classify_known_program_from_log_line( + "Program ComputeBudget111111111111111111111111111111 invoke [1]", + ); + assert_eq!(result, Some(super::KhbbKnownProgram::ComputeBudget)); + } + + #[test] + fn classify_known_program_from_log_line_returns_none_for_unknown_program() { + let result = super::classify_known_program_from_log_line( + "Program SomeUnknown111111111111111111111111111111 invoke [1]", + ); + assert!(result.is_none()); + } +}