// file: kb_lib/src/transaction_classification.rs //! Transaction classification service. //! //! This service classifies projected Solana transactions after transaction //! projection and optional DEX decoding. //! //! The first version is intentionally deterministic and conservative: //! decoded DEX events win over program-id hints, and unknown transactions are //! preserved as explicit `unknown_or_unclassified` rows. /// Service used to classify projected Solana transactions. #[derive(Debug, Clone)] pub struct TransactionClassificationService { database: std::sync::Arc, } impl TransactionClassificationService { /// Creates a transaction classification service. pub fn new(database: std::sync::Arc) -> Self { return Self { database }; } /// Classifies one transaction by signature and persists the classification. pub async fn classify_transaction_by_signature( &self, signature: &str, ) -> Result { let context_result = load_transaction_classification_context(self.database.as_ref(), signature).await; let context = match context_result { Ok(context) => context, Err(error) => return Err(error), }; let classification = classify_transaction_context(&context); let dto = crate::TransactionClassificationDto::new( context.transaction_id, context.transaction.signature.clone(), context.transaction.slot, classification.kind.to_string(), classification.primary_protocol, classification.primary_program_id, classification.confidence_level, classification.reason, classification.evidence_json, ); let upsert_result = crate::query_transaction_classifications_upsert(self.database.as_ref(), &dto).await; if let Err(error) = upsert_result { return Err(error); } let persisted_result = crate::query_transaction_classifications_get_by_transaction_id( self.database.as_ref(), context.transaction_id, ) .await; let persisted_option = match persisted_result { Ok(persisted_option) => persisted_option, Err(error) => return Err(error), }; let persisted = match persisted_option { Some(persisted) => persisted, None => { return Err(crate::Error::InvalidState(format!( "transaction classification for '{}' disappeared after upsert", signature ))); }, }; let candidate_recording_result = crate::protocol_candidate_recording::record_protocol_candidates_for_classification( crate::protocol_candidate_recording::ProtocolCandidateRecordingInput { database: self.database.as_ref(), transaction: &context.transaction, transaction_id: context.transaction_id, instructions: &context.instructions, classification_kind: persisted.classification_kind.as_str(), }, ) .await; match candidate_recording_result { Ok(candidate_count) => { tracing::trace!( signature = %context.transaction.signature, classification_kind = %persisted.classification_kind, protocol_candidate_count = candidate_count, "transaction protocol candidates recorded" ); }, Err(error) => return Err(error), } return Ok(persisted); } } struct TransactionClassificationContext { transaction: crate::ChainTransactionDto, transaction_id: i64, instructions: std::vec::Vec, decoded_events: std::vec::Vec, } struct TransactionClassificationDecision { kind: &'static str, primary_protocol: std::option::Option, primary_program_id: std::option::Option, confidence_level: i16, reason: std::string::String, evidence_json: std::string::String, } #[derive(Debug, Clone)] struct KnownDexProgramMatch { protocol_name: &'static str, program_id: std::string::String, instruction_id: std::option::Option, instruction_index: u32, } async fn load_transaction_classification_context( database: &crate::Database, signature: &str, ) -> Result { let transaction_result = crate::query_chain_transactions_get_by_signature(database, signature).await; let transaction_option = match transaction_result { Ok(transaction_option) => transaction_option, Err(error) => return Err(error), }; let transaction = match transaction_option { Some(transaction) => transaction, None => { return Err(crate::Error::InvalidState(format!( "cannot classify unknown chain transaction '{}'", signature ))); }, }; let transaction_id = match transaction.id { Some(transaction_id) => transaction_id, None => { return Err(crate::Error::InvalidState(format!( "chain transaction '{}' has no internal id", signature ))); }, }; let instructions_result = crate::query_chain_instructions_list_by_transaction_id(database, transaction_id).await; let instructions = match instructions_result { Ok(instructions) => instructions, Err(error) => return Err(error), }; let decoded_events_result = crate::query_dex_decoded_events_list_by_transaction_id(database, transaction_id).await; let decoded_events = match decoded_events_result { Ok(decoded_events) => decoded_events, Err(error) => return Err(error), }; return Ok(TransactionClassificationContext { transaction, transaction_id, instructions, decoded_events, }); } fn classify_transaction_context( context: &TransactionClassificationContext, ) -> TransactionClassificationDecision { if !context.decoded_events.is_empty() { return classify_from_decoded_events(context); } let known_program_matches = find_known_dex_program_matches(&context.instructions); if !known_program_matches.is_empty() { return classify_from_known_program_matches(context, &known_program_matches); } return build_decision( "unknown_or_unclassified", None, None, 25, "transaction has no decoded DEX event and no known DEX program id".to_string(), serde_json::json!({ "transactionId": context.transaction_id, "signature": context.transaction.signature, "slot": context.transaction.slot, "instructionCount": context.instructions.len(), "decodedEventCount": context.decoded_events.len() }), ); } fn classify_from_decoded_events( context: &TransactionClassificationContext, ) -> TransactionClassificationDecision { let mut first_protocol = None; let mut first_program_id = None; let mut trade_event_count = 0_i64; let mut non_trade_event_count = 0_i64; let mut decoded_event_evidence = std::vec::Vec::new(); for decoded_event in &context.decoded_events { if first_protocol.is_none() { first_protocol = Some(decoded_event.protocol_name.clone()); } if first_program_id.is_none() { first_program_id = Some(decoded_event.program_id.clone()); } let payload_value_result = serde_json::from_str::(decoded_event.payload_json.as_str()); let payload_value = match payload_value_result { Ok(payload_value) => payload_value, Err(_) => serde_json::Value::Null, }; let is_trade = crate::is_decoded_event_trade_candidate( decoded_event.event_kind.as_str(), &payload_value, ); if is_trade { trade_event_count += 1; } else { non_trade_event_count += 1; } decoded_event_evidence.push(serde_json::json!({ "id": decoded_event.id, "protocolName": decoded_event.protocol_name, "programId": decoded_event.program_id, "eventKind": decoded_event.event_kind, "poolAccount": decoded_event.pool_account, "tradeCandidate": is_trade })); } if trade_event_count > 0_i64 { return build_decision( "dex_trade", first_protocol, first_program_id, 100, "transaction has at least one decoded DEX trade event".to_string(), serde_json::json!({ "transactionId": context.transaction_id, "signature": context.transaction.signature, "slot": context.transaction.slot, "decodedEventCount": context.decoded_events.len(), "tradeEventCount": trade_event_count, "nonTradeEventCount": non_trade_event_count, "decodedEvents": decoded_event_evidence }), ); } return build_decision( "dex_non_trade", first_protocol, first_program_id, 95, "transaction has decoded DEX events but no trade candidate".to_string(), serde_json::json!({ "transactionId": context.transaction_id, "signature": context.transaction.signature, "slot": context.transaction.slot, "decodedEventCount": context.decoded_events.len(), "tradeEventCount": trade_event_count, "nonTradeEventCount": non_trade_event_count, "decodedEvents": decoded_event_evidence }), ); } fn classify_from_known_program_matches( context: &TransactionClassificationContext, known_program_matches: &[KnownDexProgramMatch], ) -> TransactionClassificationDecision { let first_match = &known_program_matches[0]; let mut evidence_items = std::vec::Vec::new(); for known_program_match in known_program_matches { evidence_items.push(serde_json::json!({ "protocolName": known_program_match.protocol_name, "programId": known_program_match.program_id, "instructionId": known_program_match.instruction_id, "instructionIndex": known_program_match.instruction_index })); } return build_decision( "known_dex_program_unclassified", Some(first_match.protocol_name.to_string()), Some(first_match.program_id.to_string()), 75, "transaction has known DEX program instructions but no decoded DEX event".to_string(), serde_json::json!({ "transactionId": context.transaction_id, "signature": context.transaction.signature, "slot": context.transaction.slot, "instructionCount": context.instructions.len(), "decodedEventCount": context.decoded_events.len(), "knownDexProgramMatches": evidence_items }), ); } fn build_decision( kind: &'static str, primary_protocol: std::option::Option, primary_program_id: std::option::Option, confidence_level: i16, reason: std::string::String, evidence_value: serde_json::Value, ) -> TransactionClassificationDecision { let evidence_json_result = serde_json::to_string(&evidence_value); let evidence_json = match evidence_json_result { Ok(evidence_json) => evidence_json, Err(error) => { return TransactionClassificationDecision { kind: "unknown_or_unclassified", primary_protocol: None, primary_program_id: None, confidence_level: 0, reason: format!("cannot serialize classification evidence: {}", error), evidence_json: "{}".to_string(), }; }, }; return TransactionClassificationDecision { kind, primary_protocol, primary_program_id, confidence_level, reason, evidence_json, }; } fn find_known_dex_program_matches( instructions: &[crate::ChainInstructionDto], ) -> std::vec::Vec { let mut matches = std::vec::Vec::new(); for instruction in instructions { let program_match = known_dex_program_match(instruction); let program_match = match program_match { Some(program_match) => program_match, None => continue, }; matches.push(program_match); } return matches; } fn known_dex_program_match( instruction: &crate::ChainInstructionDto, ) -> std::option::Option { let program_id = match instruction.program_id.as_deref() { Some(program_id) => program_id, None => return None, }; let matrix_entry = match crate::dex_support_matrix_entry_by_program_id(program_id) { Some(matrix_entry) => matrix_entry, None => return None, }; return Some(KnownDexProgramMatch { protocol_name: matrix_entry.code, program_id: program_id.to_string(), instruction_id: instruction.id, instruction_index: instruction.instruction_index, }); } #[cfg(test)] mod tests { fn test_instruction( program_id: std::option::Option, ) -> crate::ChainInstructionDto { return crate::ChainInstructionDto::new( 1, None, 0, None, program_id, None, None, "[]".to_string(), None, None, Some(serde_json::json!({}).to_string()), ); } fn test_transaction() -> crate::ChainTransactionDto { let mut transaction = crate::ChainTransactionDto::new( "signature_1".to_string(), Some(123), None, Some("test".to_string()), None, None, None, serde_json::json!({}).to_string(), ); transaction.id = Some(1); return transaction; } #[test] fn known_dex_program_ids_are_matched() { let instruction = test_instruction(Some(crate::RAYDIUM_CPMM_PROGRAM_ID.to_string())); let program_match = match super::known_dex_program_match(&instruction) { Some(program_match) => program_match, None => { panic!("expected raydium_cpmm program match"); }, }; assert_eq!(program_match.protocol_name, "raydium_cpmm"); assert_eq!(program_match.program_id, crate::RAYDIUM_CPMM_PROGRAM_ID); assert_eq!(program_match.instruction_index, 0); } #[test] fn known_program_match_uses_support_matrix_for_priority_dexes() { let samples = [ (crate::PUMP_SWAP_PROGRAM_ID, "pump_swap"), (crate::RAYDIUM_CPMM_PROGRAM_ID, "raydium_cpmm"), (crate::RAYDIUM_CLMM_PROGRAM_ID, "raydium_clmm"), (crate::METEORA_DLMM_PROGRAM_ID, "meteora_dlmm"), (crate::METEORA_DAMM_V1_PROGRAM_ID, "meteora_damm_v1"), ]; for (program_id, expected_protocol) in samples { let instruction = test_instruction(Some(program_id.to_string())); let program_match = match super::known_dex_program_match(&instruction) { Some(program_match) => program_match, None => panic!("expected program match for {}", expected_protocol), }; assert_eq!(program_match.protocol_name, expected_protocol); assert_eq!(program_match.program_id, program_id); } } #[test] fn unknown_program_id_is_not_matched() { let instruction = test_instruction(Some("UnknownProgram111111111111111111111111111111111".to_string())); let program_match = super::known_dex_program_match(&instruction); assert!(program_match.is_none()); } #[test] fn unknown_context_is_classified_as_unknown_or_unclassified() { let transaction = test_transaction(); let context = super::TransactionClassificationContext { transaction, transaction_id: 1, instructions: std::vec::Vec::new(), decoded_events: std::vec::Vec::new(), }; let decision = super::classify_transaction_context(&context); assert_eq!(decision.kind, "unknown_or_unclassified"); assert_eq!(decision.confidence_level, 25); } }