// file: kb_lib/src/protocol_candidate_recording.rs //! Protocol candidate recording. //! //! This module records candidate protocol/program instructions for transactions //! that were not fully decoded by the current DEX decoders. /// Input used to record protocol candidates for one classified transaction. pub(crate) struct ProtocolCandidateRecordingInput<'a> { /// Database connection. pub(crate) database: &'a crate::Database, /// Persisted transaction. pub(crate) transaction: &'a crate::ChainTransactionDto, /// Internal transaction id. pub(crate) transaction_id: i64, /// Projected instructions for the transaction. pub(crate) instructions: &'a [crate::ChainInstructionDto], /// Persisted classification kind. pub(crate) classification_kind: &'a str, } /// Records protocol candidates for one classified transaction. /// /// Existing candidates for the same transaction are deleted first so replay is /// deterministic. pub(crate) async fn record_protocol_candidates_for_classification( input: crate::protocol_candidate_recording::ProtocolCandidateRecordingInput<'_>, ) -> Result { let delete_result = crate::query_protocol_candidates_delete_by_transaction_id( input.database, input.transaction_id, ) .await; if let Err(error) = delete_result { return Err(error); } let candidate_specs = crate::protocol_candidate_recording::build_protocol_candidate_specs_for_classification( input.transaction, input.transaction_id, input.instructions, input.classification_kind, ); let mut inserted_count = 0_usize; for candidate_spec in candidate_specs { let dto = crate::ProtocolCandidateDto::new( input.transaction_id, candidate_spec.instruction_id, input.transaction.signature.clone(), input.transaction.slot, candidate_spec.program_id, candidate_spec.program_name_hint, candidate_spec.candidate_protocol, candidate_spec.candidate_surface, candidate_spec.reason, candidate_spec.evidence_json, ); let insert_result = crate::query_protocol_candidates_insert(input.database, &dto).await; match insert_result { Ok(_) => { inserted_count += 1; }, Err(error) => return Err(error), } } return Ok(inserted_count); } struct ProtocolCandidateSpec { instruction_id: std::option::Option, program_id: std::string::String, program_name_hint: std::option::Option, candidate_protocol: std::option::Option, candidate_surface: std::option::Option, reason: std::string::String, evidence_json: std::string::String, } fn build_protocol_candidate_specs_for_classification( transaction: &crate::ChainTransactionDto, transaction_id: i64, instructions: &[crate::ChainInstructionDto], classification_kind: &str, ) -> std::vec::Vec { if classification_kind == "known_dex_program_unclassified" { return build_known_dex_program_candidate_specs(transaction, transaction_id, instructions); } if classification_kind == "unknown_or_unclassified" { return build_unknown_program_candidate_specs(transaction, transaction_id, instructions); } return std::vec::Vec::new(); } fn build_known_dex_program_candidate_specs( transaction: &crate::ChainTransactionDto, transaction_id: i64, instructions: &[crate::ChainInstructionDto], ) -> std::vec::Vec { let mut specs = std::vec::Vec::new(); for instruction in instructions { let program_id = match instruction.program_id.clone() { Some(program_id) => program_id, None => continue, }; let known_protocol = known_dex_protocol_name(program_id.as_str()); let known_protocol = match known_protocol { Some(known_protocol) => known_protocol, None => continue, }; let evidence_json = build_instruction_evidence_json( transaction, transaction_id, instruction, "known_dex_program_without_decoded_event", ); specs.push(ProtocolCandidateSpec { instruction_id: instruction.id, program_id, program_name_hint: instruction.program_name.clone(), candidate_protocol: Some(known_protocol.to_string()), candidate_surface: None, reason: "known DEX program instruction did not produce a decoded DEX event".to_string(), evidence_json, }); } return specs; } fn build_unknown_program_candidate_specs( transaction: &crate::ChainTransactionDto, transaction_id: i64, instructions: &[crate::ChainInstructionDto], ) -> std::vec::Vec { let mut specs = std::vec::Vec::new(); for instruction in instructions { let program_id = match instruction.program_id.clone() { Some(program_id) => program_id, None => continue, }; if should_ignore_program_id(program_id.as_str()) { continue; } let surface_hint = infer_candidate_surface(program_id.as_str(), instruction); let evidence_json = build_instruction_evidence_json( transaction, transaction_id, instruction, "unknown_or_unclassified_program_instruction", ); specs.push(ProtocolCandidateSpec { instruction_id: instruction.id, program_id, program_name_hint: instruction.program_name.clone(), candidate_protocol: None, candidate_surface: surface_hint, reason: "transaction has no decoded DEX event and includes a non-ignored program instruction".to_string(), evidence_json, }); } return specs; } fn build_instruction_evidence_json( transaction: &crate::ChainTransactionDto, transaction_id: i64, instruction: &crate::ChainInstructionDto, reason_code: &str, ) -> std::string::String { let evidence_value = serde_json::json!({ "reasonCode": reason_code, "transactionId": transaction_id, "signature": transaction.signature, "slot": transaction.slot, "instructionId": instruction.id, "parentInstructionId": instruction.parent_instruction_id, "instructionIndex": instruction.instruction_index, "programId": instruction.program_id, "programName": instruction.program_name, "stackHeight": instruction.stack_height, "parsedType": instruction.parsed_type }); let evidence_json_result = serde_json::to_string(&evidence_value); match evidence_json_result { Ok(evidence_json) => return evidence_json, Err(error) => { return format!( "{{\"reasonCode\":\"evidence_serialization_failed\",\"error\":\"{}\"}}", error ); }, } } fn known_dex_protocol_name(program_id: &str) -> std::option::Option<&'static str> { let matrix_entry = match crate::dex_support_matrix_entry_by_program_id(program_id) { Some(matrix_entry) => matrix_entry, None => return None, }; return Some(matrix_entry.code); } fn should_ignore_program_id(program_id: &str) -> bool { if program_id == crate::SYSTEM_PROGRAM_ID { return true; } if program_id == crate::SPL_TOKEN_PROGRAM_ID { return true; } if program_id == crate::SPL_TOKEN_2022_PROGRAM_ID { return true; } if program_id == crate::ASSOCIATED_TOKEN_PROGRAM_ID { return true; } if program_id == crate::COMPUTE_BUDGET_PROGRAM_ID { return true; } if program_id == crate::ADDRESS_LOOKUP_TABLE_PROGRAM_ID { return true; } if program_id == crate::BPF_LOADER_DEPRECATED_PROGRAM_ID { return true; } if program_id == crate::BPF_LOADER_UPGRADEABLE_PROGRAM_ID { return true; } if program_id == crate::LOADER_V4_PROGRAM_ID { return true; } if program_id == crate::NATIVE_LOADER_PROGRAM_ID { return true; } if program_id == crate::CONFIG_PROGRAM_ID { return true; } if program_id == crate::VOTE_PROGRAM_ID { return true; } if program_id == crate::STAKE_PROGRAM_ID { return true; } if program_id == crate::STAKE_CONFIG_PROGRAM_ID { return true; } if program_id == crate::ED25519_PROGRAM_ID { return true; } if program_id == crate::SECP256K1_PROGRAM_ID { return true; } if program_id == crate::SECP256R1_PROGRAM_ID { return true; } if program_id == crate::ZK_TOKEN_PROOF_PROGRAM_ID { return true; } if program_id == crate::ZK_ELGAMAL_PROOF_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_CLOCK_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_RENT_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_INSTRUCTIONS_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_EPOCH_REWARDS_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_EPOCH_SCHEDULE_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_FEES_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_LAST_RESTART_SLOT_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_RECENT_BLOCKHASHES_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_REWARDS_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_SLOT_HASHES_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_SLOT_HISTORY_PROGRAM_ID { return true; } if program_id == crate::SYSVAR_STAKE_HISTORY_PROGRAM_ID { return true; } return false; } fn infer_candidate_surface( program_id: &str, instruction: &crate::ChainInstructionDto, ) -> std::option::Option { if program_id == crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID { return Some("arbitrage_bot".to_string()); } if is_known_launch_surface_program_id(program_id) { return Some("launch_surface".to_string()); } if let Some(program_name) = instruction.program_name.as_deref() { let normalized = program_name.to_ascii_lowercase(); if normalized.contains("arbitrage") { return Some("arbitrage_bot".to_string()); } if normalized.contains("sandwich") { return Some("arbitrage_bot".to_string()); } if normalized.contains("launch") { return Some("launch_surface".to_string()); } if normalized.contains("meteora") { return Some("meteora_related".to_string()); } if normalized.contains("raydium") { return Some("raydium_related".to_string()); } if normalized.contains("pump") { return Some("pump_related".to_string()); } if normalized.contains("swap") { return Some("swap_related".to_string()); } } return None; } fn is_known_launch_surface_program_id(_program_id: &str) -> bool { // Filled in later after program ids are verified from live corpus and // official or sufficiently reliable references. return false; } #[cfg(test)] mod tests { #[test] fn known_dex_candidate_uses_support_matrix_for_priority_dexes() { let samples = [ (crate::PUMP_SWAP_PROGRAM_ID, "pump_swap"), (crate::RAYDIUM_CPMM_PROGRAM_ID, "raydium_cpmm"), (crate::RAYDIUM_CLMM_PROGRAM_ID, "raydium_clmm"), (crate::METEORA_DLMM_PROGRAM_ID, "meteora_dlmm"), (crate::METEORA_DAMM_V1_PROGRAM_ID, "meteora_damm_v1"), ]; for (program_id, expected_protocol) in samples { let protocol = match super::known_dex_protocol_name(program_id) { Some(protocol) => protocol, None => panic!("expected known protocol for {}", program_id), }; assert_eq!(protocol, expected_protocol); } } #[test] fn associated_token_program_is_ignored() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some(crate::ASSOCIATED_TOKEN_PROGRAM_ID.to_string()), Some("spl-associated-token-account".to_string()), )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "unknown_or_unclassified", ); assert_eq!(specs.len(), 0); } #[test] fn meteora_dlmm_is_known_dex_protocol() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some(crate::METEORA_DLMM_PROGRAM_ID.to_string()), Some("Meteora DLMM".to_string()), )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "known_dex_program_unclassified", ); assert_eq!(specs.len(), 1); assert_eq!(specs[0].program_id, crate::METEORA_DLMM_PROGRAM_ID); assert_eq!(specs[0].candidate_protocol, Some("meteora_dlmm".to_string())); } #[test] fn known_arbitrage_bot_gets_surface_hint() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some(crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID.to_string()), None, )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "unknown_or_unclassified", ); assert_eq!(specs.len(), 1); assert_eq!(specs[0].candidate_surface, Some("arbitrage_bot".to_string())); } fn test_instruction( instruction_index: u32, program_id: std::option::Option, program_name: std::option::Option, ) -> crate::ChainInstructionDto { return crate::ChainInstructionDto::new( 1, None, instruction_index, None, program_id, program_name, None, "[]".to_string(), None, None, Some(serde_json::json!({}).to_string()), ); } fn test_transaction() -> crate::ChainTransactionDto { let mut transaction = crate::ChainTransactionDto::new( "signature_1".to_string(), Some(123), None, Some("test".to_string()), None, None, None, serde_json::json!({}).to_string(), ); transaction.id = Some(1); return transaction; } #[test] fn known_dex_candidate_is_built_for_unclassified_known_program() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some(crate::METEORA_DAMM_V2_PROGRAM_ID.to_string()), Some("Meteora".to_string()), )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "known_dex_program_unclassified", ); assert_eq!(specs.len(), 1); assert_eq!(specs[0].program_id, crate::METEORA_DAMM_V2_PROGRAM_ID); assert_eq!(specs[0].candidate_protocol, Some("meteora_damm_v2".to_string())); } #[test] fn ignored_program_is_not_recorded_as_unknown_candidate() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some(crate::SPL_TOKEN_PROGRAM_ID.to_string()), Some("spl-token".to_string()), )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "unknown_or_unclassified", ); assert_eq!(specs.len(), 0); } #[test] fn unknown_non_ignored_program_is_recorded() { let transaction = test_transaction(); let instructions = vec![test_instruction( 0, Some("UnknownProgram111111111111111111111111111111111".to_string()), Some("unknown swap program".to_string()), )]; let specs = super::build_protocol_candidate_specs_for_classification( &transaction, 1, &instructions, "unknown_or_unclassified", ); assert_eq!(specs.len(), 1); assert_eq!(specs[0].candidate_surface, Some("swap_related".to_string())); } }