// file: kb_lib/src/onchain_dex_pair_discovery.rs //! On-chain DEX pair/pool discovery helpers used by Demo3. //! //! This module intentionally does not persist discovered transactions and does //! not promote a program or pool as verified. It only queries Solana RPC for //! recent signatures of a DEX program id, fetches the corresponding //! transactions, and extracts candidate signatures / pools / mints that can be //! fed later into the existing backfill pipeline. /// Request for on-chain DEX pair/pool discovery. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnchainDexPairDiscoveryRequestDto { /// Optional DEX code from the support matrix. pub dex_code: std::option::Option, /// Optional Solana program id. When absent, `dex_code` must resolve to a verified matrix program id. pub program_id: std::option::Option, /// HTTP endpoint role used to query Solana RPC. pub http_role: std::string::String, /// Maximum number of recent signatures to inspect for the program id. pub signature_limit: u32, /// Maximum number of transactions to fetch from the signature list. pub transaction_limit: u32, /// Maximum number of candidate rows to return. pub candidate_limit: u32, } /// Result of one on-chain DEX pair/pool discovery run. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnchainDexPairDiscoveryResultDto { /// Normalized request actually used by the service. pub request: crate::OnchainDexPairDiscoveryRequestDto, /// DEX code resolved from the matrix when available. pub resolved_dex_code: std::option::Option, /// Program id scanned through `getSignaturesForAddress`. pub resolved_program_id: std::string::String, /// Number of signatures returned by the RPC endpoint. pub fetched_signature_count: usize, /// Number of transactions fetched with `getTransaction`. pub fetched_transaction_count: usize, /// Number of `getTransaction` calls returning null. pub missing_transaction_count: usize, /// Number of failed transactions encountered. pub failed_transaction_count: usize, /// Number of candidate rows returned. pub candidate_count: usize, /// Candidate on-chain rows. pub candidates: std::vec::Vec, } /// Candidate transaction/instruction observed on-chain for one DEX program id. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnchainDexPairCandidateDto { /// Transaction signature. pub signature: std::string::String, /// Transaction slot when present. pub slot: std::option::Option, /// Transaction block time when present. pub block_time: std::option::Option, /// Whether the transaction has a non-null `meta.err` value. pub failed: bool, /// Program id matched by this candidate. pub program_id: std::string::String, /// DEX code resolved from the support matrix when available. pub dex_code: std::option::Option, /// Candidate kind inferred from decoded instruction data, parsed fields or logs. pub candidate_kind: std::string::String, /// Confidence of the candidate extraction: `high`, `medium` or `low`. pub confidence: std::string::String, /// Top-level instruction index when known. pub instruction_index: std::option::Option, /// Inner instruction index when this candidate came from `meta.innerInstructions`. pub inner_instruction_index: std::option::Option, /// Instruction name inferred from parsed payload or logs. pub instruction_name: std::option::Option, /// Candidate pool address when it can be extracted safely or heuristically. pub pool_address: std::option::Option, /// Candidate token A/base mint when it can be extracted. pub token_a_mint: std::option::Option, /// Candidate token B/quote mint when it can be extracted. pub token_b_mint: std::option::Option, /// Verified pool address when a DEX decoder or stable layout proves it. pub verified_pool_address: std::option::Option, /// Token mints observed generically from transaction token balance arrays. pub observed_token_mints: std::vec::Vec, /// Token balance deltas observed through transaction metadata. pub token_balance_deltas: std::vec::Vec, /// Program-owned or writable accounts that may be pool/config/state accounts. pub candidate_pool_accounts: std::vec::Vec, /// Token accounts that may be pool vaults. pub candidate_token_vault_accounts: std::vec::Vec, /// Other candidate accounts attached to the matched instruction. pub candidate_program_accounts: std::vec::Vec, /// Short account sample from the matched instruction. pub account_samples: std::vec::Vec, /// Short log sample from the transaction. pub log_samples: std::vec::Vec, /// Suggested next action for the user. pub backfill_hint: std::string::String, } /// Token-balance delta observed in one transaction through Solana transaction metadata. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnchainDexTokenBalanceDeltaDto { /// Token account index in the transaction message when available. pub account_index: std::option::Option, /// Token account address resolved from the transaction account keys. pub account_address: std::option::Option, /// SPL Token or Token-2022 mint address. pub mint: std::string::String, /// Token account owner when Solana RPC exposes it. pub owner: std::option::Option, /// Token program id when Solana RPC exposes it. pub token_program: std::option::Option, /// Raw token amount before the transaction. pub pre_amount_raw: std::option::Option, /// Raw token amount after the transaction. pub post_amount_raw: std::option::Option, /// Signed raw delta when both raw amounts fit in a signed integer. pub delta_raw: std::option::Option, } /// Candidate account inferred from generic transaction evidence. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct OnchainDexCandidateAccountDto { /// Account address. pub address: std::string::String, /// Account index in the transaction message when known. pub account_index: std::option::Option, /// Whether the account is writable in the transaction message when known. pub writable: std::option::Option, /// Whether the account is a signer in the transaction message when known. pub signer: std::option::Option, /// Generic role inferred by Demo3, for example `program_owned_candidate` or `token_vault_candidate`. pub inferred_role: std::string::String, /// Confidence of the generic account inference. pub confidence: std::string::String, /// Short reason explaining why the account is listed. pub reason: std::string::String, } /// On-chain pair/pool discovery service. #[derive(Debug, Clone)] pub struct OnchainDexPairDiscoveryService { http_pool: std::sync::Arc, } impl OnchainDexPairDiscoveryService { /// Creates a new on-chain DEX discovery service. pub fn new(http_pool: std::sync::Arc) -> Self { return Self { http_pool }; } /// Searches recent on-chain transactions for DEX program candidates. pub async fn discover( &self, request: crate::OnchainDexPairDiscoveryRequestDto, ) -> Result { let normalized_request = normalize_request(request); let resolved = match resolve_program_id(&normalized_request) { Ok(resolved) => resolved, Err(error) => return Err(error), }; let signatures_result = self .fetch_signatures( normalized_request.http_role.as_str(), resolved.program_id.clone(), normalized_request.signature_limit as usize, ) .await; let signatures = match signatures_result { Ok(signatures) => signatures, Err(error) => return Err(error), }; let mut result = crate::OnchainDexPairDiscoveryResultDto { request: normalized_request.clone(), resolved_dex_code: resolved.dex_code.clone(), resolved_program_id: resolved.program_id.clone(), fetched_signature_count: signatures.len(), fetched_transaction_count: 0, missing_transaction_count: 0, failed_transaction_count: 0, candidate_count: 0, candidates: std::vec::Vec::new(), }; let transaction_limit = normalized_request.transaction_limit as usize; let candidate_limit = normalized_request.candidate_limit as usize; let mut scanned = 0usize; for signature_status in signatures { if scanned >= transaction_limit { break; } if result.candidates.len() >= candidate_limit { break; } scanned += 1; let signature = signature_status.signature.clone(); let transaction_result = self .fetch_transaction(normalized_request.http_role.as_str(), signature.clone()) .await; let transaction_value = match transaction_result { Ok(transaction_value) => transaction_value, Err(error) => return Err(error), }; if transaction_value.is_null() { result.missing_transaction_count += 1; continue; } result.fetched_transaction_count += 1; if transaction_failed(&transaction_value) { result.failed_transaction_count += 1; } let candidates = extract_candidates_from_transaction( signature.as_str(), &transaction_value, resolved.program_id.as_str(), resolved.dex_code.clone(), ); for candidate in candidates { if result.candidates.len() >= candidate_limit { break; } result.candidates.push(candidate); } } result.candidate_count = result.candidates.len(); return Ok(result); } async fn fetch_signatures( &self, http_role: &str, address: std::string::String, limit: usize, ) -> Result< std::vec::Vec, crate::Error, > { let effective_limit = clamp_usize(limit, 1, 1000); let config = solana_rpc_client_api::config::RpcSignaturesForAddressConfig { before: None, until: None, limit: Some(effective_limit), commitment: None, min_context_slot: None, }; return self .http_pool .get_signatures_for_address_for_role(http_role, address, Some(config)) .await; } async fn fetch_transaction( &self, http_role: &str, signature: std::string::String, ) -> Result { let config = Some(serde_json::json!({ "encoding": "jsonParsed", "maxSupportedTransactionVersion": 0 })); return self.http_pool.get_transaction_raw_for_role(http_role, signature, config).await; } } #[derive(Debug, Clone)] struct ResolvedDiscoveryTarget { dex_code: std::option::Option, program_id: std::string::String, } #[derive(Debug, Clone)] struct OnchainInstructionCandidate { instruction_index: std::option::Option, inner_instruction_index: std::option::Option, program_id: std::option::Option, accounts: std::vec::Vec, data: std::option::Option, parsed: std::option::Option, } #[derive(Debug, Clone)] struct AccountKeyInfo { index: i64, address: std::string::String, signer: std::option::Option, writable: std::option::Option, } #[derive(Debug, Clone)] struct TransactionGenericEvidence { account_keys: std::vec::Vec, observed_token_mints: std::vec::Vec, token_balance_deltas: std::vec::Vec, } #[derive(Debug, Clone)] struct TokenBalanceAccumulator { account_index: std::option::Option, account_address: std::option::Option, mint: std::string::String, owner: std::option::Option, token_program: std::option::Option, pre_amount_raw: std::option::Option, post_amount_raw: std::option::Option, } fn normalize_request( request: crate::OnchainDexPairDiscoveryRequestDto, ) -> crate::OnchainDexPairDiscoveryRequestDto { let http_role = normalize_string(request.http_role, "history_backfill"); return crate::OnchainDexPairDiscoveryRequestDto { dex_code: normalize_optional_string(request.dex_code), program_id: normalize_optional_string(request.program_id), http_role, signature_limit: clamp_u32(request.signature_limit, 1, 1000), transaction_limit: clamp_u32(request.transaction_limit, 1, 250), candidate_limit: clamp_u32(request.candidate_limit, 1, 100), }; } fn normalize_string(value: std::string::String, fallback: &str) -> std::string::String { let trimmed = value.trim().to_string(); if trimmed.is_empty() { return fallback.to_string(); } return trimmed; } fn normalize_optional_string( value: std::option::Option, ) -> std::option::Option { let value = match value { Some(value) => value.trim().to_string(), None => return None, }; if value.is_empty() { return None; } return Some(value); } fn resolve_program_id( request: &crate::OnchainDexPairDiscoveryRequestDto, ) -> Result { if let Some(program_id) = &request.program_id { let dex_code = match crate::dex_support_matrix_entry_by_program_id(program_id.as_str()) { Some(entry) => Some(entry.code.to_string()), None => request.dex_code.clone(), }; return Ok(ResolvedDiscoveryTarget { dex_code, program_id: program_id.clone() }); } let dex_code = match &request.dex_code { Some(dex_code) => dex_code.clone(), None => { return Err(crate::Error::Config( "on-chain DEX discovery requires a dex_code or a program_id".to_string(), )); }, }; let entry = match crate::dex_support_matrix_entry_by_code(dex_code.as_str()) { Some(entry) => entry, None => { return Err(crate::Error::Config(format!( "unknown dex_code '{}' for on-chain DEX discovery", dex_code ))); }, }; let program_id = match entry.program_id { Some(program_id) => program_id.to_string(), None => { return Err(crate::Error::Config(format!( "dex_code '{}' has no verified program_id; provide a program_id explicitly after verification", dex_code ))); }, }; return Ok(ResolvedDiscoveryTarget { dex_code: Some(dex_code), program_id }); } fn extract_candidates_from_transaction( signature: &str, transaction: &serde_json::Value, target_program_id: &str, dex_code: std::option::Option, ) -> std::vec::Vec { let mut candidates = std::vec::Vec::new(); let slot = transaction.get("slot").and_then(serde_json::Value::as_u64); let block_time = transaction.get("blockTime").and_then(serde_json::Value::as_i64); let failed = transaction_failed(transaction); let logs = extract_log_messages(transaction); let evidence = extract_transaction_generic_evidence(transaction); let instructions = extract_onchain_instructions(transaction); for instruction in instructions { let program_id = match &instruction.program_id { Some(program_id) => program_id.clone(), None => continue, }; if program_id.as_str() != target_program_id { continue; } let decoded_candidate = decode_known_candidate( signature, slot, block_time, failed, program_id.as_str(), dex_code.clone(), &instruction, logs.as_slice(), ); let mut candidate = match decoded_candidate { Some(candidate) => candidate, None => build_heuristic_candidate( signature, slot, block_time, failed, program_id.as_str(), dex_code.clone(), &instruction, logs.as_slice(), ), }; enrich_candidate_with_generic_evidence(&mut candidate, &instruction, &evidence); candidates.push(candidate); } return candidates; } fn decode_known_candidate( signature: &str, slot: std::option::Option, block_time: std::option::Option, failed: bool, program_id: &str, dex_code: std::option::Option, instruction: &OnchainInstructionCandidate, logs: &[std::string::String], ) -> std::option::Option { if program_id == crate::RAYDIUM_CLMM_PROGRAM_ID { return decode_raydium_clmm_candidate( signature, slot, block_time, failed, program_id, dex_code, instruction, logs, ); } if program_id == crate::RAYDIUM_CPMM_PROGRAM_ID { return decode_raydium_cpmm_candidate( signature, slot, block_time, failed, program_id, dex_code, instruction, logs, ); } return None; } fn decode_raydium_clmm_candidate( signature: &str, slot: std::option::Option, block_time: std::option::Option, failed: bool, program_id: &str, dex_code: std::option::Option, instruction: &OnchainInstructionCandidate, logs: &[std::string::String], ) -> std::option::Option { let data = match &instruction.data { Some(data) => data.clone(), None => return None, }; let accounts_json = match serde_json::to_string(&instruction.accounts) { Ok(accounts_json) => accounts_json, Err(_) => return None, }; let data_json = match serde_json::to_string(&data) { Ok(data_json) => data_json, Err(_) => return None, }; let decoded_events = crate::decode_raydium_clmm_instruction(accounts_json.as_str(), data_json.as_str()); for decoded in decoded_events { match decoded { crate::RaydiumClmmDecodedEvent::Swap(event) => { return Some(crate::OnchainDexPairCandidateDto { signature: signature.to_string(), slot, block_time, failed, program_id: program_id.to_string(), dex_code, candidate_kind: "swap".to_string(), confidence: "high".to_string(), instruction_index: instruction.instruction_index, inner_instruction_index: instruction.inner_instruction_index, instruction_name: Some("raydium_clmm.swap".to_string()), pool_address: Some(event.pool_state.clone()), token_a_mint: Some(event.base_mint), token_b_mint: Some(event.quote_mint), verified_pool_address: Some(event.pool_state.clone()), observed_token_mints: std::vec::Vec::new(), token_balance_deltas: std::vec::Vec::new(), candidate_pool_accounts: std::vec::Vec::new(), candidate_token_vault_accounts: std::vec::Vec::new(), candidate_program_accounts: std::vec::Vec::new(), account_samples: sample_strings(instruction.accounts.as_slice(), 12), log_samples: sample_logs(logs, 8), backfill_hint: build_backfill_hint( "pool", Some(event.pool_state.as_str()), signature, ), }); }, crate::RaydiumClmmDecodedEvent::SwapV2(event) => { return Some(crate::OnchainDexPairCandidateDto { signature: signature.to_string(), slot, block_time, failed, program_id: program_id.to_string(), dex_code, candidate_kind: "swap".to_string(), confidence: "high".to_string(), instruction_index: instruction.instruction_index, inner_instruction_index: instruction.inner_instruction_index, instruction_name: Some("raydium_clmm.swap_v2".to_string()), pool_address: Some(event.pool_state.clone()), token_a_mint: Some(event.base_mint), token_b_mint: Some(event.quote_mint), verified_pool_address: Some(event.pool_state.clone()), observed_token_mints: std::vec::Vec::new(), token_balance_deltas: std::vec::Vec::new(), candidate_pool_accounts: std::vec::Vec::new(), candidate_token_vault_accounts: std::vec::Vec::new(), candidate_program_accounts: std::vec::Vec::new(), account_samples: sample_strings(instruction.accounts.as_slice(), 12), log_samples: sample_logs(logs, 8), backfill_hint: build_backfill_hint( "pool", Some(event.pool_state.as_str()), signature, ), }); }, } } return None; } fn decode_raydium_cpmm_candidate( signature: &str, slot: std::option::Option, block_time: std::option::Option, failed: bool, program_id: &str, dex_code: std::option::Option, instruction: &OnchainInstructionCandidate, logs: &[std::string::String], ) -> std::option::Option { let data = match &instruction.data { Some(data) => data.clone(), None => return None, }; let accounts_json = match serde_json::to_string(&instruction.accounts) { Ok(accounts_json) => accounts_json, Err(_) => return None, }; let data_json = match serde_json::to_string(&data) { Ok(data_json) => data_json, Err(_) => return None, }; let decoded_events = crate::decode_raydium_cpmm_instruction(accounts_json.as_str(), data_json.as_str()); for decoded in decoded_events { match decoded { crate::RaydiumCpmmDecodedEvent::SwapBaseInput(event) => { return Some(build_raydium_cpmm_candidate( signature, slot, block_time, failed, program_id, dex_code, instruction, logs, "raydium_cpmm.swap_base_input", event.pool_state, event.base_mint, event.quote_mint, )); }, crate::RaydiumCpmmDecodedEvent::SwapBaseOutput(event) => { return Some(build_raydium_cpmm_candidate( signature, slot, block_time, failed, program_id, dex_code, instruction, logs, "raydium_cpmm.swap_base_output", event.pool_state, event.base_mint, event.quote_mint, )); }, } } return None; } fn build_raydium_cpmm_candidate( signature: &str, slot: std::option::Option, block_time: std::option::Option, failed: bool, program_id: &str, dex_code: std::option::Option, instruction: &OnchainInstructionCandidate, logs: &[std::string::String], instruction_name: &str, pool_address: std::string::String, token_a_mint: std::string::String, token_b_mint: std::string::String, ) -> crate::OnchainDexPairCandidateDto { return crate::OnchainDexPairCandidateDto { signature: signature.to_string(), slot, block_time, failed, program_id: program_id.to_string(), dex_code, candidate_kind: "swap".to_string(), confidence: "high".to_string(), instruction_index: instruction.instruction_index, inner_instruction_index: instruction.inner_instruction_index, instruction_name: Some(instruction_name.to_string()), pool_address: Some(pool_address.clone()), token_a_mint: Some(token_a_mint), token_b_mint: Some(token_b_mint), verified_pool_address: Some(pool_address.clone()), observed_token_mints: std::vec::Vec::new(), token_balance_deltas: std::vec::Vec::new(), candidate_pool_accounts: std::vec::Vec::new(), candidate_token_vault_accounts: std::vec::Vec::new(), candidate_program_accounts: std::vec::Vec::new(), account_samples: sample_strings(instruction.accounts.as_slice(), 12), log_samples: sample_logs(logs, 8), backfill_hint: build_backfill_hint("pool", Some(pool_address.as_str()), signature), }; } fn build_heuristic_candidate( signature: &str, slot: std::option::Option, block_time: std::option::Option, failed: bool, program_id: &str, dex_code: std::option::Option, instruction: &OnchainInstructionCandidate, logs: &[std::string::String], ) -> crate::OnchainDexPairCandidateDto { let instruction_name = infer_instruction_name(instruction.parsed.as_ref(), logs); let pool_address = extract_string_by_candidate_keys_from_instruction( instruction, &["pool", "poolAddress", "poolAccount", "poolState", "amm", "ammPool", "whirlpool"], ); let token_a_mint = extract_string_by_candidate_keys_from_instruction( instruction, &["tokenA", "tokenAMint", "mintA", "baseMint", "token0Mint", "mint0", "coinMint"], ); let token_b_mint = extract_string_by_candidate_keys_from_instruction( instruction, &["tokenB", "tokenBMint", "mintB", "quoteMint", "token1Mint", "mint1", "pcMint"], ); let candidate_kind = infer_candidate_kind(instruction_name.as_ref(), logs); let confidence = if pool_address.is_some() || token_a_mint.is_some() || token_b_mint.is_some() { "medium".to_string() } else { "low".to_string() }; let hint = if pool_address.is_some() { build_backfill_hint("pool", pool_address.as_deref(), signature) } else if token_a_mint.is_some() { build_backfill_hint("token", token_a_mint.as_deref(), signature) } else { build_backfill_hint("signature", None, signature) }; return crate::OnchainDexPairCandidateDto { signature: signature.to_string(), slot, block_time, failed, program_id: program_id.to_string(), dex_code, candidate_kind, confidence, instruction_index: instruction.instruction_index, inner_instruction_index: instruction.inner_instruction_index, instruction_name, pool_address: pool_address.clone(), token_a_mint, token_b_mint, verified_pool_address: None, observed_token_mints: std::vec::Vec::new(), token_balance_deltas: std::vec::Vec::new(), candidate_pool_accounts: std::vec::Vec::new(), candidate_token_vault_accounts: std::vec::Vec::new(), candidate_program_accounts: std::vec::Vec::new(), account_samples: sample_strings(instruction.accounts.as_slice(), 12), log_samples: sample_logs(logs, 8), backfill_hint: hint, }; } fn enrich_candidate_with_generic_evidence( candidate: &mut crate::OnchainDexPairCandidateDto, instruction: &OnchainInstructionCandidate, evidence: &TransactionGenericEvidence, ) { candidate.observed_token_mints = evidence.observed_token_mints.clone(); candidate.token_balance_deltas = sample_token_balance_deltas(evidence.token_balance_deltas.as_slice(), 20); if candidate.token_a_mint.is_none() || candidate.token_b_mint.is_none() { let inferred = infer_token_pair_from_deltas(evidence.token_balance_deltas.as_slice()); if candidate.token_a_mint.is_none() { candidate.token_a_mint = inferred.0; } if candidate.token_b_mint.is_none() { candidate.token_b_mint = inferred.1; } } candidate.candidate_token_vault_accounts = infer_candidate_token_vault_accounts( instruction.accounts.as_slice(), evidence.token_balance_deltas.as_slice(), evidence.account_keys.as_slice(), ); candidate.candidate_pool_accounts = infer_candidate_pool_accounts( instruction.accounts.as_slice(), evidence.account_keys.as_slice(), candidate.candidate_token_vault_accounts.as_slice(), ); candidate.candidate_program_accounts = infer_candidate_program_accounts( instruction.accounts.as_slice(), evidence.account_keys.as_slice(), candidate.candidate_pool_accounts.as_slice(), candidate.candidate_token_vault_accounts.as_slice(), ); if candidate.confidence == "low" && (!candidate.observed_token_mints.is_empty() || !candidate.candidate_pool_accounts.is_empty()) { candidate.confidence = "medium".to_string(); } if candidate.pool_address.is_none() && candidate.verified_pool_address.is_none() { let first_pool = candidate.candidate_pool_accounts.first(); if let Some(first_pool) = first_pool { candidate.backfill_hint = build_backfill_hint( "candidate_pool", Some(first_pool.address.as_str()), candidate.signature.as_str(), ); } } } fn extract_transaction_generic_evidence( transaction: &serde_json::Value, ) -> TransactionGenericEvidence { let account_keys = extract_transaction_account_keys(transaction); let token_balance_deltas = extract_token_balance_deltas(transaction, account_keys.as_slice()); let observed_token_mints = collect_observed_token_mints(token_balance_deltas.as_slice()); return TransactionGenericEvidence { account_keys, observed_token_mints, token_balance_deltas, }; } fn extract_transaction_account_keys( transaction: &serde_json::Value, ) -> std::vec::Vec { let mut account_keys = std::vec::Vec::new(); let values = transaction .get("transaction") .and_then(|value| value.get("message")) .and_then(|value| value.get("accountKeys")) .and_then(serde_json::Value::as_array); if let Some(values) = values { let mut index = 0usize; for value in values { let parsed = parse_account_key_info(value, index as i64); if let Some(parsed) = parsed { account_keys.push(parsed); } index += 1; } } return account_keys; } fn parse_account_key_info( value: &serde_json::Value, index: i64, ) -> std::option::Option { if let Some(address) = value.as_str() { return Some(AccountKeyInfo { index, address: address.to_string(), signer: None, writable: None, }); } let address = match value.get("pubkey").and_then(serde_json::Value::as_str) { Some(address) => address.to_string(), None => return None, }; let signer = value.get("signer").and_then(serde_json::Value::as_bool); let writable = value.get("writable").and_then(serde_json::Value::as_bool); return Some(AccountKeyInfo { index, address, signer, writable }); } fn extract_token_balance_deltas( transaction: &serde_json::Value, account_keys: &[AccountKeyInfo], ) -> std::vec::Vec { let mut accumulators = std::vec::Vec::new(); collect_token_balance_side( transaction, "preTokenBalances", true, account_keys, &mut accumulators, ); collect_token_balance_side( transaction, "postTokenBalances", false, account_keys, &mut accumulators, ); let mut deltas = std::vec::Vec::new(); for accumulator in accumulators { deltas.push(crate::OnchainDexTokenBalanceDeltaDto { account_index: accumulator.account_index, account_address: accumulator.account_address, mint: accumulator.mint, owner: accumulator.owner, token_program: accumulator.token_program, pre_amount_raw: accumulator.pre_amount_raw.clone(), post_amount_raw: accumulator.post_amount_raw.clone(), delta_raw: compute_delta_raw( accumulator.pre_amount_raw.as_deref(), accumulator.post_amount_raw.as_deref(), ), }); } return deltas; } fn collect_token_balance_side( transaction: &serde_json::Value, key: &str, is_pre: bool, account_keys: &[AccountKeyInfo], accumulators: &mut std::vec::Vec, ) { let values = transaction .get("meta") .and_then(|value| value.get(key)) .and_then(serde_json::Value::as_array); let values = match values { Some(values) => values, None => return, }; for value in values { let account_index = value.get("accountIndex").and_then(serde_json::Value::as_i64); let mint = match value.get("mint").and_then(serde_json::Value::as_str) { Some(mint) => mint.to_string(), None => continue, }; let owner = value .get("owner") .and_then(serde_json::Value::as_str) .map(|value| value.to_string()); let token_program = value .get("programId") .and_then(serde_json::Value::as_str) .map(|value| value.to_string()); let amount = value .get("uiTokenAmount") .and_then(|amount| amount.get("amount")) .and_then(serde_json::Value::as_str) .map(|value| value.to_string()); let account_address = match account_index { Some(account_index) => account_address_by_index(account_keys, account_index), None => None, }; let accumulator_index = find_token_balance_accumulator(accumulators.as_slice(), account_index, mint.as_str()); let index = match accumulator_index { Some(index) => index, None => { accumulators.push(TokenBalanceAccumulator { account_index, account_address, mint, owner: owner.clone(), token_program: token_program.clone(), pre_amount_raw: None, post_amount_raw: None, }); accumulators.len() - 1 }, }; if accumulators[index].owner.is_none() { accumulators[index].owner = owner; } if accumulators[index].token_program.is_none() { accumulators[index].token_program = token_program; } if is_pre { accumulators[index].pre_amount_raw = amount; } else { accumulators[index].post_amount_raw = amount; } } } fn find_token_balance_accumulator( accumulators: &[TokenBalanceAccumulator], account_index: std::option::Option, mint: &str, ) -> std::option::Option { let mut index = 0usize; while index < accumulators.len() { let accumulator = &accumulators[index]; if accumulator.account_index == account_index && accumulator.mint == mint { return Some(index); } index += 1; } return None; } fn account_address_by_index( account_keys: &[AccountKeyInfo], account_index: i64, ) -> std::option::Option { for account_key in account_keys { if account_key.index == account_index { return Some(account_key.address.clone()); } } return None; } fn account_key_info_by_address( account_keys: &[AccountKeyInfo], address: &str, ) -> std::option::Option { for account_key in account_keys { if account_key.address == address { return Some(account_key.clone()); } } return None; } fn collect_observed_token_mints( deltas: &[crate::OnchainDexTokenBalanceDeltaDto], ) -> std::vec::Vec { let mut mints = std::vec::Vec::new(); for delta in deltas { push_unique_string(&mut mints, delta.mint.clone()); } return mints; } fn sample_token_balance_deltas( deltas: &[crate::OnchainDexTokenBalanceDeltaDto], limit: usize, ) -> std::vec::Vec { let mut samples = std::vec::Vec::new(); let mut index = 0usize; while index < deltas.len() && index < limit { samples.push(deltas[index].clone()); index += 1; } return samples; } fn infer_token_pair_from_deltas( deltas: &[crate::OnchainDexTokenBalanceDeltaDto], ) -> ( std::option::Option, std::option::Option, ) { let mut mints = std::vec::Vec::new(); for delta in deltas { if delta.delta_raw.as_deref() == Some("0") { continue; } push_unique_string(&mut mints, delta.mint.clone()); } if mints.len() < 2 { for delta in deltas { push_unique_string(&mut mints, delta.mint.clone()); } } let first = mints.first().cloned(); let second = if mints.len() > 1 { Some(mints[1].clone()) } else { None }; return (first, second); } fn infer_candidate_token_vault_accounts( instruction_accounts: &[std::string::String], deltas: &[crate::OnchainDexTokenBalanceDeltaDto], account_keys: &[AccountKeyInfo], ) -> std::vec::Vec { let mut candidates = std::vec::Vec::new(); for delta in deltas { let address = match &delta.account_address { Some(address) => address.clone(), None => continue, }; if !string_slice_contains(instruction_accounts, address.as_str()) { continue; } let account_info = account_key_info_by_address(account_keys, address.as_str()); let account_index = match &account_info { Some(account_info) => Some(account_info.index), None => delta.account_index, }; let writable = match &account_info { Some(account_info) => account_info.writable, None => None, }; let signer = match &account_info { Some(account_info) => account_info.signer, None => None, }; push_unique_candidate_account( &mut candidates, crate::OnchainDexCandidateAccountDto { address, account_index, writable, signer, inferred_role: "token_vault_candidate".to_string(), confidence: "medium".to_string(), reason: format!( "instruction account has token balance delta for mint {}", delta.mint ), }, ); } return candidates; } fn infer_candidate_pool_accounts( instruction_accounts: &[std::string::String], account_keys: &[AccountKeyInfo], vault_candidates: &[crate::OnchainDexCandidateAccountDto], ) -> std::vec::Vec { let mut candidates = std::vec::Vec::new(); for account in instruction_accounts { if is_known_program_or_sysvar(account.as_str()) { continue; } if candidate_account_slice_contains(vault_candidates, account.as_str()) { continue; } let info = account_key_info_by_address(account_keys, account.as_str()); let writable = match &info { Some(info) => info.writable, None => None, }; let signer = match &info { Some(info) => info.signer, None => None, }; if signer == Some(true) { continue; } if writable == Some(false) { continue; } let account_index = match &info { Some(info) => Some(info.index), None => None, }; push_unique_candidate_account( &mut candidates, crate::OnchainDexCandidateAccountDto { address: account.clone(), account_index, writable, signer, inferred_role: "pool_or_state_candidate".to_string(), confidence: "low".to_string(), reason: "writable non-signer account used by matched DEX instruction".to_string(), }, ); if candidates.len() >= 12 { break; } } return candidates; } fn infer_candidate_program_accounts( instruction_accounts: &[std::string::String], account_keys: &[AccountKeyInfo], pool_candidates: &[crate::OnchainDexCandidateAccountDto], vault_candidates: &[crate::OnchainDexCandidateAccountDto], ) -> std::vec::Vec { let mut candidates = std::vec::Vec::new(); for account in instruction_accounts { if is_known_program_or_sysvar(account.as_str()) { continue; } if candidate_account_slice_contains(pool_candidates, account.as_str()) { continue; } if candidate_account_slice_contains(vault_candidates, account.as_str()) { continue; } let info = account_key_info_by_address(account_keys, account.as_str()); let account_index = match &info { Some(info) => Some(info.index), None => None, }; let writable = match &info { Some(info) => info.writable, None => None, }; let signer = match &info { Some(info) => info.signer, None => None, }; push_unique_candidate_account( &mut candidates, crate::OnchainDexCandidateAccountDto { address: account.clone(), account_index, writable, signer, inferred_role: "instruction_account_candidate".to_string(), confidence: "low".to_string(), reason: "non-program account referenced by matched DEX instruction".to_string(), }, ); if candidates.len() >= 12 { break; } } return candidates; } fn compute_delta_raw( pre: std::option::Option<&str>, post: std::option::Option<&str>, ) -> std::option::Option { let pre_value = parse_i128_or_zero(pre); let post_value = parse_i128_or_zero(post); match (pre_value, post_value) { (Some(pre_value), Some(post_value)) => return Some((post_value - pre_value).to_string()), _ => return None, } } fn parse_i128_or_zero(value: std::option::Option<&str>) -> std::option::Option { let value = match value { Some(value) => value, None => return Some(0), }; let parsed = value.parse::(); match parsed { Ok(parsed) => return Some(parsed), Err(_) => return None, } } fn push_unique_string(values: &mut std::vec::Vec, value: std::string::String) { if values.iter().any(|existing| existing == &value) { return; } values.push(value); } fn push_unique_candidate_account( values: &mut std::vec::Vec, value: crate::OnchainDexCandidateAccountDto, ) { if values.iter().any(|existing| existing.address == value.address) { return; } values.push(value); } fn string_slice_contains(values: &[std::string::String], needle: &str) -> bool { for value in values { if value == needle { return true; } } return false; } fn candidate_account_slice_contains( values: &[crate::OnchainDexCandidateAccountDto], needle: &str, ) -> bool { for value in values { if value.address == needle { return true; } } return false; } fn is_known_program_or_sysvar(address: &str) -> bool { if address == "11111111111111111111111111111111" { return true; } if address == "ComputeBudget111111111111111111111111111111" { return true; } if address == "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA" { return true; } if address == "TokenzQdBNbLqP5VEhdkAS6EPVDJGcDq6QubKX8TPp6" { return true; } if address == "ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL" { return true; } if address == "SysvarRent111111111111111111111111111111111" { return true; } if address == "SysvarC1ock11111111111111111111111111111111" { return true; } if address == "SysvarInstructions1111111111111111111111111" { return true; } return false; } fn extract_onchain_instructions( transaction: &serde_json::Value, ) -> std::vec::Vec { let mut instructions = std::vec::Vec::new(); let top_level = transaction .get("transaction") .and_then(|value| value.get("message")) .and_then(|value| value.get("instructions")) .and_then(serde_json::Value::as_array); if let Some(top_level) = top_level { let mut index = 0usize; for instruction in top_level { instructions.push(parse_instruction_candidate(instruction, Some(index as i64), None)); index += 1; } } let inner_groups = transaction .get("meta") .and_then(|value| value.get("innerInstructions")) .and_then(serde_json::Value::as_array); if let Some(inner_groups) = inner_groups { for group in inner_groups { let parent_index = group.get("index").and_then(serde_json::Value::as_i64); let inner_array = group.get("instructions").and_then(serde_json::Value::as_array); if let Some(inner_array) = inner_array { let mut inner_index = 0usize; for instruction in inner_array { instructions.push(parse_instruction_candidate( instruction, parent_index, Some(inner_index as i64), )); inner_index += 1; } } } } return instructions; } fn parse_instruction_candidate( instruction: &serde_json::Value, instruction_index: std::option::Option, inner_instruction_index: std::option::Option, ) -> OnchainInstructionCandidate { return OnchainInstructionCandidate { instruction_index, inner_instruction_index, program_id: extract_string_field(instruction, "programId"), accounts: extract_accounts(instruction), data: extract_instruction_data(instruction), parsed: instruction.get("parsed").cloned(), }; } fn extract_accounts(instruction: &serde_json::Value) -> std::vec::Vec { let mut accounts = std::vec::Vec::new(); let account_values = instruction.get("accounts").and_then(serde_json::Value::as_array); if let Some(account_values) = account_values { for account_value in account_values { if let Some(account) = account_value.as_str() { accounts.push(account.to_string()); continue; } if let Some(pubkey) = account_value.get("pubkey").and_then(serde_json::Value::as_str) { accounts.push(pubkey.to_string()); } } } return accounts; } fn extract_instruction_data( instruction: &serde_json::Value, ) -> std::option::Option { let data = match instruction.get("data") { Some(data) => data, None => return None, }; if let Some(data_string) = data.as_str() { return Some(data_string.to_string()); } if let Some(data_array) = data.as_array() { let first = match data_array.first() { Some(first) => first, None => return None, }; if let Some(first_string) = first.as_str() { return Some(first_string.to_string()); } } return None; } fn extract_string_field( value: &serde_json::Value, key: &str, ) -> std::option::Option { let found = value.get(key).and_then(serde_json::Value::as_str); match found { Some(found) => return Some(found.to_string()), None => return None, } } fn extract_string_by_candidate_keys_from_instruction( instruction: &OnchainInstructionCandidate, keys: &[&str], ) -> std::option::Option { let parsed = match &instruction.parsed { Some(parsed) => parsed, None => return None, }; let info = parsed.get("info"); for key in keys { if let Some(info) = info { if let Some(value) = info.get(*key).and_then(serde_json::Value::as_str) { return Some(value.to_string()); } } if let Some(value) = parsed.get(*key).and_then(serde_json::Value::as_str) { return Some(value.to_string()); } } return None; } fn infer_instruction_name( parsed: std::option::Option<&serde_json::Value>, logs: &[std::string::String], ) -> std::option::Option { if let Some(parsed) = parsed { if let Some(name) = parsed.get("type").and_then(serde_json::Value::as_str) { return Some(name.to_string()); } if let Some(info) = parsed.get("info") { let keys = ["instruction", "instructionType", "type", "name"]; for key in keys { if let Some(name) = info.get(key).and_then(serde_json::Value::as_str) { return Some(name.to_string()); } } } } for log in logs { let lower = log.to_ascii_lowercase(); if let Some(index) = lower.find("instruction:") { let original = log.as_str(); let start_index = index + "instruction:".len(); if start_index <= original.len() { return Some(original[start_index..].trim().to_string()); } } } return None; } fn infer_candidate_kind( instruction_name: std::option::Option<&std::string::String>, logs: &[std::string::String], ) -> std::string::String { let mut text = std::string::String::new(); if let Some(instruction_name) = instruction_name { text.push_str(instruction_name.as_str()); text.push(' '); } for log in logs { text.push_str(log.as_str()); text.push(' '); } let lower = text.to_ascii_lowercase(); if lower.contains("create") && lower.contains("pool") { return "create_pool".to_string(); } if lower.contains("initialize") && lower.contains("pool") { return "initialize_pool".to_string(); } if lower.contains("swap") || lower.contains("buy") || lower.contains("sell") { return "swap".to_string(); } if lower.contains("liquidity") || lower.contains("position") { return "liquidity_or_position".to_string(); } return "program_activity".to_string(); } fn transaction_failed(transaction: &serde_json::Value) -> bool { let err = transaction.get("meta").and_then(|value| value.get("err")); match err { Some(err) => return !err.is_null(), None => return false, } } fn extract_log_messages(transaction: &serde_json::Value) -> std::vec::Vec { let mut logs = std::vec::Vec::new(); let log_values = transaction .get("meta") .and_then(|value| value.get("logMessages")) .and_then(serde_json::Value::as_array); if let Some(log_values) = log_values { for log_value in log_values { if let Some(log) = log_value.as_str() { logs.push(log.to_string()); } } } return logs; } fn sample_logs(logs: &[std::string::String], limit: usize) -> std::vec::Vec { return sample_strings(logs, limit); } fn sample_strings( values: &[std::string::String], limit: usize, ) -> std::vec::Vec { let mut samples = std::vec::Vec::new(); let mut index = 0usize; while index < values.len() && index < limit { samples.push(values[index].clone()); index += 1; } return samples; } fn build_backfill_hint( hint_kind: &str, address: std::option::Option<&str>, signature: &str, ) -> std::string::String { match hint_kind { "pool" => { if let Some(address) = address { return format!("Backfill pool address in Demo Pipeline 2: {}", address); } }, "token" => { if let Some(address) = address { return format!("Backfill token mint in Demo Pipeline 2: {}", address); } }, "candidate_pool" => { if let Some(address) = address { return format!( "Candidate pool/state account for inspection/backfill: {} ; signature: {}", address, signature ); } }, _ => {}, } return format!("Inspect/backfill transaction signature: {}", signature); } fn clamp_u32(value: u32, min: u32, max: u32) -> u32 { if value < min { return min; } if value > max { return max; } return value; } fn clamp_usize(value: usize, min: usize, max: usize) -> usize { if value < min { return min; } if value > max { return max; } return value; } #[cfg(test)] mod tests { #[test] fn resolve_known_dex_code_to_program_id() { let request = crate::OnchainDexPairDiscoveryRequestDto { dex_code: Some("raydium_clmm".to_string()), program_id: None, http_role: "history_backfill".to_string(), signature_limit: 10, transaction_limit: 5, candidate_limit: 3, }; let resolved = super::resolve_program_id(&request); match resolved { Ok(resolved) => { assert_eq!(resolved.program_id, crate::RAYDIUM_CLMM_PROGRAM_ID); assert_eq!(resolved.dex_code, Some("raydium_clmm".to_string())); }, Err(error) => panic!("resolve must succeed: {error}"), } } #[test] fn reject_unverified_dex_code_without_program_id() { let request = crate::OnchainDexPairDiscoveryRequestDto { dex_code: Some("metadao".to_string()), program_id: None, http_role: "history_backfill".to_string(), signature_limit: 10, transaction_limit: 5, candidate_limit: 3, }; let resolved = super::resolve_program_id(&request); assert!(resolved.is_err()); } }