Files
khadhroony-bobobot/kb_lib/src/onchain_dex_pair_discovery.rs
2026-05-24 17:17:26 +02:00

1582 lines
56 KiB
Rust

// file: kb_lib/src/onchain_dex_pair_discovery.rs
//! On-chain DEX pair/pool discovery helpers used by Demo3.
//!
//! This module intentionally does not persist discovered transactions and does
//! not promote a program or pool as verified. It only queries Solana RPC for
//! recent signatures of a DEX program id, fetches the corresponding
//! transactions, and extracts candidate signatures / pools / mints that can be
//! fed later into the existing backfill pipeline.
/// Request for on-chain DEX pair/pool discovery.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct OnchainDexPairDiscoveryRequestDto {
/// Optional DEX code from the support matrix.
pub dex_code: std::option::Option<std::string::String>,
/// Optional Solana program id. When absent, `dex_code` must resolve to a verified matrix program id.
pub program_id: std::option::Option<std::string::String>,
/// HTTP endpoint role used to query Solana RPC.
pub http_role: std::string::String,
/// Maximum number of recent signatures to inspect for the program id.
pub signature_limit: u32,
/// Maximum number of transactions to fetch from the signature list.
pub transaction_limit: u32,
/// Maximum number of candidate rows to return.
pub candidate_limit: u32,
}
/// Result of one on-chain DEX pair/pool discovery run.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct OnchainDexPairDiscoveryResultDto {
/// Normalized request actually used by the service.
pub request: crate::OnchainDexPairDiscoveryRequestDto,
/// DEX code resolved from the matrix when available.
pub resolved_dex_code: std::option::Option<std::string::String>,
/// Program id scanned through `getSignaturesForAddress`.
pub resolved_program_id: std::string::String,
/// Number of signatures returned by the RPC endpoint.
pub fetched_signature_count: usize,
/// Number of transactions fetched with `getTransaction`.
pub fetched_transaction_count: usize,
/// Number of `getTransaction` calls returning null.
pub missing_transaction_count: usize,
/// Number of failed transactions encountered.
pub failed_transaction_count: usize,
/// Number of candidate rows returned.
pub candidate_count: usize,
/// Candidate on-chain rows.
pub candidates: std::vec::Vec<crate::OnchainDexPairCandidateDto>,
}
/// Candidate transaction/instruction observed on-chain for one DEX program id.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct OnchainDexPairCandidateDto {
/// Transaction signature.
pub signature: std::string::String,
/// Transaction slot when present.
pub slot: std::option::Option<u64>,
/// Transaction block time when present.
pub block_time: std::option::Option<i64>,
/// Whether the transaction has a non-null `meta.err` value.
pub failed: bool,
/// Program id matched by this candidate.
pub program_id: std::string::String,
/// DEX code resolved from the support matrix when available.
pub dex_code: std::option::Option<std::string::String>,
/// Candidate kind inferred from decoded instruction data, parsed fields or logs.
pub candidate_kind: std::string::String,
/// Confidence of the candidate extraction: `high`, `medium` or `low`.
pub confidence: std::string::String,
/// Top-level instruction index when known.
pub instruction_index: std::option::Option<i64>,
/// Inner instruction index when this candidate came from `meta.innerInstructions`.
pub inner_instruction_index: std::option::Option<i64>,
/// Instruction name inferred from parsed payload or logs.
pub instruction_name: std::option::Option<std::string::String>,
/// Candidate pool address when it can be extracted safely or heuristically.
pub pool_address: std::option::Option<std::string::String>,
/// Candidate token A/base mint when it can be extracted.
pub token_a_mint: std::option::Option<std::string::String>,
/// Candidate token B/quote mint when it can be extracted.
pub token_b_mint: std::option::Option<std::string::String>,
/// Verified pool address when a DEX decoder or stable layout proves it.
pub verified_pool_address: std::option::Option<std::string::String>,
/// Token mints observed generically from transaction token balance arrays.
pub observed_token_mints: std::vec::Vec<std::string::String>,
/// Token balance deltas observed through transaction metadata.
pub token_balance_deltas: std::vec::Vec<crate::OnchainDexTokenBalanceDeltaDto>,
/// Program-owned or writable accounts that may be pool/config/state accounts.
pub candidate_pool_accounts: std::vec::Vec<crate::OnchainDexCandidateAccountDto>,
/// Token accounts that may be pool vaults.
pub candidate_token_vault_accounts: std::vec::Vec<crate::OnchainDexCandidateAccountDto>,
/// Other candidate accounts attached to the matched instruction.
pub candidate_program_accounts: std::vec::Vec<crate::OnchainDexCandidateAccountDto>,
/// Short account sample from the matched instruction.
pub account_samples: std::vec::Vec<std::string::String>,
/// Short log sample from the transaction.
pub log_samples: std::vec::Vec<std::string::String>,
/// Suggested next action for the user.
pub backfill_hint: std::string::String,
}
/// Token-balance delta observed in one transaction through Solana transaction metadata.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct OnchainDexTokenBalanceDeltaDto {
/// Token account index in the transaction message when available.
pub account_index: std::option::Option<i64>,
/// Token account address resolved from the transaction account keys.
pub account_address: std::option::Option<std::string::String>,
/// SPL Token or Token-2022 mint address.
pub mint: std::string::String,
/// Token account owner when Solana RPC exposes it.
pub owner: std::option::Option<std::string::String>,
/// Token program id when Solana RPC exposes it.
pub token_program: std::option::Option<std::string::String>,
/// Raw token amount before the transaction.
pub pre_amount_raw: std::option::Option<std::string::String>,
/// Raw token amount after the transaction.
pub post_amount_raw: std::option::Option<std::string::String>,
/// Signed raw delta when both raw amounts fit in a signed integer.
pub delta_raw: std::option::Option<std::string::String>,
}
/// Candidate account inferred from generic transaction evidence.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct OnchainDexCandidateAccountDto {
/// Account address.
pub address: std::string::String,
/// Account index in the transaction message when known.
pub account_index: std::option::Option<i64>,
/// Whether the account is writable in the transaction message when known.
pub writable: std::option::Option<bool>,
/// Whether the account is a signer in the transaction message when known.
pub signer: std::option::Option<bool>,
/// Generic role inferred by Demo3, for example `program_owned_candidate` or `token_vault_candidate`.
pub inferred_role: std::string::String,
/// Confidence of the generic account inference.
pub confidence: std::string::String,
/// Short reason explaining why the account is listed.
pub reason: std::string::String,
}
/// On-chain pair/pool discovery service.
#[derive(Debug, Clone)]
pub struct OnchainDexPairDiscoveryService {
http_pool: std::sync::Arc<crate::HttpEndpointPool>,
}
impl OnchainDexPairDiscoveryService {
/// Creates a new on-chain DEX discovery service.
pub fn new(http_pool: std::sync::Arc<crate::HttpEndpointPool>) -> Self {
return Self { http_pool };
}
/// Searches recent on-chain transactions for DEX program candidates.
pub async fn discover(
&self,
request: crate::OnchainDexPairDiscoveryRequestDto,
) -> Result<crate::OnchainDexPairDiscoveryResultDto, crate::Error> {
let normalized_request = normalize_request(request);
let resolved = match resolve_program_id(&normalized_request) {
Ok(resolved) => resolved,
Err(error) => return Err(error),
};
let signatures_result = self
.fetch_signatures(
normalized_request.http_role.as_str(),
resolved.program_id.clone(),
normalized_request.signature_limit as usize,
)
.await;
let signatures = match signatures_result {
Ok(signatures) => signatures,
Err(error) => return Err(error),
};
let mut result = crate::OnchainDexPairDiscoveryResultDto {
request: normalized_request.clone(),
resolved_dex_code: resolved.dex_code.clone(),
resolved_program_id: resolved.program_id.clone(),
fetched_signature_count: signatures.len(),
fetched_transaction_count: 0,
missing_transaction_count: 0,
failed_transaction_count: 0,
candidate_count: 0,
candidates: std::vec::Vec::new(),
};
let transaction_limit = normalized_request.transaction_limit as usize;
let candidate_limit = normalized_request.candidate_limit as usize;
let mut scanned = 0usize;
for signature_status in signatures {
if scanned >= transaction_limit {
break;
}
if result.candidates.len() >= candidate_limit {
break;
}
scanned += 1;
let signature = signature_status.signature.clone();
let transaction_result = self
.fetch_transaction(normalized_request.http_role.as_str(), signature.clone())
.await;
let transaction_value = match transaction_result {
Ok(transaction_value) => transaction_value,
Err(error) => return Err(error),
};
if transaction_value.is_null() {
result.missing_transaction_count += 1;
continue;
}
result.fetched_transaction_count += 1;
if transaction_failed(&transaction_value) {
result.failed_transaction_count += 1;
}
let candidates = extract_candidates_from_transaction(
signature.as_str(),
&transaction_value,
resolved.program_id.as_str(),
resolved.dex_code.clone(),
);
for candidate in candidates {
if result.candidates.len() >= candidate_limit {
break;
}
result.candidates.push(candidate);
}
}
result.candidate_count = result.candidates.len();
return Ok(result);
}
async fn fetch_signatures(
&self,
http_role: &str,
address: std::string::String,
limit: usize,
) -> Result<
std::vec::Vec<solana_rpc_client_api::response::RpcConfirmedTransactionStatusWithSignature>,
crate::Error,
> {
let effective_limit = clamp_usize(limit, 1, 1000);
let config = solana_rpc_client_api::config::RpcSignaturesForAddressConfig {
before: None,
until: None,
limit: Some(effective_limit),
commitment: None,
min_context_slot: None,
};
return self
.http_pool
.get_signatures_for_address_for_role(http_role, address, Some(config))
.await;
}
async fn fetch_transaction(
&self,
http_role: &str,
signature: std::string::String,
) -> Result<serde_json::Value, crate::Error> {
let config = Some(serde_json::json!({
"encoding": "jsonParsed",
"maxSupportedTransactionVersion": 0
}));
return self.http_pool.get_transaction_raw_for_role(http_role, signature, config).await;
}
}
#[derive(Debug, Clone)]
struct ResolvedDiscoveryTarget {
dex_code: std::option::Option<std::string::String>,
program_id: std::string::String,
}
#[derive(Debug, Clone)]
struct OnchainInstructionCandidate {
instruction_index: std::option::Option<i64>,
inner_instruction_index: std::option::Option<i64>,
program_id: std::option::Option<std::string::String>,
accounts: std::vec::Vec<std::string::String>,
data: std::option::Option<std::string::String>,
parsed: std::option::Option<serde_json::Value>,
}
#[derive(Debug, Clone)]
struct AccountKeyInfo {
index: i64,
address: std::string::String,
signer: std::option::Option<bool>,
writable: std::option::Option<bool>,
}
#[derive(Debug, Clone)]
struct TransactionGenericEvidence {
account_keys: std::vec::Vec<AccountKeyInfo>,
observed_token_mints: std::vec::Vec<std::string::String>,
token_balance_deltas: std::vec::Vec<crate::OnchainDexTokenBalanceDeltaDto>,
}
#[derive(Debug, Clone)]
struct TokenBalanceAccumulator {
account_index: std::option::Option<i64>,
account_address: std::option::Option<std::string::String>,
mint: std::string::String,
owner: std::option::Option<std::string::String>,
token_program: std::option::Option<std::string::String>,
pre_amount_raw: std::option::Option<std::string::String>,
post_amount_raw: std::option::Option<std::string::String>,
}
fn normalize_request(
request: crate::OnchainDexPairDiscoveryRequestDto,
) -> crate::OnchainDexPairDiscoveryRequestDto {
let http_role = normalize_string(request.http_role, "history_backfill");
return crate::OnchainDexPairDiscoveryRequestDto {
dex_code: normalize_optional_string(request.dex_code),
program_id: normalize_optional_string(request.program_id),
http_role,
signature_limit: clamp_u32(request.signature_limit, 1, 1000),
transaction_limit: clamp_u32(request.transaction_limit, 1, 250),
candidate_limit: clamp_u32(request.candidate_limit, 1, 100),
};
}
fn normalize_string(value: std::string::String, fallback: &str) -> std::string::String {
let trimmed = value.trim().to_string();
if trimmed.is_empty() {
return fallback.to_string();
}
return trimmed;
}
fn normalize_optional_string(
value: std::option::Option<std::string::String>,
) -> std::option::Option<std::string::String> {
let value = match value {
Some(value) => value.trim().to_string(),
None => return None,
};
if value.is_empty() {
return None;
}
return Some(value);
}
fn resolve_program_id(
request: &crate::OnchainDexPairDiscoveryRequestDto,
) -> Result<ResolvedDiscoveryTarget, crate::Error> {
if let Some(program_id) = &request.program_id {
let dex_code = match crate::dex_support_matrix_entry_by_program_id(program_id.as_str()) {
Some(entry) => Some(entry.code.to_string()),
None => request.dex_code.clone(),
};
return Ok(ResolvedDiscoveryTarget { dex_code, program_id: program_id.clone() });
}
let dex_code = match &request.dex_code {
Some(dex_code) => dex_code.clone(),
None => {
return Err(crate::Error::Config(
"on-chain DEX discovery requires a dex_code or a program_id".to_string(),
));
},
};
let entry = match crate::dex_support_matrix_entry_by_code(dex_code.as_str()) {
Some(entry) => entry,
None => {
return Err(crate::Error::Config(format!(
"unknown dex_code '{}' for on-chain DEX discovery",
dex_code
)));
},
};
let program_id = match entry.program_id {
Some(program_id) => program_id.to_string(),
None => {
return Err(crate::Error::Config(format!(
"dex_code '{}' has no verified program_id; provide a program_id explicitly after verification",
dex_code
)));
},
};
return Ok(ResolvedDiscoveryTarget { dex_code: Some(dex_code), program_id });
}
fn extract_candidates_from_transaction(
signature: &str,
transaction: &serde_json::Value,
target_program_id: &str,
dex_code: std::option::Option<std::string::String>,
) -> std::vec::Vec<crate::OnchainDexPairCandidateDto> {
let mut candidates = std::vec::Vec::new();
let slot = transaction.get("slot").and_then(serde_json::Value::as_u64);
let block_time = transaction.get("blockTime").and_then(serde_json::Value::as_i64);
let failed = transaction_failed(transaction);
let logs = extract_log_messages(transaction);
let evidence = extract_transaction_generic_evidence(transaction);
let instructions = extract_onchain_instructions(transaction);
for instruction in instructions {
let program_id = match &instruction.program_id {
Some(program_id) => program_id.clone(),
None => continue,
};
if program_id.as_str() != target_program_id {
continue;
}
let decoded_candidate = decode_known_candidate(
signature,
slot,
block_time,
failed,
program_id.as_str(),
dex_code.clone(),
&instruction,
logs.as_slice(),
);
let mut candidate = match decoded_candidate {
Some(candidate) => candidate,
None => build_heuristic_candidate(
signature,
slot,
block_time,
failed,
program_id.as_str(),
dex_code.clone(),
&instruction,
logs.as_slice(),
),
};
enrich_candidate_with_generic_evidence(&mut candidate, &instruction, &evidence);
candidates.push(candidate);
}
return candidates;
}
fn decode_known_candidate(
signature: &str,
slot: std::option::Option<u64>,
block_time: std::option::Option<i64>,
failed: bool,
program_id: &str,
dex_code: std::option::Option<std::string::String>,
instruction: &OnchainInstructionCandidate,
logs: &[std::string::String],
) -> std::option::Option<crate::OnchainDexPairCandidateDto> {
if program_id == crate::RAYDIUM_CLMM_PROGRAM_ID {
return decode_raydium_clmm_candidate(
signature,
slot,
block_time,
failed,
program_id,
dex_code,
instruction,
logs,
);
}
if program_id == crate::RAYDIUM_CPMM_PROGRAM_ID {
return decode_raydium_cpmm_candidate(
signature,
slot,
block_time,
failed,
program_id,
dex_code,
instruction,
logs,
);
}
return None;
}
fn decode_raydium_clmm_candidate(
signature: &str,
slot: std::option::Option<u64>,
block_time: std::option::Option<i64>,
failed: bool,
program_id: &str,
dex_code: std::option::Option<std::string::String>,
instruction: &OnchainInstructionCandidate,
logs: &[std::string::String],
) -> std::option::Option<crate::OnchainDexPairCandidateDto> {
let data = match &instruction.data {
Some(data) => data.clone(),
None => return None,
};
let accounts_json = match serde_json::to_string(&instruction.accounts) {
Ok(accounts_json) => accounts_json,
Err(_) => return None,
};
let data_json = match serde_json::to_string(&data) {
Ok(data_json) => data_json,
Err(_) => return None,
};
let decoded_events =
crate::decode_raydium_clmm_instruction(accounts_json.as_str(), data_json.as_str());
for decoded in decoded_events {
match decoded {
crate::RaydiumClmmDecodedEvent::Swap(event) => {
return Some(crate::OnchainDexPairCandidateDto {
signature: signature.to_string(),
slot,
block_time,
failed,
program_id: program_id.to_string(),
dex_code,
candidate_kind: "swap".to_string(),
confidence: "high".to_string(),
instruction_index: instruction.instruction_index,
inner_instruction_index: instruction.inner_instruction_index,
instruction_name: Some("raydium_clmm.swap".to_string()),
pool_address: Some(event.pool_state.clone()),
token_a_mint: Some(event.base_mint),
token_b_mint: Some(event.quote_mint),
verified_pool_address: Some(event.pool_state.clone()),
observed_token_mints: std::vec::Vec::new(),
token_balance_deltas: std::vec::Vec::new(),
candidate_pool_accounts: std::vec::Vec::new(),
candidate_token_vault_accounts: std::vec::Vec::new(),
candidate_program_accounts: std::vec::Vec::new(),
account_samples: sample_strings(instruction.accounts.as_slice(), 12),
log_samples: sample_logs(logs, 8),
backfill_hint: build_backfill_hint(
"pool",
Some(event.pool_state.as_str()),
signature,
),
});
},
crate::RaydiumClmmDecodedEvent::SwapV2(event) => {
return Some(crate::OnchainDexPairCandidateDto {
signature: signature.to_string(),
slot,
block_time,
failed,
program_id: program_id.to_string(),
dex_code,
candidate_kind: "swap".to_string(),
confidence: "high".to_string(),
instruction_index: instruction.instruction_index,
inner_instruction_index: instruction.inner_instruction_index,
instruction_name: Some("raydium_clmm.swap_v2".to_string()),
pool_address: Some(event.pool_state.clone()),
token_a_mint: Some(event.base_mint),
token_b_mint: Some(event.quote_mint),
verified_pool_address: Some(event.pool_state.clone()),
observed_token_mints: std::vec::Vec::new(),
token_balance_deltas: std::vec::Vec::new(),
candidate_pool_accounts: std::vec::Vec::new(),
candidate_token_vault_accounts: std::vec::Vec::new(),
candidate_program_accounts: std::vec::Vec::new(),
account_samples: sample_strings(instruction.accounts.as_slice(), 12),
log_samples: sample_logs(logs, 8),
backfill_hint: build_backfill_hint(
"pool",
Some(event.pool_state.as_str()),
signature,
),
});
},
}
}
return None;
}
fn decode_raydium_cpmm_candidate(
signature: &str,
slot: std::option::Option<u64>,
block_time: std::option::Option<i64>,
failed: bool,
program_id: &str,
dex_code: std::option::Option<std::string::String>,
instruction: &OnchainInstructionCandidate,
logs: &[std::string::String],
) -> std::option::Option<crate::OnchainDexPairCandidateDto> {
let data = match &instruction.data {
Some(data) => data.clone(),
None => return None,
};
let accounts_json = match serde_json::to_string(&instruction.accounts) {
Ok(accounts_json) => accounts_json,
Err(_) => return None,
};
let data_json = match serde_json::to_string(&data) {
Ok(data_json) => data_json,
Err(_) => return None,
};
let decoded_events =
crate::decode_raydium_cpmm_instruction(accounts_json.as_str(), data_json.as_str());
for decoded in decoded_events {
match decoded {
crate::RaydiumCpmmDecodedEvent::SwapBaseInput(event) => {
return Some(build_raydium_cpmm_candidate(
signature,
slot,
block_time,
failed,
program_id,
dex_code,
instruction,
logs,
"raydium_cpmm.swap_base_input",
event.pool_state,
event.base_mint,
event.quote_mint,
));
},
crate::RaydiumCpmmDecodedEvent::SwapBaseOutput(event) => {
return Some(build_raydium_cpmm_candidate(
signature,
slot,
block_time,
failed,
program_id,
dex_code,
instruction,
logs,
"raydium_cpmm.swap_base_output",
event.pool_state,
event.base_mint,
event.quote_mint,
));
},
}
}
return None;
}
fn build_raydium_cpmm_candidate(
signature: &str,
slot: std::option::Option<u64>,
block_time: std::option::Option<i64>,
failed: bool,
program_id: &str,
dex_code: std::option::Option<std::string::String>,
instruction: &OnchainInstructionCandidate,
logs: &[std::string::String],
instruction_name: &str,
pool_address: std::string::String,
token_a_mint: std::string::String,
token_b_mint: std::string::String,
) -> crate::OnchainDexPairCandidateDto {
return crate::OnchainDexPairCandidateDto {
signature: signature.to_string(),
slot,
block_time,
failed,
program_id: program_id.to_string(),
dex_code,
candidate_kind: "swap".to_string(),
confidence: "high".to_string(),
instruction_index: instruction.instruction_index,
inner_instruction_index: instruction.inner_instruction_index,
instruction_name: Some(instruction_name.to_string()),
pool_address: Some(pool_address.clone()),
token_a_mint: Some(token_a_mint),
token_b_mint: Some(token_b_mint),
verified_pool_address: Some(pool_address.clone()),
observed_token_mints: std::vec::Vec::new(),
token_balance_deltas: std::vec::Vec::new(),
candidate_pool_accounts: std::vec::Vec::new(),
candidate_token_vault_accounts: std::vec::Vec::new(),
candidate_program_accounts: std::vec::Vec::new(),
account_samples: sample_strings(instruction.accounts.as_slice(), 12),
log_samples: sample_logs(logs, 8),
backfill_hint: build_backfill_hint("pool", Some(pool_address.as_str()), signature),
};
}
fn build_heuristic_candidate(
signature: &str,
slot: std::option::Option<u64>,
block_time: std::option::Option<i64>,
failed: bool,
program_id: &str,
dex_code: std::option::Option<std::string::String>,
instruction: &OnchainInstructionCandidate,
logs: &[std::string::String],
) -> crate::OnchainDexPairCandidateDto {
let instruction_name = infer_instruction_name(instruction.parsed.as_ref(), logs);
let pool_address = extract_string_by_candidate_keys_from_instruction(
instruction,
&["pool", "poolAddress", "poolAccount", "poolState", "amm", "ammPool", "whirlpool"],
);
let token_a_mint = extract_string_by_candidate_keys_from_instruction(
instruction,
&["tokenA", "tokenAMint", "mintA", "baseMint", "token0Mint", "mint0", "coinMint"],
);
let token_b_mint = extract_string_by_candidate_keys_from_instruction(
instruction,
&["tokenB", "tokenBMint", "mintB", "quoteMint", "token1Mint", "mint1", "pcMint"],
);
let candidate_kind = infer_candidate_kind(instruction_name.as_ref(), logs);
let confidence = if pool_address.is_some() || token_a_mint.is_some() || token_b_mint.is_some() {
"medium".to_string()
} else {
"low".to_string()
};
let hint = if pool_address.is_some() {
build_backfill_hint("pool", pool_address.as_deref(), signature)
} else if token_a_mint.is_some() {
build_backfill_hint("token", token_a_mint.as_deref(), signature)
} else {
build_backfill_hint("signature", None, signature)
};
return crate::OnchainDexPairCandidateDto {
signature: signature.to_string(),
slot,
block_time,
failed,
program_id: program_id.to_string(),
dex_code,
candidate_kind,
confidence,
instruction_index: instruction.instruction_index,
inner_instruction_index: instruction.inner_instruction_index,
instruction_name,
pool_address: pool_address.clone(),
token_a_mint,
token_b_mint,
verified_pool_address: None,
observed_token_mints: std::vec::Vec::new(),
token_balance_deltas: std::vec::Vec::new(),
candidate_pool_accounts: std::vec::Vec::new(),
candidate_token_vault_accounts: std::vec::Vec::new(),
candidate_program_accounts: std::vec::Vec::new(),
account_samples: sample_strings(instruction.accounts.as_slice(), 12),
log_samples: sample_logs(logs, 8),
backfill_hint: hint,
};
}
fn enrich_candidate_with_generic_evidence(
candidate: &mut crate::OnchainDexPairCandidateDto,
instruction: &OnchainInstructionCandidate,
evidence: &TransactionGenericEvidence,
) {
candidate.observed_token_mints = evidence.observed_token_mints.clone();
candidate.token_balance_deltas =
sample_token_balance_deltas(evidence.token_balance_deltas.as_slice(), 20);
if candidate.token_a_mint.is_none() || candidate.token_b_mint.is_none() {
let inferred = infer_token_pair_from_deltas(evidence.token_balance_deltas.as_slice());
if candidate.token_a_mint.is_none() {
candidate.token_a_mint = inferred.0;
}
if candidate.token_b_mint.is_none() {
candidate.token_b_mint = inferred.1;
}
}
candidate.candidate_token_vault_accounts = infer_candidate_token_vault_accounts(
instruction.accounts.as_slice(),
evidence.token_balance_deltas.as_slice(),
evidence.account_keys.as_slice(),
);
candidate.candidate_pool_accounts = infer_candidate_pool_accounts(
instruction.accounts.as_slice(),
evidence.account_keys.as_slice(),
candidate.candidate_token_vault_accounts.as_slice(),
);
candidate.candidate_program_accounts = infer_candidate_program_accounts(
instruction.accounts.as_slice(),
evidence.account_keys.as_slice(),
candidate.candidate_pool_accounts.as_slice(),
candidate.candidate_token_vault_accounts.as_slice(),
);
if candidate.confidence == "low"
&& (!candidate.observed_token_mints.is_empty()
|| !candidate.candidate_pool_accounts.is_empty())
{
candidate.confidence = "medium".to_string();
}
if candidate.pool_address.is_none() && candidate.verified_pool_address.is_none() {
let first_pool = candidate.candidate_pool_accounts.first();
if let Some(first_pool) = first_pool {
candidate.backfill_hint = build_backfill_hint(
"candidate_pool",
Some(first_pool.address.as_str()),
candidate.signature.as_str(),
);
}
}
}
fn extract_transaction_generic_evidence(
transaction: &serde_json::Value,
) -> TransactionGenericEvidence {
let account_keys = extract_transaction_account_keys(transaction);
let token_balance_deltas = extract_token_balance_deltas(transaction, account_keys.as_slice());
let observed_token_mints = collect_observed_token_mints(token_balance_deltas.as_slice());
return TransactionGenericEvidence {
account_keys,
observed_token_mints,
token_balance_deltas,
};
}
fn extract_transaction_account_keys(
transaction: &serde_json::Value,
) -> std::vec::Vec<AccountKeyInfo> {
let mut account_keys = std::vec::Vec::new();
let values = transaction
.get("transaction")
.and_then(|value| value.get("message"))
.and_then(|value| value.get("accountKeys"))
.and_then(serde_json::Value::as_array);
if let Some(values) = values {
let mut index = 0usize;
for value in values {
let parsed = parse_account_key_info(value, index as i64);
if let Some(parsed) = parsed {
account_keys.push(parsed);
}
index += 1;
}
}
return account_keys;
}
fn parse_account_key_info(
value: &serde_json::Value,
index: i64,
) -> std::option::Option<AccountKeyInfo> {
if let Some(address) = value.as_str() {
return Some(AccountKeyInfo {
index,
address: address.to_string(),
signer: None,
writable: None,
});
}
let address = match value.get("pubkey").and_then(serde_json::Value::as_str) {
Some(address) => address.to_string(),
None => return None,
};
let signer = value.get("signer").and_then(serde_json::Value::as_bool);
let writable = value.get("writable").and_then(serde_json::Value::as_bool);
return Some(AccountKeyInfo { index, address, signer, writable });
}
fn extract_token_balance_deltas(
transaction: &serde_json::Value,
account_keys: &[AccountKeyInfo],
) -> std::vec::Vec<crate::OnchainDexTokenBalanceDeltaDto> {
let mut accumulators = std::vec::Vec::new();
collect_token_balance_side(
transaction,
"preTokenBalances",
true,
account_keys,
&mut accumulators,
);
collect_token_balance_side(
transaction,
"postTokenBalances",
false,
account_keys,
&mut accumulators,
);
let mut deltas = std::vec::Vec::new();
for accumulator in accumulators {
deltas.push(crate::OnchainDexTokenBalanceDeltaDto {
account_index: accumulator.account_index,
account_address: accumulator.account_address,
mint: accumulator.mint,
owner: accumulator.owner,
token_program: accumulator.token_program,
pre_amount_raw: accumulator.pre_amount_raw.clone(),
post_amount_raw: accumulator.post_amount_raw.clone(),
delta_raw: compute_delta_raw(
accumulator.pre_amount_raw.as_deref(),
accumulator.post_amount_raw.as_deref(),
),
});
}
return deltas;
}
fn collect_token_balance_side(
transaction: &serde_json::Value,
key: &str,
is_pre: bool,
account_keys: &[AccountKeyInfo],
accumulators: &mut std::vec::Vec<TokenBalanceAccumulator>,
) {
let values = transaction
.get("meta")
.and_then(|value| value.get(key))
.and_then(serde_json::Value::as_array);
let values = match values {
Some(values) => values,
None => return,
};
for value in values {
let account_index = value.get("accountIndex").and_then(serde_json::Value::as_i64);
let mint = match value.get("mint").and_then(serde_json::Value::as_str) {
Some(mint) => mint.to_string(),
None => continue,
};
let owner = value
.get("owner")
.and_then(serde_json::Value::as_str)
.map(|value| value.to_string());
let token_program = value
.get("programId")
.and_then(serde_json::Value::as_str)
.map(|value| value.to_string());
let amount = value
.get("uiTokenAmount")
.and_then(|amount| amount.get("amount"))
.and_then(serde_json::Value::as_str)
.map(|value| value.to_string());
let account_address = match account_index {
Some(account_index) => account_address_by_index(account_keys, account_index),
None => None,
};
let accumulator_index =
find_token_balance_accumulator(accumulators.as_slice(), account_index, mint.as_str());
let index = match accumulator_index {
Some(index) => index,
None => {
accumulators.push(TokenBalanceAccumulator {
account_index,
account_address,
mint,
owner: owner.clone(),
token_program: token_program.clone(),
pre_amount_raw: None,
post_amount_raw: None,
});
accumulators.len() - 1
},
};
if accumulators[index].owner.is_none() {
accumulators[index].owner = owner;
}
if accumulators[index].token_program.is_none() {
accumulators[index].token_program = token_program;
}
if is_pre {
accumulators[index].pre_amount_raw = amount;
} else {
accumulators[index].post_amount_raw = amount;
}
}
}
fn find_token_balance_accumulator(
accumulators: &[TokenBalanceAccumulator],
account_index: std::option::Option<i64>,
mint: &str,
) -> std::option::Option<usize> {
let mut index = 0usize;
while index < accumulators.len() {
let accumulator = &accumulators[index];
if accumulator.account_index == account_index && accumulator.mint == mint {
return Some(index);
}
index += 1;
}
return None;
}
fn account_address_by_index(
account_keys: &[AccountKeyInfo],
account_index: i64,
) -> std::option::Option<std::string::String> {
for account_key in account_keys {
if account_key.index == account_index {
return Some(account_key.address.clone());
}
}
return None;
}
fn account_key_info_by_address(
account_keys: &[AccountKeyInfo],
address: &str,
) -> std::option::Option<AccountKeyInfo> {
for account_key in account_keys {
if account_key.address == address {
return Some(account_key.clone());
}
}
return None;
}
fn collect_observed_token_mints(
deltas: &[crate::OnchainDexTokenBalanceDeltaDto],
) -> std::vec::Vec<std::string::String> {
let mut mints = std::vec::Vec::new();
for delta in deltas {
push_unique_string(&mut mints, delta.mint.clone());
}
return mints;
}
fn sample_token_balance_deltas(
deltas: &[crate::OnchainDexTokenBalanceDeltaDto],
limit: usize,
) -> std::vec::Vec<crate::OnchainDexTokenBalanceDeltaDto> {
let mut samples = std::vec::Vec::new();
let mut index = 0usize;
while index < deltas.len() && index < limit {
samples.push(deltas[index].clone());
index += 1;
}
return samples;
}
fn infer_token_pair_from_deltas(
deltas: &[crate::OnchainDexTokenBalanceDeltaDto],
) -> (
std::option::Option<std::string::String>,
std::option::Option<std::string::String>,
) {
let mut mints = std::vec::Vec::new();
for delta in deltas {
if delta.delta_raw.as_deref() == Some("0") {
continue;
}
push_unique_string(&mut mints, delta.mint.clone());
}
if mints.len() < 2 {
for delta in deltas {
push_unique_string(&mut mints, delta.mint.clone());
}
}
let first = mints.first().cloned();
let second = if mints.len() > 1 { Some(mints[1].clone()) } else { None };
return (first, second);
}
fn infer_candidate_token_vault_accounts(
instruction_accounts: &[std::string::String],
deltas: &[crate::OnchainDexTokenBalanceDeltaDto],
account_keys: &[AccountKeyInfo],
) -> std::vec::Vec<crate::OnchainDexCandidateAccountDto> {
let mut candidates = std::vec::Vec::new();
for delta in deltas {
let address = match &delta.account_address {
Some(address) => address.clone(),
None => continue,
};
if !string_slice_contains(instruction_accounts, address.as_str()) {
continue;
}
let account_info = account_key_info_by_address(account_keys, address.as_str());
let account_index = match &account_info {
Some(account_info) => Some(account_info.index),
None => delta.account_index,
};
let writable = match &account_info {
Some(account_info) => account_info.writable,
None => None,
};
let signer = match &account_info {
Some(account_info) => account_info.signer,
None => None,
};
push_unique_candidate_account(
&mut candidates,
crate::OnchainDexCandidateAccountDto {
address,
account_index,
writable,
signer,
inferred_role: "token_vault_candidate".to_string(),
confidence: "medium".to_string(),
reason: format!(
"instruction account has token balance delta for mint {}",
delta.mint
),
},
);
}
return candidates;
}
fn infer_candidate_pool_accounts(
instruction_accounts: &[std::string::String],
account_keys: &[AccountKeyInfo],
vault_candidates: &[crate::OnchainDexCandidateAccountDto],
) -> std::vec::Vec<crate::OnchainDexCandidateAccountDto> {
let mut candidates = std::vec::Vec::new();
for account in instruction_accounts {
if is_known_program_or_sysvar(account.as_str()) {
continue;
}
if candidate_account_slice_contains(vault_candidates, account.as_str()) {
continue;
}
let info = account_key_info_by_address(account_keys, account.as_str());
let writable = match &info {
Some(info) => info.writable,
None => None,
};
let signer = match &info {
Some(info) => info.signer,
None => None,
};
if signer == Some(true) {
continue;
}
if writable == Some(false) {
continue;
}
let account_index = match &info {
Some(info) => Some(info.index),
None => None,
};
push_unique_candidate_account(
&mut candidates,
crate::OnchainDexCandidateAccountDto {
address: account.clone(),
account_index,
writable,
signer,
inferred_role: "pool_or_state_candidate".to_string(),
confidence: "low".to_string(),
reason: "writable non-signer account used by matched DEX instruction".to_string(),
},
);
if candidates.len() >= 12 {
break;
}
}
return candidates;
}
fn infer_candidate_program_accounts(
instruction_accounts: &[std::string::String],
account_keys: &[AccountKeyInfo],
pool_candidates: &[crate::OnchainDexCandidateAccountDto],
vault_candidates: &[crate::OnchainDexCandidateAccountDto],
) -> std::vec::Vec<crate::OnchainDexCandidateAccountDto> {
let mut candidates = std::vec::Vec::new();
for account in instruction_accounts {
if is_known_program_or_sysvar(account.as_str()) {
continue;
}
if candidate_account_slice_contains(pool_candidates, account.as_str()) {
continue;
}
if candidate_account_slice_contains(vault_candidates, account.as_str()) {
continue;
}
let info = account_key_info_by_address(account_keys, account.as_str());
let account_index = match &info {
Some(info) => Some(info.index),
None => None,
};
let writable = match &info {
Some(info) => info.writable,
None => None,
};
let signer = match &info {
Some(info) => info.signer,
None => None,
};
push_unique_candidate_account(
&mut candidates,
crate::OnchainDexCandidateAccountDto {
address: account.clone(),
account_index,
writable,
signer,
inferred_role: "instruction_account_candidate".to_string(),
confidence: "low".to_string(),
reason: "non-program account referenced by matched DEX instruction".to_string(),
},
);
if candidates.len() >= 12 {
break;
}
}
return candidates;
}
fn compute_delta_raw(
pre: std::option::Option<&str>,
post: std::option::Option<&str>,
) -> std::option::Option<std::string::String> {
let pre_value = parse_i128_or_zero(pre);
let post_value = parse_i128_or_zero(post);
match (pre_value, post_value) {
(Some(pre_value), Some(post_value)) => return Some((post_value - pre_value).to_string()),
_ => return None,
}
}
fn parse_i128_or_zero(value: std::option::Option<&str>) -> std::option::Option<i128> {
let value = match value {
Some(value) => value,
None => return Some(0),
};
let parsed = value.parse::<i128>();
match parsed {
Ok(parsed) => return Some(parsed),
Err(_) => return None,
}
}
fn push_unique_string(values: &mut std::vec::Vec<std::string::String>, value: std::string::String) {
if values.iter().any(|existing| existing == &value) {
return;
}
values.push(value);
}
fn push_unique_candidate_account(
values: &mut std::vec::Vec<crate::OnchainDexCandidateAccountDto>,
value: crate::OnchainDexCandidateAccountDto,
) {
if values.iter().any(|existing| existing.address == value.address) {
return;
}
values.push(value);
}
fn string_slice_contains(values: &[std::string::String], needle: &str) -> bool {
for value in values {
if value == needle {
return true;
}
}
return false;
}
fn candidate_account_slice_contains(
values: &[crate::OnchainDexCandidateAccountDto],
needle: &str,
) -> bool {
for value in values {
if value.address == needle {
return true;
}
}
return false;
}
fn is_known_program_or_sysvar(address: &str) -> bool {
if address == "11111111111111111111111111111111" {
return true;
}
if address == "ComputeBudget111111111111111111111111111111" {
return true;
}
if address == "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA" {
return true;
}
if address == "TokenzQdBNbLqP5VEhdkAS6EPVDJGcDq6QubKX8TPp6" {
return true;
}
if address == "ATokenGPvbdGVxr1b2hvZbsiqW5xWH25efTNsLJA8knL" {
return true;
}
if address == "SysvarRent111111111111111111111111111111111" {
return true;
}
if address == "SysvarC1ock11111111111111111111111111111111" {
return true;
}
if address == "SysvarInstructions1111111111111111111111111" {
return true;
}
return false;
}
fn extract_onchain_instructions(
transaction: &serde_json::Value,
) -> std::vec::Vec<OnchainInstructionCandidate> {
let mut instructions = std::vec::Vec::new();
let top_level = transaction
.get("transaction")
.and_then(|value| value.get("message"))
.and_then(|value| value.get("instructions"))
.and_then(serde_json::Value::as_array);
if let Some(top_level) = top_level {
let mut index = 0usize;
for instruction in top_level {
instructions.push(parse_instruction_candidate(instruction, Some(index as i64), None));
index += 1;
}
}
let inner_groups = transaction
.get("meta")
.and_then(|value| value.get("innerInstructions"))
.and_then(serde_json::Value::as_array);
if let Some(inner_groups) = inner_groups {
for group in inner_groups {
let parent_index = group.get("index").and_then(serde_json::Value::as_i64);
let inner_array = group.get("instructions").and_then(serde_json::Value::as_array);
if let Some(inner_array) = inner_array {
let mut inner_index = 0usize;
for instruction in inner_array {
instructions.push(parse_instruction_candidate(
instruction,
parent_index,
Some(inner_index as i64),
));
inner_index += 1;
}
}
}
}
return instructions;
}
fn parse_instruction_candidate(
instruction: &serde_json::Value,
instruction_index: std::option::Option<i64>,
inner_instruction_index: std::option::Option<i64>,
) -> OnchainInstructionCandidate {
return OnchainInstructionCandidate {
instruction_index,
inner_instruction_index,
program_id: extract_string_field(instruction, "programId"),
accounts: extract_accounts(instruction),
data: extract_instruction_data(instruction),
parsed: instruction.get("parsed").cloned(),
};
}
fn extract_accounts(instruction: &serde_json::Value) -> std::vec::Vec<std::string::String> {
let mut accounts = std::vec::Vec::new();
let account_values = instruction.get("accounts").and_then(serde_json::Value::as_array);
if let Some(account_values) = account_values {
for account_value in account_values {
if let Some(account) = account_value.as_str() {
accounts.push(account.to_string());
continue;
}
if let Some(pubkey) = account_value.get("pubkey").and_then(serde_json::Value::as_str) {
accounts.push(pubkey.to_string());
}
}
}
return accounts;
}
fn extract_instruction_data(
instruction: &serde_json::Value,
) -> std::option::Option<std::string::String> {
let data = match instruction.get("data") {
Some(data) => data,
None => return None,
};
if let Some(data_string) = data.as_str() {
return Some(data_string.to_string());
}
if let Some(data_array) = data.as_array() {
let first = match data_array.first() {
Some(first) => first,
None => return None,
};
if let Some(first_string) = first.as_str() {
return Some(first_string.to_string());
}
}
return None;
}
fn extract_string_field(
value: &serde_json::Value,
key: &str,
) -> std::option::Option<std::string::String> {
let found = value.get(key).and_then(serde_json::Value::as_str);
match found {
Some(found) => return Some(found.to_string()),
None => return None,
}
}
fn extract_string_by_candidate_keys_from_instruction(
instruction: &OnchainInstructionCandidate,
keys: &[&str],
) -> std::option::Option<std::string::String> {
let parsed = match &instruction.parsed {
Some(parsed) => parsed,
None => return None,
};
let info = parsed.get("info");
for key in keys {
if let Some(info) = info {
if let Some(value) = info.get(*key).and_then(serde_json::Value::as_str) {
return Some(value.to_string());
}
}
if let Some(value) = parsed.get(*key).and_then(serde_json::Value::as_str) {
return Some(value.to_string());
}
}
return None;
}
fn infer_instruction_name(
parsed: std::option::Option<&serde_json::Value>,
logs: &[std::string::String],
) -> std::option::Option<std::string::String> {
if let Some(parsed) = parsed {
if let Some(name) = parsed.get("type").and_then(serde_json::Value::as_str) {
return Some(name.to_string());
}
if let Some(info) = parsed.get("info") {
let keys = ["instruction", "instructionType", "type", "name"];
for key in keys {
if let Some(name) = info.get(key).and_then(serde_json::Value::as_str) {
return Some(name.to_string());
}
}
}
}
for log in logs {
let lower = log.to_ascii_lowercase();
if let Some(index) = lower.find("instruction:") {
let original = log.as_str();
let start_index = index + "instruction:".len();
if start_index <= original.len() {
return Some(original[start_index..].trim().to_string());
}
}
}
return None;
}
fn infer_candidate_kind(
instruction_name: std::option::Option<&std::string::String>,
logs: &[std::string::String],
) -> std::string::String {
let mut text = std::string::String::new();
if let Some(instruction_name) = instruction_name {
text.push_str(instruction_name.as_str());
text.push(' ');
}
for log in logs {
text.push_str(log.as_str());
text.push(' ');
}
let lower = text.to_ascii_lowercase();
if lower.contains("create") && lower.contains("pool") {
return "create_pool".to_string();
}
if lower.contains("initialize") && lower.contains("pool") {
return "initialize_pool".to_string();
}
if lower.contains("swap") || lower.contains("buy") || lower.contains("sell") {
return "swap".to_string();
}
if lower.contains("liquidity") || lower.contains("position") {
return "liquidity_or_position".to_string();
}
return "program_activity".to_string();
}
fn transaction_failed(transaction: &serde_json::Value) -> bool {
let err = transaction.get("meta").and_then(|value| value.get("err"));
match err {
Some(err) => return !err.is_null(),
None => return false,
}
}
fn extract_log_messages(transaction: &serde_json::Value) -> std::vec::Vec<std::string::String> {
let mut logs = std::vec::Vec::new();
let log_values = transaction
.get("meta")
.and_then(|value| value.get("logMessages"))
.and_then(serde_json::Value::as_array);
if let Some(log_values) = log_values {
for log_value in log_values {
if let Some(log) = log_value.as_str() {
logs.push(log.to_string());
}
}
}
return logs;
}
fn sample_logs(logs: &[std::string::String], limit: usize) -> std::vec::Vec<std::string::String> {
return sample_strings(logs, limit);
}
fn sample_strings(
values: &[std::string::String],
limit: usize,
) -> std::vec::Vec<std::string::String> {
let mut samples = std::vec::Vec::new();
let mut index = 0usize;
while index < values.len() && index < limit {
samples.push(values[index].clone());
index += 1;
}
return samples;
}
fn build_backfill_hint(
hint_kind: &str,
address: std::option::Option<&str>,
signature: &str,
) -> std::string::String {
match hint_kind {
"pool" => {
if let Some(address) = address {
return format!("Backfill pool address in Demo Pipeline 2: {}", address);
}
},
"token" => {
if let Some(address) = address {
return format!("Backfill token mint in Demo Pipeline 2: {}", address);
}
},
"candidate_pool" => {
if let Some(address) = address {
return format!(
"Candidate pool/state account for inspection/backfill: {} ; signature: {}",
address, signature
);
}
},
_ => {},
}
return format!("Inspect/backfill transaction signature: {}", signature);
}
fn clamp_u32(value: u32, min: u32, max: u32) -> u32 {
if value < min {
return min;
}
if value > max {
return max;
}
return value;
}
fn clamp_usize(value: usize, min: usize, max: usize) -> usize {
if value < min {
return min;
}
if value > max {
return max;
}
return value;
}
#[cfg(test)]
mod tests {
#[test]
fn resolve_known_dex_code_to_program_id() {
let request = crate::OnchainDexPairDiscoveryRequestDto {
dex_code: Some("raydium_clmm".to_string()),
program_id: None,
http_role: "history_backfill".to_string(),
signature_limit: 10,
transaction_limit: 5,
candidate_limit: 3,
};
let resolved = super::resolve_program_id(&request);
match resolved {
Ok(resolved) => {
assert_eq!(resolved.program_id, crate::RAYDIUM_CLMM_PROGRAM_ID);
assert_eq!(resolved.dex_code, Some("raydium_clmm".to_string()));
},
Err(error) => panic!("resolve must succeed: {error}"),
}
}
#[test]
fn reject_unverified_dex_code_without_program_id() {
let request = crate::OnchainDexPairDiscoveryRequestDto {
dex_code: Some("metadao".to_string()),
program_id: None,
http_role: "history_backfill".to_string(),
signature_limit: 10,
transaction_limit: 5,
candidate_limit: 3,
};
let resolved = super::resolve_program_id(&request);
assert!(resolved.is_err());
}
}