This commit is contained in:
2026-05-11 11:02:47 +02:00
parent d66afede28
commit 7f130dba6b
49 changed files with 10301 additions and 8481 deletions

View File

@@ -0,0 +1,530 @@
// file: kb_lib/src/protocol_candidate_recording.rs
//! Protocol candidate recording.
//!
//! This module records candidate protocol/program instructions for transactions
//! that were not fully decoded by the current DEX decoders.
/// Input used to record protocol candidates for one classified transaction.
pub(crate) struct ProtocolCandidateRecordingInput<'a> {
/// Database connection.
pub(crate) database: &'a crate::Database,
/// Persisted transaction.
pub(crate) transaction: &'a crate::ChainTransactionDto,
/// Internal transaction id.
pub(crate) transaction_id: i64,
/// Projected instructions for the transaction.
pub(crate) instructions: &'a [crate::ChainInstructionDto],
/// Persisted classification kind.
pub(crate) classification_kind: &'a str,
}
/// Records protocol candidates for one classified transaction.
///
/// Existing candidates for the same transaction are deleted first so replay is
/// deterministic.
pub(crate) async fn record_protocol_candidates_for_classification(
input: crate::protocol_candidate_recording::ProtocolCandidateRecordingInput<'_>,
) -> Result<usize, crate::Error> {
let delete_result = crate::query_protocol_candidates_delete_by_transaction_id(
input.database,
input.transaction_id,
)
.await;
if let Err(error) = delete_result {
return Err(error);
}
let candidate_specs =
crate::protocol_candidate_recording::build_protocol_candidate_specs_for_classification(
input.transaction,
input.transaction_id,
input.instructions,
input.classification_kind,
);
let mut inserted_count = 0_usize;
for candidate_spec in candidate_specs {
let dto = crate::ProtocolCandidateDto::new(
input.transaction_id,
candidate_spec.instruction_id,
input.transaction.signature.clone(),
input.transaction.slot,
candidate_spec.program_id,
candidate_spec.program_name_hint,
candidate_spec.candidate_protocol,
candidate_spec.candidate_surface,
candidate_spec.reason,
candidate_spec.evidence_json,
);
let insert_result = crate::query_protocol_candidates_insert(input.database, &dto).await;
match insert_result {
Ok(_) => {
inserted_count += 1;
},
Err(error) => return Err(error),
}
}
return Ok(inserted_count);
}
struct ProtocolCandidateSpec {
instruction_id: std::option::Option<i64>,
program_id: std::string::String,
program_name_hint: std::option::Option<std::string::String>,
candidate_protocol: std::option::Option<std::string::String>,
candidate_surface: std::option::Option<std::string::String>,
reason: std::string::String,
evidence_json: std::string::String,
}
fn build_protocol_candidate_specs_for_classification(
transaction: &crate::ChainTransactionDto,
transaction_id: i64,
instructions: &[crate::ChainInstructionDto],
classification_kind: &str,
) -> std::vec::Vec<ProtocolCandidateSpec> {
if classification_kind == "known_dex_program_unclassified" {
return build_known_dex_program_candidate_specs(transaction, transaction_id, instructions);
}
if classification_kind == "unknown_or_unclassified" {
return build_unknown_program_candidate_specs(transaction, transaction_id, instructions);
}
return std::vec::Vec::new();
}
fn build_known_dex_program_candidate_specs(
transaction: &crate::ChainTransactionDto,
transaction_id: i64,
instructions: &[crate::ChainInstructionDto],
) -> std::vec::Vec<ProtocolCandidateSpec> {
let mut specs = std::vec::Vec::new();
for instruction in instructions {
let program_id = match instruction.program_id.clone() {
Some(program_id) => program_id,
None => continue,
};
let known_protocol = known_dex_protocol_name(program_id.as_str());
let known_protocol = match known_protocol {
Some(known_protocol) => known_protocol,
None => continue,
};
let evidence_json = build_instruction_evidence_json(
transaction,
transaction_id,
instruction,
"known_dex_program_without_decoded_event",
);
specs.push(ProtocolCandidateSpec {
instruction_id: instruction.id,
program_id,
program_name_hint: instruction.program_name.clone(),
candidate_protocol: Some(known_protocol.to_string()),
candidate_surface: None,
reason: "known DEX program instruction did not produce a decoded DEX event".to_string(),
evidence_json,
});
}
return specs;
}
fn build_unknown_program_candidate_specs(
transaction: &crate::ChainTransactionDto,
transaction_id: i64,
instructions: &[crate::ChainInstructionDto],
) -> std::vec::Vec<ProtocolCandidateSpec> {
let mut specs = std::vec::Vec::new();
for instruction in instructions {
let program_id = match instruction.program_id.clone() {
Some(program_id) => program_id,
None => continue,
};
if should_ignore_program_id(program_id.as_str()) {
continue;
}
let surface_hint = infer_candidate_surface(program_id.as_str(), instruction);
let evidence_json = build_instruction_evidence_json(
transaction,
transaction_id,
instruction,
"unknown_or_unclassified_program_instruction",
);
specs.push(ProtocolCandidateSpec {
instruction_id: instruction.id,
program_id,
program_name_hint: instruction.program_name.clone(),
candidate_protocol: None,
candidate_surface: surface_hint,
reason: "transaction has no decoded DEX event and includes a non-ignored program instruction".to_string(),
evidence_json,
});
}
return specs;
}
fn build_instruction_evidence_json(
transaction: &crate::ChainTransactionDto,
transaction_id: i64,
instruction: &crate::ChainInstructionDto,
reason_code: &str,
) -> std::string::String {
let evidence_value = serde_json::json!({
"reasonCode": reason_code,
"transactionId": transaction_id,
"signature": transaction.signature,
"slot": transaction.slot,
"instructionId": instruction.id,
"parentInstructionId": instruction.parent_instruction_id,
"instructionIndex": instruction.instruction_index,
"programId": instruction.program_id,
"programName": instruction.program_name,
"stackHeight": instruction.stack_height,
"parsedType": instruction.parsed_type
});
let evidence_json_result = serde_json::to_string(&evidence_value);
match evidence_json_result {
Ok(evidence_json) => return evidence_json,
Err(error) => {
return format!(
"{{\"reasonCode\":\"evidence_serialization_failed\",\"error\":\"{}\"}}",
error
);
},
}
}
fn known_dex_protocol_name(program_id: &str) -> std::option::Option<&'static str> {
if program_id == crate::RAYDIUM_AMM_V4_PROGRAM_ID {
return Some("raydium_amm_v4");
}
if program_id == crate::RAYDIUM_CPMM_PROGRAM_ID {
return Some("raydium_cpmm");
}
if program_id == crate::RAYDIUM_CLMM_PROGRAM_ID {
return Some("raydium_clmm");
}
if program_id == crate::RAYDIUM_LAUNCHLAB_PROGRAM_ID {
return Some("raydium_launchlab");
}
if program_id == crate::RAYDIUM_AMM_ROUTING_PROGRAM_ID {
return Some("raydium_router");
}
if program_id == crate::RAYDIUM_STABLE_SWAP_AMM_PROGRAM_ID {
return Some("raydium_stable_swap");
}
if program_id == crate::PUMP_FUN_PROGRAM_ID {
return Some("pump_fun");
}
if program_id == crate::PUMP_SWAP_PROGRAM_ID {
return Some("pump_swap");
}
if program_id == crate::METEORA_DBC_PROGRAM_ID {
return Some("meteora_dbc");
}
if program_id == crate::METEORA_DLMM_PROGRAM_ID {
return Some("meteora_dlmm");
}
if program_id == crate::METEORA_DAMM_V1_PROGRAM_ID {
return Some("meteora_damm_v1");
}
if program_id == crate::METEORA_DAMM_V2_PROGRAM_ID {
return Some("meteora_damm_v2");
}
if program_id == crate::ORCA_WHIRLPOOLS_PROGRAM_ID {
return Some("orca_whirlpools");
}
if program_id == crate::FLUXBEAM_PROGRAM_ID {
return Some("fluxbeam");
}
if program_id == crate::DEXLAB_PROGRAM_ID {
return Some("dexlab");
}
return None;
}
fn should_ignore_program_id(program_id: &str) -> bool {
if program_id == crate::SYSTEM_PROGRAM_ID {
return true;
}
if program_id == crate::SPL_TOKEN_PROGRAM_ID {
return true;
}
if program_id == crate::SPL_TOKEN_2022_PROGRAM_ID {
return true;
}
if program_id == crate::ASSOCIATED_TOKEN_PROGRAM_ID {
return true;
}
if program_id == crate::COMPUTE_BUDGET_PROGRAM_ID {
return true;
}
if program_id == crate::ADDRESS_LOOKUP_TABLE_PROGRAM_ID {
return true;
}
if program_id == crate::BPF_LOADER_DEPRECATED_PROGRAM_ID {
return true;
}
if program_id == crate::BPF_LOADER_UPGRADEABLE_PROGRAM_ID {
return true;
}
if program_id == crate::LOADER_V4_PROGRAM_ID {
return true;
}
if program_id == crate::NATIVE_LOADER_PROGRAM_ID {
return true;
}
if program_id == crate::CONFIG_PROGRAM_ID {
return true;
}
if program_id == crate::VOTE_PROGRAM_ID {
return true;
}
if program_id == crate::STAKE_PROGRAM_ID {
return true;
}
if program_id == crate::STAKE_CONFIG_PROGRAM_ID {
return true;
}
if program_id == crate::ED25519_PROGRAM_ID {
return true;
}
if program_id == crate::SECP256K1_PROGRAM_ID {
return true;
}
if program_id == crate::SECP256R1_PROGRAM_ID {
return true;
}
if program_id == crate::ZK_TOKEN_PROOF_PROGRAM_ID {
return true;
}
if program_id == crate::ZK_ELGAMAL_PROOF_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_CLOCK_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_RENT_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_INSTRUCTIONS_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_EPOCH_REWARDS_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_EPOCH_SCHEDULE_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_FEES_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_LAST_RESTART_SLOT_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_RECENT_BLOCKHASHES_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_REWARDS_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_SLOT_HASHES_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_SLOT_HISTORY_PROGRAM_ID {
return true;
}
if program_id == crate::SYSVAR_STAKE_HISTORY_PROGRAM_ID {
return true;
}
return false;
}
fn infer_candidate_surface(
program_id: &str,
instruction: &crate::ChainInstructionDto,
) -> std::option::Option<std::string::String> {
if program_id == crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID {
return Some("arbitrage_bot".to_string());
}
if is_known_launch_surface_program_id(program_id) {
return Some("launch_surface".to_string());
}
if let Some(program_name) = instruction.program_name.as_deref() {
let normalized = program_name.to_ascii_lowercase();
if normalized.contains("arbitrage") {
return Some("arbitrage_bot".to_string());
}
if normalized.contains("sandwich") {
return Some("arbitrage_bot".to_string());
}
if normalized.contains("launch") {
return Some("launch_surface".to_string());
}
if normalized.contains("meteora") {
return Some("meteora_related".to_string());
}
if normalized.contains("raydium") {
return Some("raydium_related".to_string());
}
if normalized.contains("pump") {
return Some("pump_related".to_string());
}
if normalized.contains("swap") {
return Some("swap_related".to_string());
}
}
return None;
}
fn is_known_launch_surface_program_id(_program_id: &str) -> bool {
// Filled in later after program ids are verified from live corpus and
// official or sufficiently reliable references.
return false;
}
#[cfg(test)]
mod tests {
#[test]
fn associated_token_program_is_ignored() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some(crate::ASSOCIATED_TOKEN_PROGRAM_ID.to_string()),
Some("spl-associated-token-account".to_string()),
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"unknown_or_unclassified",
);
assert_eq!(specs.len(), 0);
}
#[test]
fn meteora_dlmm_is_known_dex_protocol() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some(crate::METEORA_DLMM_PROGRAM_ID.to_string()),
Some("Meteora DLMM".to_string()),
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"known_dex_program_unclassified",
);
assert_eq!(specs.len(), 1);
assert_eq!(specs[0].program_id, crate::METEORA_DLMM_PROGRAM_ID);
assert_eq!(specs[0].candidate_protocol, Some("meteora_dlmm".to_string()));
}
#[test]
fn known_arbitrage_bot_gets_surface_hint() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some(crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID.to_string()),
None,
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"unknown_or_unclassified",
);
assert_eq!(specs.len(), 1);
assert_eq!(specs[0].candidate_surface, Some("arbitrage_bot".to_string()));
}
fn test_instruction(
instruction_index: u32,
program_id: std::option::Option<std::string::String>,
program_name: std::option::Option<std::string::String>,
) -> crate::ChainInstructionDto {
return crate::ChainInstructionDto::new(
1,
None,
instruction_index,
None,
program_id,
program_name,
None,
"[]".to_string(),
None,
None,
Some(serde_json::json!({}).to_string()),
);
}
fn test_transaction() -> crate::ChainTransactionDto {
let mut transaction = crate::ChainTransactionDto::new(
"signature_1".to_string(),
Some(123),
None,
Some("test".to_string()),
None,
None,
None,
serde_json::json!({}).to_string(),
);
transaction.id = Some(1);
return transaction;
}
#[test]
fn known_dex_candidate_is_built_for_unclassified_known_program() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some(crate::METEORA_DAMM_V2_PROGRAM_ID.to_string()),
Some("Meteora".to_string()),
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"known_dex_program_unclassified",
);
assert_eq!(specs.len(), 1);
assert_eq!(specs[0].program_id, crate::METEORA_DAMM_V2_PROGRAM_ID);
assert_eq!(specs[0].candidate_protocol, Some("meteora_damm_v2".to_string()));
}
#[test]
fn ignored_program_is_not_recorded_as_unknown_candidate() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some(crate::SPL_TOKEN_PROGRAM_ID.to_string()),
Some("spl-token".to_string()),
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"unknown_or_unclassified",
);
assert_eq!(specs.len(), 0);
}
#[test]
fn unknown_non_ignored_program_is_recorded() {
let transaction = test_transaction();
let instructions = vec![test_instruction(
0,
Some("UnknownProgram111111111111111111111111111111111".to_string()),
Some("unknown swap program".to_string()),
)];
let specs = super::build_protocol_candidate_specs_for_classification(
&transaction,
1,
&instructions,
"unknown_or_unclassified",
);
assert_eq!(specs.len(), 1);
assert_eq!(specs[0].candidate_surface, Some("swap_related".to_string()));
}
}