508 lines
17 KiB
Rust
508 lines
17 KiB
Rust
// file: kb_lib/src/protocol_candidate_recording.rs
|
|
|
|
//! Protocol candidate recording.
|
|
//!
|
|
//! This module records candidate protocol/program instructions for transactions
|
|
//! that were not fully decoded by the current DEX decoders.
|
|
|
|
/// Input used to record protocol candidates for one classified transaction.
|
|
pub(crate) struct ProtocolCandidateRecordingInput<'a> {
|
|
/// Database connection.
|
|
pub(crate) database: &'a crate::Database,
|
|
/// Persisted transaction.
|
|
pub(crate) transaction: &'a crate::ChainTransactionDto,
|
|
/// Internal transaction id.
|
|
pub(crate) transaction_id: i64,
|
|
/// Projected instructions for the transaction.
|
|
pub(crate) instructions: &'a [crate::ChainInstructionDto],
|
|
/// Persisted classification kind.
|
|
pub(crate) classification_kind: &'a str,
|
|
}
|
|
|
|
/// Records protocol candidates for one classified transaction.
|
|
///
|
|
/// Existing candidates for the same transaction are deleted first so replay is
|
|
/// deterministic.
|
|
pub(crate) async fn record_protocol_candidates_for_classification(
|
|
input: crate::protocol_candidate_recording::ProtocolCandidateRecordingInput<'_>,
|
|
) -> Result<usize, crate::Error> {
|
|
let delete_result = crate::query_protocol_candidates_delete_by_transaction_id(
|
|
input.database,
|
|
input.transaction_id,
|
|
)
|
|
.await;
|
|
if let Err(error) = delete_result {
|
|
return Err(error);
|
|
}
|
|
let candidate_specs =
|
|
crate::protocol_candidate_recording::build_protocol_candidate_specs_for_classification(
|
|
input.transaction,
|
|
input.transaction_id,
|
|
input.instructions,
|
|
input.classification_kind,
|
|
);
|
|
let mut inserted_count = 0_usize;
|
|
for candidate_spec in candidate_specs {
|
|
let dto = crate::ProtocolCandidateDto::new(
|
|
input.transaction_id,
|
|
candidate_spec.instruction_id,
|
|
input.transaction.signature.clone(),
|
|
input.transaction.slot,
|
|
candidate_spec.program_id,
|
|
candidate_spec.program_name_hint,
|
|
candidate_spec.candidate_protocol,
|
|
candidate_spec.candidate_surface,
|
|
candidate_spec.reason,
|
|
candidate_spec.evidence_json,
|
|
);
|
|
let insert_result = crate::query_protocol_candidates_insert(input.database, &dto).await;
|
|
match insert_result {
|
|
Ok(_) => {
|
|
inserted_count += 1;
|
|
},
|
|
Err(error) => return Err(error),
|
|
}
|
|
}
|
|
return Ok(inserted_count);
|
|
}
|
|
|
|
struct ProtocolCandidateSpec {
|
|
instruction_id: std::option::Option<i64>,
|
|
program_id: std::string::String,
|
|
program_name_hint: std::option::Option<std::string::String>,
|
|
candidate_protocol: std::option::Option<std::string::String>,
|
|
candidate_surface: std::option::Option<std::string::String>,
|
|
reason: std::string::String,
|
|
evidence_json: std::string::String,
|
|
}
|
|
|
|
fn build_protocol_candidate_specs_for_classification(
|
|
transaction: &crate::ChainTransactionDto,
|
|
transaction_id: i64,
|
|
instructions: &[crate::ChainInstructionDto],
|
|
classification_kind: &str,
|
|
) -> std::vec::Vec<ProtocolCandidateSpec> {
|
|
if classification_kind == "known_dex_program_unclassified" {
|
|
return build_known_dex_program_candidate_specs(transaction, transaction_id, instructions);
|
|
}
|
|
if classification_kind == "unknown_or_unclassified" {
|
|
return build_unknown_program_candidate_specs(transaction, transaction_id, instructions);
|
|
}
|
|
return std::vec::Vec::new();
|
|
}
|
|
|
|
fn build_known_dex_program_candidate_specs(
|
|
transaction: &crate::ChainTransactionDto,
|
|
transaction_id: i64,
|
|
instructions: &[crate::ChainInstructionDto],
|
|
) -> std::vec::Vec<ProtocolCandidateSpec> {
|
|
let mut specs = std::vec::Vec::new();
|
|
for instruction in instructions {
|
|
let program_id = match instruction.program_id.clone() {
|
|
Some(program_id) => program_id,
|
|
None => continue,
|
|
};
|
|
let known_protocol = known_dex_protocol_name(program_id.as_str());
|
|
let known_protocol = match known_protocol {
|
|
Some(known_protocol) => known_protocol,
|
|
None => continue,
|
|
};
|
|
let evidence_json = build_instruction_evidence_json(
|
|
transaction,
|
|
transaction_id,
|
|
instruction,
|
|
"known_dex_program_without_decoded_event",
|
|
);
|
|
specs.push(ProtocolCandidateSpec {
|
|
instruction_id: instruction.id,
|
|
program_id,
|
|
program_name_hint: instruction.program_name.clone(),
|
|
candidate_protocol: Some(known_protocol.to_string()),
|
|
candidate_surface: None,
|
|
reason: "known DEX program instruction did not produce a decoded DEX event".to_string(),
|
|
evidence_json,
|
|
});
|
|
}
|
|
return specs;
|
|
}
|
|
|
|
fn build_unknown_program_candidate_specs(
|
|
transaction: &crate::ChainTransactionDto,
|
|
transaction_id: i64,
|
|
instructions: &[crate::ChainInstructionDto],
|
|
) -> std::vec::Vec<ProtocolCandidateSpec> {
|
|
let mut specs = std::vec::Vec::new();
|
|
for instruction in instructions {
|
|
let program_id = match instruction.program_id.clone() {
|
|
Some(program_id) => program_id,
|
|
None => continue,
|
|
};
|
|
if should_ignore_program_id(program_id.as_str()) {
|
|
continue;
|
|
}
|
|
let surface_hint = infer_candidate_surface(program_id.as_str(), instruction);
|
|
let evidence_json = build_instruction_evidence_json(
|
|
transaction,
|
|
transaction_id,
|
|
instruction,
|
|
"unknown_or_unclassified_program_instruction",
|
|
);
|
|
specs.push(ProtocolCandidateSpec {
|
|
instruction_id: instruction.id,
|
|
program_id,
|
|
program_name_hint: instruction.program_name.clone(),
|
|
candidate_protocol: None,
|
|
candidate_surface: surface_hint,
|
|
reason: "transaction has no decoded DEX event and includes a non-ignored program instruction".to_string(),
|
|
evidence_json,
|
|
});
|
|
}
|
|
return specs;
|
|
}
|
|
|
|
fn build_instruction_evidence_json(
|
|
transaction: &crate::ChainTransactionDto,
|
|
transaction_id: i64,
|
|
instruction: &crate::ChainInstructionDto,
|
|
reason_code: &str,
|
|
) -> std::string::String {
|
|
let evidence_value = serde_json::json!({
|
|
"reasonCode": reason_code,
|
|
"transactionId": transaction_id,
|
|
"signature": transaction.signature,
|
|
"slot": transaction.slot,
|
|
"instructionId": instruction.id,
|
|
"parentInstructionId": instruction.parent_instruction_id,
|
|
"instructionIndex": instruction.instruction_index,
|
|
"programId": instruction.program_id,
|
|
"programName": instruction.program_name,
|
|
"stackHeight": instruction.stack_height,
|
|
"parsedType": instruction.parsed_type
|
|
});
|
|
let evidence_json_result = serde_json::to_string(&evidence_value);
|
|
match evidence_json_result {
|
|
Ok(evidence_json) => return evidence_json,
|
|
Err(error) => {
|
|
return format!(
|
|
"{{\"reasonCode\":\"evidence_serialization_failed\",\"error\":\"{}\"}}",
|
|
error
|
|
);
|
|
},
|
|
}
|
|
}
|
|
|
|
fn known_dex_protocol_name(program_id: &str) -> std::option::Option<&'static str> {
|
|
let matrix_entry = match crate::dex_support_matrix_entry_by_program_id(program_id) {
|
|
Some(matrix_entry) => matrix_entry,
|
|
None => return None,
|
|
};
|
|
return Some(matrix_entry.code);
|
|
}
|
|
|
|
fn should_ignore_program_id(program_id: &str) -> bool {
|
|
if program_id == crate::SYSTEM_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SPL_TOKEN_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SPL_TOKEN_2022_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::ASSOCIATED_TOKEN_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::COMPUTE_BUDGET_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::ADDRESS_LOOKUP_TABLE_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::BPF_LOADER_DEPRECATED_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::BPF_LOADER_UPGRADEABLE_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::LOADER_V4_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::NATIVE_LOADER_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::CONFIG_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::VOTE_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::STAKE_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::STAKE_CONFIG_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::ED25519_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SECP256K1_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SECP256R1_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::ZK_TOKEN_PROOF_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::ZK_ELGAMAL_PROOF_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_CLOCK_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_RENT_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_INSTRUCTIONS_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_EPOCH_REWARDS_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_EPOCH_SCHEDULE_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_FEES_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_LAST_RESTART_SLOT_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_RECENT_BLOCKHASHES_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_REWARDS_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_SLOT_HASHES_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_SLOT_HISTORY_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
if program_id == crate::SYSVAR_STAKE_HISTORY_PROGRAM_ID {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn infer_candidate_surface(
|
|
program_id: &str,
|
|
instruction: &crate::ChainInstructionDto,
|
|
) -> std::option::Option<std::string::String> {
|
|
if program_id == crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID {
|
|
return Some("arbitrage_bot".to_string());
|
|
}
|
|
if is_known_launch_surface_program_id(program_id) {
|
|
return Some("launch_surface".to_string());
|
|
}
|
|
if let Some(program_name) = instruction.program_name.as_deref() {
|
|
let normalized = program_name.to_ascii_lowercase();
|
|
if normalized.contains("arbitrage") {
|
|
return Some("arbitrage_bot".to_string());
|
|
}
|
|
if normalized.contains("sandwich") {
|
|
return Some("arbitrage_bot".to_string());
|
|
}
|
|
if normalized.contains("launch") {
|
|
return Some("launch_surface".to_string());
|
|
}
|
|
if normalized.contains("meteora") {
|
|
return Some("meteora_related".to_string());
|
|
}
|
|
if normalized.contains("raydium") {
|
|
return Some("raydium_related".to_string());
|
|
}
|
|
if normalized.contains("pump") {
|
|
return Some("pump_related".to_string());
|
|
}
|
|
if normalized.contains("swap") {
|
|
return Some("swap_related".to_string());
|
|
}
|
|
}
|
|
return None;
|
|
}
|
|
|
|
fn is_known_launch_surface_program_id(_program_id: &str) -> bool {
|
|
// Filled in later after program ids are verified from live corpus and
|
|
// official or sufficiently reliable references.
|
|
return false;
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
#[test]
|
|
fn known_dex_candidate_uses_support_matrix_for_priority_dexes() {
|
|
let samples = [
|
|
(crate::PUMP_SWAP_PROGRAM_ID, "pump_swap"),
|
|
(crate::RAYDIUM_CPMM_PROGRAM_ID, "raydium_cpmm"),
|
|
(crate::RAYDIUM_CLMM_PROGRAM_ID, "raydium_clmm"),
|
|
(crate::METEORA_DLMM_PROGRAM_ID, "meteora_dlmm"),
|
|
(crate::METEORA_DAMM_V1_PROGRAM_ID, "meteora_damm_v1"),
|
|
];
|
|
for (program_id, expected_protocol) in samples {
|
|
let protocol = match super::known_dex_protocol_name(program_id) {
|
|
Some(protocol) => protocol,
|
|
None => panic!("expected known protocol for {}", program_id),
|
|
};
|
|
assert_eq!(protocol, expected_protocol);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn associated_token_program_is_ignored() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some(crate::ASSOCIATED_TOKEN_PROGRAM_ID.to_string()),
|
|
Some("spl-associated-token-account".to_string()),
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"unknown_or_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn meteora_dlmm_is_known_dex_protocol() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some(crate::METEORA_DLMM_PROGRAM_ID.to_string()),
|
|
Some("Meteora DLMM".to_string()),
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"known_dex_program_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 1);
|
|
assert_eq!(specs[0].program_id, crate::METEORA_DLMM_PROGRAM_ID);
|
|
assert_eq!(specs[0].candidate_protocol, Some("meteora_dlmm".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn known_arbitrage_bot_gets_surface_hint() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some(crate::ARBITRAGE_BOT_6MWVT_PROGRAM_ID.to_string()),
|
|
None,
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"unknown_or_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 1);
|
|
assert_eq!(specs[0].candidate_surface, Some("arbitrage_bot".to_string()));
|
|
}
|
|
|
|
fn test_instruction(
|
|
instruction_index: u32,
|
|
program_id: std::option::Option<std::string::String>,
|
|
program_name: std::option::Option<std::string::String>,
|
|
) -> crate::ChainInstructionDto {
|
|
return crate::ChainInstructionDto::new(
|
|
1,
|
|
None,
|
|
instruction_index,
|
|
None,
|
|
program_id,
|
|
program_name,
|
|
None,
|
|
"[]".to_string(),
|
|
None,
|
|
None,
|
|
Some(serde_json::json!({}).to_string()),
|
|
);
|
|
}
|
|
|
|
fn test_transaction() -> crate::ChainTransactionDto {
|
|
let mut transaction = crate::ChainTransactionDto::new(
|
|
"signature_1".to_string(),
|
|
Some(123),
|
|
None,
|
|
Some("test".to_string()),
|
|
None,
|
|
None,
|
|
None,
|
|
serde_json::json!({}).to_string(),
|
|
);
|
|
transaction.id = Some(1);
|
|
return transaction;
|
|
}
|
|
|
|
#[test]
|
|
fn known_dex_candidate_is_built_for_unclassified_known_program() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some(crate::METEORA_DAMM_V2_PROGRAM_ID.to_string()),
|
|
Some("Meteora".to_string()),
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"known_dex_program_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 1);
|
|
assert_eq!(specs[0].program_id, crate::METEORA_DAMM_V2_PROGRAM_ID);
|
|
assert_eq!(specs[0].candidate_protocol, Some("meteora_damm_v2".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn ignored_program_is_not_recorded_as_unknown_candidate() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some(crate::SPL_TOKEN_PROGRAM_ID.to_string()),
|
|
Some("spl-token".to_string()),
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"unknown_or_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_non_ignored_program_is_recorded() {
|
|
let transaction = test_transaction();
|
|
let instructions = vec![test_instruction(
|
|
0,
|
|
Some("UnknownProgram111111111111111111111111111111111".to_string()),
|
|
Some("unknown swap program".to_string()),
|
|
)];
|
|
let specs = super::build_protocol_candidate_specs_for_classification(
|
|
&transaction,
|
|
1,
|
|
&instructions,
|
|
"unknown_or_unclassified",
|
|
);
|
|
assert_eq!(specs.len(), 1);
|
|
assert_eq!(specs[0].candidate_surface, Some("swap_related".to_string()));
|
|
}
|
|
}
|