0.7.28
This commit is contained in:
466
kb_lib/src/transaction_classification.rs
Normal file
466
kb_lib/src/transaction_classification.rs
Normal file
@@ -0,0 +1,466 @@
|
||||
// file: kb_lib/src/transaction_classification.rs
|
||||
|
||||
//! Transaction classification service.
|
||||
//!
|
||||
//! This service classifies projected Solana transactions after transaction
|
||||
//! projection and optional DEX decoding.
|
||||
//!
|
||||
//! The first version is intentionally deterministic and conservative:
|
||||
//! decoded DEX events win over program-id hints, and unknown transactions are
|
||||
//! preserved as explicit `unknown_or_unclassified` rows.
|
||||
|
||||
/// Service used to classify projected Solana transactions.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TransactionClassificationService {
|
||||
database: std::sync::Arc<crate::Database>,
|
||||
}
|
||||
|
||||
impl TransactionClassificationService {
|
||||
/// Creates a transaction classification service.
|
||||
pub fn new(database: std::sync::Arc<crate::Database>) -> Self {
|
||||
return Self { database };
|
||||
}
|
||||
|
||||
/// Classifies one transaction by signature and persists the classification.
|
||||
pub async fn classify_transaction_by_signature(
|
||||
&self,
|
||||
signature: &str,
|
||||
) -> Result<crate::TransactionClassificationDto, crate::Error> {
|
||||
let context_result =
|
||||
load_transaction_classification_context(self.database.as_ref(), signature).await;
|
||||
let context = match context_result {
|
||||
Ok(context) => context,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
let classification = classify_transaction_context(&context);
|
||||
let dto = crate::TransactionClassificationDto::new(
|
||||
context.transaction_id,
|
||||
context.transaction.signature.clone(),
|
||||
context.transaction.slot,
|
||||
classification.kind.to_string(),
|
||||
classification.primary_protocol,
|
||||
classification.primary_program_id,
|
||||
classification.confidence_level,
|
||||
classification.reason,
|
||||
classification.evidence_json,
|
||||
);
|
||||
let upsert_result =
|
||||
crate::query_transaction_classifications_upsert(self.database.as_ref(), &dto).await;
|
||||
if let Err(error) = upsert_result {
|
||||
return Err(error);
|
||||
}
|
||||
let persisted_result = crate::query_transaction_classifications_get_by_transaction_id(
|
||||
self.database.as_ref(),
|
||||
context.transaction_id,
|
||||
)
|
||||
.await;
|
||||
let persisted_option = match persisted_result {
|
||||
Ok(persisted_option) => persisted_option,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
let persisted = match persisted_option {
|
||||
Some(persisted) => persisted,
|
||||
None => {
|
||||
return Err(crate::Error::InvalidState(format!(
|
||||
"transaction classification for '{}' disappeared after upsert",
|
||||
signature
|
||||
)));
|
||||
},
|
||||
};
|
||||
let candidate_recording_result =
|
||||
crate::protocol_candidate_recording::record_protocol_candidates_for_classification(
|
||||
crate::protocol_candidate_recording::ProtocolCandidateRecordingInput {
|
||||
database: self.database.as_ref(),
|
||||
transaction: &context.transaction,
|
||||
transaction_id: context.transaction_id,
|
||||
instructions: &context.instructions,
|
||||
classification_kind: persisted.classification_kind.as_str(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
match candidate_recording_result {
|
||||
Ok(candidate_count) => {
|
||||
tracing::trace!(
|
||||
signature = %context.transaction.signature,
|
||||
classification_kind = %persisted.classification_kind,
|
||||
protocol_candidate_count = candidate_count,
|
||||
"transaction protocol candidates recorded"
|
||||
);
|
||||
},
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
return Ok(persisted);
|
||||
}
|
||||
}
|
||||
|
||||
struct TransactionClassificationContext {
|
||||
transaction: crate::ChainTransactionDto,
|
||||
transaction_id: i64,
|
||||
instructions: std::vec::Vec<crate::ChainInstructionDto>,
|
||||
decoded_events: std::vec::Vec<crate::DexDecodedEventDto>,
|
||||
}
|
||||
|
||||
struct TransactionClassificationDecision {
|
||||
kind: &'static str,
|
||||
primary_protocol: std::option::Option<std::string::String>,
|
||||
primary_program_id: std::option::Option<std::string::String>,
|
||||
confidence_level: i16,
|
||||
reason: std::string::String,
|
||||
evidence_json: std::string::String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct KnownDexProgramMatch {
|
||||
protocol_name: &'static str,
|
||||
program_id: std::string::String,
|
||||
instruction_id: std::option::Option<i64>,
|
||||
instruction_index: u32,
|
||||
}
|
||||
|
||||
async fn load_transaction_classification_context(
|
||||
database: &crate::Database,
|
||||
signature: &str,
|
||||
) -> Result<TransactionClassificationContext, crate::Error> {
|
||||
let transaction_result =
|
||||
crate::query_chain_transactions_get_by_signature(database, signature).await;
|
||||
let transaction_option = match transaction_result {
|
||||
Ok(transaction_option) => transaction_option,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
let transaction = match transaction_option {
|
||||
Some(transaction) => transaction,
|
||||
None => {
|
||||
return Err(crate::Error::InvalidState(format!(
|
||||
"cannot classify unknown chain transaction '{}'",
|
||||
signature
|
||||
)));
|
||||
},
|
||||
};
|
||||
let transaction_id = match transaction.id {
|
||||
Some(transaction_id) => transaction_id,
|
||||
None => {
|
||||
return Err(crate::Error::InvalidState(format!(
|
||||
"chain transaction '{}' has no internal id",
|
||||
signature
|
||||
)));
|
||||
},
|
||||
};
|
||||
let instructions_result =
|
||||
crate::query_chain_instructions_list_by_transaction_id(database, transaction_id).await;
|
||||
let instructions = match instructions_result {
|
||||
Ok(instructions) => instructions,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
let decoded_events_result =
|
||||
crate::query_dex_decoded_events_list_by_transaction_id(database, transaction_id).await;
|
||||
let decoded_events = match decoded_events_result {
|
||||
Ok(decoded_events) => decoded_events,
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
return Ok(TransactionClassificationContext {
|
||||
transaction,
|
||||
transaction_id,
|
||||
instructions,
|
||||
decoded_events,
|
||||
});
|
||||
}
|
||||
|
||||
fn classify_transaction_context(
|
||||
context: &TransactionClassificationContext,
|
||||
) -> TransactionClassificationDecision {
|
||||
if !context.decoded_events.is_empty() {
|
||||
return classify_from_decoded_events(context);
|
||||
}
|
||||
let known_program_matches = find_known_dex_program_matches(&context.instructions);
|
||||
if !known_program_matches.is_empty() {
|
||||
return classify_from_known_program_matches(context, &known_program_matches);
|
||||
}
|
||||
return build_decision(
|
||||
"unknown_or_unclassified",
|
||||
None,
|
||||
None,
|
||||
25,
|
||||
"transaction has no decoded DEX event and no known DEX program id".to_string(),
|
||||
serde_json::json!({
|
||||
"transactionId": context.transaction_id,
|
||||
"signature": context.transaction.signature,
|
||||
"slot": context.transaction.slot,
|
||||
"instructionCount": context.instructions.len(),
|
||||
"decodedEventCount": context.decoded_events.len()
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn classify_from_decoded_events(
|
||||
context: &TransactionClassificationContext,
|
||||
) -> TransactionClassificationDecision {
|
||||
let mut first_protocol = None;
|
||||
let mut first_program_id = None;
|
||||
let mut trade_event_count = 0_i64;
|
||||
let mut non_trade_event_count = 0_i64;
|
||||
let mut decoded_event_evidence = std::vec::Vec::new();
|
||||
for decoded_event in &context.decoded_events {
|
||||
if first_protocol.is_none() {
|
||||
first_protocol = Some(decoded_event.protocol_name.clone());
|
||||
}
|
||||
if first_program_id.is_none() {
|
||||
first_program_id = Some(decoded_event.program_id.clone());
|
||||
}
|
||||
let payload_value_result =
|
||||
serde_json::from_str::<serde_json::Value>(decoded_event.payload_json.as_str());
|
||||
let payload_value = match payload_value_result {
|
||||
Ok(payload_value) => payload_value,
|
||||
Err(_) => serde_json::Value::Null,
|
||||
};
|
||||
let is_trade = crate::is_decoded_event_trade_candidate(
|
||||
decoded_event.event_kind.as_str(),
|
||||
&payload_value,
|
||||
);
|
||||
if is_trade {
|
||||
trade_event_count += 1;
|
||||
} else {
|
||||
non_trade_event_count += 1;
|
||||
}
|
||||
decoded_event_evidence.push(serde_json::json!({
|
||||
"id": decoded_event.id,
|
||||
"protocolName": decoded_event.protocol_name,
|
||||
"programId": decoded_event.program_id,
|
||||
"eventKind": decoded_event.event_kind,
|
||||
"poolAccount": decoded_event.pool_account,
|
||||
"tradeCandidate": is_trade
|
||||
}));
|
||||
}
|
||||
if trade_event_count > 0_i64 {
|
||||
return build_decision(
|
||||
"dex_trade",
|
||||
first_protocol,
|
||||
first_program_id,
|
||||
100,
|
||||
"transaction has at least one decoded DEX trade event".to_string(),
|
||||
serde_json::json!({
|
||||
"transactionId": context.transaction_id,
|
||||
"signature": context.transaction.signature,
|
||||
"slot": context.transaction.slot,
|
||||
"decodedEventCount": context.decoded_events.len(),
|
||||
"tradeEventCount": trade_event_count,
|
||||
"nonTradeEventCount": non_trade_event_count,
|
||||
"decodedEvents": decoded_event_evidence
|
||||
}),
|
||||
);
|
||||
}
|
||||
return build_decision(
|
||||
"dex_non_trade",
|
||||
first_protocol,
|
||||
first_program_id,
|
||||
95,
|
||||
"transaction has decoded DEX events but no trade candidate".to_string(),
|
||||
serde_json::json!({
|
||||
"transactionId": context.transaction_id,
|
||||
"signature": context.transaction.signature,
|
||||
"slot": context.transaction.slot,
|
||||
"decodedEventCount": context.decoded_events.len(),
|
||||
"tradeEventCount": trade_event_count,
|
||||
"nonTradeEventCount": non_trade_event_count,
|
||||
"decodedEvents": decoded_event_evidence
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn classify_from_known_program_matches(
|
||||
context: &TransactionClassificationContext,
|
||||
known_program_matches: &[KnownDexProgramMatch],
|
||||
) -> TransactionClassificationDecision {
|
||||
let first_match = &known_program_matches[0];
|
||||
let mut evidence_items = std::vec::Vec::new();
|
||||
for known_program_match in known_program_matches {
|
||||
evidence_items.push(serde_json::json!({
|
||||
"protocolName": known_program_match.protocol_name,
|
||||
"programId": known_program_match.program_id,
|
||||
"instructionId": known_program_match.instruction_id,
|
||||
"instructionIndex": known_program_match.instruction_index
|
||||
}));
|
||||
}
|
||||
return build_decision(
|
||||
"known_dex_program_unclassified",
|
||||
Some(first_match.protocol_name.to_string()),
|
||||
Some(first_match.program_id.to_string()),
|
||||
75,
|
||||
"transaction has known DEX program instructions but no decoded DEX event".to_string(),
|
||||
serde_json::json!({
|
||||
"transactionId": context.transaction_id,
|
||||
"signature": context.transaction.signature,
|
||||
"slot": context.transaction.slot,
|
||||
"instructionCount": context.instructions.len(),
|
||||
"decodedEventCount": context.decoded_events.len(),
|
||||
"knownDexProgramMatches": evidence_items
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
fn build_decision(
|
||||
kind: &'static str,
|
||||
primary_protocol: std::option::Option<std::string::String>,
|
||||
primary_program_id: std::option::Option<std::string::String>,
|
||||
confidence_level: i16,
|
||||
reason: std::string::String,
|
||||
evidence_value: serde_json::Value,
|
||||
) -> TransactionClassificationDecision {
|
||||
let evidence_json_result = serde_json::to_string(&evidence_value);
|
||||
let evidence_json = match evidence_json_result {
|
||||
Ok(evidence_json) => evidence_json,
|
||||
Err(error) => {
|
||||
return TransactionClassificationDecision {
|
||||
kind: "unknown_or_unclassified",
|
||||
primary_protocol: None,
|
||||
primary_program_id: None,
|
||||
confidence_level: 0,
|
||||
reason: format!("cannot serialize classification evidence: {}", error),
|
||||
evidence_json: "{}".to_string(),
|
||||
};
|
||||
},
|
||||
};
|
||||
return TransactionClassificationDecision {
|
||||
kind,
|
||||
primary_protocol,
|
||||
primary_program_id,
|
||||
confidence_level,
|
||||
reason,
|
||||
evidence_json,
|
||||
};
|
||||
}
|
||||
|
||||
fn find_known_dex_program_matches(
|
||||
instructions: &[crate::ChainInstructionDto],
|
||||
) -> std::vec::Vec<KnownDexProgramMatch> {
|
||||
let mut matches = std::vec::Vec::new();
|
||||
for instruction in instructions {
|
||||
let program_match = known_dex_program_match(instruction);
|
||||
let program_match = match program_match {
|
||||
Some(program_match) => program_match,
|
||||
None => continue,
|
||||
};
|
||||
matches.push(program_match);
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
fn known_dex_program_match(
|
||||
instruction: &crate::ChainInstructionDto,
|
||||
) -> std::option::Option<KnownDexProgramMatch> {
|
||||
let program_id = match instruction.program_id.as_deref() {
|
||||
Some(program_id) => program_id,
|
||||
None => return None,
|
||||
};
|
||||
let protocol_name = if program_id == crate::RAYDIUM_AMM_V4_PROGRAM_ID {
|
||||
"raydium_amm_v4"
|
||||
} else if program_id == crate::RAYDIUM_CPMM_PROGRAM_ID {
|
||||
"raydium_cpmm"
|
||||
} else if program_id == crate::RAYDIUM_CLMM_PROGRAM_ID {
|
||||
"raydium_clmm"
|
||||
} else if program_id == crate::RAYDIUM_LAUNCHLAB_PROGRAM_ID {
|
||||
"raydium_launchlab"
|
||||
} else if program_id == crate::RAYDIUM_AMM_ROUTING_PROGRAM_ID {
|
||||
"raydium_router"
|
||||
} else if program_id == crate::RAYDIUM_STABLE_SWAP_AMM_PROGRAM_ID {
|
||||
"raydium_stable_swap"
|
||||
} else if program_id == crate::PUMP_FUN_PROGRAM_ID {
|
||||
"pump_fun"
|
||||
} else if program_id == crate::PUMP_SWAP_PROGRAM_ID {
|
||||
"pump_swap"
|
||||
} else if program_id == crate::METEORA_DBC_PROGRAM_ID {
|
||||
"meteora_dbc"
|
||||
} else if program_id == crate::METEORA_DLMM_PROGRAM_ID {
|
||||
"meteora_dlmm"
|
||||
} else if program_id == crate::METEORA_DAMM_V1_PROGRAM_ID {
|
||||
"meteora_damm_v1"
|
||||
} else if program_id == crate::METEORA_DAMM_V2_PROGRAM_ID {
|
||||
"meteora_damm_v2"
|
||||
} else if program_id == crate::ORCA_WHIRLPOOLS_PROGRAM_ID {
|
||||
"orca_whirlpools"
|
||||
} else if program_id == crate::FLUXBEAM_PROGRAM_ID {
|
||||
"fluxbeam"
|
||||
} else if program_id == crate::DEXLAB_PROGRAM_ID {
|
||||
"dexlab"
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
return Some(KnownDexProgramMatch {
|
||||
protocol_name,
|
||||
program_id: program_id.to_string(),
|
||||
instruction_id: instruction.id,
|
||||
instruction_index: instruction.instruction_index,
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
fn test_instruction(
|
||||
program_id: std::option::Option<std::string::String>,
|
||||
) -> crate::ChainInstructionDto {
|
||||
return crate::ChainInstructionDto::new(
|
||||
1,
|
||||
None,
|
||||
0,
|
||||
None,
|
||||
program_id,
|
||||
None,
|
||||
None,
|
||||
"[]".to_string(),
|
||||
None,
|
||||
None,
|
||||
Some(serde_json::json!({}).to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
fn test_transaction() -> crate::ChainTransactionDto {
|
||||
let mut transaction = crate::ChainTransactionDto::new(
|
||||
"signature_1".to_string(),
|
||||
Some(123),
|
||||
None,
|
||||
Some("test".to_string()),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
serde_json::json!({}).to_string(),
|
||||
);
|
||||
transaction.id = Some(1);
|
||||
return transaction;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn known_dex_program_ids_are_matched() {
|
||||
let instruction = test_instruction(Some(crate::RAYDIUM_CPMM_PROGRAM_ID.to_string()));
|
||||
let program_match = match super::known_dex_program_match(&instruction) {
|
||||
Some(program_match) => program_match,
|
||||
None => {
|
||||
panic!("expected raydium_cpmm program match");
|
||||
},
|
||||
};
|
||||
assert_eq!(program_match.protocol_name, "raydium_cpmm");
|
||||
assert_eq!(program_match.program_id, crate::RAYDIUM_CPMM_PROGRAM_ID);
|
||||
assert_eq!(program_match.instruction_index, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_program_id_is_not_matched() {
|
||||
let instruction =
|
||||
test_instruction(Some("UnknownProgram111111111111111111111111111111111".to_string()));
|
||||
let program_match = super::known_dex_program_match(&instruction);
|
||||
assert!(program_match.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_context_is_classified_as_unknown_or_unclassified() {
|
||||
let transaction = test_transaction();
|
||||
let context = super::TransactionClassificationContext {
|
||||
transaction,
|
||||
transaction_id: 1,
|
||||
instructions: std::vec::Vec::new(),
|
||||
decoded_events: std::vec::Vec::new(),
|
||||
};
|
||||
let decision = super::classify_transaction_context(&context);
|
||||
assert_eq!(decision.kind, "unknown_or_unclassified");
|
||||
assert_eq!(decision.confidence_level, 25);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user