458 lines
17 KiB
Rust
458 lines
17 KiB
Rust
// file: kb_lib/src/transaction_classification.rs
|
|
|
|
//! Transaction classification service.
|
|
//!
|
|
//! This service classifies projected Solana transactions after transaction
|
|
//! projection and optional DEX decoding.
|
|
//!
|
|
//! The first version is intentionally deterministic and conservative:
|
|
//! decoded DEX events win over program-id hints, and unknown transactions are
|
|
//! preserved as explicit `unknown_or_unclassified` rows.
|
|
|
|
/// Service used to classify projected Solana transactions.
|
|
#[derive(Debug, Clone)]
|
|
pub struct TransactionClassificationService {
|
|
database: std::sync::Arc<crate::Database>,
|
|
}
|
|
|
|
impl TransactionClassificationService {
|
|
/// Creates a transaction classification service.
|
|
pub fn new(database: std::sync::Arc<crate::Database>) -> Self {
|
|
return Self { database };
|
|
}
|
|
|
|
/// Classifies one transaction by signature and persists the classification.
|
|
pub async fn classify_transaction_by_signature(
|
|
&self,
|
|
signature: &str,
|
|
) -> Result<crate::TransactionClassificationDto, crate::Error> {
|
|
let context_result =
|
|
load_transaction_classification_context(self.database.as_ref(), signature).await;
|
|
let context = match context_result {
|
|
Ok(context) => context,
|
|
Err(error) => return Err(error),
|
|
};
|
|
let classification = classify_transaction_context(&context);
|
|
let dto = crate::TransactionClassificationDto::new(
|
|
context.transaction_id,
|
|
context.transaction.signature.clone(),
|
|
context.transaction.slot,
|
|
classification.kind.to_string(),
|
|
classification.primary_protocol,
|
|
classification.primary_program_id,
|
|
classification.confidence_level,
|
|
classification.reason,
|
|
classification.evidence_json,
|
|
);
|
|
let upsert_result =
|
|
crate::query_transaction_classifications_upsert(self.database.as_ref(), &dto).await;
|
|
if let Err(error) = upsert_result {
|
|
return Err(error);
|
|
}
|
|
let persisted_result = crate::query_transaction_classifications_get_by_transaction_id(
|
|
self.database.as_ref(),
|
|
context.transaction_id,
|
|
)
|
|
.await;
|
|
let persisted_option = match persisted_result {
|
|
Ok(persisted_option) => persisted_option,
|
|
Err(error) => return Err(error),
|
|
};
|
|
let persisted = match persisted_option {
|
|
Some(persisted) => persisted,
|
|
None => {
|
|
return Err(crate::Error::InvalidState(format!(
|
|
"transaction classification for '{}' disappeared after upsert",
|
|
signature
|
|
)));
|
|
},
|
|
};
|
|
let candidate_recording_result =
|
|
crate::protocol_candidate_recording::record_protocol_candidates_for_classification(
|
|
crate::protocol_candidate_recording::ProtocolCandidateRecordingInput {
|
|
database: self.database.as_ref(),
|
|
transaction: &context.transaction,
|
|
transaction_id: context.transaction_id,
|
|
instructions: &context.instructions,
|
|
classification_kind: persisted.classification_kind.as_str(),
|
|
},
|
|
)
|
|
.await;
|
|
match candidate_recording_result {
|
|
Ok(candidate_count) => {
|
|
tracing::trace!(
|
|
signature = %context.transaction.signature,
|
|
classification_kind = %persisted.classification_kind,
|
|
protocol_candidate_count = candidate_count,
|
|
"transaction protocol candidates recorded"
|
|
);
|
|
},
|
|
Err(error) => return Err(error),
|
|
}
|
|
return Ok(persisted);
|
|
}
|
|
}
|
|
|
|
struct TransactionClassificationContext {
|
|
transaction: crate::ChainTransactionDto,
|
|
transaction_id: i64,
|
|
instructions: std::vec::Vec<crate::ChainInstructionDto>,
|
|
decoded_events: std::vec::Vec<crate::DexDecodedEventDto>,
|
|
}
|
|
|
|
struct TransactionClassificationDecision {
|
|
kind: &'static str,
|
|
primary_protocol: std::option::Option<std::string::String>,
|
|
primary_program_id: std::option::Option<std::string::String>,
|
|
confidence_level: i16,
|
|
reason: std::string::String,
|
|
evidence_json: std::string::String,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
struct KnownDexProgramMatch {
|
|
protocol_name: &'static str,
|
|
program_id: std::string::String,
|
|
instruction_id: std::option::Option<i64>,
|
|
instruction_index: u32,
|
|
}
|
|
|
|
async fn load_transaction_classification_context(
|
|
database: &crate::Database,
|
|
signature: &str,
|
|
) -> Result<TransactionClassificationContext, crate::Error> {
|
|
let transaction_result =
|
|
crate::query_chain_transactions_get_by_signature(database, signature).await;
|
|
let transaction_option = match transaction_result {
|
|
Ok(transaction_option) => transaction_option,
|
|
Err(error) => return Err(error),
|
|
};
|
|
let transaction = match transaction_option {
|
|
Some(transaction) => transaction,
|
|
None => {
|
|
return Err(crate::Error::InvalidState(format!(
|
|
"cannot classify unknown chain transaction '{}'",
|
|
signature
|
|
)));
|
|
},
|
|
};
|
|
let transaction_id = match transaction.id {
|
|
Some(transaction_id) => transaction_id,
|
|
None => {
|
|
return Err(crate::Error::InvalidState(format!(
|
|
"chain transaction '{}' has no internal id",
|
|
signature
|
|
)));
|
|
},
|
|
};
|
|
let instructions_result =
|
|
crate::query_chain_instructions_list_by_transaction_id(database, transaction_id).await;
|
|
let instructions = match instructions_result {
|
|
Ok(instructions) => instructions,
|
|
Err(error) => return Err(error),
|
|
};
|
|
let decoded_events_result =
|
|
crate::query_dex_decoded_events_list_by_transaction_id(database, transaction_id).await;
|
|
let decoded_events = match decoded_events_result {
|
|
Ok(decoded_events) => decoded_events,
|
|
Err(error) => return Err(error),
|
|
};
|
|
return Ok(TransactionClassificationContext {
|
|
transaction,
|
|
transaction_id,
|
|
instructions,
|
|
decoded_events,
|
|
});
|
|
}
|
|
|
|
fn classify_transaction_context(
|
|
context: &TransactionClassificationContext,
|
|
) -> TransactionClassificationDecision {
|
|
if !context.decoded_events.is_empty() {
|
|
return classify_from_decoded_events(context);
|
|
}
|
|
let known_program_matches = find_known_dex_program_matches(&context.instructions);
|
|
if !known_program_matches.is_empty() {
|
|
return classify_from_known_program_matches(context, &known_program_matches);
|
|
}
|
|
return build_decision(
|
|
"unknown_or_unclassified",
|
|
None,
|
|
None,
|
|
25,
|
|
"transaction has no decoded DEX event and no known DEX program id".to_string(),
|
|
serde_json::json!({
|
|
"transactionId": context.transaction_id,
|
|
"signature": context.transaction.signature,
|
|
"slot": context.transaction.slot,
|
|
"instructionCount": context.instructions.len(),
|
|
"decodedEventCount": context.decoded_events.len()
|
|
}),
|
|
);
|
|
}
|
|
|
|
fn classify_from_decoded_events(
|
|
context: &TransactionClassificationContext,
|
|
) -> TransactionClassificationDecision {
|
|
let mut first_protocol = None;
|
|
let mut first_program_id = None;
|
|
let mut trade_event_count = 0_i64;
|
|
let mut non_trade_event_count = 0_i64;
|
|
let mut decoded_event_evidence = std::vec::Vec::new();
|
|
for decoded_event in &context.decoded_events {
|
|
if first_protocol.is_none() {
|
|
first_protocol = Some(decoded_event.protocol_name.clone());
|
|
}
|
|
if first_program_id.is_none() {
|
|
first_program_id = Some(decoded_event.program_id.clone());
|
|
}
|
|
let payload_value_result =
|
|
serde_json::from_str::<serde_json::Value>(decoded_event.payload_json.as_str());
|
|
let payload_value = match payload_value_result {
|
|
Ok(payload_value) => payload_value,
|
|
Err(_) => serde_json::Value::Null,
|
|
};
|
|
let is_trade = crate::is_decoded_event_trade_candidate(
|
|
decoded_event.event_kind.as_str(),
|
|
&payload_value,
|
|
);
|
|
if is_trade {
|
|
trade_event_count += 1;
|
|
} else {
|
|
non_trade_event_count += 1;
|
|
}
|
|
decoded_event_evidence.push(serde_json::json!({
|
|
"id": decoded_event.id,
|
|
"protocolName": decoded_event.protocol_name,
|
|
"programId": decoded_event.program_id,
|
|
"eventKind": decoded_event.event_kind,
|
|
"poolAccount": decoded_event.pool_account,
|
|
"tradeCandidate": is_trade
|
|
}));
|
|
}
|
|
if trade_event_count > 0_i64 {
|
|
return build_decision(
|
|
"dex_trade",
|
|
first_protocol,
|
|
first_program_id,
|
|
100,
|
|
"transaction has at least one decoded DEX trade event".to_string(),
|
|
serde_json::json!({
|
|
"transactionId": context.transaction_id,
|
|
"signature": context.transaction.signature,
|
|
"slot": context.transaction.slot,
|
|
"decodedEventCount": context.decoded_events.len(),
|
|
"tradeEventCount": trade_event_count,
|
|
"nonTradeEventCount": non_trade_event_count,
|
|
"decodedEvents": decoded_event_evidence
|
|
}),
|
|
);
|
|
}
|
|
return build_decision(
|
|
"dex_non_trade",
|
|
first_protocol,
|
|
first_program_id,
|
|
95,
|
|
"transaction has decoded DEX events but no trade candidate".to_string(),
|
|
serde_json::json!({
|
|
"transactionId": context.transaction_id,
|
|
"signature": context.transaction.signature,
|
|
"slot": context.transaction.slot,
|
|
"decodedEventCount": context.decoded_events.len(),
|
|
"tradeEventCount": trade_event_count,
|
|
"nonTradeEventCount": non_trade_event_count,
|
|
"decodedEvents": decoded_event_evidence
|
|
}),
|
|
);
|
|
}
|
|
|
|
fn classify_from_known_program_matches(
|
|
context: &TransactionClassificationContext,
|
|
known_program_matches: &[KnownDexProgramMatch],
|
|
) -> TransactionClassificationDecision {
|
|
let first_match = &known_program_matches[0];
|
|
let mut evidence_items = std::vec::Vec::new();
|
|
for known_program_match in known_program_matches {
|
|
evidence_items.push(serde_json::json!({
|
|
"protocolName": known_program_match.protocol_name,
|
|
"programId": known_program_match.program_id,
|
|
"instructionId": known_program_match.instruction_id,
|
|
"instructionIndex": known_program_match.instruction_index
|
|
}));
|
|
}
|
|
return build_decision(
|
|
"known_dex_program_unclassified",
|
|
Some(first_match.protocol_name.to_string()),
|
|
Some(first_match.program_id.to_string()),
|
|
75,
|
|
"transaction has known DEX program instructions but no decoded DEX event".to_string(),
|
|
serde_json::json!({
|
|
"transactionId": context.transaction_id,
|
|
"signature": context.transaction.signature,
|
|
"slot": context.transaction.slot,
|
|
"instructionCount": context.instructions.len(),
|
|
"decodedEventCount": context.decoded_events.len(),
|
|
"knownDexProgramMatches": evidence_items
|
|
}),
|
|
);
|
|
}
|
|
|
|
fn build_decision(
|
|
kind: &'static str,
|
|
primary_protocol: std::option::Option<std::string::String>,
|
|
primary_program_id: std::option::Option<std::string::String>,
|
|
confidence_level: i16,
|
|
reason: std::string::String,
|
|
evidence_value: serde_json::Value,
|
|
) -> TransactionClassificationDecision {
|
|
let evidence_json_result = serde_json::to_string(&evidence_value);
|
|
let evidence_json = match evidence_json_result {
|
|
Ok(evidence_json) => evidence_json,
|
|
Err(error) => {
|
|
return TransactionClassificationDecision {
|
|
kind: "unknown_or_unclassified",
|
|
primary_protocol: None,
|
|
primary_program_id: None,
|
|
confidence_level: 0,
|
|
reason: format!("cannot serialize classification evidence: {}", error),
|
|
evidence_json: "{}".to_string(),
|
|
};
|
|
},
|
|
};
|
|
return TransactionClassificationDecision {
|
|
kind,
|
|
primary_protocol,
|
|
primary_program_id,
|
|
confidence_level,
|
|
reason,
|
|
evidence_json,
|
|
};
|
|
}
|
|
|
|
fn find_known_dex_program_matches(
|
|
instructions: &[crate::ChainInstructionDto],
|
|
) -> std::vec::Vec<KnownDexProgramMatch> {
|
|
let mut matches = std::vec::Vec::new();
|
|
for instruction in instructions {
|
|
let program_match = known_dex_program_match(instruction);
|
|
let program_match = match program_match {
|
|
Some(program_match) => program_match,
|
|
None => continue,
|
|
};
|
|
matches.push(program_match);
|
|
}
|
|
|
|
return matches;
|
|
}
|
|
|
|
fn known_dex_program_match(
|
|
instruction: &crate::ChainInstructionDto,
|
|
) -> std::option::Option<KnownDexProgramMatch> {
|
|
let program_id = match instruction.program_id.as_deref() {
|
|
Some(program_id) => program_id,
|
|
None => return None,
|
|
};
|
|
let matrix_entry = match crate::dex_support_matrix_entry_by_program_id(program_id) {
|
|
Some(matrix_entry) => matrix_entry,
|
|
None => return None,
|
|
};
|
|
return Some(KnownDexProgramMatch {
|
|
protocol_name: matrix_entry.code,
|
|
program_id: program_id.to_string(),
|
|
instruction_id: instruction.id,
|
|
instruction_index: instruction.instruction_index,
|
|
});
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
fn test_instruction(
|
|
program_id: std::option::Option<std::string::String>,
|
|
) -> crate::ChainInstructionDto {
|
|
return crate::ChainInstructionDto::new(
|
|
1,
|
|
None,
|
|
0,
|
|
None,
|
|
program_id,
|
|
None,
|
|
None,
|
|
"[]".to_string(),
|
|
None,
|
|
None,
|
|
Some(serde_json::json!({}).to_string()),
|
|
);
|
|
}
|
|
|
|
fn test_transaction() -> crate::ChainTransactionDto {
|
|
let mut transaction = crate::ChainTransactionDto::new(
|
|
"signature_1".to_string(),
|
|
Some(123),
|
|
None,
|
|
Some("test".to_string()),
|
|
None,
|
|
None,
|
|
None,
|
|
serde_json::json!({}).to_string(),
|
|
);
|
|
transaction.id = Some(1);
|
|
return transaction;
|
|
}
|
|
|
|
#[test]
|
|
fn known_dex_program_ids_are_matched() {
|
|
let instruction = test_instruction(Some(crate::RAYDIUM_CPMM_PROGRAM_ID.to_string()));
|
|
let program_match = match super::known_dex_program_match(&instruction) {
|
|
Some(program_match) => program_match,
|
|
None => {
|
|
panic!("expected raydium_cpmm program match");
|
|
},
|
|
};
|
|
assert_eq!(program_match.protocol_name, "raydium_cpmm");
|
|
assert_eq!(program_match.program_id, crate::RAYDIUM_CPMM_PROGRAM_ID);
|
|
assert_eq!(program_match.instruction_index, 0);
|
|
}
|
|
|
|
#[test]
|
|
fn known_program_match_uses_support_matrix_for_priority_dexes() {
|
|
let samples = [
|
|
(crate::PUMP_SWAP_PROGRAM_ID, "pump_swap"),
|
|
(crate::RAYDIUM_CPMM_PROGRAM_ID, "raydium_cpmm"),
|
|
(crate::RAYDIUM_CLMM_PROGRAM_ID, "raydium_clmm"),
|
|
(crate::METEORA_DLMM_PROGRAM_ID, "meteora_dlmm"),
|
|
(crate::METEORA_DAMM_V1_PROGRAM_ID, "meteora_damm_v1"),
|
|
];
|
|
for (program_id, expected_protocol) in samples {
|
|
let instruction = test_instruction(Some(program_id.to_string()));
|
|
let program_match = match super::known_dex_program_match(&instruction) {
|
|
Some(program_match) => program_match,
|
|
None => panic!("expected program match for {}", expected_protocol),
|
|
};
|
|
assert_eq!(program_match.protocol_name, expected_protocol);
|
|
assert_eq!(program_match.program_id, program_id);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_program_id_is_not_matched() {
|
|
let instruction =
|
|
test_instruction(Some("UnknownProgram111111111111111111111111111111111".to_string()));
|
|
let program_match = super::known_dex_program_match(&instruction);
|
|
assert!(program_match.is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_context_is_classified_as_unknown_or_unclassified() {
|
|
let transaction = test_transaction();
|
|
let context = super::TransactionClassificationContext {
|
|
transaction,
|
|
transaction_id: 1,
|
|
instructions: std::vec::Vec::new(),
|
|
decoded_events: std::vec::Vec::new(),
|
|
};
|
|
let decision = super::classify_transaction_context(&context);
|
|
assert_eq!(decision.kind, "unknown_or_unclassified");
|
|
assert_eq!(decision.confidence_level, 25);
|
|
}
|
|
}
|