This commit is contained in:
2026-05-11 11:02:47 +02:00
parent d66afede28
commit 7f130dba6b
49 changed files with 10301 additions and 8481 deletions

View File

@@ -0,0 +1,466 @@
// file: kb_lib/src/transaction_classification.rs
//! Transaction classification service.
//!
//! This service classifies projected Solana transactions after transaction
//! projection and optional DEX decoding.
//!
//! The first version is intentionally deterministic and conservative:
//! decoded DEX events win over program-id hints, and unknown transactions are
//! preserved as explicit `unknown_or_unclassified` rows.
/// Service used to classify projected Solana transactions.
#[derive(Debug, Clone)]
pub struct TransactionClassificationService {
database: std::sync::Arc<crate::Database>,
}
impl TransactionClassificationService {
/// Creates a transaction classification service.
pub fn new(database: std::sync::Arc<crate::Database>) -> Self {
return Self { database };
}
/// Classifies one transaction by signature and persists the classification.
pub async fn classify_transaction_by_signature(
&self,
signature: &str,
) -> Result<crate::TransactionClassificationDto, crate::Error> {
let context_result =
load_transaction_classification_context(self.database.as_ref(), signature).await;
let context = match context_result {
Ok(context) => context,
Err(error) => return Err(error),
};
let classification = classify_transaction_context(&context);
let dto = crate::TransactionClassificationDto::new(
context.transaction_id,
context.transaction.signature.clone(),
context.transaction.slot,
classification.kind.to_string(),
classification.primary_protocol,
classification.primary_program_id,
classification.confidence_level,
classification.reason,
classification.evidence_json,
);
let upsert_result =
crate::query_transaction_classifications_upsert(self.database.as_ref(), &dto).await;
if let Err(error) = upsert_result {
return Err(error);
}
let persisted_result = crate::query_transaction_classifications_get_by_transaction_id(
self.database.as_ref(),
context.transaction_id,
)
.await;
let persisted_option = match persisted_result {
Ok(persisted_option) => persisted_option,
Err(error) => return Err(error),
};
let persisted = match persisted_option {
Some(persisted) => persisted,
None => {
return Err(crate::Error::InvalidState(format!(
"transaction classification for '{}' disappeared after upsert",
signature
)));
},
};
let candidate_recording_result =
crate::protocol_candidate_recording::record_protocol_candidates_for_classification(
crate::protocol_candidate_recording::ProtocolCandidateRecordingInput {
database: self.database.as_ref(),
transaction: &context.transaction,
transaction_id: context.transaction_id,
instructions: &context.instructions,
classification_kind: persisted.classification_kind.as_str(),
},
)
.await;
match candidate_recording_result {
Ok(candidate_count) => {
tracing::trace!(
signature = %context.transaction.signature,
classification_kind = %persisted.classification_kind,
protocol_candidate_count = candidate_count,
"transaction protocol candidates recorded"
);
},
Err(error) => return Err(error),
}
return Ok(persisted);
}
}
struct TransactionClassificationContext {
transaction: crate::ChainTransactionDto,
transaction_id: i64,
instructions: std::vec::Vec<crate::ChainInstructionDto>,
decoded_events: std::vec::Vec<crate::DexDecodedEventDto>,
}
struct TransactionClassificationDecision {
kind: &'static str,
primary_protocol: std::option::Option<std::string::String>,
primary_program_id: std::option::Option<std::string::String>,
confidence_level: i16,
reason: std::string::String,
evidence_json: std::string::String,
}
#[derive(Debug, Clone)]
struct KnownDexProgramMatch {
protocol_name: &'static str,
program_id: std::string::String,
instruction_id: std::option::Option<i64>,
instruction_index: u32,
}
async fn load_transaction_classification_context(
database: &crate::Database,
signature: &str,
) -> Result<TransactionClassificationContext, crate::Error> {
let transaction_result =
crate::query_chain_transactions_get_by_signature(database, signature).await;
let transaction_option = match transaction_result {
Ok(transaction_option) => transaction_option,
Err(error) => return Err(error),
};
let transaction = match transaction_option {
Some(transaction) => transaction,
None => {
return Err(crate::Error::InvalidState(format!(
"cannot classify unknown chain transaction '{}'",
signature
)));
},
};
let transaction_id = match transaction.id {
Some(transaction_id) => transaction_id,
None => {
return Err(crate::Error::InvalidState(format!(
"chain transaction '{}' has no internal id",
signature
)));
},
};
let instructions_result =
crate::query_chain_instructions_list_by_transaction_id(database, transaction_id).await;
let instructions = match instructions_result {
Ok(instructions) => instructions,
Err(error) => return Err(error),
};
let decoded_events_result =
crate::query_dex_decoded_events_list_by_transaction_id(database, transaction_id).await;
let decoded_events = match decoded_events_result {
Ok(decoded_events) => decoded_events,
Err(error) => return Err(error),
};
return Ok(TransactionClassificationContext {
transaction,
transaction_id,
instructions,
decoded_events,
});
}
fn classify_transaction_context(
context: &TransactionClassificationContext,
) -> TransactionClassificationDecision {
if !context.decoded_events.is_empty() {
return classify_from_decoded_events(context);
}
let known_program_matches = find_known_dex_program_matches(&context.instructions);
if !known_program_matches.is_empty() {
return classify_from_known_program_matches(context, &known_program_matches);
}
return build_decision(
"unknown_or_unclassified",
None,
None,
25,
"transaction has no decoded DEX event and no known DEX program id".to_string(),
serde_json::json!({
"transactionId": context.transaction_id,
"signature": context.transaction.signature,
"slot": context.transaction.slot,
"instructionCount": context.instructions.len(),
"decodedEventCount": context.decoded_events.len()
}),
);
}
fn classify_from_decoded_events(
context: &TransactionClassificationContext,
) -> TransactionClassificationDecision {
let mut first_protocol = None;
let mut first_program_id = None;
let mut trade_event_count = 0_i64;
let mut non_trade_event_count = 0_i64;
let mut decoded_event_evidence = std::vec::Vec::new();
for decoded_event in &context.decoded_events {
if first_protocol.is_none() {
first_protocol = Some(decoded_event.protocol_name.clone());
}
if first_program_id.is_none() {
first_program_id = Some(decoded_event.program_id.clone());
}
let payload_value_result =
serde_json::from_str::<serde_json::Value>(decoded_event.payload_json.as_str());
let payload_value = match payload_value_result {
Ok(payload_value) => payload_value,
Err(_) => serde_json::Value::Null,
};
let is_trade = crate::is_decoded_event_trade_candidate(
decoded_event.event_kind.as_str(),
&payload_value,
);
if is_trade {
trade_event_count += 1;
} else {
non_trade_event_count += 1;
}
decoded_event_evidence.push(serde_json::json!({
"id": decoded_event.id,
"protocolName": decoded_event.protocol_name,
"programId": decoded_event.program_id,
"eventKind": decoded_event.event_kind,
"poolAccount": decoded_event.pool_account,
"tradeCandidate": is_trade
}));
}
if trade_event_count > 0_i64 {
return build_decision(
"dex_trade",
first_protocol,
first_program_id,
100,
"transaction has at least one decoded DEX trade event".to_string(),
serde_json::json!({
"transactionId": context.transaction_id,
"signature": context.transaction.signature,
"slot": context.transaction.slot,
"decodedEventCount": context.decoded_events.len(),
"tradeEventCount": trade_event_count,
"nonTradeEventCount": non_trade_event_count,
"decodedEvents": decoded_event_evidence
}),
);
}
return build_decision(
"dex_non_trade",
first_protocol,
first_program_id,
95,
"transaction has decoded DEX events but no trade candidate".to_string(),
serde_json::json!({
"transactionId": context.transaction_id,
"signature": context.transaction.signature,
"slot": context.transaction.slot,
"decodedEventCount": context.decoded_events.len(),
"tradeEventCount": trade_event_count,
"nonTradeEventCount": non_trade_event_count,
"decodedEvents": decoded_event_evidence
}),
);
}
fn classify_from_known_program_matches(
context: &TransactionClassificationContext,
known_program_matches: &[KnownDexProgramMatch],
) -> TransactionClassificationDecision {
let first_match = &known_program_matches[0];
let mut evidence_items = std::vec::Vec::new();
for known_program_match in known_program_matches {
evidence_items.push(serde_json::json!({
"protocolName": known_program_match.protocol_name,
"programId": known_program_match.program_id,
"instructionId": known_program_match.instruction_id,
"instructionIndex": known_program_match.instruction_index
}));
}
return build_decision(
"known_dex_program_unclassified",
Some(first_match.protocol_name.to_string()),
Some(first_match.program_id.to_string()),
75,
"transaction has known DEX program instructions but no decoded DEX event".to_string(),
serde_json::json!({
"transactionId": context.transaction_id,
"signature": context.transaction.signature,
"slot": context.transaction.slot,
"instructionCount": context.instructions.len(),
"decodedEventCount": context.decoded_events.len(),
"knownDexProgramMatches": evidence_items
}),
);
}
fn build_decision(
kind: &'static str,
primary_protocol: std::option::Option<std::string::String>,
primary_program_id: std::option::Option<std::string::String>,
confidence_level: i16,
reason: std::string::String,
evidence_value: serde_json::Value,
) -> TransactionClassificationDecision {
let evidence_json_result = serde_json::to_string(&evidence_value);
let evidence_json = match evidence_json_result {
Ok(evidence_json) => evidence_json,
Err(error) => {
return TransactionClassificationDecision {
kind: "unknown_or_unclassified",
primary_protocol: None,
primary_program_id: None,
confidence_level: 0,
reason: format!("cannot serialize classification evidence: {}", error),
evidence_json: "{}".to_string(),
};
},
};
return TransactionClassificationDecision {
kind,
primary_protocol,
primary_program_id,
confidence_level,
reason,
evidence_json,
};
}
fn find_known_dex_program_matches(
instructions: &[crate::ChainInstructionDto],
) -> std::vec::Vec<KnownDexProgramMatch> {
let mut matches = std::vec::Vec::new();
for instruction in instructions {
let program_match = known_dex_program_match(instruction);
let program_match = match program_match {
Some(program_match) => program_match,
None => continue,
};
matches.push(program_match);
}
return matches;
}
fn known_dex_program_match(
instruction: &crate::ChainInstructionDto,
) -> std::option::Option<KnownDexProgramMatch> {
let program_id = match instruction.program_id.as_deref() {
Some(program_id) => program_id,
None => return None,
};
let protocol_name = if program_id == crate::RAYDIUM_AMM_V4_PROGRAM_ID {
"raydium_amm_v4"
} else if program_id == crate::RAYDIUM_CPMM_PROGRAM_ID {
"raydium_cpmm"
} else if program_id == crate::RAYDIUM_CLMM_PROGRAM_ID {
"raydium_clmm"
} else if program_id == crate::RAYDIUM_LAUNCHLAB_PROGRAM_ID {
"raydium_launchlab"
} else if program_id == crate::RAYDIUM_AMM_ROUTING_PROGRAM_ID {
"raydium_router"
} else if program_id == crate::RAYDIUM_STABLE_SWAP_AMM_PROGRAM_ID {
"raydium_stable_swap"
} else if program_id == crate::PUMP_FUN_PROGRAM_ID {
"pump_fun"
} else if program_id == crate::PUMP_SWAP_PROGRAM_ID {
"pump_swap"
} else if program_id == crate::METEORA_DBC_PROGRAM_ID {
"meteora_dbc"
} else if program_id == crate::METEORA_DLMM_PROGRAM_ID {
"meteora_dlmm"
} else if program_id == crate::METEORA_DAMM_V1_PROGRAM_ID {
"meteora_damm_v1"
} else if program_id == crate::METEORA_DAMM_V2_PROGRAM_ID {
"meteora_damm_v2"
} else if program_id == crate::ORCA_WHIRLPOOLS_PROGRAM_ID {
"orca_whirlpools"
} else if program_id == crate::FLUXBEAM_PROGRAM_ID {
"fluxbeam"
} else if program_id == crate::DEXLAB_PROGRAM_ID {
"dexlab"
} else {
return None;
};
return Some(KnownDexProgramMatch {
protocol_name,
program_id: program_id.to_string(),
instruction_id: instruction.id,
instruction_index: instruction.instruction_index,
});
}
#[cfg(test)]
mod tests {
fn test_instruction(
program_id: std::option::Option<std::string::String>,
) -> crate::ChainInstructionDto {
return crate::ChainInstructionDto::new(
1,
None,
0,
None,
program_id,
None,
None,
"[]".to_string(),
None,
None,
Some(serde_json::json!({}).to_string()),
);
}
fn test_transaction() -> crate::ChainTransactionDto {
let mut transaction = crate::ChainTransactionDto::new(
"signature_1".to_string(),
Some(123),
None,
Some("test".to_string()),
None,
None,
None,
serde_json::json!({}).to_string(),
);
transaction.id = Some(1);
return transaction;
}
#[test]
fn known_dex_program_ids_are_matched() {
let instruction = test_instruction(Some(crate::RAYDIUM_CPMM_PROGRAM_ID.to_string()));
let program_match = match super::known_dex_program_match(&instruction) {
Some(program_match) => program_match,
None => {
panic!("expected raydium_cpmm program match");
},
};
assert_eq!(program_match.protocol_name, "raydium_cpmm");
assert_eq!(program_match.program_id, crate::RAYDIUM_CPMM_PROGRAM_ID);
assert_eq!(program_match.instruction_index, 0);
}
#[test]
fn unknown_program_id_is_not_matched() {
let instruction =
test_instruction(Some("UnknownProgram111111111111111111111111111111111".to_string()));
let program_match = super::known_dex_program_match(&instruction);
assert!(program_match.is_none());
}
#[test]
fn unknown_context_is_classified_as_unknown_or_unclassified() {
let transaction = test_transaction();
let context = super::TransactionClassificationContext {
transaction,
transaction_id: 1,
instructions: std::vec::Vec::new(),
decoded_events: std::vec::Vec::new(),
};
let decision = super::classify_transaction_context(&context);
assert_eq!(decision.kind, "unknown_or_unclassified");
assert_eq!(decision.confidence_level, 25);
}
}