Files
khadhroony-bobobot/kb_lib/src/upstream_registry_match.rs
2026-06-08 12:32:58 +02:00

652 lines
24 KiB
Rust

// file: kb_lib/src/upstream_registry_match.rs
//! Matching, filtering and summarizing helpers for the upstream Git registry.
/// Returns all static upstream registry entries as owned DTOs.
pub(crate) fn upstream_registry_all_entries() -> std::vec::Vec<crate::UpstreamRegistryEntryDto> {
let mut entries = std::vec::Vec::new();
for entry in crate::upstream_registry_generated::UPSTREAM_REGISTRY_ENTRIES {
entries.push(entry.to_dto());
}
return entries;
}
/// Searches static upstream registry entries with optional filters.
pub(crate) fn upstream_registry_search(
request: &crate::UpstreamRegistrySearchRequestDto,
) -> crate::UpstreamRegistrySearchResultDto {
let total_entry_count = crate::upstream_registry_generated::UPSTREAM_REGISTRY_ENTRIES.len();
let mut entries = std::vec::Vec::new();
for entry in crate::upstream_registry_generated::UPSTREAM_REGISTRY_ENTRIES {
if !matches_request(entry, request) {
continue;
}
entries.push(entry.to_dto());
if let Some(limit) = request.limit {
if entries.len() >= limit {
break;
}
}
}
let summary = summarize(total_entry_count, entries.as_slice());
return crate::UpstreamRegistrySearchResultDto {
request: request.clone(),
summary,
entries,
};
}
fn matches_request(
entry: &crate::UpstreamRegistryEntry,
request: &crate::UpstreamRegistrySearchRequestDto,
) -> bool {
if !matches_string_filter(entry.decoder_code, &request.decoder_code) {
return false;
}
if !matches_optional_string_filter(entry.program_id, &request.program_id) {
return false;
}
if !matches_string_filter(entry.program_family, &request.program_family) {
return false;
}
if !matches_string_filter(entry.surface_kind, &request.surface_kind) {
return false;
}
if !matches_string_filter(entry.entry_kind, &request.entry_kind) {
return false;
}
if !matches_string_filter(entry.proof_status, &request.proof_status) {
return false;
}
return true;
}
fn matches_string_filter(value: &str, filter: &std::option::Option<std::string::String>) -> bool {
let filter_value = match filter {
Some(filter_value) => filter_value.trim(),
None => return true,
};
if filter_value.is_empty() {
return true;
}
return value.eq_ignore_ascii_case(filter_value);
}
fn matches_optional_string_filter(
value: std::option::Option<&str>,
filter: &std::option::Option<std::string::String>,
) -> bool {
let filter_value = match filter {
Some(filter_value) => filter_value.trim(),
None => return true,
};
if filter_value.is_empty() {
return true;
}
let value = match value {
Some(value) => value,
None => return false,
};
return value.eq_ignore_ascii_case(filter_value);
}
fn summarize(
total_entry_count: usize,
entries: &[crate::UpstreamRegistryEntryDto],
) -> crate::UpstreamRegistrySummaryDto {
let mut entries_with_program_id_count = 0_usize;
let mut entries_with_discriminator_count = 0_usize;
let mut program_entry_count = 0_usize;
let mut instruction_entry_count = 0_usize;
let mut event_entry_count = 0_usize;
let mut account_entry_count = 0_usize;
let mut upstream_git_unverified_count = 0_usize;
let mut upstream_git_mapped_unverified_count = 0_usize;
let mut upstream_git_local_corpus_observed_count = 0_usize;
let mut upstream_git_local_corpus_materialized_count = 0_usize;
let mut upstream_git_layout_unverified_count = 0_usize;
for entry in entries {
if entry.program_id.is_some() {
entries_with_program_id_count += 1;
}
if entry.discriminator_hex.is_some() {
entries_with_discriminator_count += 1;
}
match entry.entry_kind.as_str() {
crate::ENTRY_KIND_PROGRAM => program_entry_count += 1,
crate::ENTRY_KIND_INSTRUCTION => instruction_entry_count += 1,
crate::ENTRY_KIND_EVENT => event_entry_count += 1,
crate::ENTRY_KIND_ACCOUNT => account_entry_count += 1,
_ => (),
}
match entry.proof_status.as_str() {
crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED => upstream_git_unverified_count += 1,
crate::PROOF_STATUS_UPSTREAM_GIT_MAPPED_UNVERIFIED => {
upstream_git_mapped_unverified_count += 1;
},
crate::PROOF_STATUS_UPSTREAM_GIT_LOCAL_CORPUS_OBSERVED => {
upstream_git_local_corpus_observed_count += 1;
},
crate::PROOF_STATUS_UPSTREAM_GIT_LOCAL_CORPUS_MATERIALIZED => {
upstream_git_local_corpus_materialized_count += 1;
},
crate::PROOF_STATUS_UPSTREAM_GIT_LAYOUT_UNVERIFIED => {
upstream_git_layout_unverified_count += 1;
},
_ => (),
}
}
return crate::UpstreamRegistrySummaryDto {
total_entry_count,
returned_entry_count: entries.len(),
entries_with_program_id_count,
entries_with_discriminator_count,
program_entry_count,
instruction_entry_count,
event_entry_count,
account_entry_count,
upstream_git_unverified_count,
upstream_git_mapped_unverified_count,
upstream_git_local_corpus_observed_count,
upstream_git_local_corpus_materialized_count,
upstream_git_layout_unverified_count,
};
}
/// Matches raw base58 instruction data against upstream registry discriminator entries.
pub(crate) fn upstream_registry_match_instruction_data(
program_id: &str,
data_base58: std::option::Option<&str>,
) -> std::option::Option<crate::UpstreamRegistryEntryDto> {
let data = decode_base58_instruction_data(data_base58);
let data = match data {
Some(data) => data,
None => return None,
};
let mut selected_entry: std::option::Option<&crate::UpstreamRegistryEntry> = None;
let mut selected_len = 0_usize;
for entry in crate::upstream_registry_generated::UPSTREAM_REGISTRY_ENTRIES {
if entry.entry_kind != crate::ENTRY_KIND_INSTRUCTION {
continue;
}
let entry_program_id = match entry.program_id {
Some(entry_program_id) => entry_program_id,
None => continue,
};
if entry_program_id != program_id {
continue;
}
let discriminator_hex = match entry.discriminator_hex {
Some(discriminator_hex) => discriminator_hex,
None => continue,
};
let discriminator_len = match discriminator_len_usize(entry.discriminator_len) {
Some(discriminator_len) => discriminator_len,
None => continue,
};
if !data_prefix_matches_discriminator(data.as_slice(), discriminator_len, discriminator_hex)
{
continue;
}
if discriminator_len > selected_len {
selected_entry = Some(entry);
selected_len = discriminator_len;
}
}
let selected_entry = match selected_entry {
Some(selected_entry) => selected_entry,
None => return None,
};
return Some(selected_entry.to_dto());
}
fn discriminator_len_usize(
discriminator_len: std::option::Option<u16>,
) -> std::option::Option<usize> {
let discriminator_len = match discriminator_len {
Some(discriminator_len) => discriminator_len,
None => return None,
};
if discriminator_len == 0 {
return None;
}
return Some(usize::from(discriminator_len));
}
fn decode_base58_instruction_data(
data_base58: std::option::Option<&str>,
) -> std::option::Option<std::vec::Vec<u8>> {
let data_base58 = match data_base58 {
Some(data_base58) => data_base58.trim(),
None => return None,
};
if data_base58.is_empty() {
return None;
}
let decoded_result = bs58::decode(data_base58).into_vec();
match decoded_result {
Ok(decoded) => return Some(decoded),
Err(_) => return None,
}
}
fn data_prefix_matches_discriminator(
data: &[u8],
discriminator_len: usize,
discriminator_hex: &str,
) -> bool {
if data.len() < discriminator_len {
return false;
}
let data_prefix_hex = bytes_prefix_to_hex(data, discriminator_len);
return data_prefix_hex == discriminator_hex;
}
fn bytes_prefix_to_hex(data: &[u8], len: usize) -> std::string::String {
let mut text = std::string::String::new();
let mut index = 0_usize;
while index < len {
let byte = data[index];
text.push_str(format!("{byte:02x}").as_str());
index += 1;
}
return text;
}
#[cfg(test)]
mod tests {
#[test]
fn registry_contains_openbook_v2_idl_instruction_discriminators_without_local_verification() {
let expected = [
("create_market", "67e261ebc8bcfbfe"),
("close_market", "589af8ba300e7bf4"),
("create_open_orders_account", "ccb5afde287dbc47"),
("close_open_orders_account", "b04a73d236b35b67"),
("place_order", "33c29baf6d82606a"),
("place_take_order", "032c47031ac7cb55"),
("consume_events", "dd91b1341f2f3fc9"),
("consume_given_events", "d1e336046dac2947"),
("cancel_order", "5f81edf00831df84"),
("cancel_order_by_client_order_id", "73b2c908afb77b77"),
("cancel_all_orders", "c453f3ab1164a08f"),
("deposit", "f223c68952e1f2b6"),
("settle_funds", "ee40a3604bab1021"),
("sweep_fees", "afe1624776422294"),
];
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
for (entry_name, discriminator_hex) in expected {
let mut found = false;
for entry in all_entries.as_slice() {
if entry.decoder_code == "openbook_v2"
&& entry.program_id.as_deref() == Some(crate::OPENBOOK_V2_PROGRAM_ID)
&& entry.entry_kind == crate::ENTRY_KIND_INSTRUCTION
&& entry.entry_name == entry_name
&& entry.discriminator_hex.as_deref() == Some(discriminator_hex)
&& entry.discriminator_len == Some(8)
&& entry.proof_status == crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED
{
found = true;
break;
}
}
assert!(
found,
"missing OpenBook v2 discriminator entry '{}', '{}'",
entry_name, discriminator_hex
);
}
}
#[test]
fn openbook_v2_place_take_order_discriminator_matches_raw_data() {
let data =
bs58::encode([3_u8, 44_u8, 71_u8, 3_u8, 26_u8, 199_u8, 203_u8, 85_u8]).into_string();
let matched = crate::upstream_registry_match::upstream_registry_match_instruction_data(
crate::OPENBOOK_V2_PROGRAM_ID,
Some(data.as_str()),
);
let matched = match matched {
Some(matched) => matched,
None => panic!("OpenBook v2 place_take_order discriminator must match"),
};
assert_eq!(matched.decoder_code, "openbook_v2".to_string());
assert_eq!(matched.entry_name, "place_take_order".to_string());
assert_eq!(matched.discriminator_hex, Some("032c47031ac7cb55".to_string()));
}
#[test]
fn registry_contains_priority_family_program_seeds() {
let expected_codes = [
"meteora_damm_v2",
"meteora_dbc",
"meteora_dlmm",
"meteora_vault",
"raydium_amm_v4",
"raydium_clmm",
"raydium_cpmm",
"raydium_launchpad",
"raydium_liquidity_locking",
"raydium_stable_swap",
"orca_whirlpools",
"fluxbeam",
"lifinity_v2",
"phoenix_v1",
"openbook_v2",
"stabble_stable_swap",
"stabble_weighted_swap",
"bonkswap",
"boop_fun",
"moonshot",
"heaven",
"okx_dex",
"pancake_swap",
"vertigo",
"virtuals",
"wavebreak",
"onchain_labs_dex_v1",
"onchain_labs_dex_v2",
"jupiter_swap",
"jupiter_dca",
"jupiter_limit_order",
"jupiter_limit_order_2",
"jupiter_perpetuals",
"jupiter_lend",
"kamino_lending",
"kamino_vault",
"kamino_farms",
"kamino_limit_order",
"drift_v2",
"marginfi_v2",
"dflow_aggregator_v4",
"zeta",
"system_program",
"token_program",
"token_2022",
"associated_token_account",
"address_lookup_table",
"memo_program",
"stake_program",
"mpl_token_metadata",
"mpl_core",
"bubblegum",
"name_service",
"marinade_finance",
"solayer_restaking_program",
"swig",
"sharky",
"circle_message_transmitter_v2",
"circle_token_messenger_v2",
];
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
for expected_code in expected_codes {
let mut found = false;
for entry in all_entries.as_slice() {
if entry.decoder_code == expected_code
&& entry.entry_kind == crate::ENTRY_KIND_PROGRAM
{
found = true;
break;
}
}
assert!(found, "missing upstream registry code '{}'", expected_code);
}
}
#[test]
fn registry_has_no_duplicate_entry_keys() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let mut seen = std::collections::BTreeSet::new();
for entry in all_entries.as_slice() {
let key = (
entry.decoder_code.as_str(),
entry.program_id.as_deref(),
entry.entry_kind.as_str(),
entry.entry_name.as_str(),
entry.discriminator_hex.as_deref(),
);
assert!(
seen.insert(key),
"duplicate upstream registry entry: decoder={} program_id={:?} kind={} name={} discriminator={:?}",
entry.decoder_code,
entry.program_id,
entry.entry_kind,
entry.entry_name,
entry.discriminator_hex
);
}
}
#[test]
fn registry_has_no_duplicate_program_entry_for_same_decoder_and_program_id() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let mut seen = std::collections::BTreeSet::new();
for entry in all_entries.as_slice() {
if entry.entry_kind != crate::ENTRY_KIND_PROGRAM {
continue;
}
let key = (entry.decoder_code.as_str(), entry.program_id.as_deref());
assert!(
seen.insert(key),
"duplicate upstream registry program entry: decoder={} program_id={:?}",
entry.decoder_code,
entry.program_id
);
}
}
#[test]
fn registry_has_no_duplicate_program_id_for_program_rows() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let mut seen = std::collections::BTreeMap::<&str, &str>::new();
for entry in all_entries.as_slice() {
if entry.entry_kind != crate::ENTRY_KIND_PROGRAM {
continue;
}
let program_id = match entry.program_id.as_deref() {
Some(program_id) => program_id,
None => continue,
};
match seen.insert(program_id, entry.decoder_code.as_str()) {
Some(previous_decoder) => panic!(
"duplicate upstream registry program_id {} for {} and {}",
program_id, previous_decoder, entry.decoder_code
),
None => (),
}
}
}
#[test]
fn registry_has_no_duplicate_program_discriminator_keys() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let mut seen = std::collections::BTreeSet::new();
for entry in all_entries.as_slice() {
if entry.entry_kind == crate::ENTRY_KIND_PROGRAM {
continue;
}
let program_id = match entry.program_id.as_deref() {
Some(program_id) => program_id,
None => continue,
};
let discriminator_hex = match entry.discriminator_hex.as_deref() {
Some(discriminator_hex) => discriminator_hex,
None => continue,
};
let key = (
program_id,
entry.entry_kind.as_str(),
entry.entry_name.as_str(),
discriminator_hex,
);
assert!(
seen.insert(key),
"duplicate upstream registry discriminator key: program_id={} kind={} name={} discriminator={}",
program_id,
entry.entry_kind,
entry.entry_name,
discriminator_hex
);
}
}
#[test]
fn registry_does_not_claim_local_corpus_verification_in_bootstrap_tranche() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
for entry in all_entries.as_slice() {
assert_eq!(entry.proof_status, crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED);
assert_ne!(entry.proof_status, crate::PROOF_STATUS_UPSTREAM_GIT_LOCAL_CORPUS_OBSERVED);
assert_ne!(
entry.proof_status,
crate::PROOF_STATUS_UPSTREAM_GIT_LOCAL_CORPUS_MATERIALIZED
);
}
}
#[test]
fn registry_contains_all_meteora_damm_v2_instruction_discriminators_without_local_verification()
{
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let expected_entries = [
("add_liquidity", "b59d59438fb63448"),
("claim_partner_fee", "61ce27695e5e7e94"),
("claim_position_fee", "b4269a118521a2d3"),
("claim_protocol_fee", "a5e4853063f9ff21"),
("claim_reward", "955fb5f25e5a9ea2"),
("cpi_event", "bcd8a66c1aa68eb6"),
("initialize_pool", "5fb40aac54aee828"),
("remove_liquidity", "5055d14818ceb16c"),
("swap", "f8c69e91e17587c8"),
("swap2", "414b3f4ceb5b5b88"),
("update_pool_fees", "76d9cbb33c084659"),
("withdraw_ineligible_reward", "94ce2ac3f7316708"),
];
for expected_entry in expected_entries {
let mut found = false;
for entry in all_entries.as_slice() {
if entry.decoder_code == "meteora_damm_v2"
&& entry.entry_kind == crate::ENTRY_KIND_INSTRUCTION
&& entry.entry_name == expected_entry.0
&& entry.discriminator_hex.as_deref() == Some(expected_entry.1)
&& entry.discriminator_len == Some(8)
&& entry.proof_status == crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED
{
found = true;
break;
}
}
assert!(
found,
"missing upstream Git discriminator entry '{}', '{}'",
expected_entry.0, expected_entry.1
);
}
}
#[test]
fn registry_contains_meteora_damm_v2_event_discriminators_without_local_verification() {
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
let expected_entries =
[("evt_liquidity_change", "c5ab4e7fe0d3570d"), ("evt_swap2", "bd4233a826507599")];
for expected_entry in expected_entries {
let mut found = false;
for entry in all_entries.as_slice() {
if entry.decoder_code == "meteora_damm_v2"
&& entry.entry_kind == crate::ENTRY_KIND_EVENT
&& entry.entry_name == expected_entry.0
&& entry.discriminator_hex.as_deref() == Some(expected_entry.1)
&& entry.discriminator_len == Some(8)
&& entry.proof_status == crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED
{
found = true;
break;
}
}
assert!(found, "missing upstream Git event entry '{}'", expected_entry.0);
}
}
#[test]
fn matches_anchor_instruction_data_by_program_id_and_discriminator() {
let data =
[0xb5_u8, 0x9d_u8, 0x59_u8, 0x43_u8, 0x8f_u8, 0xb6_u8, 0x34_u8, 0x48_u8, 0x01_u8];
let data_base58 = bs58::encode(data).into_string();
let matched = crate::upstream_registry_match::upstream_registry_match_instruction_data(
crate::METEORA_DAMM_V2_PROGRAM_ID,
Some(data_base58.as_str()),
);
let matched = match matched {
Some(matched) => matched,
None => panic!("missing meteora_damm_v2 add_liquidity registry match"),
};
assert_eq!(matched.decoder_code, "meteora_damm_v2");
assert_eq!(matched.entry_name, "add_liquidity");
assert_eq!(matched.discriminator_hex.as_deref(), Some("b59d59438fb63448"));
assert_eq!(matched.proof_status, crate::PROOF_STATUS_UPSTREAM_GIT_UNVERIFIED);
}
#[test]
fn matches_one_byte_instruction_data_by_program_id_and_discriminator() {
let data = [0x00_u8, 0x10_u8, 0x20_u8];
let data_base58 = bs58::encode(data).into_string();
let matched = crate::upstream_registry_match::upstream_registry_match_instruction_data(
crate::PHOENIX_V1_PROGRAM_ID,
Some(data_base58.as_str()),
);
let matched = match matched {
Some(matched) => matched,
None => panic!("missing phoenix_v1 swap registry match"),
};
assert_eq!(matched.decoder_code, "phoenix_v1");
assert_eq!(matched.entry_name, "swap");
assert_eq!(matched.discriminator_hex.as_deref(), Some("00"));
assert_eq!(matched.discriminator_len, Some(1));
}
#[test]
fn registry_can_filter_by_program_id_and_family() {
let request = crate::UpstreamRegistrySearchRequestDto {
decoder_code: None,
program_id: Some(crate::RAYDIUM_CPMM_PROGRAM_ID.to_string()),
program_family: Some("raydium".to_string()),
surface_kind: None,
entry_kind: None,
proof_status: None,
limit: None,
};
let result = crate::upstream_registry_match::upstream_registry_search(&request);
assert!(result.entries.len() >= 2);
for entry in result.entries.as_slice() {
assert_eq!(entry.decoder_code, "raydium_cpmm");
}
}
#[test]
fn registry_uses_generic_upstream_git_status_names_only() {
let deprecated_external_repo_prefix = format!("{}{}", "car", "bon");
let forbidden_terms = [deprecated_external_repo_prefix];
let all_entries = crate::upstream_registry_match::upstream_registry_all_entries();
for entry in all_entries.as_slice() {
let payload = format!(
"{} {} {} {} {}",
entry.decoder_code,
entry.program_family,
entry.surface_kind,
entry.proof_status,
entry.notes
);
for forbidden_term in forbidden_terms.as_slice() {
assert!(
!payload.to_ascii_lowercase().contains(forbidden_term.as_str()),
"forbidden registry term found: {}",
forbidden_term
);
}
}
}
}