This commit is contained in:
2026-06-15 20:16:27 +02:00
parent 3b908b318e
commit 045af4931c
44 changed files with 5328 additions and 113 deletions

View File

@@ -0,0 +1,456 @@
-- file: validation_sql/SQL_VALIDATION_PUMP_FUN_0_7_54.sql
-- 0.7.54 pump_fun validation and corpus-seed checklist.
-- Run on a dedicated fresh SQLite database for the Pump.fun tranche.
-- Recommended replay settings after each backfill group:
-- skipDexDecode=no, forceDexDecode=yes, deferInstructionObservations=yes.
-- This file is intentionally read-only: it never mutates the database.
-- 00. Corpus seed: upstream fallback samples to backfill first.
SELECT
json_extract(de.payload_json, '$.upstreamEntryName') AS upstream_entry_name,
json_extract(de.payload_json, '$.upstreamDiscriminatorHex') AS upstream_discriminator_hex,
COUNT(*) AS fallback_count,
COUNT(DISTINCT de.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
WHERE de.protocol_name = 'upstream_git'
AND de.event_kind = 'upstream_git.instruction_match'
AND json_extract(de.payload_json, '$.upstreamDecoderCode') = 'pump_fun'
GROUP BY upstream_entry_name, upstream_discriminator_hex
ORDER BY fallback_count DESC, upstream_entry_name, upstream_discriminator_hex;
-- 01. Corpus seed: local instruction observations.
SELECT
instruction_name,
discriminator_hex,
COUNT(*) AS observed_count,
COUNT(DISTINCT signature) AS tx_count,
MIN(signature) AS sample_signature
FROM k_sol_instruction_observations
WHERE decoder_code = 'pump_fun'
GROUP BY instruction_name, discriminator_hex
ORDER BY observed_count DESC, instruction_name, discriminator_hex;
-- 02. Coverage pump_fun.
SELECT
entry_name,
entry_kind,
event_family,
expected_db_target,
proof_status,
local_event_kind,
discriminator_hex,
observed_count,
materialized_count,
trade_count
FROM k_sol_dex_event_coverage_entries
WHERE decoder_code = 'pump_fun'
ORDER BY entry_kind, entry_name, discriminator_hex;
-- 03. Decoded events summary.
SELECT
de.event_kind,
COUNT(*) AS decoded_count,
COUNT(DISTINCT de.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
WHERE de.protocol_name = 'pump_fun'
GROUP BY de.event_kind
ORDER BY decoded_count DESC, de.event_kind;
-- 04. Decoded pump_fun events without coverage.
-- Target after closure: empty for all locally decoded pump_fun rows.
SELECT
de.event_kind,
COUNT(*) AS decoded_count,
COUNT(DISTINCT de.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
LEFT JOIN k_sol_dex_event_coverage_entries ce
ON ce.decoder_code = 'pump_fun'
AND ce.local_event_kind = de.event_kind
WHERE de.protocol_name = 'pump_fun'
AND ce.id IS NULL
GROUP BY de.event_kind
ORDER BY decoded_count DESC, de.event_kind;
-- 05. Residual upstream fallback for covered local entries.
-- Target after local promotion: empty for every entry that has a local_event_kind.
SELECT
json_extract(ug.payload_json, '$.upstreamEntryName') AS upstream_entry_name,
json_extract(ug.payload_json, '$.upstreamDiscriminatorHex') AS upstream_discriminator_hex,
json_extract(ug.payload_json, '$.upstreamSourceRepo') AS source_repo,
ce.local_event_kind,
ce.expected_db_target,
ce.proof_status,
COUNT(*) AS fallback_count,
COUNT(DISTINCT ug.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events ug
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = ug.transaction_id
JOIN k_sol_dex_event_coverage_entries ce
ON ce.decoder_code = json_extract(ug.payload_json, '$.upstreamDecoderCode')
AND ce.entry_name = json_extract(ug.payload_json, '$.upstreamEntryName')
AND ce.discriminator_hex = json_extract(ug.payload_json, '$.upstreamDiscriminatorHex')
AND ce.local_event_kind IS NOT NULL
AND ce.local_event_kind <> ''
WHERE ug.protocol_name = 'upstream_git'
AND ug.event_kind = 'upstream_git.instruction_match'
AND json_extract(ug.payload_json, '$.upstreamDecoderCode') = 'pump_fun'
GROUP BY upstream_entry_name, upstream_discriminator_hex, source_repo, ce.local_event_kind, ce.expected_db_target, ce.proof_status
ORDER BY fallback_count DESC, upstream_entry_name;
-- 06. Successful non-materialized events without explicit skip reason.
-- Target after closure: empty, or documented exceptions with explicit skip reason in payload_json.
SELECT
de.event_kind,
COUNT(*) AS unexplained_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
LEFT JOIN k_sol_trade_events te
ON te.decoded_event_id = de.id
LEFT JOIN k_sol_launch_events lae
ON lae.decoded_event_id = de.id
LEFT JOIN k_sol_liquidity_events lie
ON lie.decoded_event_id = de.id
LEFT JOIN k_sol_pool_lifecycle_events ple
ON ple.decoded_event_id = de.id
LEFT JOIN k_sol_fee_events fee
ON fee.decoded_event_id = de.id
LEFT JOIN k_sol_reward_events rew
ON rew.decoded_event_id = de.id
LEFT JOIN k_sol_pool_admin_events adm
ON adm.decoded_event_id = de.id
LEFT JOIN k_sol_orderbook_events obe
ON obe.decoded_event_id = de.id
LEFT JOIN k_sol_token_account_events tae
ON tae.decoded_event_id = de.id
WHERE de.protocol_name = 'pump_fun'
AND (
tx.err_json IS NULL
OR tx.err_json = ''
OR tx.err_json = 'null'
)
AND te.id IS NULL
AND lae.id IS NULL
AND lie.id IS NULL
AND ple.id IS NULL
AND fee.id IS NULL
AND rew.id IS NULL
AND adm.id IS NULL
AND obe.id IS NULL
AND tae.id IS NULL
AND COALESCE(TRIM(json_extract(de.payload_json, '$.skipTradeReason')), '') = ''
AND COALESCE(TRIM(json_extract(de.payload_json, '$.skipCandleReason')), '') = ''
AND COALESCE(TRIM(json_extract(de.payload_json, '$.skipLiquidityReason')), '') = ''
AND COALESCE(TRIM(json_extract(de.payload_json, '$.skipLifecycleReason')), '') = ''
AND COALESCE(TRIM(json_extract(de.payload_json, '$.skipCatalogReason')), '') = ''
GROUP BY de.event_kind
ORDER BY unexplained_count DESC, de.event_kind;
-- 07. Failed transaction materialization safety.
-- Target after closure: empty. Failed transactions may be decoded for audit, but must not be business-materialized.
SELECT
de.event_kind,
COUNT(DISTINCT de.id) AS decoded_failed_count,
COUNT(DISTINCT te.id) AS trade_count,
COUNT(DISTINCT lae.id) AS launch_count,
COUNT(DISTINCT lie.id) AS liquidity_count,
COUNT(DISTINCT ple.id) AS lifecycle_count,
COUNT(DISTINCT fee.id) AS fee_count,
COUNT(DISTINCT rew.id) AS reward_count,
COUNT(DISTINCT adm.id) AS admin_count,
COUNT(DISTINCT obe.id) AS orderbook_count,
COUNT(DISTINCT tae.id) AS token_account_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
LEFT JOIN k_sol_trade_events te
ON te.decoded_event_id = de.id
LEFT JOIN k_sol_launch_events lae
ON lae.decoded_event_id = de.id
LEFT JOIN k_sol_liquidity_events lie
ON lie.decoded_event_id = de.id
LEFT JOIN k_sol_pool_lifecycle_events ple
ON ple.decoded_event_id = de.id
LEFT JOIN k_sol_fee_events fee
ON fee.decoded_event_id = de.id
LEFT JOIN k_sol_reward_events rew
ON rew.decoded_event_id = de.id
LEFT JOIN k_sol_pool_admin_events adm
ON adm.decoded_event_id = de.id
LEFT JOIN k_sol_orderbook_events obe
ON obe.decoded_event_id = de.id
LEFT JOIN k_sol_token_account_events tae
ON tae.decoded_event_id = de.id
WHERE de.protocol_name = 'pump_fun'
AND tx.err_json IS NOT NULL
AND tx.err_json <> ''
AND tx.err_json <> 'null'
GROUP BY de.event_kind
HAVING trade_count > 0
OR launch_count > 0
OR liquidity_count > 0
OR lifecycle_count > 0
OR fee_count > 0
OR reward_count > 0
OR admin_count > 0
OR orderbook_count > 0
OR token_account_count > 0
ORDER BY de.event_kind;
-- 08. Multi-target materialization safety.
-- Target after closure: empty. One decoded event must not feed multiple business targets.
SELECT
de.event_kind,
COUNT(DISTINCT de.id) AS decoded_count,
COUNT(DISTINCT te.id) AS trade_count,
COUNT(DISTINCT lae.id) AS launch_count,
COUNT(DISTINCT lie.id) AS liquidity_count,
COUNT(DISTINCT ple.id) AS lifecycle_count,
COUNT(DISTINCT fee.id) AS fee_count,
COUNT(DISTINCT rew.id) AS reward_count,
COUNT(DISTINCT adm.id) AS admin_count,
COUNT(DISTINCT obe.id) AS orderbook_count,
COUNT(DISTINCT tae.id) AS token_account_count,
(
CASE WHEN COUNT(DISTINCT te.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT lae.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT lie.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT ple.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT fee.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT rew.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT adm.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT obe.id) > 0 THEN 1 ELSE 0 END
+ CASE WHEN COUNT(DISTINCT tae.id) > 0 THEN 1 ELSE 0 END
) AS materialized_target_count
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_trade_events te
ON te.decoded_event_id = de.id
LEFT JOIN k_sol_launch_events lae
ON lae.decoded_event_id = de.id
LEFT JOIN k_sol_liquidity_events lie
ON lie.decoded_event_id = de.id
LEFT JOIN k_sol_pool_lifecycle_events ple
ON ple.decoded_event_id = de.id
LEFT JOIN k_sol_fee_events fee
ON fee.decoded_event_id = de.id
LEFT JOIN k_sol_reward_events rew
ON rew.decoded_event_id = de.id
LEFT JOIN k_sol_pool_admin_events adm
ON adm.decoded_event_id = de.id
LEFT JOIN k_sol_orderbook_events obe
ON obe.decoded_event_id = de.id
LEFT JOIN k_sol_token_account_events tae
ON tae.decoded_event_id = de.id
WHERE de.protocol_name = 'pump_fun'
GROUP BY de.event_kind
HAVING materialized_target_count > 1
ORDER BY materialized_target_count DESC, de.event_kind;
-- 09. Materialization summary.
SELECT
de.event_kind,
COUNT(DISTINCT de.id) AS decoded_count,
COUNT(DISTINCT te.id) AS trade_count,
COUNT(DISTINCT lae.id) AS launch_count,
COUNT(DISTINCT lie.id) AS liquidity_count,
COUNT(DISTINCT ple.id) AS lifecycle_count,
COUNT(DISTINCT fee.id) AS fee_count,
COUNT(DISTINCT rew.id) AS reward_count,
COUNT(DISTINCT adm.id) AS admin_count,
COUNT(DISTINCT obe.id) AS orderbook_count,
COUNT(DISTINCT tae.id) AS token_account_count
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_trade_events te
ON te.decoded_event_id = de.id
LEFT JOIN k_sol_launch_events lae
ON lae.decoded_event_id = de.id
LEFT JOIN k_sol_liquidity_events lie
ON lie.decoded_event_id = de.id
LEFT JOIN k_sol_pool_lifecycle_events ple
ON ple.decoded_event_id = de.id
LEFT JOIN k_sol_fee_events fee
ON fee.decoded_event_id = de.id
LEFT JOIN k_sol_reward_events rew
ON rew.decoded_event_id = de.id
LEFT JOIN k_sol_pool_admin_events adm
ON adm.decoded_event_id = de.id
LEFT JOIN k_sol_orderbook_events obe
ON obe.decoded_event_id = de.id
LEFT JOIN k_sol_token_account_events tae
ON tae.decoded_event_id = de.id
WHERE de.protocol_name = 'pump_fun'
GROUP BY de.event_kind
ORDER BY de.event_kind;
-- 10. Instruction observation versus coverage.
-- Target after closure: every observed non-transport discriminator is covered or documented.
WITH normalized_io AS (
SELECT
io.decoder_code,
io.instruction_name,
CASE
WHEN io.instruction_name LIKE 'pump_fun.%'
THEN SUBSTR(io.instruction_name, LENGTH('pump_fun') + 2)
ELSE io.instruction_name
END AS normalized_entry_name,
io.discriminator_hex,
io.signature
FROM k_sol_instruction_observations io
WHERE io.decoder_code = 'pump_fun'
AND io.discriminator_hex IS NOT NULL
AND io.discriminator_hex <> ''
AND io.discriminator_hex <> 'e445a52e51cb9a1d'
)
SELECT
nio.instruction_name,
nio.normalized_entry_name,
nio.discriminator_hex,
COUNT(*) AS observed_count,
COUNT(DISTINCT nio.signature) AS tx_count,
MIN(nio.signature) AS sample_signature,
CASE
WHEN ce.id IS NULL THEN 'coverage_gap'
ELSE 'covered'
END AS observation_coverage_status,
ce.local_event_kind,
ce.expected_db_target,
ce.proof_status
FROM normalized_io nio
LEFT JOIN k_sol_dex_event_coverage_entries ce
ON ce.decoder_code = 'pump_fun'
AND COALESCE(ce.discriminator_hex, '') = COALESCE(nio.discriminator_hex, '')
AND (
COALESCE(TRIM(nio.instruction_name), '') = ''
OR ce.entry_name = nio.instruction_name
OR ce.entry_name = nio.normalized_entry_name
OR ce.local_event_kind = nio.instruction_name
OR ce.local_event_kind = ('pump_fun.' || nio.normalized_entry_name)
)
GROUP BY
nio.instruction_name,
nio.normalized_entry_name,
nio.discriminator_hex,
observation_coverage_status,
ce.local_event_kind,
ce.expected_db_target,
ce.proof_status
ORDER BY observed_count DESC, nio.instruction_name, nio.discriminator_hex;
-- 11. Pump.fun successful trade candidates without materialized trade.
-- Target after closure: only rows with explicit skipTradeReason when exact amounts/direction are not proven.
SELECT
de.event_kind,
json_extract(de.payload_json, '$.amountSource') AS amount_source,
json_extract(de.payload_json, '$.skipTradeReason') AS skip_trade_reason,
COUNT(*) AS decoded_count,
COUNT(te.id) AS trade_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
LEFT JOIN k_sol_trade_events te
ON te.decoded_event_id = de.id
WHERE de.protocol_name = 'pump_fun'
AND de.event_kind IN (
'pump_fun.buy',
'pump_fun.sell',
'pump_fun.buy_v2',
'pump_fun.sell_v2',
'pump_fun.buy_exact_sol_in',
'pump_fun.buy_exact_quote_in_v2',
'pump_fun.trade_event'
)
AND (
tx.err_json IS NULL
OR tx.err_json = ''
OR tx.err_json = 'null'
)
GROUP BY de.event_kind, amount_source, skip_trade_reason
HAVING COUNT(te.id) = 0
AND COALESCE(TRIM(skip_trade_reason), '') = ''
ORDER BY decoded_count DESC, de.event_kind, amount_source;
-- 12. Global watchlist after pump_fun replay.
-- Expected after local promotion: pump_fun rows should no longer dominate this list unless explicitly deferred.
SELECT
json_extract(de.payload_json, '$.upstreamDecoderCode') AS upstream_decoder_code,
json_extract(de.payload_json, '$.upstreamEntryName') AS upstream_entry_name,
json_extract(de.payload_json, '$.upstreamDiscriminatorHex') AS upstream_discriminator_hex,
COUNT(*) AS decoded_count,
COUNT(DISTINCT de.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
WHERE de.protocol_name = 'upstream_git'
AND de.event_kind = 'upstream_git.instruction_match'
GROUP BY upstream_decoder_code, upstream_entry_name, upstream_discriminator_hex
ORDER BY decoded_count DESC, upstream_decoder_code, upstream_entry_name;
-- 13. Pump.fun Solscan-IDL-only instruction coverage.
-- Target after the first Rust delta: all rows below must be covered after coverage sync.
SELECT
ce.entry_name,
ce.discriminator_hex,
ce.source_repo,
ce.source_path,
ce.local_event_kind,
ce.expected_db_target,
ce.proof_status,
ce.observed_count,
ce.materialized_count,
ce.trade_count
FROM k_sol_dex_event_coverage_entries ce
WHERE ce.decoder_code = 'pump_fun'
AND ce.source_repo = 'manual-solscan'
ORDER BY ce.entry_name;
-- 14. Pump.fun Anchor event coverage local kind check.
-- Target after full decoder delta: every Pump.fun event registry row has a local_event_kind.
SELECT
ce.entry_name,
ce.discriminator_hex,
ce.local_event_kind,
ce.expected_db_target,
ce.proof_status,
ce.observed_count,
ce.materialized_count
FROM k_sol_dex_event_coverage_entries ce
WHERE ce.decoder_code = 'pump_fun'
AND ce.entry_kind = 'event'
AND (
ce.local_event_kind IS NULL
OR TRIM(ce.local_event_kind) = ''
)
ORDER BY ce.entry_name;
-- 15. Pump.fun decoded Anchor events summary.
-- Informational: real corpus may be empty until an Anchor event log/self-CPI appears.
SELECT
de.event_kind,
json_extract(de.payload_json, '$.anchorEventName') AS anchor_event_name,
json_extract(de.payload_json, '$.anchorEventDiscriminatorHex') AS anchor_event_discriminator_hex,
COUNT(*) AS decoded_count,
COUNT(DISTINCT de.transaction_id) AS tx_count,
MIN(tx.signature) AS sample_signature
FROM k_sol_dex_decoded_events de
LEFT JOIN k_sol_chain_transactions tx
ON tx.id = de.transaction_id
WHERE de.protocol_name = 'pump_fun'
AND COALESCE(TRIM(json_extract(de.payload_json, '$.anchorEventName')), '') <> ''
GROUP BY de.event_kind, anchor_event_name, anchor_event_discriminator_hex
ORDER BY decoded_count DESC, de.event_kind;