-- ...省略部分代码...
profiles_summary as (
select (
case
when length(short_name) >= 20 then 20 -- 域名长度大于20时,视为20对待
else length(short_name) -- 域名长度小于20,直接使用其长度值
end) as name_length, -- 将case语句评估返回的结果命名为一个新的字段
handle_type,
count(*) as name_count
from profile_created
group by 1, 2
),
profiles_total as (
select count(*) as total_profile_count,
sum(case
when handle_type = 'Pure Digits' then 1 -- 类型值等于给定值,返回1
else 0 -- 类型值不等于给定值,返回 0
end
) as pure_digit_profile_count,
sum(case
when handle_type = 'Pure Letters' then 1 -- 类型值等于给定值,返回1
else 0 -- 类型值不等于给定值,返回 0
end
) as pure_letter_profile_count
from profile_created
)
-- ...省略部分代码...
其中,function可以是排名窗口函数、分析窗口函数或者聚合函数。over是固定必须使用的关键字。window_spec部分又有两种可能的变化:partition by partition_feild order by order_field或者order by order_field,分别表示先分区再排序和不分区直接排序。除了把所有行当作同一个分组的情况外,分组函数必须配合 order by来使用。
with pool_details as (
select date_trunc('day', evt_block_time) as block_date, evt_tx_hash, pool
from uniswap_v3_ethereum.Factory_evt_PoolCreated
where evt_block_time >= now() - interval '29' day
)
select block_date, count(pool) as pool_count
from pool_details
group by 1
order by 1
with pool_details as (
select date_trunc('day', evt_block_time) as block_date, evt_tx_hash, pool
from uniswap_v3_ethereum.Factory_evt_PoolCreated
where evt_block_time >= now() - interval '29' day
),
pool_summary as (
select block_date,
count(pool) as pool_count
from pool_details
group by 1
order by 1
)
select block_date,
pool_count,
lag(pool_count, 1) over (order by block_date) as pool_count_previous, -- 使用Lag()函数获取前一天的值
pool_count - (lag(pool_count, 1) over (order by block_date)) as pool_count_diff -- 相减得到变化值
from pool_summary
order by block_date
with post_data as (
-- 获取原始发帖详细数据,请参考完整SQL链接
),
top_post_profiles as (
select profile_id,
count(*) as post_count
from post_data
group by 1
order by 2 desc
limit 50
)
select row_number() over (order by post_count desc) as rank_id, -- 生成连续行号,用来表示排名
profile_id,
post_count,
lead(post_count, 1) over (order by post_count desc) as post_count_next, -- 获取下一行的发帖数据
post_count - (lead(post_count, 1) over (order by post_count desc)) as post_count_diff -- 计算当前行和下一行的发帖数量差
from top_post_profiles
order by post_count desc
Row_Number() 是一个排名类型的窗口函数,用于按照指定的排序方式生成不同的行号,从1开始连续编号。在上一个例子中,我们已经使用了row_number() over (order by post_count desc) as rank_id来生成行号用来表示排名,这里不再举例。如果结合partition by分区字句,Row_Number()将在每一个分区内部从1开始编号。利用这个特性,我们可以用来实现一些高级筛选。例如,我们有一组Token地址,需要计算并返回他们最近1小时内的平均价格。考虑到Dune的数据会存在一到几分钟的延迟,如果按当前系统日期的“小时”数值筛选,并不一定总是能返回需要的价格数据。相对更安全的方法是扩大取值的时间范围,然后从中筛选出每个Token最近的那条记录。这样即使出现数据有几个小时的延迟的特殊情况,我们的查询仍然可以工作良好。此时我们可以使用Row_Number()函数结合partition by来按分区生成行号再根据行号筛选出需要的数据。
with latest_token_price as (
select date_trunc('hour', minute) as price_date, -- 按小时分组计算
contract_address,
symbol,
decimals,
avg(price) as price -- 计算平均价格
from prices.usd
where contract_address in (
0xdac17f958d2ee523a2206206994597c13d831ec7,
0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,
0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,
0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,
0x7fc66500c84a76ad7e9c93437bfc5ac33e2ddae9
)
and minute > now() - interval '1' day -- 取最后一天内的数据,确保即使数据有延迟也工作良好
group by 1, 2, 3, 4
),
latest_token_price_row_num as (
select price_date,
contract_address,
symbol,
decimals,
price,
row_number() over (partition by contract_address order by price_date desc) as row_num -- 按分区单独生成行号
from latest_token_price
)
select contract_address,
symbol,
decimals,
price
from latest_token_price_row_num
where row_num = 1 -- 按行号筛选出每个token最新的平均价格