#1.创建source表
CREATE TABLE source_table (
user_id STRING,
price BIGINT,
`timestamp` bigint,
row_time AS TO_TIMESTAMP(FROM_UNIXTIME(`timestamp`)),
watermark for row_time as row_time – interval ‘0’ second
) WITH (
‘connector’=’socket’,
‘hostname’=’node1’,
‘port’=’9999’,
‘format’=’csv’
);
#2.语法
tumble(事件时间列,窗口大小)
窗口大小是用户自定义的。比如30分钟、1小时等。
直接把tumble窗口放在group by语句后即可。
比如:tumble(row_time,interval ‘5’ second)
含义:定义一个5秒大小的滚动窗口。
#3.数据处理
select
user_id,
count(*) as pv,
sum(price) as sum_price,
UNIX_TIMESTAMP(CAST(tumble_start(row_time, interval ‘5’ second) AS STRING)) * 1000 as window_start,
UNIX_TIMESTAMP(CAST(tumble_end(row_time, interval ‘5’ second) AS STRING)) * 1000 as window_end
from source_table
group by
user_id,
tumble(row_time, interval ‘5’ second);
CREATE TABLE source_table (
user_id STRING,
price BIGINT,
`timestamp` bigint,
row_time AS TO_TIMESTAMP(FROM_UNIXTIME(`timestamp`)),
watermark for row_time as row_time – interval ‘0’ second
) WITH (
‘connector’=’socket’,
‘hostname’=’node1’,
‘port’=’9999’,
‘format’=’csv’
);
#2.语法
tumble(事件时间列,窗口大小)
窗口大小是用户自定义的。比如30分钟、1小时等。
直接把tumble窗口放在group by语句后即可。
比如:tumble(row_time,interval ‘5’ second)
含义:定义一个5秒大小的滚动窗口。
#3.数据处理
select
user_id,
count(*) as pv,
sum(price) as sum_price,
UNIX_TIMESTAMP(CAST(tumble_start(row_time, interval ‘5’ second) AS STRING)) * 1000 as window_start,
UNIX_TIMESTAMP(CAST(tumble_end(row_time, interval ‘5’ second) AS STRING)) * 1000 as window_end
from source_table
group by
user_id,
tumble(row_time, interval ‘5’ second);
© 版权声明
文章版权归作者所有,未经允许请勿转载。