SELECT t1.platform,t1.user_id,t1.seq,t2.click_url FROM_URL,t1.click_url TO_URL FROM
(SELECT platform,user_id,click_time,click_url,count(1) seq FROM (SELECT a.*,b.click_time click_time1,b.click_url click_url2 FROM trlog a left outer join trlog b on a.user_id = b.user_id)t WHERE click_time>=click_time1 GROUP BY platform,user_id,click_time,click_url)t1
LEFT OUTER JOIN
(SELECT platform,user_id,click_time,click_url,count(1) seq FROM (SELECT a.*,b.click_time click_time1,b.click_url click_url2 FROM trlog a left outer join trlog b on a.user_id = b.user_id)t WHERE click_time>=click_time1 GROUP BY platform,user_id,click_time,click_url )t2
on t1.user_id = t2.user_id and t1.seq = t2.seq + 1;
第一个写法在hive-0.7.1上面运行有问题,估计是row_number的bug,用
select platform, user_id, row_number(user_id),click_time,click_url from trlog group by platform, user_id, click_time,click_url;
代替
select *,row_number(user_id)seq from trlog结果正确。
在hive-0.10上运行成功。