常用HQL

進入hive客戶端后：

1、建表：

create table page_view(viewTime int, userid bigint,page_url string, referrer_url string,ip string comment 'IP Address of the User')comment 'This is the page view table'partitioned by(dt string, country string)row format delimited fields terminated by '\t'  //指定字段間的分隔符stored as sequencefile; //textfile
//sequencefile 是以鍵值對組織的二進制文件
//hive的默認庫是default，表也是一個文件夾，置于hdfs的/user/hive/warehouse下
//新建的庫就是在hdfs的/user/hive/warehouse下新建一個庫名文件夾

2、在hive客戶端將本地文件上傳至數據倉庫

實際就是將文件上傳至對應的hdfs中表文件夾下?

load data local inpath '~/data.txt' into table table_name;

//此操作也可通過hadoop fs -put ~/data.txt /user/hive/warehouse/庫名/表名來實現

load data inpath '/data.txt' into table table_name;//將hdfs上的文件移動至對應的hdfs表文件夾下

3、外表 external

表：文件找表

外表：表找文件

create external table tab_ip_ext(id int, name string, ip string, country string)
row format delimited fields terminated by ','
stored as testfile
location '/hdfs的文件路徑;'

?
4、類似視圖 ?CTAS ?用于創建一些臨時表存儲中間結果，在hdfs中沒有建立目錄

create table tab_ip_ctas
as
select id new_id, name new_name, ip new_ip, country new_country
from tab_ip_ext
sort by new_id;

5、向表中寫入查詢數據

create table tab_ip_like like tab_ip; 
insert overwrite table tab_ip_like  //insert into為追加，overwrite為重寫select * from tab_ip;

6、分區?PARTITION

分區就是在表的文件夾下再建一層文件夾，文件夾名稱為"分區字段名=分區值"，如"part_flag=part1"

create table tab_ip_part(id int,name string,ip string,country string) partitioned by (part_flag string)  //分區標志可以是表中字段，也可以是另一個自定義字段row format delimited fields terminated by ',';load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_part partition(part_flag='part1');
load data local inpath '/home/hadoop/ip_part2.txt' overwrite into table tab_ip_part partition(part_flag='part2');

select * from tab_ip_part;
select * from tab_ip_part  where part_flag='part2';  //查詢時，將分區當做一個字段來使用；末尾會有part2
select count(*) from tab_ip_part  where part_flag='part2';

7、將查詢結果寫入文件

//寫入本地文件
insert overwrite local directory '/tmp/test.txt' select * from tab_ip_part where part_flag='part1';?

//寫入hdfs   
insert overwrite directory '/hiveout.txt' select * from tab_ip_part where part_flag='part1';

8、數據類型為數組?array?

create table tab_array(a array<int>,b array<string>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';

示例數據
one,two,three ? ?1,2,3

d,e,f ? ?4,5,6

select a[0] from tab_array;
select * from tab_array where array_contains(b,'word');
insert into table tab_array select array(0),array(name,ip) from tab_ext t;

9、數據類型為map

create table tab_map(name string,info map<string,string>)
row format delimited
fields terminated by '\t'
collection items terminated by ';'
map keys terminated by ':';

示例數據：
name key1:value1;key2:value2;key3:value3

insert into table tab_map select name,map('name',name,'ip',ip) from tab_ext;

10、數據類型為struct

create table tab_struct(name string,info struct<age:int,tel:string,addr:string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','

insert into table tab_struct select name,named_struct('age',id,'tel',name,'addr',country) from tab_ext;

11、在shell環境下執行HQL

hive -S -e 'select country,count(*) from 庫名.表名' > /tmp/query.txt

可用腳本（bash,python）進行hql批量查詢

12、用戶自定義函數?UDF
select udf(id=1,first,no-first),name from tab_ext;

寫一個Java類，定義udf函數邏輯，此類繼承UDF，重寫evaluate方法，public修飾，參數與返回值按需指定；

打成jar包，告知hive此jar包的位置：hive>add?jar /..../xxx.jar

在hive中創建函數udf，將udf和此Java類掛鉤即可：

hive>CREATE TEMPORARY FUNCTION my_udf?AS 'org.dht.Lower(類全名)'; //退出后，my_udf失去作用

13、sequencefile

create table tab_ip_seq(id int,name string,ip string,country string) row format delimitedfields terminated by ','stored as sequencefile;
insert overwrite table tab_ip_seq select * from tab_ext;

14、分區將不同的記錄放在不同的文件夾中，分桶將不同的記錄置于不同的文件?CLUSTER?

create table tab_ip_cluster(id int,name string,ip string,country string)
clustered by(id) into 3 buckets;load data local inpath '/.../ip.txt' overwrite into table tab_ip_cluster;
set hive.enforce.bucketing=true;
insert into table tab_ip_cluster select * from tab_ip;

select * from tab_ip_cluster tablesample(bucket 2 out of 3 on id);

用于抽樣，保證均勻抽樣。

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/387418.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/387418.shtml
英文地址，請注明出處：http://en.pswp.cn/news/387418.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！