Hive建表导入数据转换orc格式记录
hdfs更改权限:sudo -u hdfs hadoop fs -chown carpo:hdfs /apps/carpo
show databases;
use tpcds_orc;
show tables;
drop table tpcds_orc;
create table customer_address_tmp(
ca_address_sk int ,
ca_address_id char(16) ,
ca_street_number char(10) ,
ca_street_name varchar(60) ,
ca_street_type char(15) ,
ca_suite_number char(10) ,
ca_city varchar(60) ,
ca_county varchar(30) ,
ca_state char(2) ,
ca_zip char(10) ,
ca_country varchar(20) ,
ca_gmt_offset decimal(5,2) ,
ca_location_type char(20)
)row format delimited fields terminated by '\|'
location '/data1/tpcds/data100/customer_address.dat';
1.创建普通表
create table customer_address(
ca_address_sk int ,
ca_address_id char(16) ,
ca_street_number char(10) ,
ca_street_name varchar(60) ,
ca_street_type char(15) ,
ca_suite_number char(10) ,
ca_city varchar(60) ,
ca_county varchar(30) ,
ca_state char(2) ,
ca_zip char(10) ,
ca_country varchar(20) ,
ca_gmt_offset decimal(5,2) ,
ca_location_type char(20)
)
row format delimited fields terminated by '\|';
2.导入数据
load data local inpath '/data1/tpcds/data100/customer_address.dat' into table customer_address;
3.转换成orc格式
ALTER TABLE customer_address SET FILEFORMAT ORC