??????Hive????wiki??????Hive0.7??????????????????index????????????????????к?????????ο????Щ???????????????飬????1???????
?????.???????????
????1.??????gen-data.sh?????????????
#! /bin/bash
#generating 1.7G raw data.
i=0
while [ $i -ne 5000000 ]
do
echo "$i        A decade ago?? many were predicting that Cooke?? a New York City prodigy?? would become a basketball shoe pitchman and would flaunt his wares and skills at All-Star weekends like the recent aerial show in Orlando?? Fla. There was a time?? however fleeting?? when he was more heralded?? or perhaps merely hyped?? than any other high school player in America."
i=$(($i+1))
done
????2.???????
?????????????: sh gen-data.sh >dual.txt???????????????????.
??????.Hive???????????
????1.??????????????????????????μ??id??name????????????????
????create table table01(id int??name string) row format delimited fields terminated by ' ';
????2.?????????????
????load data local inpath '~/testData/hive/dataScripts/dual.txt' overwrite into table table01; (???Time taken: 160.787 seconds)
????3.????table02????????????table01
????create table table02 as select id ??name as text from table01; (Time taken: 154.463 seconds)
????4.???????
????select * from table02 where id=500000; (Time taken: 30.463 seconds?? Fetched: 1 row(s))
???????dfs -ls /user/hive/warehouse/????????table01??table02??????????????????
????5.????hive??CompactIndexHandler?id??????????????
????create index table02_index on table table02(id) as 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' with deferred rebuild;
????alter index table02_index on table02 rebuild;  (Time taken: 112.451 seconds)
????????????????????????deferred rebuild????????????????????empty?????alter index??????????index structure.
????6.???????????????????????????????
????hive> select * from default__table02_table02_index__ limit 3;
????OK
????9    hdfs://littleNameservice/user/hive/warehouse/table02/000000_0    [3168]
????36    hdfs://littleNameservice/user/hive/warehouse/table02/000000_0    [12698]
????63    hdfs://littleNameservice/user/hive/warehouse/table02/000000_0    [22229]
?????????????????????????У???????????е???????????????????????λ??????????????λ???е???????????????????
?????????????????????????????????????????λ????????????????λ???????block??????????????????????.
????7.??β??????
????select * from table02 where id=500000; (Time taken: 29.226 seconds?? Fetched: 1 row(s))
????????????30.463????????仯??????????о?
????8.???????????????ü??????
????SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
????Insert overwrite directory "/tmp/table02_index_data" select `_bucketname`?? `_offsets` from  default__table02_table02_index__ where id =500000;
????Set hive.index.compact.file=/tmp/table02_index_data;
????Set hive.optimize.index.filter=false;
????Set hive.input.format=org.apache.hadoop.hive.ql.index.compact.HiveCompactIndexInputFormat;
?????????????????????????????????????????????id = 500000??????????е???????default__table02_table02_index__ ?вü?????????????tmp????????????????????
??????????????????
????9.????????
????select * from table02 where id =500000; (Time taken: 17.259 seconds?? Fetched: 1 row(s))
?????????α??17??????????????Ч??.????о????????.
???????????:??????wiki??jira?????????????????Hive???????????????????????????B?????????????????????lookup????????????????????????????range?????????
??????Щ???????????????????????в?????????????????????????????????????вü?????????Ч??????о??????????????????????????????????.