Hive Kerberos Installation

We are going to install Hive over Hadoop and perform a basic query. Ensure you install Kerberos and Hadoop with Kerberos.

This assumes your hostname is “hadoop”

Download Hive:

wget http://apache.forsale.plus/hive/hive-2.3.3/apache-hive-2.3.3-bin.tar.gz
tar -xzf apache-hive-2.3.3-bin.tar.gz
sudo mv apache-hive-2.3.3-bin /usr/local/hive
sudo chown -R root:hadoopuser /usr/local/hive/

Setup .bashrc:

 sudo nano ~/.bashrc

Add the following to the end of the file.

#HIVE VARIABLES START
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=/usr/local/hive/conf
export PATH=$PATH:$HIVE_HOME/bin
export CLASSPATH=$CLASSPATH:/usr/local/hadoop/lib/*:/usr/local/hive/lib/*
#HIVE VARIABLES STOP

 source ~/.bashrc

Create warehouse on hdfs

kinit -kt /etc/security/keytabs/myuser.keytab myuser/hadoop@REAL.CA
hdfs dfs -mkdir -p /user/hive/warehouse
hdfs dfs -mkdir /tmp
hdfs dfs -chmod g+w /tmp
hdfs dfs -chmod g+w /user/hive/warehouse

Create Kerberos Principals

cd /etc/security/keytabs
sudo kadmin.local
addprinc -randkey hive/hadoop@REALM.CA
addprinc -randkey hivemetastore/hadoop@REALM.CA
addprinc -randkey hive-spnego/hadoop@REALM.CA
xst -kt hive.service.keytab hive/hadoop@REALM.CA
xst -kt hivemetastore.service.keytab hivemetastore/hadoop@REALM.CA
xst -kt hive-spnego.service.keytab hive-spnego/hadoop@REALM.CA
q

Set Keytab Permissions/Ownership

sudo chown root:hadoopuser /etc/security/keytabs/*
sudo chmod 750 /etc/security/keytabs/*

hive-env.sh

cd $HIVE_HOME/conf
sudo cp hive-env.sh.template hive-env.sh

sudo nano /usr/local/hive/conf/hive-env.sh

#locate "HADOOP_HOME" and change to be this
export HADOOP_HOME=/usr/local/hadoop

#locate "HIVE_CONF_DIR" and change to be this
export HIVE_CONF_DIR=/usr/local/hive/conf

hive-site.xml

Chekck out this link for the configuration properties.

sudo cp /usr/local/hive/conf/hive-default.xml.template /usr/local/hive/conf/hive-site.xml

sudo nano /usr/local/hive/conf/hive-site.xml

#Modify the following properties

<property>
	<name>system:user.name</name>
	<value>${user.name}</value>
</property>
<property>
	<name>javax.jdo.option.ConnectionURL</name>
	<value>jdbc:postgresql://myhost:5432/metastore</value>
</property>
<property>
	<name>javax.jdo.option.ConnectionDriverName</name>
	<value>org.postgresql.Driver</value>
</property>
<property>
	<name>hive.metastore.warehouse.dir</name>
	<value>/user/hive/warehouse</value>
</property>
<property>
	<name>javax.jdo.option.ConnectionUserName</name>
	<value>hiveuser</value>
</property>
<property>
	<name>javax.jdo.option.ConnectionPassword</name>
	<value>PASSWORD</value>
</property>
<property>
	<name>hive.exec.local.scratchdir</name>
	<value>/tmp/${system:user.name}</value>
	<description>Local scratch space for Hive jobs</description>
</property>
<property>
	<name>hive.querylog.location</name>
	<value>/tmp/${system:user.name}</value>
	<description>Location of Hive run time structured log file</description>
</property>
<property>
	<name>hive.downloaded.resources.dir</name>
	<value>/tmp/${hive.session.id}_resources</value>
	<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
	<name>hive.server2.logging.operation.log.location</name>
	<value>/tmp/${system:user.name}/operation_logs</value>
	<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
	<name>hive.metastore.uris</name>
	<value>thrift://0.0.0.0:9083</value>
	<description>IP address (or fully-qualified domain name) and port of the metastore host</description>
</property>
<property>
	<name>hive.server2.webui.host</name> 
	<value>0.0.0.0</value>
</property>
<property>
	<name>hive.server2.webui.port</name> 
	<value>10002</value>
</property>
<property>
	<name>hive.metastore.port</name>
	<value>9083</value>
</property>
<property>
	<name>hive.server2.transport.mode</name>
	<value>binary</value>
</property>
<property>
	<name>hive.server2.thrift.sasl.qop</name>
	<value>auth-int</value>
</property>
<property>
	<name>hive.server2.authentication</name>
	<value>KERBEROS</value>
	<description>authenticationtype</description>     
</property>
<property>
	<name>hive.server2.authentication.kerberos.principal</name>
	<value>hive/_HOST@REALM.CA</value>
	<description>HiveServer2 principal. If _HOST is used as the FQDN portion, it will be replaced with the actual hostname of the running instance.</description>
</property>
<property>
	<name>hive.server2.authentication.kerberos.keytab</name>
	<value>/etc/security/keytabs/hive.service.keytab</value>
	<description>Keytab file for HiveServer2 principal</description>  
</property>
<property>
	<name>hive.metastore.sasl.enabled</name>
	<value>true</value>
	<description>If true, the metastore thrift interface will be secured with SASL. Clients
	must authenticate with Kerberos.</description>
</property>
<property>
	<name>hive.metastore.kerberos.keytab.file</name>
	<value>/etc/security/keytabs/hivemetastore.service.keytab</value>
	<description>The path to the Kerberos Keytab file containing the metastore thrift 
	server's service principal.</description>
</property>
<property>
	<name>hive.metastore.kerberos.principal</name>
	<value>hivemetastore/_HOST@REALM.CA</value>
	<description>The service principal for the metastore thrift server. The special string _HOST will be replaced automatically with the correct host name.</description>
</property>
<property>
	<name>hive.security.authorization.enabled</name>
	<value>true</value>
	<description>enable or disable the hive client authorization</description>
</property>
<property>
	<name>hive.metastore.pre.event.listeners</name>
	<value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
	<description>List of comma separated listeners for metastore events.</description>
</property>
<property>
	<name>hive.security.metastore.authorization.manager</name>
	<value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
	<description>
	Names of authorization manager classes (comma separated) to be used in the metastore
	for authorization. The user defined authorization class should implement interface
	org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider.
	All authorization manager classes have to successfully authorize the metastore API
	call for the command execution to be allowed.
</description>
</property>
<property>
	<name>hive.security.metastore.authenticator.manager</name>
	<value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
	<description>
	authenticator manager class name to be used in the metastore for authentication.
	The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider.
</description>
</property>
<property>
	<name>hive.security.metastore.authorization.auth.reads</name>
	<value>true</value>
	<description>If this is true, metastore authorizer authorizes read actions on database, table</description>
</property>
<property>     
	<name>datanucleus.autoCreateSchema</name>     
	<value>false</value>
</property>

Hadoop core-site.xml

Notice here how it’s .hive. that is used with the storage based authentication.

sudo nano /usr/local/hadoop/etc/hadoop/core-site.xml

<property>
	<name>hadoop.proxyuser.hive.hosts</name>
	<value>*</value>
</property>
<property>
	<name>hadoop.proxyuser.hive.groups</name>
	<value>*</value>
</property>

Install Postgres 9.6

Follow this install for installing Psotgresql 9.6

sudo su - postgres
psql

CREATE USER hiveuser WITH PASSWORD 'PASSWORD';
CREATE DATABASE metastore;
GRANT ALL PRIVILEGES ON DATABASE metastore TO hiveuser;
\q
exit

Initiate Postgres Schema

schematool -dbType postgres -initSchema

Start Metastore & HiveServer2

nohup /usr/local/hive/bin/hive --service metastore --hiveconf hive.log.file=hivemetastore.log >/var/log/hive/hivemetastore.out 2>/var/log/hive/hivemetastoreerr.log &

nohup /usr/local/hive/bin/hiveserver2 --hiveconf hive.metastore.uris=" " --hiveconf hive.log.file=hiveserver2.log >/var/log/hive/hiveserver2.out 2> /var/log/hive/hiveserver2err.log &

Auto Start

sudo mkdir /var/log/hive/
sudo chown root:hduser /var/log/hive
sudo chmod 777 /var/log/hive

crontab -e

#Add the following
@reboot nohup /usr/local/hive/bin/hive --service metastore --hiveconf hive.log.file=hivemetastore.log >/var/log/hive/hivemetastore.out 2>/var/log/hive/hivemetastoreerr.log &
@reboot nohup /usr/local/hive/bin/hiveserver2 --hiveconf hive.metastore.uris=" " --hiveconf hive.log.file=hiveserver2.log >/var/log/hive/hiveserver2.out 2> /var/log/hive/hiveserver2err.log &

Now you can check the hive version

 hive --version

Hive Web URL

http://hadoop:10002/

Beeline

#We first need to have a ticket to access beeline using the hive kerberos user we setup earlier.
kinit -kt /etc/security/keytabs/hive.service.keytab hive/hadoop@REALM.CA

#Now we can get into beeline using that principal
beeline -u "jdbc:hive2://0.0.0.0:10000/default;principal=hive/hadoop@REALM.CA;"

#You can also just get into beeline then connect from there
beeline
beeline>!connect jdbc:hive2://0.0.0.0:10000/default;principal=hive/hadoop@REALM.CA

#Disconnect from beeline
!q

References

http://www.bogotobogo.com/Hadoop/BigData_hadoop_Hive_Install_On_Ubuntu_16_04.php
https://maprdocs.mapr.com/home/Hive/Config-RemotePostgreSQLForHiveMetastore.html
https://cwiki.apache.org/confluence/display/Hive/Hive+Schema+Tool#HiveSchemaTool-TheHiveSchemaTool
https://cwiki.apache.org/confluence/display/Hive/GettingStarted#GettingStarted-InstallationandConfiguration