In this tutorial I will show you how to use Kerberos/SSL with HDFS/Yarn/MapRed. I will use self signed certs for this example. Before you begin ensure you have installed Kerberos Server and Hadoop.
This assumes your hostname is “hadoop”
Create Kerberos Principals
- cd /etc/security/keytabs/
- sudo kadmin.local
- #You can list princepals
- listprincs
- #Create the following principals
- addprinc -randkey nn/hadoop@REALM.CA
- addprinc -randkey jn/hadoop@REALM.CA
- addprinc -randkey dn/hadoop@REALM.CA
- addprinc -randkey sn/hadoop@REALM.CA
- addprinc -randkey nm/hadoop@REALM.CA
- addprinc -randkey rm/hadoop@REALM.CA
- addprinc -randkey jhs/hadoop@REALM.CA
- addprinc -randkey HTTP/hadoop@REALM.CA
- #We are going to create a user to access with later
- addprinc -pw hadoop myuser/hadoop@REALM.CA
- xst -k myuser.keytab myuser/hadoop@REALM.CA
- #Create the keytab files.
- #You will need these for Hadoop to be able to login
- xst -k nn.service.keytab nn/hadoop@REALM.CA
- xst -k jn.service.keytab jn/hadoop@REALM.CA
- xst -k dn.service.keytab dn/hadoop@REALM.CA
- xst -k sn.service.keytab sn/hadoop@REALM.CA
- xst -k nm.service.keytab nm/hadoop@REALM.CA
- xst -k rm.service.keytab rm/hadoop@REALM.CA
- xst -k jhs.service.keytab jhs/hadoop@REALM.CA
- xst -k spnego.service.keytab HTTP/hadoop@REALM.CA
Set Keytab Permissions/Ownership
- sudo chown root:hadoopuser /etc/security/keytabs/*
- sudo chmod 750 /etc/security/keytabs/*
Stop the Cluster
- stop-dfs.sh
- stop-yarn.sh
- mr-jobhistory-daemon.sh --config $HADOOP_CONF_DIR stop historyserver
Hosts Update
- sudo nano /etc/hosts
- #Remove 127.0.1.1 line
- #Change 127.0.0.1 to the following
- #Notice how realm.ca is there its because we need to tell where that host resides
- 127.0.0.1 realm.ca hadoop localhost
hadoop-env.sh
We don’t set the HADOOP_SECURE_DN_USER because we are going to use Kerberos
- sudo nano /usr/local/hadoop/etc/hadoop/hadoop-env.sh
- #Locate "export ${HADOOP_SECURE_DN_USER}=${HADOOP_SECURE_DN_USER}"
- #and change to
- export HADOOP_SECURE_DN_USER=
core-site.xml
- nano /usr/local/hadoop/etc/hadoop/core-site.xml
- <configuration>
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://NAMENODE:54310</value>
- <description>The name of the default file system. A URI whose scheme and authority determine the FileSystem implementation. The uri's scheme determines the config property (fs.SCHEME.impl) naming
- the FileSystem implementation class. The uri's authority is used to determine the host, port, etc. for a filesystem.</description>
- </property>
- <property>
- <name>hadoop.tmp.dir</name>
- <value>/app/hadoop/tmp</value>
- </property>
- <property>
- <name>hadoop.proxyuser.hadoopuser.hosts</name>
- <value>*</value>
- </property>
- <property>
- <name>hadoop.proxyuser.hadoopuser.groups</name>
- <value>*</value>
- </property>
- <property>
- <name>hadoop.security.authentication</name>
- <value>kerberos</value> <!-- A value of "simple" would disable security. -->
- </property>
- <property>
- <name>hadoop.security.authorization</name>
- <value>true</value>
- </property>
- <property>
- <name>hadoop.security.auth_to_local</name>
- <value>
- RULE:[2:$1@$0](nn/.*@.*REALM.TLD)s/.*/hdfs/
- RULE:[2:$1@$0](jn/.*@.*REALM.TLD)s/.*/hdfs/
- RULE:[2:$1@$0](dn/.*@.*REALM.TLD)s/.*/hdfs/
- RULE:[2:$1@$0](sn/.*@.*REALM.TLD)s/.*/hdfs/
- RULE:[2:$1@$0](nm/.*@.*REALM.TLD)s/.*/yarn/
- RULE:[2:$1@$0](rm/.*@.*REALM.TLD)s/.*/yarn/
- RULE:[2:$1@$0](jhs/.*@.*REALM.TLD)s/.*/mapred/
- DEFAULT
- </value>
- </property>
- <property>
- <name>hadoop.rpc.protection</name>
- <value>integrity</value>
- </property>
- <property>
- <name>hadoop.ssl.require.client.cert</name>
- <value>false</value>
- </property>
- <property>
- <name>hadoop.ssl.hostname.verifier</name>
- <value>DEFAULT</value>
- </property>
- <property>
- <name>hadoop.ssl.keystores.factory.class</name>
- <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
- </property>
- <property>
- <name>hadoop.ssl.server.conf</name>
- <value>ssl-server.xml</value>
- </property>
- <property>
- <name>hadoop.ssl.client.conf</name>
- <value>ssl-client.xml</value>
- </property>
- <property>
- <name>hadoop.rpc.protection</name>
- <value>integrity</value>
- </property>
- </configuration>
ssl-server.xml
Change ssl-server.xml.example to ssl-server.xml
- cp /usr/local/hadoop/etc/hadoop/ssl-server.xml.example /usr/local/hadoop/etc/hadoop/ssl-server.xml
- nano /usr/local/hadoop/etc/hadoop/ssl-server.xml
Update properties
- <configuration>
- <property>
- <name>ssl.server.truststore.location</name>
- <value>/etc/security/serverKeys/truststore.jks</value>
- <description>Truststore to be used by NN and DN. Must be specified.</description>
- </property>
- <property>
- <name>ssl.server.truststore.password</name>
- <value>PASSWORD</value>
- <description>Optional. Default value is "".</description>
- </property>
- <property>
- <name>ssl.server.truststore.type</name>
- <value>jks</value>
- <description>Optional. The keystore file format, default value is "jks".</description>
- </property>
- <property>
- <name>ssl.server.truststore.reload.interval</name>
- <value>10000</value>
- <description>Truststore reload check interval, in milliseconds. Default value is 10000 (10 seconds).</description>
- </property>
- <property>
- <name>ssl.server.keystore.location</name>
- <value>/etc/security/serverKeys/keystore.jks</value>
- <description>Keystore to be used by NN and DN. Must be specified.</description>
- </property>
- <property>
- <name>ssl.server.keystore.password</name>
- <value>PASSWORD</value>
- <description>Must be specified.</description>
- </property>
- <property>
- <name>ssl.server.keystore.keypassword</name>
- <value>PASSWORD</value>
- <description>Must be specified.</description>
- </property>
- <property>
- <name>ssl.server.keystore.type</name>
- <value>jks</value>
- <description>Optional. The keystore file format, default value is "jks".</description>
- </property>
- <property>
- <name>ssl.server.exclude.cipher.list</name>
- <value>TLS_ECDHE_RSA_WITH_RC4_128_SHA,SSL_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA,
- SSL_RSA_WITH_DES_CBC_SHA,SSL_DHE_RSA_WITH_DES_CBC_SHA,
- SSL_RSA_EXPORT_WITH_RC4_40_MD5,SSL_RSA_EXPORT_WITH_DES40_CBC_SHA,
- SSL_RSA_WITH_RC4_128_MD5</value>
- <description>Optional. The weak security cipher suites that you want excluded from SSL communication.</description>
- </property>
- </configuration>
ssl-client.xml
Change ssl-client.xml.example to ssl-client.xml
- cp /usr/local/hadoop/etc/hadoop/ssl-client.xml.example /usr/local/hadoop/etc/hadoop/ssl-client.xml
- nano /usr/local/hadoop/etc/hadoop/ssl-client.xml
Update properties
- <configuration>
- <property>
- <name>ssl.client.truststore.location</name>
- <value>/etc/security/serverKeys/truststore.jks</value>
- <description>Truststore to be used by clients like distcp. Must be specified.</description>
- </property>
- <property>
- <name>ssl.client.truststore.password</name>
- <value>PASSWORD</value>
- <description>Optional. Default value is "".</description>
- </property>
- <property>
- <name>ssl.client.truststore.type</name>
- <value>jks</value>
- <description>Optional. The keystore file format, default value is "jks".</description>
- </property>
- <property>
- <name>ssl.client.truststore.reload.interval</name>
- <value>10000</value>
- <description>Truststore reload check interval, in milliseconds. Default value is 10000 (10 seconds).</description>
- </property>
- <property>
- <name>ssl.client.keystore.location</name>
- <value></value>
- <description>Keystore to be used by clients like distcp. Must be specified.</description>
- </property>
- <property>
- <name>ssl.client.keystore.password</name>
- <value></value>
- <description>Optional. Default value is "".</description>
- </property>
- <property>
- <name>ssl.client.keystore.keypassword</name>
- <value></value>
- <description>Optional. Default value is "".</description>
- </property>
- <property>
- <name>ssl.client.keystore.type</name>
- <value>jks</value>
- <description>Optional. The keystore file format, default value is "jks".</description>
- </property>
- </configuration>
mapred-site.xml
Just add the following to the config to let it know the Kerberos keytabs to use.
- nano /usr/local/hadoop/etc/hadoop/mapred-site.xml
- <property>
- <name>mapreduce.jobhistory.keytab</name>
- <value>/etc/security/keytabs/jhs.service.keytab</value>
- </property>
- <property>
- <name>mapreduce.jobhistory.principal</name>
- <value>jhs/_HOST@REALM.CA</value>
- </property>
- <property>
- <name>mapreduce.jobhistory.http.policy</name>
- <value>HTTPS_ONLY</value>
- </property>
hdfs-site.xml
Add the following properties
- nano /usr/local/hadoop/etc/hadoop/hdfs-site.xml
- <property>
- <name>dfs.http.policy</name>
- <value>HTTPS_ONLY</value>
- </property>
- <property>
- <name>hadoop.ssl.enabled</name>
- <value>true</value>
- </property>
- <property>
- <name>dfs.datanode.https.address</name>
- <value>NAMENODE:50475</value>
- </property>
- <property>
- <name>dfs.namenode.https-address</name>
- <value>NAMENODE:50470</value>
- <description>Your NameNode hostname for http access.</description>
- </property>
- <property>
- <name>dfs.namenode.secondary.https-address</name>
- <value>NAMENODE:50091</value>
- <description>Your Secondary NameNode hostname for http access.</description>
- </property>
- <property>
- <name>dfs.namenode.https-bind-host</name>
- <value>0.0.0.0</value>
- </property>
- <property>
- <name>dfs.block.access.token.enable</name>
- <value>true</value>
- <description> If "true", access tokens are used as capabilities for accessing datanodes. If "false", no access tokens are checked on accessing datanod</description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.principal</name>
- <value>nn/_HOST@REALM.CA</value>
- <description> Kerberos principal name for the NameNode</description>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.principal</name>
- <value>sn/_HOST@REALM.CA</value>
- <description>Kerberos principal name for the secondary NameNode.</description>
- </property>
- <property>
- <name>dfs.web.authentication.kerberos.keytab</name>
- <value>/etc/security/keytabs/spnego.service.keytab</value>
- <description>The Kerberos keytab file with the credentials for the HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.</description>
- </property>
- <property>
- <name>dfs.namenode.keytab.file</name>
- <value>/etc/security/keytabs/nn.service.keytab</value>
- <description>Combined keytab file containing the namenode service and host principals.</description>
- </property>
- <property>
- <name>dfs.datanode.keytab.file</name>
- <value>/etc/security/keytabs/dn.service.keytab</value>
- <description>The filename of the keytab file for the DataNode.</description>
- </property>
- <property>
- <name>dfs.datanode.kerberos.principal</name>
- <value>dn/_HOST@REALM.CA</value>
- <description>The Kerberos principal that the DataNode runs as. "_HOST" is replaced by the real host name.</description>
- </property>
- <property>
- <name>dfs.namenode.kerberos.internal.spnego.principal</name>
- <value>${dfs.web.authentication.kerberos.principal}</value>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
- <value>>${dfs.web.authentication.kerberos.principal}</value>
- </property>
- <property>
- <name>dfs.web.authentication.kerberos.principal</name>
- <value>HTTP/_HOST@REALM.CA</value>
- <description>The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.</description>
- </property>
- <property>
- <name>dfs.data.transfer.protection</name>
- <value>integrity</value>
- </property>
- <property>
- <name>dfs.datanode.address</name>
- <value>NAMENODE:50010</value>
- </property>
- <property>
- <name>dfs.secondary.namenode.keytab.file</name>
- <value>/etc/security/keytabs/sn.service.keytab</value>
- </property>
- <property>
- <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
- <value>HTTP/_HOST@REALM.CA</value>
- </property>
- <property>
- <name>dfs.webhdfs.enabled</name>
- <value>true</value>
- </property>
Remove the following properties
- dfs.namenode.http-address
- dfs.namenode.secondary.http-address
- dfs.namenode.http-bind-host
yarn-site.xml
Add the following properties
- nano /usr/local/hadoop/etc/hadoop/yarn-site.xml
- <property>
- <name>yarn.http.policy</name>
- <value>HTTPS_ONLY</value>
- </property>
- <property>
- <name>yarn.resourcemanager.webapp.https.address</name>
- <value>${yarn.resourcemanager.hostname}:8090</value>
- </property>
- <property>
- <name>yarn.resourcemanager.hostname</name>
- <value>NAMENODE</value>
- </property>
- <property>
- <name>yarn.nodemanager.bind-host</name>
- <value>0.0.0.0</value>
- </property>
- <property>
- <name>yarn.nodemanager.webapp.address</name>
- <value>${yarn.nodemanager.hostname}:8042</value>
- </property>
- <property>
- <name>yarn.resourcemanager.principal</name>
- <value>rm/_HOST@REALM.CA</value>
- </property>
- <property>
- <name>yarn.resourcemanager.keytab</name>
- <value>/etc/security/keytabs/rm.service.keytab</value>
- </property>
- <property>
- <name>yarn.nodemanager.principal</name>
- <value>nm/_HOST@REALM.CA</value>
- </property>
- <property>
- <name>yarn.nodemanager.keytab</name>
- <value>/etc/security/keytabs/nm.service.keytab</value>
- </property>
- <property>
- <name>yarn.nodemanager.hostname</name>
- <value>NAMENODE</value>
- </property>
- <property>
- <name>yarn.resourcemanager.bind-host</name>
- <value>0.0.0.0</value>
- </property>
- <property>
- <name>yarn.timeline-service.bind-host</name>
- <value>0.0.0.0</value>
- </property>
Remove the following properties
- yarn.resourcemanager.webapp.address
SSL
Setup SSL Directories
- sudo mkdir -p /etc/security/serverKeys
- sudo chown -R root:hadoopuser /etc/security/serverKeys/
- sudo chmod 755 /etc/security/serverKeys/
- cd /etc/security/serverKeys
Setup Keystore
- sudo keytool -genkey -alias NAMENODE -keyalg RSA -keysize 1024 -dname "CN=NAMENODE,OU=ORGANIZATION_UNIT,C=canada" -keypass PASSWORD -keystore /etc/security/serverKeys/keystore.jks -storepass PASSWORD
- sudo keytool -export -alias NAMENODE -keystore /etc/security/serverKeys/keystore.jks -rfc -file /etc/security/serverKeys/NAMENODE.csr -storepass PASSWORD
Setup Truststore
- sudo keytool -import -noprompt -alias NAMENODE -file /etc/security/serverKeys/NAMENODE.csr -keystore /etc/security/serverKeys/truststore.jks -storepass PASSWORD
Generate Self Signed Certifcate
- sudo openssl genrsa -out /etc/security/serverKeys/NAMENODE.key 2048
- sudo openssl req -x509 -new -key /etc/security/serverKeys/NAMENODE.key -days 300 -out /etc/security/serverKeys/NAMENODE.pem
- sudo keytool -keystore /etc/security/serverKeys/keystore.jks -alias NAMENODE -certreq -file /etc/security/serverKeys/NAMENODE.cert -storepass PASSWORD -keypass PASSWORD
- sudo openssl x509 -req -CA /etc/security/serverKeys/NAMENODE.pem -CAkey /etc/security/serverKeys/NAMENODE.key -in /etc/security/serverKeys/NAMENODE.cert -out /etc/security/serverKeys/NAMENODE.signed -days 300 -CAcreateserial
Setup File Permissions
- sudo chmod 440 /etc/security/serverKeys/*
- sudo chown root:hadoopuser /etc/security/serverKeys/*
Start the Cluster
- start-dfs.sh
- start-yarn.sh
- mr-jobhistory-daemon.sh --config $HADOOP_CONF_DIR start historyserver
Create User Directory
- kinit -kt /etc/security/keytabs/myuser.keytab myuser/hadoop@REALM.CA
- #ensure the login worked
- klist
- #Create hdfs directory now
- hdfs dfs -mkdir /user
- hdfs dfs -mkdir /user/myuser
- #remove kerberos ticket
- kdestroy
URL
https://NAMENODE:50470
https://NAMENODE:50475
https://NAMENODE:8090
4 thoughts on “HDFS/Yarn/MapRed: Kerberize/SSL”
Comments are closed.