diff --git a/hbase/Dockerfile b/hbase/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..38c748843956f3a6b5f83d1dfe9c4fc96c0dd818 --- /dev/null +++ b/hbase/Dockerfile @@ -0,0 +1,45 @@ +# HBase in Docker +# +# Version 0.4 + +# http://docs.docker.io/en/latest/use/builder/ + +FROM ubuntu:bionic +MAINTAINER Dave Beckett + +COPY sources.list /etc/apt/sources.list + +COPY *.sh /build/ + +ENV HBASE_VERSION 2.2.4 + +ADD hbase-$HBASE_VERSION-bin.tar.gz /opt/ + +RUN /build/prepare-hbase.sh && \ + cd /opt/hbase && /build/build-hbase.sh \ + cd / && /build/cleanup-hbase.sh && rm -rf /build + +VOLUME /data + +ADD ./hbase-site.xml /opt/hbase/conf/hbase-site.xml + +ADD ./zoo.cfg /opt/hbase/conf/zoo.cfg + +ADD ./replace-hostname /opt/replace-hostname + +ADD ./hbase-server /opt/hbase-server + +# REST API +EXPOSE 8080 +# REST Web UI at :8085/rest.jsp +EXPOSE 8085 +# Thrift API +EXPOSE 9090 +# Thrift Web UI at :9095/thrift.jsp +EXPOSE 9095 +# HBase's Embedded zookeeper cluster +EXPOSE 2181 +# HBase Master web UI at :16010/master-status; ZK at :16010/zk.jsp +EXPOSE 16010 + +CMD ["/opt/hbase-server"] diff --git a/hbase/Makefile b/hbase/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..baeb78de635f9a4acd4e23152e874188cba264f3 --- /dev/null +++ b/hbase/Makefile @@ -0,0 +1,13 @@ +IMAGE_NAME=dajobe/hbase +IMAGE_TAG=latest + +HBASE_VERSION=$(shell awk '/^ENV HBASE_VERSION/ {print $3}' Dockerfile) + +build: + @echo "Building hbase docker image $(HBASE_VERSION)" + docker build -t $(IMAGE_NAME) . + +# This won't work unless you have already set up the repository config +push: + @echo "Pushing image to https://hub.docker.com/" + docker push $(IMAGE_NAME):$(IMAGE_TAG) diff --git a/hbase/README.md b/hbase/README.md new file mode 100644 index 0000000000000000000000000000000000000000..27cbac2de3b3ded40ffa288322f1bf8422ec5bdc --- /dev/null +++ b/hbase/README.md @@ -0,0 +1,196 @@ +HBase in Docker +=============== + +This configuration builds a docker container to run HBase (with +embedded Zookeeper) running on the files inside the container. + +NOTE +---- + +The approach here requires editing the local server's `/etc/hosts` +file to add an entry for the container hostname. This is because +HBase uses hostnames to pass connection data back out of the +container (from it's internal Zookeeper). + +Hopefully this can be improved with Docker's newer networking +but this hasn't been fixed yet. + + +Build Image +----------- + + $ docker build -t dajobe/hbase . + + +Pull image +---------- + +If you want to pull the image already built then use this + + $ docker pull dajobe/hbase + +More details at https://hub.docker.com/r/dajobe/hbase/ + + +Run HBase +--------- + +To run HBase by hand: + + $ mkdir data + $ id=$(docker run --name=hbase-docker -h hbase-docker -d -v $PWD/data:/data dajobe/hbase) + +To run it and adjust the host system's locally by editing +`/etc/hosts` to alias the DNS hostname 'hbase-docker' to the +container, use this: + + $ ./start-hbase.sh + +This will require you to enter your sudo password to edit the host +machine's `/etc/hosts` file + +If you want to run multiple hbase dockers on the same host, you can +give them different hostnames with the '-h' / '--hostname' argument. +You may have to give them different ports though. Not tested. + +If you want to customize the hostname used, set the +`HBASE_DOCKER_HOSTNAME` envariable on the docker command line + + +Find Hbase status +----------------- + +Master status if docker container DNS name is 'hbase-docker' + + http://hbase-docker:16010/master-status + +The region servers status pages are linked from the above page. + +Thrift UI + + http://hbase-docker:9095/thrift.jsp + +REST server UI + + http://hbase-docker:8085/rest.jsp + +(Embedded) Zookeeper status + + http://hbase-docker:16010/zk.jsp + + +See HBase Logs +-------------- + +If you want to see the latest logs live use: + + $ docker attach $id + +Then ^C to detach. + +To see all the logs since the HBase server started, use: + + $ docker logs $id + +and ^C to detach again. + +To see the individual log files without using `docker`, look into +the data volume dir eg $PWD/data/logs if invoked as above. + + +Test HBase is working via python over Thrift +-------------------------------------------- + +Here I am connecting to a docker container with the name 'hbase-docker' +(such as created by the start-hbase.sh script). The port 9090 is the +Thrift API port because [Happybase][1] [2] uses Thrift to talk to HBase. + + $ python + Python 2.7.15 (default, Jan 12 2019, 21:07:57) + [GCC 4.2.1 Compatible Apple LLVM 10.0.0 (clang-1000.11.45.5)] on darwin + Type "help", "copyright", "credits" or "license" for more information. + >>> import happybase + >>> connection = happybase.Connection('hbase-docker', 9090) + >>> connection.create_table('table-name', { 'family': dict() } ) + >>> connection.tables() + ['table-name'] + >>> table = connection.table('table-name') + >>> table.put('row-key', {'family:qual1': 'value1', 'family:qual2': 'value2'}) + >>> for k, data in table.scan(): + ... print k, data + ... + row-key {'family:qual1': 'value1', 'family:qual2': 'value2'} + >>> + +(Simple install for happybase: `sudo pip install happybase` although I +use `pip install --user happybase` to get it just for me) + + +Test HBase is working from Java +------------------------------- + + $ docker run --rm -it --link $id:hbase-docker dajobe/hbase hbase shell + HBase Shell + Use "help" to get list of supported commands. + Use "exit" to quit this interactive shell. + For Reference, please visit: http://hbase.apache.org/2.0/book.html#shell + Version 2.1.2, r1dfc418f77801fbfb59a125756891b9100c1fc6d, Sun Dec 30 21:45:09 PST 2018 + Took 0.0472 seconds + hbase(main):001:0> status + 1 active master, 0 backup masters, 1 servers, 0 dead, 2.0000 average load + Took 0.7255 seconds + hbase(main):002:0> list + TABLE + table-name + 1 row(s) + Took 0.0509 seconds + => ["table-name"] + hbase(main):003:0> + +Showing the `table-name` table made in the happybase example above. + +Alternatively if you have the Hbase distribution available on the +host you can use `bin/hbase shell` if the hbase configuration has +been set up to connect to host `hbase-docker` zookeeper port 2181 to +get the servers via configuration property `hbase.zookeeper.quorum` + + + +Proxy HBase UIs locally +----------------------- + +If you are running docker on a remote machine, it is handy to see +these server-private urls in a local browser so here is a +~/.ssh/config fragment to do that + + Host my-docker-server + Hostname 1.2.3.4 + LocalForward 127.0.0.1:16010 127.0.0.1:16010 + LocalForward 127.0.0.1:9095 127.0.0.1:9095 + LocalForward 127.0.0.1:8085 127.0.0.1:8085 + +When you `ssh my-docker-server` ssh connects to the docker server and +forwards request on your local machine on ports 16010 / 16030 to the +remote ports that are attached to the hbase container. + +The bottom line, you can use these URLs to see what's going on: + + * http://localhost:16010/master-status for the Master Server + * http://localhost:9095/thrift.jsp for the thrift UI + * http://localhost:8085/rest.jsp for the REST server UI + * http://localhost:16010/zk.jsp for the embedded Zookeeper + +to see what's going on in the container and since both your local +machine and the container are using localhost (aka 127.0.0.1), even +the links work! + + + + + +Notes +----- + +[1] http://happybase.readthedocs.org/en/latest/ + +[2] https://github.com/wbolster/happybase diff --git a/hbase/build-hbase.sh b/hbase/build-hbase.sh new file mode 100755 index 0000000000000000000000000000000000000000..8247039b240de87efe0c92b8fafe18cba950dea6 --- /dev/null +++ b/hbase/build-hbase.sh @@ -0,0 +1,21 @@ +#!/bin/sh -xe + +. /build/config-hbase.sh + +here=$(pwd) + +# delete files that are not needed to run hbase +rm -rf docs *.txt LEGAL +rm -f */*.cmd + +# Set Java home for hbase servers +sed -i "s,^. export JAVA_HOME.*,export JAVA_HOME=$JAVA_HOME," conf/hbase-env.sh + +# Set interactive shell defaults +cat > /etc/profile.d/defaults.sh < $logs_dir/hbase-thrift.log 2>&1 & + +# REST server (background) +# Ports: 8080 API +echo "hbase rest start logging to $logs_dir/hbase-rest.log" +hbase rest start > $logs_dir/hbase-rest.log 2>&1 & + +# Master server (Foreground) that also starts the region server +# Ports: Master: 16000 API, 16010 UI; 2181 ZK; Region: 16020 API, 16030 UI +echo "hbase master start logging to $logs_dir/hbase-master.log" +exec hbase master start 2>&1 | tee $logs_dir/hbase-master.log diff --git a/hbase/hbase-site.xml b/hbase/hbase-site.xml new file mode 100644 index 0000000000000000000000000000000000000000..b8c3be89b8386c37a3b4e923eb486675c69d7db5 --- /dev/null +++ b/hbase/hbase-site.xml @@ -0,0 +1,23 @@ + + + + + hbase.zookeeper.quorum + hbase-docker + + + hbase.rootdir + file:////data/hbase + + + + hbase.master.info.bindAddress + hbase-docker + + + + hbase.regionserver.info.bindAddress + hbase-docker + + + diff --git a/hbase/prepare-hbase.sh b/hbase/prepare-hbase.sh new file mode 100755 index 0000000000000000000000000000000000000000..d634315923942b3d78cc8e4d3162a5d8b2b0d2b8 --- /dev/null +++ b/hbase/prepare-hbase.sh @@ -0,0 +1,13 @@ +#!/bin/sh -xe + +. /build/config-hbase.sh + +apt-get update -y + +apt-get install $minimal_apt_get_args $HBASE_BUILD_PACKAGES + +cd /opt + +#curl -SL $HBASE_DIST/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz | tar -x -z && mv hbase-${HBASE_VERSION} hbase + +mv hbase-${HBASE_VERSION} hbase diff --git a/hbase/replace-hostname b/hbase/replace-hostname new file mode 100755 index 0000000000000000000000000000000000000000..c8e8f1fb8c9ff126232a151822e5800a47287d49 --- /dev/null +++ b/hbase/replace-hostname @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Script that replaces the default hostname in files with the environments +# ${HOSTNAME} variable. +# +# This script is intended to be run before starting hbase-server to ensure +# that the hostname matches the configured environment variable. i.e. +# the -h --hostname flag. +# +declare -a files=( + '/opt/hbase/conf/hbase-site.xml' + '/opt/hbase/conf/zoo.cfg' +) + +# Optional custom hostname replacement +REPLACEMENT_HOSTNAME=${HBASE_DOCKER_HOSTNAME:-$HOSTNAME} + +for file in "${files[@]}"; do + if [ -f "${file}.bak" ]; then + cp "${file}.bak" "${file}" + else + cp "${file}" "${file}.bak" + fi + + sed -i "s/hbase-docker/${REPLACEMENT_HOSTNAME}/g" "${file}" +done diff --git a/hbase/sources.list b/hbase/sources.list new file mode 100644 index 0000000000000000000000000000000000000000..82bd9625c805eae98764c57e76f03a3bb323a390 --- /dev/null +++ b/hbase/sources.list @@ -0,0 +1,19 @@ +# deb cdrom:[Ubuntu 16.04 LTS _Xenial Xerus_ - Release amd64 (20160420.1)]/ bionic main restricted +deb-src http://archive.ubuntu.com/ubuntu bionic main restricted #Added by software-properties +deb http://mirrors.aliyun.com/ubuntu/ bionic main restricted +deb-src http://mirrors.aliyun.com/ubuntu/ bionic main restricted multiverse universe #Added by software-properties +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted +deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted multiverse universe #Added by software-properties +deb http://mirrors.aliyun.com/ubuntu/ bionic universe +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates universe +deb http://mirrors.aliyun.com/ubuntu/ bionic multiverse +deb http://mirrors.aliyun.com/ubuntu/ bionic-updates multiverse +deb http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse +deb-src http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse #Added by software-properties +deb http://archive.canonical.com/ubuntu bionic partner +deb-src http://archive.canonical.com/ubuntu bionic partner +deb http://mirrors.aliyun.com/ubuntu/ bionic-security main restricted +deb-src http://mirrors.aliyun.com/ubuntu/ bionic-security main restricted multiverse universe #Added by software-properties +deb http://mirrors.aliyun.com/ubuntu/ bionic-security universe +deb http://mirrors.aliyun.com/ubuntu/ bionic-security multiverse + diff --git a/hbase/start-hbase.sh b/hbase/start-hbase.sh new file mode 100755 index 0000000000000000000000000000000000000000..2ce5562a5af65b272ad4afe185741607e2b80a2c --- /dev/null +++ b/hbase/start-hbase.sh @@ -0,0 +1,37 @@ +#!/bin/bash -e +# +# Script to start docker and update the /etc/hosts file to point to +# the hbase-docker container +# +# hbase thrift and master server logs are written to the local +# logs directory +# + +echo "Starting HBase container" +data_dir=$PWD/data +rm -rf $data_dir +mkdir -p $data_dir +id=$(docker run --name=hbase-docker -h hbase-docker -d -v $data_dir:/data dajobe/hbase) + +echo "Container has ID $id" + +# Get the hostname and IP inside the container +docker inspect $id > config.json +docker_hostname=$(python -c 'from __future__ import print_function; import json; c=json.load(open("config.json")); print(c[0]["Config"]["Hostname"])') +docker_ip=$(python -c 'from __future__ import print_function; import json; c=json.load(open("config.json")); print(c[0]["NetworkSettings"]["IPAddress"])') +rm -f config.json + +echo "Updating /etc/hosts to make hbase-docker point to $docker_ip ($docker_hostname)" +if grep 'hbase-docker' /etc/hosts >/dev/null; then + sudo sed -i.bak "s/^.*hbase-docker.*\$/$docker_ip hbase-docker $docker_hostname/" /etc/hosts +else + sudo sh -c "echo '\n$docker_ip hbase-docker $docker_hostname' >> /etc/hosts" +fi + +echo "Now connect to hbase at localhost on the standard ports" +echo " ZK 2181, Thrift 9090, Master 16000, Region 16020" +echo "Or connect to host hbase-docker (in the container) on the same ports" +echo "" +echo "For docker status:" +echo "$ id=$id" +echo "$ docker inspect \$id" diff --git a/hbase/test_hbase.py b/hbase/test_hbase.py new file mode 100644 index 0000000000000000000000000000000000000000..040d21ea8eae086c275395f4f90fa6d2b687d34d --- /dev/null +++ b/hbase/test_hbase.py @@ -0,0 +1,47 @@ +# https://happybase.readthedocs.org/en/latest/ +# https://github.com/wbolster/happybase +import happybase + +def main(): + HOST='hbase-docker' + PORT=9090 + # Will create and then delete this table + TABLE_NAME='table-name' + ROW_KEY='row-key' + + connection = happybase.Connection(HOST, PORT) + + tables = connection.tables() + print "HBase has tables {0}".format(tables) + + if TABLE_NAME not in tables: + print "Creating table {0}".format(TABLE_NAME) + connection.create_table(TABLE_NAME, { 'family': dict() } ) + + + table = connection.table(TABLE_NAME) + + print "Storing values with row key '{0}'".format(ROW_KEY) + table.put(ROW_KEY, {'family:qual1': 'value1', + 'family:qual2': 'value2'}) + + print "Getting values for row key '{0}'".format(ROW_KEY) + row = table.row(ROW_KEY) + print row['family:qual1'] + + print "Printing rows with keys '{0}' and row-key-2".format(ROW_KEY) + for key, data in table.rows([ROW_KEY, 'row-key-2']): + print key, data + + print "Scanning rows with prefix 'row'" + for key, data in table.scan(row_prefix='row'): + print key, data # prints 'value1' and 'value2' + + print "Deleting row '{0}'".format(ROW_KEY) + row = table.delete(ROW_KEY) + + print "Deleting table {0}".format(TABLE_NAME) + connection.delete_table(TABLE_NAME, disable=True) + +if __name__ == "__main__": + main() diff --git a/hbase/zoo.cfg b/hbase/zoo.cfg new file mode 100644 index 0000000000000000000000000000000000000000..5677f0fbfa7c3e5792ab9200a0c4704e8d6a66b1 --- /dev/null +++ b/hbase/zoo.cfg @@ -0,0 +1,3 @@ +clientPort=2181 +clientPortAddress=hbase-docker +server.1=hbase-docker:2181