通过Python脚本来操作Hbase
不能通过Python脚本来直接操作Hbase,必须要借助thrift服务作为中间层,所以需要两个Python模块:hbase模块和thrift模块,和安装thrift来实现Python对Hbase的操作
####安装thrift并获得thrift模块
- 下载安装thrift
wget http://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz
tar -zxvf thrift-0.11.0.tar.gz
cd thrift-0.11.0/
./configure
make
make install
cd lib/py/build/lib.linux-x86_64-2.7
然后就能看到thrift模块
获得hbase模块
- 下载Hbase源码包
wget http://archive.apache.org/dist/hbase/0.98.24/hbase-0.98.24-src.tar.gz
tar -zxvf hbase-0.98.24-src.tar.gz
- 产生hbase模块
//进入该目录
cd /usr/local/src/hbase-0.98.24/hbase-thrift/src/main/resources/org/apache/hadoop/hbase/thrift
//执行如下命令,产生gen-py目录
thrift --gen py Hbase.thrift
//进入该目录就能得到生成的hbase模块
cd gen-py
使用Python写数据
- 创建表
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
transport = TSocket.TSocket('master', 9090)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
base_info_contents = ColumnDescriptor(name='columnName1', maxVersions=1)
other_info_contents = ColumnDescriptor(name='columnName2', maxVersions=1)
client.createTable('tableName', [base_info_contents,other_info_contents])
- 插入数据
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
transport = TSocket.TSocket('master', 9090)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
table_name = 'tableName'
rowKey = 'rowKeyName'
mutations = [Mutation(column="columnName:columnPro", value="valueName")]
client.mutateRow(table_name,rowKey,mutations,None)
- 查看数据
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
transport = TSocket.TSocket('master', 9090)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
table_name = 'tableName'
rowKey = 'rowKeyName'
result = client.getRow(table_name,rowKey,None)
for l in result:
print "the row is "+ l.row
for k,v in l.columns.items():
print '\t'.join([k,v.value])
from thrift.transport import TSocket
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
transport = TSocket.TSocket('master', 9090)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
table_name = 'tableName'
scan = TScan()
id = client.scannerOpenWithScan(table_name,scan,None)
result = client.scannerGetList(id,10)
for l in result:
print "========="
print "the row is "+ l.row
for k,v in l.columns.items():
print '\t'.join([k,v.value])
转载请注明:SuperIT » 通过Python脚本来操作Hbase