“林子雨大数据” 实验3 HBase操作与接口编程
环境搭建
在Windows中使用VirtualBox安装Ubuntu虚拟机(2020年7月版本)_厦大数据库实验室博客 (xmu.edu.cn
Hadoop3.1.3安装教程_单机/伪分布式配置_Hadoop3.1.3/Ubuntu18.04(16.04_厦大数据库实验室博客 (xmu.edu.cn
HBase2.2.2安装和编程实践指南_厦大数据库实验室博客 (xmu.edu.cn
PowerShell SSH 连接 VirtualBox Ubuntu 虚拟机的具体步骤 - 小能日记 - 博客园 (cnblogs.com
FlashFXP - Secure FTP Client Software for Windows. Upload, Download, and Synchronize your files。
在VScode中使用SSH进行远程开发_vscode ssh_Shipmaster_23的博客-CSDN博客
【golang】linux中安装go环境并运行_linux 运行golang_沉默小管的博客-CSDN博客
理解Hbase
第三方软件包
tsuna/gohbase: Pure-Go HBase client (github.com
gin框架 · Go语言中文文档 (topgoer.com
遇到过的问题
Infinite Getting code actions from ''Go', 'Go', 'Go'' on save · Issue #3105 · microsoft/vscode-go (github.com
接口编程
实验过程
创建三个表
对应第2题答案
create 'student','S_No','S_Name','S_Sex','S_Age'
put 'student','s001','S_No','2015001'
put 'student','s001','S_Name','zhangsan'
put 'student','s001','S_Sex','male'
put 'student','s001','S_Age','23'
put 'student','s002','S_No','2015002'
put 'student','s002','S_Name','Mary'
put 'student','s002','S_Sex','female'
put 'student','s002','S_Age','22'
put 'student','s003','S_No','2015003'
put 'student','s003','S_Name','Lisi'
put 'student','s003','S_Sex','male'
put 'student','s003','S_Age','24'
create 'course','C_No','C_Name','C_Credit'
put 'course','c001','C_No','123001'
put 'course','c001','C_Name','Math'
put 'course','c001','C_Credit','2.0'
put 'course','c002','C_No','123002'
put 'course','c002','C_Name','Computer'
put 'course','c002','C_Credit','5.0'
put 'course','c003','C_No','123003'
put 'course','c003','C_Name','English'
put 'course','c003','C_Credit','3.0'
create 'SC','SC_Sno','SC_Cno','SC_Score'
put 'SC','sc001','SC_Sno','2015001'
put 'SC','sc001','SC_Cno','123001'
put 'SC','sc001','SC_Score','86'
put 'SC','sc002','SC_Sno','2015001'
put 'SC','sc002','SC_Cno','123002'
put 'SC','sc002','SC_Score','77'
put 'SC','sc003','SC_Sno','2015002'
put 'SC','sc003','SC_Cno','123002'
put 'SC','sc003','SC_Score','77'
put 'SC','sc004','SC_Sno','2015002'
put 'SC','sc004','SC_Cno','123003'
put 'SC','sc004','SC_Score','99'
put 'SC','sc005','SC_Sno','2015003'
put 'SC','sc005','SC_Cno','123001'
put 'SC','sc005','SC_Score','98'
put 'SC','sc006','SC_Sno','2015003'
put 'SC','sc006','SC_Cno','123002'
put 'SC','sc006','SC_Score','95'
后端编程
后端在启动时分别与本地HBase建立管理员客户端、普通客户端的连接。管理员客户端管理所有表,普通客户端管理表数据的增删改查。并启动HTTP服务提供一系列API接口。
package variable
import "github.com/tsuna/gohbase"
var AdminClient gohbase.AdminClient
var Client gohbase.Client
package main
import (
"github.com/gin-gonic/gin"
"github.com/tsuna/gohbase"
"wolflong.com/hbase_gin/router"
"wolflong.com/hbase_gin/variable"
func init( {
variable.AdminClient = gohbase.NewAdminClient("127.0.0.1"
variable.Client = gohbase.NewClient("127.0.0.1"
}
func main( {
r := gin.Default(
router.Router(r
r.Run(":1313"
}
给定一个处理错误的通用方法
package controller
import "github.com/gin-gonic/gin"
func checkError(err error, c *gin.Context, handlers ...gin.HandlerFunc {
if err != nil {
c.JSON(500, gin.H{"error": "致命错误", "back": err.Error(}
panic(err
}
}
1.1 列出所有表的相关信息
HBase Shell 对应代码
list
从管理客户端发出请求,获取所有表,遍历表将表的命名空间与表名存储至切片中以json格式返回
type table struct {
Namespace string `json:"namespace"`
Qualifier string `json:"qualifier"`
}
func ShowTableList(c *gin.Context {
var tables []table
t, err := hrpc.NewListTableNames(context.Background(
checkError(err, c
res, err := variable.AdminClient.ListTableNames(t
checkError(err, c
for _, v := range res {
tables = append(tables, table{string(v.GetNamespace(, string(v.GetQualifier(}
}
fmt.Println(tables
c.JSON(200, tables
}
测试结果
[
{
"namespace": "default",
"qualifier": "SC"
},
{
"namespace": "default",
"qualifier": "course"
},
{
"namespace": "default",
"qualifier": "student"
},
{
"namespace": "default",
"qualifier": "test"
}
]
1.2 打印指定表的所有记录数据
HBase Shell 对应代码
scan "course"
func ShowTableRows(c *gin.Context {
var Cells [][]*hrpc.Cell
t, err := hrpc.NewScan(context.Background(, []byte(c.Query("table"
checkError(err, c
res := variable.Client.Scan(t
row, err := res.Next(
for err != io.EOF && row != nil {
Cells = append(Cells, row.Cells
fmt.Println(row.Cells
row, err = res.Next(
}
c.JSON(200, Cells
}
localhost:1313/TableRows?table=course 测试结果。
[
[
{
"row": "YzAwMQ==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640294,
"cell_type": 4,
"value": "Mi4w"
},
{
"row": "YzAwMQ==",
"family": "Q19DcmVkaXQ=",
"qualifier": "bmV3",
"timestamp": 1680432352886,
"cell_type": 4,
"value": "NS4w"
},
{
"row": "YzAwMQ==",
"family": "Q19OYW1l",
"timestamp": 1680431640279,
"cell_type": 4,
"value": "TWF0aA=="
},
{
"row": "YzAwMQ==",
"family": "Q19Obw==",
"timestamp": 1680431640250,
"cell_type": 4,
"value": "MTIzMDAx"
}
],
[
{
"row": "YzAwMg==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640328,
"cell_type": 4,
"value": "NS4w"
},
{
"row": "YzAwMg==",
"family": "Q19OYW1l",
"timestamp": 1680431640318,
"cell_type": 4,
"value": "Q29tcHV0ZXI="
},
{
"row": "YzAwMg==",
"family": "Q19Obw==",
"timestamp": 1680431640305,
"cell_type": 4,
"value": "MTIzMDAy"
}
],
[
{
"row": "YzAwMw==",
"family": "Q19DcmVkaXQ=",
"timestamp": 1680431640363,
"cell_type": 4,
"value": "My4w"
},
{
"row": "YzAwMw==",
"family": "Q19OYW1l",
"timestamp": 1680431640352,
"cell_type": 4,
"value": "RW5nbGlzaA=="
},
{
"row": "YzAwMw==",
"family": "Q19Obw==",
"timestamp": 1680431640343,
"cell_type": 4,
"value": "MTIzMDAz"
}
]
]
1.3 向已创建好的表添加和删除指定的列族或列
HBase Shell 对应代码
put 'course','c001','C_Credit:new','5.0'
delete 'course','c001','C_Credit:new'
使用普通客户端进行put操作,需要准备一个item数据包含当前操作的列族或列以及对应的值。支持覆盖重写与新增。
func TableInsertRowCol(c *gin.Context {
table := c.PostForm("table"
rowKey := c.PostForm("rowKey"
colFamily := c.PostForm("colFamily"
col := c.PostForm("col"
val := c.PostForm("val"
var item map[string]map[string][]byte = make(map[string]map[string][]byte
item[colFamily] = make(map[string][]byte
item[colFamily][col] = []byte(val
fmt.Println(item
t, err := hrpc.NewPutStr(context.Background(, table, rowKey, item
checkError(err, c
res, err := variable.Client.Put(t
checkError(err, c
c.JSON(200, res
}
测试结果
{
"Cells": null,
"Stale": false,
"Partial": false,
"Exists": null
}
func TableDeleteRowCol(c *gin.Context {
table := c.PostForm("table"
rowKey := c.PostForm("rowKey"
colFamily := c.PostForm("colFamily"
col := c.PostForm("col"
// val := c.PostForm("val"
var item map[string]map[string][]byte = make(map[string]map[string][]byte
item[colFamily] = make(map[string][]byte
item[colFamily][col] = []byte{}
fmt.Println(item
t, err := hrpc.NewPutStr(context.Background(, table, rowKey, item
checkError(err, c
res, err := variable.Client.Delete(t
checkError(err, c
c.JSON(200, res
}
1.4 清空指定表的所有数据
HBase Shell 对应代码
truncate 'course'
先disable表,再删除表。
func TableDelete(c *gin.Context {
t := hrpc.NewDisableTable(context.Background(, []byte(c.Query("table"
err := variable.AdminClient.DisableTable(t
checkError(err, c
t2 := hrpc.NewDeleteTable(context.Background(, []byte(c.Query("table"
err = variable.AdminClient.DeleteTable(t2
checkError(err, c
c.JSON(200, gin.H{"result": "删除成功"}
}
1.5 统计表的行数
HBase Shell 对应代码
count 'course'
修改1.2的代码
func ShowTableRowsCount(c *gin.Context {
var count int
t, err := hrpc.NewScan(context.Background(, []byte(c.Query("table"
checkError(err, c
res := variable.Client.Scan(t
row, err := res.Next(
for err != io.EOF && row != nil {
count++
row, err = res.Next(
}
c.JSON(200, count
}
localhost:1313/TableRowsCount?table=course 测试结果
3
3.1 创建表
func TableCreate(c *gin.Context {
table := c.PostForm("table"
fs := c.PostForm("fields"
var fields []string
// fmt.Println(table, fs
err := json.Unmarshal([]byte(fs, &fields
checkError(err, c
// 验证是否存在表
flag := false
t, err := hrpc.NewListTableNames(context.Background(
checkError(err, c
res, err := variable.AdminClient.ListTableNames(t
checkError(err, c
for _, v := range res {
if string(v.GetQualifier( == table {
flag = true
}
}
// 如存在删除表
if flag {
t := hrpc.NewDisableTable(context.Background(, []byte(table
err := variable.AdminClient.DisableTable(t
checkError(err, c
t2 := hrpc.NewDeleteTable(context.Background(, []byte(table
err = variable.AdminClient.DeleteTable(t2
checkError(err, c
}
// 插入新表
var items map[string]map[string]string = make(map[string]map[string]string
for _, v := range fields {
items[v] = make(map[string]string
}
t2 := hrpc.NewCreateTable(context.Background(, []byte(table, items
err = variable.AdminClient.CreateTable(t2
checkError(err, c
c.JSON(200, gin.H{"result": "创建成功"}
}
通过1.1函数接口我们可知test表已经存在,现在我们使用3.1函数接口重新创建该表,并为接下来的3.2函数接口调用做准备。
{
"result": "创建成功"
}
3.2 新增记录
为了增强健壮性,我们需要判断传入的fields、values参数个数是否一致,否则应当主动报错。
func TableInsertRow(c *gin.Context {
table := c.PostForm("table"
rowKey := c.PostForm("rowKey"
fs := c.PostForm("fields"
vs := c.PostForm("values"
var fields []string
var values []string
err := json.Unmarshal([]byte(fs, &fields
checkError(err, c
err = json.Unmarshal([]byte(vs, &values
checkError(err, c
if len(fields != len(values {
checkError(fmt.Errorf("数量不一致", c
}
var item map[string]map[string][]byte = make(map[string]map[string][]byte
for i, v := range fields {
vs := strings.Split(v, ":"
item[vs[0]] = make(map[string][]byte
if len(vs > 1 {
item[vs[0]][vs[1]] = []byte(values[i]
} else {
item[vs[0]][""] = []byte(values[i]
}
}
fmt.Println(item
t, err := hrpc.NewPutStr(context.Background(, table, rowKey, item
checkError(err, c
res, err := variable.Client.Put(t
checkError(err, c
c.JSON(200, res
}
3.3 通过列过滤数据
使用scanner实现,也可以通过过滤器实现。
// TODO USE FILTER
type item struct {
Row string `json:"row"`
Family string `json:"family"`
Qualifier string `json:"qualifier"`
Timestamp *uint64 `json:"timestamp"`
Cell_type *pb.CellType `json:"cell_type"`
Value string `json:"value"`
}
func TableColumnScan(c *gin.Context {
table := c.Query("table"
column := c.Query("column"
vs := strings.Split(column, ":"
var items []item
t, err := hrpc.NewScan(context.Background(, []byte(table
checkError(err, c
res := variable.Client.Scan(t
row, err := res.Next(
for err != io.EOF && row != nil {
for _, v := range row.Cells {
if string(v.Family != vs[0] {
continue
}
if len(vs > 1 {
if string(v.Qualifier != vs[1] {
continue
}
}
fmt.Println(row.Cells
items = append(items, item{
Row: string(v.Row,
Family: string(v.Family,
Qualifier: string(v.Qualifier,
Timestamp: v.Timestamp,
Cell_type: v.CellType,
Value: string(v.Value,
}
}
row, err = res.Next(
}
c.JSON(200, items
}
再执行一遍1.3添加列的函数,调用接口,执行结果如下。
[
{
"row": "c001",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640294,
"cell_type": 4,
"value": "2.0"
},
{
"row": "c001",
"family": "C_Credit",
"qualifier": "new",
"timestamp": 1680434951646,
"cell_type": 4,
"value": "5.0"
},
{
"row": "c002",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640328,
"cell_type": 4,
"value": "5.0"
},
{
"row": "c003",
"family": "C_Credit",
"qualifier": "",
"timestamp": 1680431640363,
"cell_type": 4,
"value": "3.0"
}
]
localhost:1313/TableColumnScan?table=course&column=C_Credit:new
[
{
"row": "c001",
"family": "C_Credit",
"qualifier": "new",
"timestamp": 1680434951646,
"cell_type": 4,
"value": "5.0"
}
]
3.4 修改行数据
与 1.3 函数代码一致
3.5 删除表指定记录
package controller
import (
"context"
"github.com/gin-gonic/gin"
"github.com/tsuna/gohbase/hrpc"
"wolflong.com/hbase_gin/variable"
func TableDeleteRow(c *gin.Context {
table := c.PostForm("table"
rowKey := c.PostForm("rowKey"
t, err := hrpc.NewDelStr(context.Background(, table, rowKey, nil
checkError(err, c
res, err := variable.Client.Delete(t
checkError(err, c
c.JSON(200, res
}
{
"Cells": null,
"Stale": false,
"Partial": false,
"Exists": null
}
再次调用1.5函数接口,执行结果符合预期。
2
总结
总共花费6.5个小时,共编写333行代码,56行表数据。
代码编写能力得到了提升。提高了自己对HBase的理解,作为一个典型的NoSQL数据库,其一大优点是可在廉价PC服务器上搭建起大规模结构化存储集群,并提供易使用的HBase Shell操作数据集,水平扩展方便。