kyuubi/python/pyhive/tests/test_sqlalchemy_hive.py
Harry 9075fbb623
[KYUUBI #6281][PY] Initialize github action for python unit testing
# 🔍 Description
## Issue References 🔗

This pull request fixes #6281

## Describe Your Solution 🔧

The change initialize a CI job to run unit testing on python client, including:
- Set up Github Action based on docker-compose
- Update test cases and test succeeded for dialect `presto` and `trino`
- Temporary disabled hive related test due to test cases are not valid, not about connection
- Update dev dependencies to support python 3.10
- Speed up testing with `pytest-xdist` plugin

## Types of changes 🔖

- [ ] Bugfix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan 🧪

#### Behavior Without This Pull Request ⚰️
Not able to ran unit test in local and on CI

#### Behavior With This Pull Request 🎉
Able to run and partially cover a couple of test cases

#### Related Unit Tests
No

## Additional notes
Next action is about fixing failing test cases or considering skipping some of them if necessary

---

# Checklist 📝

- [ ] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6343 from sudohainguyen/ci/init.

Closes #6281

682e575c4 [Harry] Remove xdist out of scope
dc42ca1ff [Harry] Pin pytest packages version
469f1d955 [Harry] Pin ubuntu version
00cef476a [Harry] Use v4 checkout action
96ef83148 [Harry] Remove unnecessary steps
732344a2c [Harry] Add step to tear down containers
1e2c2481a [Harry] Resolved trino and presto test
5b33e3924 [Harry] Make tests runnable
1be033ba3 [Harry] Remove randome flag which causes failed test run
2bc6dc036 [Harry] Switch action setup provider to docker
ea2a76319 [Harry] Initialize github action for python unit testing

Authored-by: Harry <quanghai.ng1512@gmail.com>
Signed-off-by: Cheng Pan <chengpan@apache.org>
2024-05-07 18:05:03 +08:00

241 lines
9.9 KiB
Python

from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import str
from pyhive.sqlalchemy_hive import HiveDate
from pyhive.sqlalchemy_hive import HiveDecimal
from pyhive.sqlalchemy_hive import HiveTimestamp
from sqlalchemy.exc import NoSuchTableError, OperationalError
import pytest
from pyhive.tests.sqlalchemy_test_case import SqlAlchemyTestCase
from pyhive.tests.sqlalchemy_test_case import with_engine_connection
from sqlalchemy import types
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import Column
from sqlalchemy.schema import MetaData
from sqlalchemy.schema import Table
from sqlalchemy.sql import text
import contextlib
import datetime
import decimal
import sqlalchemy.types
import unittest
import re
sqlalchemy_version = float(re.search(r"^([\d]+\.[\d]+)\..+", sqlalchemy.__version__).group(1))
_ONE_ROW_COMPLEX_CONTENTS = [
True,
127,
32767,
2147483647,
9223372036854775807,
0.5,
0.25,
'a string',
datetime.datetime(1970, 1, 1),
b'123',
'[1,2]',
'{1:2,3:4}',
'{"a":1,"b":2}',
'{0:1}',
decimal.Decimal('0.1'),
]
# [
# ('boolean', 'boolean', ''),
# ('tinyint', 'tinyint', ''),
# ('smallint', 'smallint', ''),
# ('int', 'int', ''),
# ('bigint', 'bigint', ''),
# ('float', 'float', ''),
# ('double', 'double', ''),
# ('string', 'string', ''),
# ('timestamp', 'timestamp', ''),
# ('binary', 'binary', ''),
# ('array', 'array<int>', ''),
# ('map', 'map<int,int>', ''),
# ('struct', 'struct<a:int,b:int>', ''),
# ('union', 'uniontype<int,string>', ''),
# ('decimal', 'decimal(10,1)', '')
# ]
@pytest.mark.skip(reason="Temporarily disabled")
class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase):
def create_engine(self):
return create_engine('hive://localhost:10000/default')
@with_engine_connection
def test_dotted_column_names(self, engine, connection):
"""When Hive returns a dotted column name, both the non-dotted version should be available
as an attribute, and the dotted version should remain available as a key.
"""
row = connection.execute(text('SELECT * FROM one_row')).fetchone()
if sqlalchemy_version >= 1.4:
row = row._mapping
assert row.keys() == ['number_of_rows']
assert 'number_of_rows' in row
assert row.number_of_rows == 1
assert row['number_of_rows'] == 1
assert getattr(row, 'one_row.number_of_rows') == 1
assert row['one_row.number_of_rows'] == 1
@with_engine_connection
def test_dotted_column_names_raw(self, engine, connection):
"""When Hive returns a dotted column name, and raw mode is on, nothing should be modified.
"""
row = connection.execution_options(hive_raw_colnames=True).execute(text('SELECT * FROM one_row')).fetchone()
if sqlalchemy_version >= 1.4:
row = row._mapping
assert row.keys() == ['one_row.number_of_rows']
assert 'number_of_rows' not in row
assert getattr(row, 'one_row.number_of_rows') == 1
assert row['one_row.number_of_rows'] == 1
@with_engine_connection
def test_reflect_no_such_table(self, engine, connection):
"""reflecttable should throw an exception on an invalid table"""
self.assertRaises(
NoSuchTableError,
lambda: Table('this_does_not_exist', MetaData(), autoload_with=engine))
self.assertRaises(
OperationalError,
lambda: Table('this_does_not_exist', MetaData(schema="also_does_not_exist"), autoload_with=engine))
@with_engine_connection
def test_reflect_select(self, engine, connection):
"""reflecttable should be able to fill in a table from the name"""
one_row_complex = Table('one_row_complex', MetaData(), autoload_with=engine)
self.assertEqual(len(one_row_complex.c), 15)
self.assertIsInstance(one_row_complex.c.string, Column)
row = connection.execute(one_row_complex.select()).fetchone()
self.assertEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS)
# TODO some of these types could be filled in better
self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
self.assertIsInstance(one_row_complex.c.bigint.type, types.BigInteger)
self.assertIsInstance(one_row_complex.c.float.type, types.Float)
self.assertIsInstance(one_row_complex.c.double.type, types.Float)
self.assertIsInstance(one_row_complex.c.string.type, types.String)
self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp)
self.assertIsInstance(one_row_complex.c.binary.type, types.String)
self.assertIsInstance(one_row_complex.c.array.type, types.String)
self.assertIsInstance(one_row_complex.c.map.type, types.String)
self.assertIsInstance(one_row_complex.c.struct.type, types.String)
self.assertIsInstance(one_row_complex.c.union.type, types.String)
self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
@with_engine_connection
def test_type_map(self, engine, connection):
"""sqlalchemy should use the dbapi_type_map to infer types from raw queries"""
row = connection.execute(text('SELECT * FROM one_row_complex')).fetchone()
self.assertListEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS)
@with_engine_connection
def test_reserved_words(self, engine, connection):
"""Hive uses backticks"""
# Use keywords for the table/column name
fake_table = Table('select', MetaData(), Column('map', sqlalchemy.types.String))
query = str(fake_table.select().where(fake_table.c.map == 'a').compile(engine))
self.assertIn('`select`', query)
self.assertIn('`map`', query)
self.assertNotIn('"select"', query)
self.assertNotIn('"map"', query)
def test_switch_database(self):
engine = create_engine('hive://localhost:10000/pyhive_test_database')
try:
with contextlib.closing(engine.connect()) as connection:
self.assertIn(
('dummy_table',),
connection.execute(text('SHOW TABLES')).fetchall()
)
connection.execute(text('USE default'))
self.assertIn(
('one_row',),
connection.execute(text('SHOW TABLES')).fetchall()
)
finally:
engine.dispose()
@with_engine_connection
def test_lots_of_types(self, engine, connection):
# Presto doesn't have raw CREATE TABLE support, so we ony test hive
# take type list from sqlalchemy.types
types = [
'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
'String', 'Integer', 'SmallInteger',
'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'LargeBinary',
'Boolean', 'Unicode', 'UnicodeText',
]
cols = []
for i, t in enumerate(types):
cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
cols.append(Column('hive_date', HiveDate))
cols.append(Column('hive_decimal', HiveDecimal))
cols.append(Column('hive_timestamp', HiveTimestamp))
table = Table('test_table', MetaData(schema='pyhive_test_database'), *cols,)
table.drop(checkfirst=True, bind=connection)
table.create(bind=connection)
connection.execute(text('SET mapred.job.tracker=local'))
connection.execute(text('USE pyhive_test_database'))
big_number = 10 ** 10 - 1
connection.execute(text("""
INSERT OVERWRITE TABLE test_table
SELECT
1, "a", "a", "a", "a", "a", 0.1,
0.1, 0.1, 0, 0, "a", "a",
false, 1, 0, 0,
"a", 1, 1,
0.1, 0.1, 0, 0, 0, "a",
false, "a", "a",
0, :big_number, 123 + 2000
FROM default.one_row
"""), {"big_number": big_number})
row = connection.execute(text("select * from test_table")).fetchone()
self.assertEqual(row.hive_date, datetime.datetime(1970, 1, 1, 0, 0))
self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123000))
table.drop(bind=connection)
@with_engine_connection
def test_insert_select(self, engine, connection):
one_row = Table('one_row', MetaData(), autoload_with=engine)
table = Table('insert_test', MetaData(schema='pyhive_test_database'),
Column('a', sqlalchemy.types.Integer))
table.drop(checkfirst=True, bind=connection)
table.create(bind=connection)
connection.execute(text('SET mapred.job.tracker=local'))
# NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
connection.execute(table.insert().from_select(['a'], one_row.select()))
result = connection.execute(table.select()).fetchall()
expected = [(1,)]
self.assertEqual(result, expected)
@with_engine_connection
def test_insert_values(self, engine, connection):
table = Table('insert_test', MetaData(schema='pyhive_test_database'),
Column('a', sqlalchemy.types.Integer),)
table.drop(checkfirst=True, bind=connection)
table.create(bind=connection)
connection.execute(table.insert().values([{'a': 1}, {'a': 2}]))
result = connection.execute(table.select()).fetchall()
expected = [(1,), (2,)]
self.assertEqual(result, expected)
@with_engine_connection
def test_supports_san_rowcount(self, engine, connection):
self.assertFalse(engine.dialect.supports_sane_rowcount_returning)