# 🔍 Description ## Issue References 🔗 This pull request fixes #6281 ## Describe Your Solution 🔧 The change initialize a CI job to run unit testing on python client, including: - Set up Github Action based on docker-compose - Update test cases and test succeeded for dialect `presto` and `trino` - Temporary disabled hive related test due to test cases are not valid, not about connection - Update dev dependencies to support python 3.10 - Speed up testing with `pytest-xdist` plugin ## Types of changes 🔖 - [ ] Bugfix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan 🧪 #### Behavior Without This Pull Request ⚰️ Not able to ran unit test in local and on CI #### Behavior With This Pull Request 🎉 Able to run and partially cover a couple of test cases #### Related Unit Tests No ## Additional notes Next action is about fixing failing test cases or considering skipping some of them if necessary --- # Checklist 📝 - [ ] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6343 from sudohainguyen/ci/init. Closes #6281 682e575c4 [Harry] Remove xdist out of scope dc42ca1ff [Harry] Pin pytest packages version 469f1d955 [Harry] Pin ubuntu version 00cef476a [Harry] Use v4 checkout action 96ef83148 [Harry] Remove unnecessary steps 732344a2c [Harry] Add step to tear down containers 1e2c2481a [Harry] Resolved trino and presto test 5b33e3924 [Harry] Make tests runnable 1be033ba3 [Harry] Remove randome flag which causes failed test run 2bc6dc036 [Harry] Switch action setup provider to docker ea2a76319 [Harry] Initialize github action for python unit testing Authored-by: Harry <quanghai.ng1512@gmail.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
241 lines
9.9 KiB
Python
241 lines
9.9 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import unicode_literals
|
|
from builtins import str
|
|
from pyhive.sqlalchemy_hive import HiveDate
|
|
from pyhive.sqlalchemy_hive import HiveDecimal
|
|
from pyhive.sqlalchemy_hive import HiveTimestamp
|
|
from sqlalchemy.exc import NoSuchTableError, OperationalError
|
|
import pytest
|
|
from pyhive.tests.sqlalchemy_test_case import SqlAlchemyTestCase
|
|
from pyhive.tests.sqlalchemy_test_case import with_engine_connection
|
|
from sqlalchemy import types
|
|
from sqlalchemy.engine import create_engine
|
|
from sqlalchemy.schema import Column
|
|
from sqlalchemy.schema import MetaData
|
|
from sqlalchemy.schema import Table
|
|
from sqlalchemy.sql import text
|
|
import contextlib
|
|
import datetime
|
|
import decimal
|
|
import sqlalchemy.types
|
|
import unittest
|
|
import re
|
|
|
|
sqlalchemy_version = float(re.search(r"^([\d]+\.[\d]+)\..+", sqlalchemy.__version__).group(1))
|
|
|
|
_ONE_ROW_COMPLEX_CONTENTS = [
|
|
True,
|
|
127,
|
|
32767,
|
|
2147483647,
|
|
9223372036854775807,
|
|
0.5,
|
|
0.25,
|
|
'a string',
|
|
datetime.datetime(1970, 1, 1),
|
|
b'123',
|
|
'[1,2]',
|
|
'{1:2,3:4}',
|
|
'{"a":1,"b":2}',
|
|
'{0:1}',
|
|
decimal.Decimal('0.1'),
|
|
]
|
|
|
|
|
|
# [
|
|
# ('boolean', 'boolean', ''),
|
|
# ('tinyint', 'tinyint', ''),
|
|
# ('smallint', 'smallint', ''),
|
|
# ('int', 'int', ''),
|
|
# ('bigint', 'bigint', ''),
|
|
# ('float', 'float', ''),
|
|
# ('double', 'double', ''),
|
|
# ('string', 'string', ''),
|
|
# ('timestamp', 'timestamp', ''),
|
|
# ('binary', 'binary', ''),
|
|
# ('array', 'array<int>', ''),
|
|
# ('map', 'map<int,int>', ''),
|
|
# ('struct', 'struct<a:int,b:int>', ''),
|
|
# ('union', 'uniontype<int,string>', ''),
|
|
# ('decimal', 'decimal(10,1)', '')
|
|
# ]
|
|
|
|
|
|
@pytest.mark.skip(reason="Temporarily disabled")
|
|
class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase):
|
|
def create_engine(self):
|
|
return create_engine('hive://localhost:10000/default')
|
|
|
|
@with_engine_connection
|
|
def test_dotted_column_names(self, engine, connection):
|
|
"""When Hive returns a dotted column name, both the non-dotted version should be available
|
|
as an attribute, and the dotted version should remain available as a key.
|
|
"""
|
|
row = connection.execute(text('SELECT * FROM one_row')).fetchone()
|
|
|
|
if sqlalchemy_version >= 1.4:
|
|
row = row._mapping
|
|
|
|
assert row.keys() == ['number_of_rows']
|
|
assert 'number_of_rows' in row
|
|
assert row.number_of_rows == 1
|
|
assert row['number_of_rows'] == 1
|
|
assert getattr(row, 'one_row.number_of_rows') == 1
|
|
assert row['one_row.number_of_rows'] == 1
|
|
|
|
@with_engine_connection
|
|
def test_dotted_column_names_raw(self, engine, connection):
|
|
"""When Hive returns a dotted column name, and raw mode is on, nothing should be modified.
|
|
"""
|
|
row = connection.execution_options(hive_raw_colnames=True).execute(text('SELECT * FROM one_row')).fetchone()
|
|
|
|
if sqlalchemy_version >= 1.4:
|
|
row = row._mapping
|
|
|
|
assert row.keys() == ['one_row.number_of_rows']
|
|
assert 'number_of_rows' not in row
|
|
assert getattr(row, 'one_row.number_of_rows') == 1
|
|
assert row['one_row.number_of_rows'] == 1
|
|
|
|
@with_engine_connection
|
|
def test_reflect_no_such_table(self, engine, connection):
|
|
"""reflecttable should throw an exception on an invalid table"""
|
|
self.assertRaises(
|
|
NoSuchTableError,
|
|
lambda: Table('this_does_not_exist', MetaData(), autoload_with=engine))
|
|
self.assertRaises(
|
|
OperationalError,
|
|
lambda: Table('this_does_not_exist', MetaData(schema="also_does_not_exist"), autoload_with=engine))
|
|
|
|
@with_engine_connection
|
|
def test_reflect_select(self, engine, connection):
|
|
"""reflecttable should be able to fill in a table from the name"""
|
|
one_row_complex = Table('one_row_complex', MetaData(), autoload_with=engine)
|
|
self.assertEqual(len(one_row_complex.c), 15)
|
|
self.assertIsInstance(one_row_complex.c.string, Column)
|
|
row = connection.execute(one_row_complex.select()).fetchone()
|
|
self.assertEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS)
|
|
|
|
# TODO some of these types could be filled in better
|
|
self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
|
|
self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
|
|
self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
|
|
self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
|
|
self.assertIsInstance(one_row_complex.c.bigint.type, types.BigInteger)
|
|
self.assertIsInstance(one_row_complex.c.float.type, types.Float)
|
|
self.assertIsInstance(one_row_complex.c.double.type, types.Float)
|
|
self.assertIsInstance(one_row_complex.c.string.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp)
|
|
self.assertIsInstance(one_row_complex.c.binary.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.array.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.map.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.struct.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.union.type, types.String)
|
|
self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
|
|
|
|
@with_engine_connection
|
|
def test_type_map(self, engine, connection):
|
|
"""sqlalchemy should use the dbapi_type_map to infer types from raw queries"""
|
|
row = connection.execute(text('SELECT * FROM one_row_complex')).fetchone()
|
|
self.assertListEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS)
|
|
|
|
@with_engine_connection
|
|
def test_reserved_words(self, engine, connection):
|
|
"""Hive uses backticks"""
|
|
# Use keywords for the table/column name
|
|
fake_table = Table('select', MetaData(), Column('map', sqlalchemy.types.String))
|
|
query = str(fake_table.select().where(fake_table.c.map == 'a').compile(engine))
|
|
self.assertIn('`select`', query)
|
|
self.assertIn('`map`', query)
|
|
self.assertNotIn('"select"', query)
|
|
self.assertNotIn('"map"', query)
|
|
|
|
def test_switch_database(self):
|
|
engine = create_engine('hive://localhost:10000/pyhive_test_database')
|
|
try:
|
|
with contextlib.closing(engine.connect()) as connection:
|
|
self.assertIn(
|
|
('dummy_table',),
|
|
connection.execute(text('SHOW TABLES')).fetchall()
|
|
)
|
|
connection.execute(text('USE default'))
|
|
self.assertIn(
|
|
('one_row',),
|
|
connection.execute(text('SHOW TABLES')).fetchall()
|
|
)
|
|
finally:
|
|
engine.dispose()
|
|
|
|
@with_engine_connection
|
|
def test_lots_of_types(self, engine, connection):
|
|
# Presto doesn't have raw CREATE TABLE support, so we ony test hive
|
|
# take type list from sqlalchemy.types
|
|
types = [
|
|
'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
|
|
'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
|
|
'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
|
|
'String', 'Integer', 'SmallInteger',
|
|
'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'LargeBinary',
|
|
'Boolean', 'Unicode', 'UnicodeText',
|
|
]
|
|
cols = []
|
|
for i, t in enumerate(types):
|
|
cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
|
|
cols.append(Column('hive_date', HiveDate))
|
|
cols.append(Column('hive_decimal', HiveDecimal))
|
|
cols.append(Column('hive_timestamp', HiveTimestamp))
|
|
table = Table('test_table', MetaData(schema='pyhive_test_database'), *cols,)
|
|
table.drop(checkfirst=True, bind=connection)
|
|
table.create(bind=connection)
|
|
connection.execute(text('SET mapred.job.tracker=local'))
|
|
connection.execute(text('USE pyhive_test_database'))
|
|
big_number = 10 ** 10 - 1
|
|
connection.execute(text("""
|
|
INSERT OVERWRITE TABLE test_table
|
|
SELECT
|
|
1, "a", "a", "a", "a", "a", 0.1,
|
|
0.1, 0.1, 0, 0, "a", "a",
|
|
false, 1, 0, 0,
|
|
"a", 1, 1,
|
|
0.1, 0.1, 0, 0, 0, "a",
|
|
false, "a", "a",
|
|
0, :big_number, 123 + 2000
|
|
FROM default.one_row
|
|
"""), {"big_number": big_number})
|
|
row = connection.execute(text("select * from test_table")).fetchone()
|
|
self.assertEqual(row.hive_date, datetime.datetime(1970, 1, 1, 0, 0))
|
|
self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
|
|
self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123000))
|
|
table.drop(bind=connection)
|
|
|
|
@with_engine_connection
|
|
def test_insert_select(self, engine, connection):
|
|
one_row = Table('one_row', MetaData(), autoload_with=engine)
|
|
table = Table('insert_test', MetaData(schema='pyhive_test_database'),
|
|
Column('a', sqlalchemy.types.Integer))
|
|
table.drop(checkfirst=True, bind=connection)
|
|
table.create(bind=connection)
|
|
connection.execute(text('SET mapred.job.tracker=local'))
|
|
# NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
|
|
connection.execute(table.insert().from_select(['a'], one_row.select()))
|
|
|
|
result = connection.execute(table.select()).fetchall()
|
|
expected = [(1,)]
|
|
self.assertEqual(result, expected)
|
|
|
|
@with_engine_connection
|
|
def test_insert_values(self, engine, connection):
|
|
table = Table('insert_test', MetaData(schema='pyhive_test_database'),
|
|
Column('a', sqlalchemy.types.Integer),)
|
|
table.drop(checkfirst=True, bind=connection)
|
|
table.create(bind=connection)
|
|
connection.execute(table.insert().values([{'a': 1}, {'a': 2}]))
|
|
|
|
result = connection.execute(table.select()).fetchall()
|
|
expected = [(1,), (2,)]
|
|
self.assertEqual(result, expected)
|
|
|
|
@with_engine_connection
|
|
def test_supports_san_rowcount(self, engine, connection):
|
|
self.assertFalse(engine.dialect.supports_sane_rowcount_returning)
|