From a0b9873f817267675eab304f6935bafa4ab0f731 Mon Sep 17 00:00:00 2001 From: "wenjie.wang01" Date: Fri, 21 Jun 2024 19:03:43 +0800 Subject: [PATCH] [KYUUBI #6489] [PYTHON] PyKyuubi get_table_names also supports Spark SQL dialect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # :mag: Description ## Issue References ๐Ÿ”— This pull request fixes #6489 ## Describe Your Solution ๐Ÿ”ง After my investigation, I found the bug and solution. The function get_table_names returns an incorrect value when I used Superset to connect to Kyuubi for Spark SQL. [get_table_names](https://github.com/apache/kyuubi/blob/master/python/pyhive/sqlalchemy_hive.py#L380) The following code is used to connect to hive directly. `return [row[0] for row in connection.execute(text(query))]` Because The following value is returned when the Hive is connected. show tables in default : [('student',), ('student_scores',)] The following code is used to connect to Kyuubi. `return [row[1] for row in connection.execute(text(query))]` Because The following value is returned when the Kyuubi is connected. show tables in default : [('default', 'employees', False), ('default', 'student', False), ('default', 'student_scores', False)] So, for the difference in return value, I modified the code. And I test them in Superset. The code works. Hive image Kyuubi image Spark SQL also works properly. image ## Types of changes :bookmark: - [x] Bugfix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan ๐Ÿงช #### Behavior Without This Pull Request :coffin: #### Behavior With This Pull Request :tada: #### Related Unit Tests --- # Checklist ๐Ÿ“ - [ ] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6490 from BruceWong96/branch-kyuubi-6489. Closes #6489 94a52c0e5 [wenjie.wang01] add else branch. 8ab20becf [wenjie.wang01] fix bug for function get_table_names. 136c7b795 [wenjie.wang01] fix bug for function get_table_names. Authored-by: wenjie.wang01 Signed-off-by: Cheng Pan --- python/pyhive/sqlalchemy_hive.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python/pyhive/sqlalchemy_hive.py b/python/pyhive/sqlalchemy_hive.py index e22445259..355f296da 100644 --- a/python/pyhive/sqlalchemy_hive.py +++ b/python/pyhive/sqlalchemy_hive.py @@ -10,6 +10,7 @@ from __future__ import unicode_literals import datetime import decimal +import logging import re from sqlalchemy import exc @@ -39,6 +40,7 @@ from pyhive.common import UniversalSet from dateutil.parser import parse from decimal import Decimal +_logger = logging.getLogger(__name__) class HiveStringTypeBase(types.TypeDecorator): """Translates strings returned by Thrift into something else""" @@ -377,7 +379,21 @@ class HiveDialect(default.DefaultDialect): query = 'SHOW TABLES' if schema: query += ' IN ' + self.identifier_preparer.quote_identifier(schema) - return [row[0] for row in connection.execute(text(query))] + + table_names = [] + + for row in connection.execute(text(query)): + # Hive returns 1 columns + if len(row) == 1: + table_names.append(row[0]) + # Spark SQL returns 3 columns + elif len(row) == 3: + table_names.append(row[1]) + else: + _logger.warning("Unexpected number of columns in SHOW TABLES result: {}".format(len(row))) + table_names.append('UNKNOWN') + + return table_names def do_rollback(self, dbapi_connection): # No transactions for Hive