[Tool] Detect SQL-Tester conf and filter the cases (#50336)

Signed-off-by: AndyZiYe <yeziyu@starrocks.com>
This commit is contained in:
andyziye 2024-09-04 09:38:21 +08:00 committed by GitHub
parent 58222401f6
commit 4eff9df108
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 377 additions and 124 deletions

View File

@ -592,7 +592,7 @@ jobs:
build:
runs-on: [self-hosted, normal]
needs: [test-checker, clang-tidy, fe-ut, thirdparty-info]
needs: [test-checker, be-ut, fe-ut, thirdparty-info]
name: BUILD
env:
PR_NUMBER: ${{ github.event.number }}
@ -606,7 +606,7 @@ jobs:
is_self_build: ${{ steps.run_build.outputs.is_self_build }}
build_nece: ${{ steps.check-necessity.outputs.BUILD_NECE }}
if: >
always() && needs.clang-tidy.result != 'failure' && needs.fe-ut.result != 'failure'
always() && needs.be-ut.result != 'failure' && needs.fe-ut.result != 'failure' && (needs.be-ut.result == 'success' || needs.fe-ut.result == 'success' || needs.test-checker.result == 'success')
steps:
- name: CLEAN
run: |
@ -666,9 +666,9 @@ jobs:
- name: Check necessity
id: check-necessity
if: >
(needs.clang-tidy.result == 'success' && needs.fe-ut.result == 'success') ||
(needs.be-ut.result == 'success' && needs.fe-ut.result == 'success') ||
(steps.parsing-be-path-filter.outputs.src_filter != 'true' && steps.parsing-fe-path-filter.outputs.src_filter == 'true' && needs.fe-ut.result == 'success') ||
(steps.parsing-fe-path-filter.outputs.src_filter != 'true' && steps.parsing-be-path-filter.outputs.src_filter == 'true' && needs.clang-tidy.result == 'success') ||
(steps.parsing-fe-path-filter.outputs.src_filter != 'true' && steps.parsing-be-path-filter.outputs.src_filter == 'true' && needs.be-ut.result == 'success') ||
(steps.parsing-be-path-filter.outputs.src_filter != 'true' && steps.parsing-fe-path-filter.outputs.src_filter != 'true' && needs.test-checker.outputs.output1 == 'true')
run: |
echo "BUILD_NECE=true" >> $GITHUB_OUTPUT

View File

@ -1,69 +1,74 @@
[mysql-client]
host =
port =
user =
password =
http_port =
host_user =
host_password =
cluster_path =
[cluster]
host =
port =
user =
password =
http_port =
host_user =
host_password =
cluster_path =
[trino-client]
host =
port =
user =
[client]
[.trino-client]
host =
port =
user =
[hive-client]
host =
port =
user =
[spark-client]
host =
port =
user =
[.hive-client]
host =
port =
user =
[.spark-client]
host =
port =
user =
[replace]
url = http://${mysql-client:host}:${mysql-client:http_port}
mysql_cmd = mysql -h${mysql-client:host} -P${mysql-client:port} -u${mysql-client:user}
url = http://${cluster.host}:${cluster.http_port}
mysql_cmd = mysql -h${cluster.host} -P${cluster.port} -u${cluster.user}
[env]
oss_bucket =
oss_ak =
oss_sk =
oss_region =
oss_endpoint =
hdfs_host =
hdfs_port =
hdfs_user =
hdfs_passwd =
hdfs_path = /starrocks_ci_data
hdfs_broker_name = hdfs_broker
[.oss]
oss_bucket =
oss_ak =
oss_sk =
oss_region =
oss_endpoint =
hive_metastore_uris =
hudi_hive_metastore_uris =
iceberg_catalog_hive_metastore_uris =
deltalake_catalog_hive_metastore_uris =
external_mysql_ip =
external_mysql_port =
external_mysql_user =
external_mysql_password =
jdbc_url =
aws_ak =
aws_sk =
aws_region =
aws_assume_role =
aws_sts_region =
aws_sts_endpoint =
udf_url = http://starrocks-thirdparty.oss-cn-zhangjiakou.aliyuncs.com
[.hdfs]
hdfs_host =
hdfs_port =
hdfs_user =
hdfs_passwd =
hdfs_path = /starrocks_ci_data
hdfs_broker_name = hdfs_broker
[.hive]
hive_metastore_uris =
deltalake_catalog_hive_metastore_uris =
hudi_hive_metastore_uris =
iceberg_catalog_hive_metastore_uris =
[.kafka]
broker_list =
kafka_tool_path =
[.mysql]
external_mysql_ip =
external_mysql_port =
external_mysql_user =
external_mysql_password =
jdbc_url =
[.aws]
aws_ak =
aws_sk =
aws_region =
aws_assume_role =
aws_sts_region =
aws_sts_endpoint =
broker_list =
kafka_tool_path =
[.others]
udf_url = http://starrocks-thirdparty.oss-cn-zhangjiakou.aliyuncs.com

View File

@ -40,6 +40,7 @@ from lib import *
CASE_DIR = "sql"
LOG_FILTERED_WARN = "You can use `--log_filtered` to show the details..."
class ChooseCase(object):
@ -86,7 +87,7 @@ class ChooseCase(object):
def __init__(self, case_dir=None, record_mode=False, file_regex=None, case_regex=None):
"""init"""
super().__init__()
# self.sr_lib_obj = sr_sql_lib.StarrocksSQLApiLib()
self.sr_lib_obj = sr_sql_lib.StarrocksSQLApiLib()
# case_dir = sql dir by default
self.case_dir = os.path.join(sr_sql_lib.root_path, CASE_DIR) if case_dir is None else case_dir
@ -98,6 +99,8 @@ class ChooseCase(object):
self.list_t_r_files(file_regex)
self.get_cases(record_mode, case_regex)
self.filter_cases_by_component_status()
self.filter_cases_by_data_status()
# sort
self.case_list.sort()
@ -149,13 +152,146 @@ class ChooseCase(object):
for file in file_list:
base_file = os.path.basename(file)
if base_file in skip.skip_files:
print('skip file {} because it is in skip_files'.format(file))
sr_sql_lib.self_print(f'skip file {file} because it is in skip_files', color=ColorEnum.YELLOW)
continue
self.read_t_r_file(file, case_regex)
self.case_list = list(filter(lambda x: x.name.strip() != "", self.case_list))
def filter_cases_by_component_status(self):
""" filter cases by component status """
new_case_list = []
filtered_cases_dict = {}
for each_case in self.case_list:
_case_sqls = []
for each_stat in each_case.sql:
if isinstance(each_stat, str):
_case_sqls.append(each_stat)
elif isinstance(each_stat, dict) and each_stat.get("type", "") == LOOP_FLAG:
tools.assert_in("stat", each_stat, "LOOP STATEMENT FORMAT ERROR!")
_case_sqls.extend(each_stat["stat"])
elif isinstance(each_stat, dict) and each_stat.get("type", "") == CONCURRENCY_FLAG:
tools.assert_in("thread", each_stat, "CONCURRENCY THREAD FORMAT ERROR!")
for each_thread in each_stat["thread"]:
_case_sqls.extend(each_thread["cmd"])
else:
tools.ok_(False, "Init data error!")
is_pass = True
# check trino/spark/hive flag
for each_client_flag in [TRINO_FLAG, SPARK_FLAG, HIVE_FLAG]:
if any(_case_sql.lstrip().startswith(each_client_flag) for _case_sql in _case_sqls):
# check client status
client_name = each_client_flag.split(":")[0].lower() + "-client"
if client_name not in self.sr_lib_obj.component_status:
sr_sql_lib.self_print(f"[Config ERROR]: {client_name} config not found!")
filtered_cases_dict.setdefault(client_name, []).append(each_case.name)
is_pass = False
break
if not self.sr_lib_obj.component_status[client_name]["status"]:
filtered_cases_dict.setdefault(client_name, []).append(each_case.name)
is_pass = False
break
if not is_pass:
# client check failed, no need to check component info
continue
# check ${} contains component info
_case_sqls = " ".join(_case_sqls)
_vars = re.findall(r"\${([a-zA-Z0-9._-]+)}", _case_sqls)
for _var in _vars:
if not is_pass:
break
if _var not in self.sr_lib_obj.__dict__:
continue
for each_component_name, each_component_info in self.sr_lib_obj.component_status.items():
if _var in each_component_info["keys"] and not each_component_info["status"]:
filtered_cases_dict.setdefault(each_component_name, []).append(each_case.name)
is_pass = False
break
if is_pass:
new_case_list.append(each_case)
if filtered_cases_dict:
if os.environ.get("log_filtered") == "True":
sr_sql_lib.self_print(f"\n{'-' * 60}\n[Component filter]\n{'-' * 60}", color=ColorEnum.BLUE,
logout=True, bold=True)
for k, cases in filtered_cases_dict.items():
sr_sql_lib.self_print(f"{k.upper()}", color=ColorEnum.BLUE, logout=True, bold=True)
sr_sql_lib.self_print(f" ▶ %s" % '\n'.join(cases), logout=True)
sr_sql_lib.self_print('-' * 60, color=ColorEnum.BLUE, logout=True, bold=True)
else:
filtered_count = sum([len(x) for x in filtered_cases_dict.values()])
sr_sql_lib.self_print(f"\n{'-' * 60}\n[Component filter]: {filtered_count}\n{LOG_FILTERED_WARN}\n{'-' * 60}",
color=ColorEnum.BLUE, logout=True, bold=True)
self.case_list = new_case_list
def filter_cases_by_data_status(self):
""" filter cases by data status """
new_case_list = []
filtered_cases_dict = {}
for each_case in self.case_list:
_case_sqls = []
for each_stat in each_case.sql:
if isinstance(each_stat, str):
_case_sqls.append(each_stat)
elif isinstance(each_stat, dict) and each_stat.get("type", "") == LOOP_FLAG:
tools.assert_in("stat", each_stat, "LOOP STATEMENT FORMAT ERROR!")
_case_sqls.extend(each_stat["stat"])
elif isinstance(each_stat, dict) and each_stat.get("type", "") == CONCURRENCY_FLAG:
tools.assert_in("thread", each_stat, "CONCURRENCY THREAD FORMAT ERROR!")
for each_thread in each_stat["thread"]:
_case_sqls.extend(each_thread["cmd"])
else:
tools.ok_(False, "Init data error!")
is_pass = True
# check trino/spark/hive flag
function_stats = list(filter(lambda x: x.lstrip().startswith(FUNCTION_FLAG) and "prepare_data(" in x, _case_sqls))
for func_stat in function_stats:
if not is_pass:
break
data_source_names = re.findall(r"prepare_data\(['|\"]([a-zA-Z_-]+)['|\"]", func_stat)
for data_source in data_source_names:
if self.sr_lib_obj.data_status.get(data_source, False) is False:
filtered_cases_dict.setdefault(data_source, []).append(each_case.name)
is_pass = False
break
if is_pass:
new_case_list.append(each_case)
if filtered_cases_dict:
if os.environ.get("log_filtered") == "True":
sr_sql_lib.self_print(f"\n{'-' * 60}\n[Data filter]\n{'-' * 60}", color=ColorEnum.BLUE, logout=True, bold=True)
for k in list(sorted(filtered_cases_dict.keys())):
cases = filtered_cases_dict[k]
sr_sql_lib.self_print(f"{k.upper()}", color=ColorEnum.BLUE, logout=True, bold=True)
sr_sql_lib.self_print(f" ▶ %s" % '\n'.join(cases), logout=True)
sr_sql_lib.self_print('-' * 60, color=ColorEnum.BLUE, logout=True, bold=True)
else:
filtered_count = sum([len(x) for x in filtered_cases_dict.values()])
sr_sql_lib.self_print(f"\n{'-' * 60}\n[Data filter]: {filtered_count}\n{LOG_FILTERED_WARN}\n{'-' * 60}",
color=ColorEnum.BLUE, logout=True, bold=True)
self.case_list = new_case_list
def read_t_r_file(self, file, case_regex):
"""read t r file and get case & result"""
@ -316,7 +452,8 @@ class ChooseCase(object):
and not re.compile(f'}}(\\s)*{END_LOOP_FLAG}').fullmatch(f_lines[line_id].strip())):
# read loop stats, unnecessary to record result
line_content = f_lines[line_id].strip()
line_id = __read_single_stat_and_result(line_content, line_id, tmp_sql, tmp_res, in_loop_flag, tmp_loop_stat)
line_id = __read_single_stat_and_result(line_content, line_id, tmp_sql, tmp_res, in_loop_flag,
tmp_loop_stat)
tools.assert_less(line_id, len(f_lines), "LOOP FORMAT ERROR!")
# reach the end loop line
@ -328,7 +465,7 @@ class ChooseCase(object):
"type": LOOP_FLAG,
"stat": tmp_loop_stat,
"prop": tmp_loop_prop,
"ori": f_lines[l_loop_line: r_loop_line+1]
"ori": f_lines[l_loop_line: r_loop_line + 1]
})
tmp_res.append(None)
line_id += 1
@ -439,9 +576,8 @@ def choose_cases(record_mode=False):
filename_regex = os.environ.get("file_filter")
case_name_regex = os.environ.get("case_filter")
run_info = f"""
{'-' * 60}
[DIR]: {confirm_case_dir}
run_info = f"""{'-' * 60}
[DIR]: {"DEFAULT" if confirm_case_dir is None else confirm_case_dir}
[Mode]: {"RECORD" if record_mode else "VALIDATE"}
[file regex]: {filename_regex}
[case regex]: {case_name_regex}
@ -458,17 +594,3 @@ def choose_cases(record_mode=False):
log.info("%s:%s" % (case.file, case.name))
return cases
def check_db_unique(case_list: List[ChooseCase.CaseTR]):
"""check db unique in case list"""
db_and_case_dict = {}
# get info dict, key: db value: [..case_names]
for case in case_list:
for each_db in case.db:
db_and_case_dict.setdefault(each_db, []).append(case.name)
error_info_dict = {db: cases for db, cases in db_and_case_dict.items() if len(cases) > 1}
tools.assert_true(len(error_info_dict) <= 0, "Duplicate DBs: \n%s" % json.dumps(error_info_dict, indent=2))

View File

@ -51,6 +51,7 @@ def timeout():
return receive
def ignore_timeout():
"""
ignore timeout exception
@ -73,4 +74,4 @@ def ignore_timeout():
return wrapper
return receive
return receive

View File

@ -24,7 +24,6 @@ sr api lib in this module
"""
import base64
import bz2
import configparser
import copy
import datetime
import json
@ -54,6 +53,7 @@ import requests
from cup import shell
from nose import tools
from cup import log
from cup.util import conf as configparser
from requests.auth import HTTPBasicAuth
from timeout_decorator import timeout, TimeoutError
from dbutils.pooled_db import PooledDB
@ -144,6 +144,7 @@ class StarrocksSQLApiLib(object):
"""api lib"""
version = os.environ.get("version", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"))
_instance = False
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@ -174,6 +175,9 @@ class StarrocksSQLApiLib(object):
self.thread_var = {}
self.thread_res_log = {}
self.component_status = {}
self.data_status = {}
# trino client config
self.trino_host = ""
self.trino_port = ""
@ -194,6 +198,10 @@ class StarrocksSQLApiLib(object):
self.log = []
self.res_log = []
# check data dir
self.check_data_dir()
# read conf
config_path = os.environ.get("config_path")
if config_path is None or config_path == "":
self.read_conf("conf/sr.conf")
@ -407,48 +415,157 @@ class StarrocksSQLApiLib(object):
def setUpClass(cls) -> None:
pass
def check_data_dir(self):
def _sort_by_len_and_alph(n):
return
for _data_path in os.listdir(self.data_path):
# check data files exist
_this_data_path = os.path.join(self.data_path, _data_path)
# check dirs not empty
_files = os.listdir(_this_data_path)
if all(_file.startswith(".") for _file in _files):
self.data_status[_data_path] = False
continue
else:
self.data_status[_data_path] = True
if not StarrocksSQLApiLib._instance:
self_print(f"\n{'-' * 30}\n[Data check]\n{'-' * 30}", color=ColorEnum.BLUE, logout=True, bold=True)
_data_names = sorted(list(self.data_status.keys()))
for _data_name in _data_names:
if self.data_status[_data_name]:
self_print("%-20s ... YES" % str(_data_name).upper(), color=ColorEnum.CYAN, bold=True)
else:
self_print("%-20s ... NO" % str(_data_name).upper(), color=ColorEnum.YELLOW, bold=True)
self_print(f"{'-' * 30}", color=ColorEnum.BLUE, logout=True, bold=True)
def read_conf(self, path):
"""read conf"""
config_parser = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
config_parser.read("%s/%s" % (root_path, path))
self.mysql_host = config_parser.get("mysql-client", "host")
self.mysql_port = config_parser.get("mysql-client", "port")
self.mysql_user = config_parser.get("mysql-client", "user")
self.mysql_password = config_parser.get("mysql-client", "password")
self.http_port = config_parser.get("mysql-client", "http_port")
self.host_user = config_parser.get("mysql-client", "host_user")
self.host_password = config_parser.get("mysql-client", "host_password")
self.cluster_path = config_parser.get("mysql-client", "cluster_path")
def _get_value(_conf_dict, *args):
_tmp_conf = _conf_dict
_index = 0
_now_conf_key = ""
for arg in args:
_now_conf_key = ".".join(args[:_index + 1])
if arg not in _tmp_conf:
if not StarrocksSQLApiLib._instance:
self_print(f"[Miss config] {_now_conf_key}", color=ColorEnum.YELLOW)
return ""
_tmp_conf = _tmp_conf.get(arg)
_index += 1
if isinstance(_tmp_conf, str):
if _tmp_conf == "":
if not StarrocksSQLApiLib._instance:
log.warning(f"[Null config] {_now_conf_key}")
return _tmp_conf
else:
return _tmp_conf
# read conf file to dict
config_parser = configparser.Configure2Dict(path, separator="=").get_dict()
config_parser_str = json.dumps(config_parser)
# replace ${} in conf
var_strs = set(re.findall(r"\${([a-zA-Z._-]+)}", config_parser_str))
for var_str in var_strs:
var_str_path = "['" + var_str.replace(".", "']['") + "']"
try:
var_value = eval(f'config_parser{var_str_path}')
except Exception as e:
self_print(f"[ERROR] config: {var_str} is incorrect!", color=ColorEnum.RED, bold=True)
sys.exit(1)
config_parser_str = config_parser_str.replace(f"${{{var_str}}}", var_value)
config_parser = json.loads(config_parser_str)
# update dependency component status dict
component_list = list(_get_value(config_parser, "env").keys())
component_list.extend(list(_get_value(config_parser, "client").keys()))
if not StarrocksSQLApiLib._instance:
self_print(f"\n{'-' * 30}\n[Component check]\n{'-' * 30}", color=ColorEnum.BLUE, logout=True, bold=True)
for each_comp in component_list:
if each_comp == "others":
continue
if each_comp in _get_value(config_parser, "env"):
comp_conf_dict = _get_value(config_parser, "env", each_comp)
else:
comp_conf_dict = _get_value(config_parser, "client", each_comp)
self.component_status[each_comp] = {
"status": None,
"keys": list(comp_conf_dict.keys())
}
for com_k, com_v in comp_conf_dict.items():
if str(com_v).strip() == "" and "passwd" not in com_k.lower() and "passwd" not in com_k.lower():
self.component_status[each_comp]["status"] = False
if not StarrocksSQLApiLib._instance:
self_print("%-20s ... NO" % str(each_comp).upper(), color=ColorEnum.YELLOW, bold=True)
break
if self.component_status[each_comp]["status"] is None:
self.component_status[each_comp]["status"] = True
if not StarrocksSQLApiLib._instance:
self_print("%-20s ... YES" % str(each_comp).upper(), color=ColorEnum.BLUE, bold=True)
if not StarrocksSQLApiLib._instance:
self_print(f"{'-' * 30}", color=ColorEnum.BLUE, logout=True, bold=True)
cluster_conf = _get_value(config_parser, "cluster")
self.mysql_host = _get_value(cluster_conf, "host")
self.mysql_port = _get_value(cluster_conf, "port")
self.mysql_user = _get_value(cluster_conf, "user")
self.mysql_password = _get_value(cluster_conf, "password")
self.http_port = _get_value(cluster_conf, "http_port")
self.host_user = _get_value(cluster_conf, "host_user")
self.host_password = _get_value(cluster_conf, "host_password")
self.cluster_path = _get_value(cluster_conf, "cluster_path")
# client
client_conf = _get_value(config_parser, "client")
# parse trino config
self.trino_host = config_parser.get("trino-client", "host")
self.trino_port = config_parser.get("trino-client", "port")
self.trino_user = config_parser.get("trino-client", "user")
self.trino_host = _get_value(client_conf, "trino-client", "host")
self.trino_port = _get_value(client_conf, "trino-client", "port")
self.trino_user = _get_value(client_conf, "trino-client", "user")
# parse spark config
self.spark_host = config_parser.get("spark-client", "host")
self.spark_port = config_parser.get("spark-client", "port")
self.spark_user = config_parser.get("spark-client", "user")
self.spark_host = _get_value(client_conf, "spark-client", "host")
self.spark_port = _get_value(client_conf, "spark-client", "port")
self.spark_user = _get_value(client_conf, "spark-client", "user")
# parse hive config
self.hive_host = config_parser.get("hive-client", "host")
self.hive_port = config_parser.get("hive-client", "port")
self.hive_user = config_parser.get("hive-client", "user")
self.hive_host = _get_value(client_conf, "hive-client", "host")
self.hive_port = _get_value(client_conf, "hive-client", "port")
self.hive_user = _get_value(client_conf, "hive-client", "user")
# read replace info
for rep_key, rep_value in config_parser.items("replace"):
for rep_key, rep_value in _get_value(config_parser, "replace").items():
self.__setattr__(rep_key, rep_value)
# read env info
for env_key, env_value in config_parser.items("env"):
if not env_value:
env_value = os.environ.get(env_key, "")
else:
# save secrets info
if "aws" in env_key or "oss_" in env_key:
SECRET_INFOS[env_key] = env_value
for env_key, env_value in _get_value(config_parser, "env").items():
for each_env_key, each_env_value in env_value.items():
if not each_env_value:
each_env_value = os.environ.get(each_env_key, "")
else:
# save secrets info
if 'aws' in each_env_key or 'oss_' in each_env_key:
SECRET_INFOS[each_env_key] = each_env_value
self.__setattr__(env_key, env_value)
self.__setattr__(each_env_key, each_env_value)
StarrocksSQLApiLib._instance = True
def connect_starrocks(self):
mysql_dict = {
@ -2531,3 +2648,10 @@ out.append("${{dictMgr.NO_DICT_STRING_COLUMNS.contains(cid)}}")
read_cache_size = int(result[0].replace("B", "").replace("KB", ""))
write_cache_size = int(result[1].replace("B", "").replace("KB", ""))
tools.assert_true(read_cache_size + write_cache_size > 0, "cache select is failed, read_cache_size + write_cache_size must larger than 0 bytes")
@staticmethod
def regex_match(check_str: str, pattern: str):
if re.fullmatch(pattern, check_str):
return True
return False

View File

@ -97,7 +97,8 @@ if __name__ == "__main__":
"skip_reruns",
"config=",
"keep_alive",
"run_info="
"run_info=",
"log_filtered"
]
case_dir = None
@ -110,6 +111,8 @@ if __name__ == "__main__":
cluster = "native"
log_filtered = False
try:
opts, args = getopt.getopt(sys.argv[1:], args, detail_args)
except Exception as e:
@ -167,6 +170,9 @@ if __name__ == "__main__":
if opt == "--run_info":
run_info = arg
if opt == "--log_filtered":
log_filtered = True
# merge cluster info to attr
cluster_attr = "!cloud" if cluster == "native" else "!native"
attr = f"{attr},{cluster_attr}".strip(",")
@ -188,6 +194,7 @@ if __name__ == "__main__":
os.environ["config_path"] = config
os.environ["keep_alive"] = str(keep_alive)
os.environ['run_info'] = run_info
os.environ['log_filtered'] = str(log_filtered)
argv = [
"nosetests",

View File

@ -73,12 +73,6 @@ class TestSQLCases(sr_sql_lib.StarrocksSQLApiLib):
"""
_multiprocess_can_split_ = True
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(TestSQLCases, cls).__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self, *args, **kwargs):
"""