[UT] fix unstable distance function cases because of precision (backport #63502) (#63511)

Signed-off-by: yan zhang <dirtysalt1987@gmail.com>
Co-authored-by: yan zhang <dirtysalt1987@gmail.com>
This commit is contained in:
mergify[bot] 2025-09-24 17:26:17 +08:00 committed by GitHub
parent d54987f1f2
commit 5ce9b16626
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 106 additions and 106 deletions

View File

@ -5,27 +5,27 @@ create table t1 (id int, data array<float>) engine = olap distributed by hash(id
insert into t1 values(1, array<float>[0.1, 0.2, 0.3]), (2, array<float>[0.2, 0.1, 0.3]), (3, array<float>[0.3, 0.2, 0.1]);
-- result:
-- !result
select cosine_similarity(array<float>[0.1, 0.2, 0.3], data) as dist, id from t1 order by dist desc;
select round(cosine_similarity(array<float>[0.1, 0.2, 0.3], data), 3) as dist, id from t1 order by dist desc, id;
-- result:
0.9999999 1
0.9285713 2
0.7142856 3
1.0 1
0.929 2
0.714 3
-- !result
select cosine_similarity(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(cosine_similarity(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
-- result:
0.9999999
1.0
-- !result
select cosine_similarity_norm(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(cosine_similarity_norm(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
-- result:
0.14000002
0.14
-- !result
select l2_distance(array<float>[0.1, 0.2, 0.3], data) as dist, id from t1 order by dist desc;
select round(l2_distance(array<float>[0.1, 0.2, 0.3], data), 3)as dist, id from t1 order by dist desc, id;
-- result:
0.08000001 3
0.020000001 2
0.08 3
0.02 2
0.0 1
-- !result
select l2_distance(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(l2_distance(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
-- result:
0.0
-- !result
@ -41,99 +41,99 @@ insert into test_vector values (3, array<float>[0.15, 0.25, 0.32]), (4, array<fl
insert into test_vector values (5, array<float>[0.25, 0.12, 0.13]), (6, array<float>[0.22, 0.01, 0.39]);
-- result:
-- !result
select id, data, cosine_similarity(array<float>[0.1, 0.2, 0.3], data) as sim from test_vector order by sim desc;
select id, data, round(cosine_similarity(array<float>[0.1, 0.2, 0.3], data), 3) as sim from test_vector order by sim desc, id;
-- result:
1 [0.1,0.2,0.3] 0.9999999
3 [0.15,0.25,0.32] 0.99397856
4 [0.12,0.11,0.32] 0.9677269
2 [0.2,0.1,0.3] 0.9285713
6 [0.22,0.01,0.39] 0.841375
5 [0.25,0.12,0.13] 0.76792216
1 [0.1,0.2,0.3] 1.0
3 [0.15,0.25,0.32] 0.994
4 [0.12,0.11,0.32] 0.968
2 [0.2,0.1,0.3] 0.929
6 [0.22,0.01,0.39] 0.841
5 [0.25,0.12,0.13] 0.768
-- !result
select a.id, b.id, a.data, b.data, cosine_similarity(a.data, b.data) as sim from test_vector as a cross join test_vector as b;
select a.id, b.id, a.data, b.data, round(cosine_similarity(a.data, b.data), 3) as sim from test_vector as a cross join test_vector as b order by sim desc, a.id, b.id;
-- result:
6 4 [0.22,0.01,0.39] [0.12,0.11,0.32] 0.94712645
6 1 [0.22,0.01,0.39] [0.1,0.2,0.3] 0.841375
6 2 [0.22,0.01,0.39] [0.2,0.1,0.3] 0.9666862
6 3 [0.22,0.01,0.39] [0.15,0.25,0.32] 0.8267672
6 5 [0.22,0.01,0.39] [0.25,0.12,0.13] 0.779311
6 6 [0.22,0.01,0.39] [0.22,0.01,0.39] 0.99999994
2 4 [0.2,0.1,0.3] [0.12,0.11,0.32] 0.97517097
2 1 [0.2,0.1,0.3] [0.1,0.2,0.3] 0.9285713
2 2 [0.2,0.1,0.3] [0.2,0.1,0.3] 0.9999999
2 3 [0.2,0.1,0.3] [0.15,0.25,0.32] 0.9322407
2 5 [0.2,0.1,0.3] [0.25,0.12,0.13] 0.8813651
2 6 [0.2,0.1,0.3] [0.22,0.01,0.39] 0.9666862
3 4 [0.15,0.25,0.32] [0.12,0.11,0.32] 0.95160544
3 1 [0.15,0.25,0.32] [0.1,0.2,0.3] 0.99397856
3 2 [0.15,0.25,0.32] [0.2,0.1,0.3] 0.9322407
1 1 [0.1,0.2,0.3] [0.1,0.2,0.3] 1.0
2 2 [0.2,0.1,0.3] [0.2,0.1,0.3] 1.0
3 3 [0.15,0.25,0.32] [0.15,0.25,0.32] 1.0
3 5 [0.15,0.25,0.32] [0.25,0.12,0.13] 0.82288384
3 6 [0.15,0.25,0.32] [0.22,0.01,0.39] 0.8267672
5 4 [0.25,0.12,0.13] [0.12,0.11,0.32] 0.77120167
5 1 [0.25,0.12,0.13] [0.1,0.2,0.3] 0.76792216
5 2 [0.25,0.12,0.13] [0.2,0.1,0.3] 0.8813651
5 3 [0.25,0.12,0.13] [0.15,0.25,0.32] 0.82288384
5 5 [0.25,0.12,0.13] [0.25,0.12,0.13] 0.99999994
5 6 [0.25,0.12,0.13] [0.22,0.01,0.39] 0.779311
1 4 [0.1,0.2,0.3] [0.12,0.11,0.32] 0.9677269
1 1 [0.1,0.2,0.3] [0.1,0.2,0.3] 0.9999999
1 2 [0.1,0.2,0.3] [0.2,0.1,0.3] 0.9285713
1 3 [0.1,0.2,0.3] [0.15,0.25,0.32] 0.99397856
1 5 [0.1,0.2,0.3] [0.25,0.12,0.13] 0.76792216
1 6 [0.1,0.2,0.3] [0.22,0.01,0.39] 0.841375
4 4 [0.12,0.11,0.32] [0.12,0.11,0.32] 0.9999999
4 1 [0.12,0.11,0.32] [0.1,0.2,0.3] 0.9677269
4 2 [0.12,0.11,0.32] [0.2,0.1,0.3] 0.97517097
4 3 [0.12,0.11,0.32] [0.15,0.25,0.32] 0.95160544
4 5 [0.12,0.11,0.32] [0.25,0.12,0.13] 0.77120167
4 6 [0.12,0.11,0.32] [0.22,0.01,0.39] 0.94712645
4 4 [0.12,0.11,0.32] [0.12,0.11,0.32] 1.0
5 5 [0.25,0.12,0.13] [0.25,0.12,0.13] 1.0
6 6 [0.22,0.01,0.39] [0.22,0.01,0.39] 1.0
1 3 [0.1,0.2,0.3] [0.15,0.25,0.32] 0.994
3 1 [0.15,0.25,0.32] [0.1,0.2,0.3] 0.994
2 4 [0.2,0.1,0.3] [0.12,0.11,0.32] 0.975
4 2 [0.12,0.11,0.32] [0.2,0.1,0.3] 0.975
1 4 [0.1,0.2,0.3] [0.12,0.11,0.32] 0.968
4 1 [0.12,0.11,0.32] [0.1,0.2,0.3] 0.968
2 6 [0.2,0.1,0.3] [0.22,0.01,0.39] 0.967
6 2 [0.22,0.01,0.39] [0.2,0.1,0.3] 0.967
3 4 [0.15,0.25,0.32] [0.12,0.11,0.32] 0.952
4 3 [0.12,0.11,0.32] [0.15,0.25,0.32] 0.952
4 6 [0.12,0.11,0.32] [0.22,0.01,0.39] 0.947
6 4 [0.22,0.01,0.39] [0.12,0.11,0.32] 0.947
2 3 [0.2,0.1,0.3] [0.15,0.25,0.32] 0.932
3 2 [0.15,0.25,0.32] [0.2,0.1,0.3] 0.932
1 2 [0.1,0.2,0.3] [0.2,0.1,0.3] 0.929
2 1 [0.2,0.1,0.3] [0.1,0.2,0.3] 0.929
2 5 [0.2,0.1,0.3] [0.25,0.12,0.13] 0.881
5 2 [0.25,0.12,0.13] [0.2,0.1,0.3] 0.881
1 6 [0.1,0.2,0.3] [0.22,0.01,0.39] 0.841
6 1 [0.22,0.01,0.39] [0.1,0.2,0.3] 0.841
3 6 [0.15,0.25,0.32] [0.22,0.01,0.39] 0.827
6 3 [0.22,0.01,0.39] [0.15,0.25,0.32] 0.827
3 5 [0.15,0.25,0.32] [0.25,0.12,0.13] 0.823
5 3 [0.25,0.12,0.13] [0.15,0.25,0.32] 0.823
5 6 [0.25,0.12,0.13] [0.22,0.01,0.39] 0.779
6 5 [0.22,0.01,0.39] [0.25,0.12,0.13] 0.779
4 5 [0.12,0.11,0.32] [0.25,0.12,0.13] 0.771
5 4 [0.25,0.12,0.13] [0.12,0.11,0.32] 0.771
1 5 [0.1,0.2,0.3] [0.25,0.12,0.13] 0.768
5 1 [0.25,0.12,0.13] [0.1,0.2,0.3] 0.768
-- !result
select a.id, b.id, a.data, b.data, cosine_similarity(a.data, b.data) as sim from test_vector as a cross join test_vector as b order by sim desc;
select a.id, b.id, a.data, b.data, round(cosine_similarity(a.data, b.data), 3) as sim from test_vector as a cross join test_vector as b order by sim desc, a.id, b.id;
-- result:
1 1 [0.1,0.2,0.3] [0.1,0.2,0.3] 1.0
2 2 [0.2,0.1,0.3] [0.2,0.1,0.3] 1.0
3 3 [0.15,0.25,0.32] [0.15,0.25,0.32] 1.0
5 5 [0.25,0.12,0.13] [0.25,0.12,0.13] 0.99999994
6 6 [0.22,0.01,0.39] [0.22,0.01,0.39] 0.99999994
4 4 [0.12,0.11,0.32] [0.12,0.11,0.32] 0.9999999
1 1 [0.1,0.2,0.3] [0.1,0.2,0.3] 0.9999999
2 2 [0.2,0.1,0.3] [0.2,0.1,0.3] 0.9999999
1 3 [0.1,0.2,0.3] [0.15,0.25,0.32] 0.99397856
3 1 [0.15,0.25,0.32] [0.1,0.2,0.3] 0.99397856
4 2 [0.12,0.11,0.32] [0.2,0.1,0.3] 0.97517097
2 4 [0.2,0.1,0.3] [0.12,0.11,0.32] 0.97517097
1 4 [0.1,0.2,0.3] [0.12,0.11,0.32] 0.9677269
4 1 [0.12,0.11,0.32] [0.1,0.2,0.3] 0.9677269
2 6 [0.2,0.1,0.3] [0.22,0.01,0.39] 0.9666862
6 2 [0.22,0.01,0.39] [0.2,0.1,0.3] 0.9666862
3 4 [0.15,0.25,0.32] [0.12,0.11,0.32] 0.95160544
4 3 [0.12,0.11,0.32] [0.15,0.25,0.32] 0.95160544
4 6 [0.12,0.11,0.32] [0.22,0.01,0.39] 0.94712645
6 4 [0.22,0.01,0.39] [0.12,0.11,0.32] 0.94712645
3 2 [0.15,0.25,0.32] [0.2,0.1,0.3] 0.9322407
2 3 [0.2,0.1,0.3] [0.15,0.25,0.32] 0.9322407
1 2 [0.1,0.2,0.3] [0.2,0.1,0.3] 0.9285713
2 1 [0.2,0.1,0.3] [0.1,0.2,0.3] 0.9285713
5 2 [0.25,0.12,0.13] [0.2,0.1,0.3] 0.8813651
2 5 [0.2,0.1,0.3] [0.25,0.12,0.13] 0.8813651
1 6 [0.1,0.2,0.3] [0.22,0.01,0.39] 0.841375
6 1 [0.22,0.01,0.39] [0.1,0.2,0.3] 0.841375
3 6 [0.15,0.25,0.32] [0.22,0.01,0.39] 0.8267672
6 3 [0.22,0.01,0.39] [0.15,0.25,0.32] 0.8267672
3 5 [0.15,0.25,0.32] [0.25,0.12,0.13] 0.82288384
5 3 [0.25,0.12,0.13] [0.15,0.25,0.32] 0.82288384
5 6 [0.25,0.12,0.13] [0.22,0.01,0.39] 0.779311
6 5 [0.22,0.01,0.39] [0.25,0.12,0.13] 0.779311
5 4 [0.25,0.12,0.13] [0.12,0.11,0.32] 0.77120167
4 5 [0.12,0.11,0.32] [0.25,0.12,0.13] 0.77120167
1 5 [0.1,0.2,0.3] [0.25,0.12,0.13] 0.76792216
5 1 [0.25,0.12,0.13] [0.1,0.2,0.3] 0.76792216
4 4 [0.12,0.11,0.32] [0.12,0.11,0.32] 1.0
5 5 [0.25,0.12,0.13] [0.25,0.12,0.13] 1.0
6 6 [0.22,0.01,0.39] [0.22,0.01,0.39] 1.0
1 3 [0.1,0.2,0.3] [0.15,0.25,0.32] 0.994
3 1 [0.15,0.25,0.32] [0.1,0.2,0.3] 0.994
2 4 [0.2,0.1,0.3] [0.12,0.11,0.32] 0.975
4 2 [0.12,0.11,0.32] [0.2,0.1,0.3] 0.975
1 4 [0.1,0.2,0.3] [0.12,0.11,0.32] 0.968
4 1 [0.12,0.11,0.32] [0.1,0.2,0.3] 0.968
2 6 [0.2,0.1,0.3] [0.22,0.01,0.39] 0.967
6 2 [0.22,0.01,0.39] [0.2,0.1,0.3] 0.967
3 4 [0.15,0.25,0.32] [0.12,0.11,0.32] 0.952
4 3 [0.12,0.11,0.32] [0.15,0.25,0.32] 0.952
4 6 [0.12,0.11,0.32] [0.22,0.01,0.39] 0.947
6 4 [0.22,0.01,0.39] [0.12,0.11,0.32] 0.947
2 3 [0.2,0.1,0.3] [0.15,0.25,0.32] 0.932
3 2 [0.15,0.25,0.32] [0.2,0.1,0.3] 0.932
1 2 [0.1,0.2,0.3] [0.2,0.1,0.3] 0.929
2 1 [0.2,0.1,0.3] [0.1,0.2,0.3] 0.929
2 5 [0.2,0.1,0.3] [0.25,0.12,0.13] 0.881
5 2 [0.25,0.12,0.13] [0.2,0.1,0.3] 0.881
1 6 [0.1,0.2,0.3] [0.22,0.01,0.39] 0.841
6 1 [0.22,0.01,0.39] [0.1,0.2,0.3] 0.841
3 6 [0.15,0.25,0.32] [0.22,0.01,0.39] 0.827
6 3 [0.22,0.01,0.39] [0.15,0.25,0.32] 0.827
3 5 [0.15,0.25,0.32] [0.25,0.12,0.13] 0.823
5 3 [0.25,0.12,0.13] [0.15,0.25,0.32] 0.823
5 6 [0.25,0.12,0.13] [0.22,0.01,0.39] 0.779
6 5 [0.22,0.01,0.39] [0.25,0.12,0.13] 0.779
4 5 [0.12,0.11,0.32] [0.25,0.12,0.13] 0.771
5 4 [0.25,0.12,0.13] [0.12,0.11,0.32] 0.771
1 5 [0.1,0.2,0.3] [0.25,0.12,0.13] 0.768
5 1 [0.25,0.12,0.13] [0.1,0.2,0.3] 0.768
-- !result
select id, data, l2_distance(array<float>[0.1, 0.2, 0.3], data) as sim from test_vector order by sim desc;
select id, data, round(l2_distance(array<float>[0.1, 0.2, 0.3], data), 3) as sim from test_vector order by sim desc, id;
-- result:
6 [0.22,0.01,0.39] 0.058599994
5 [0.25,0.12,0.13] 0.057800006
2 [0.2,0.1,0.3] 0.020000001
4 [0.12,0.11,0.32] 0.0089
3 [0.15,0.25,0.32] 0.005399999
6 [0.22,0.01,0.39] 0.059
5 [0.25,0.12,0.13] 0.058
2 [0.2,0.1,0.3] 0.02
4 [0.12,0.11,0.32] 0.009
3 [0.15,0.25,0.32] 0.005
1 [0.1,0.2,0.3] 0.0
-- !result

View File

@ -4,15 +4,15 @@ create table t1 (id int, data array<float>) engine = olap distributed by hash(id
insert into t1 values(1, array<float>[0.1, 0.2, 0.3]), (2, array<float>[0.2, 0.1, 0.3]), (3, array<float>[0.3, 0.2, 0.1]);
select cosine_similarity(array<float>[0.1, 0.2, 0.3], data) as dist, id from t1 order by dist desc;
select round(cosine_similarity(array<float>[0.1, 0.2, 0.3], data), 3) as dist, id from t1 order by dist desc, id;
select cosine_similarity(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(cosine_similarity(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
select cosine_similarity_norm(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(cosine_similarity_norm(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
select l2_distance(array<float>[0.1, 0.2, 0.3], data) as dist, id from t1 order by dist desc;
select round(l2_distance(array<float>[0.1, 0.2, 0.3], data), 3)as dist, id from t1 order by dist desc, id;
select l2_distance(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]) as dist;
select round(l2_distance(array<float>[0.1, 0.2, 0.3], array<float>[0.1, 0.2, 0.3]), 3) as dist;
--------------- cross join -----------------
@ -22,8 +22,8 @@ insert into test_vector values (1, array<float>[0.1, 0.2, 0.3]), (2, array<float
insert into test_vector values (3, array<float>[0.15, 0.25, 0.32]), (4, array<float>[0.12, 0.11, 0.32]);
insert into test_vector values (5, array<float>[0.25, 0.12, 0.13]), (6, array<float>[0.22, 0.01, 0.39]);
select id, data, cosine_similarity(array<float>[0.1, 0.2, 0.3], data) as sim from test_vector order by sim desc;
select a.id, b.id, a.data, b.data, cosine_similarity(a.data, b.data) as sim from test_vector as a cross join test_vector as b;
select a.id, b.id, a.data, b.data, cosine_similarity(a.data, b.data) as sim from test_vector as a cross join test_vector as b order by sim desc;
select id, data, round(cosine_similarity(array<float>[0.1, 0.2, 0.3], data), 3) as sim from test_vector order by sim desc, id;
select a.id, b.id, a.data, b.data, round(cosine_similarity(a.data, b.data), 3) as sim from test_vector as a cross join test_vector as b order by sim desc, a.id, b.id;
select a.id, b.id, a.data, b.data, round(cosine_similarity(a.data, b.data), 3) as sim from test_vector as a cross join test_vector as b order by sim desc, a.id, b.id;
select id, data, l2_distance(array<float>[0.1, 0.2, 0.3], data) as sim from test_vector order by sim desc;
select id, data, round(l2_distance(array<float>[0.1, 0.2, 0.3], data), 3) as sim from test_vector order by sim desc, id;