Compare commits

...

2 Commits

Author SHA1 Message Date
Murphy 6a78afecaa fix format
Signed-off-by: Murphy <mofei@starrocks.com>
2025-08-28 10:15:46 +08:00
Cursor Agent bacf862c5c Implement year, month, day, minute, second from_unixtime functions
Co-authored-by: huanmingwong <huanmingwong@gmail.com>
2025-08-28 10:15:45 +08:00
7 changed files with 400 additions and 11 deletions

View File

@ -1595,6 +1595,48 @@ static inline int64_t impl_hour_from_unixtime(int64_t unixtime) {
return hour;
}
static inline int64_t impl_minute_from_unixtime(int64_t unixtime) {
static const libdivide::divider<int64_t> fast_div_60(60);
static const libdivide::divider<int64_t> fast_div_3600(3600);
static const libdivide::divider<int64_t> fast_div_86400(86400);
int64_t remainder;
if (LIKELY(unixtime >= 0)) {
remainder = unixtime - unixtime / fast_div_86400 * 86400;
} else {
remainder = unixtime % 86400;
if (remainder < 0) {
remainder += 86400;
}
}
int64_t hour_sec = (remainder / fast_div_3600) * 3600;
int64_t minute = (remainder - hour_sec) / fast_div_60;
return minute;
}
static inline int64_t impl_second_from_unixtime(int64_t unixtime) {
static const libdivide::divider<int64_t> fast_div_60(60);
static const libdivide::divider<int64_t> fast_div_86400(86400);
int64_t remainder;
if (LIKELY(unixtime >= 0)) {
remainder = unixtime - unixtime / fast_div_86400 * 86400;
} else {
remainder = unixtime % 86400;
if (remainder < 0) {
remainder += 86400;
}
}
int64_t second = remainder - (remainder / fast_div_60) * 60;
return second;
}
static inline int64_t floor_div_86400(int64_t seconds) {
// floor division for negative values
if (LIKELY(seconds >= 0)) return seconds / 86400;
return -(((-seconds) + 86400 - 1) / 86400);
}
StatusOr<ColumnPtr> TimeFunctions::hour_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
@ -1626,6 +1668,173 @@ StatusOr<ColumnPtr> TimeFunctions::hour_from_unixtime(FunctionContext* context,
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> TimeFunctions::minute_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
static const auto epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
auto ctz = context->state()->timezone_obj();
auto size = columns[0]->size();
ColumnViewer<TYPE_BIGINT> data_column(columns[0]);
ColumnBuilder<TYPE_INT> result(size);
for (int row = 0; row < size; ++row) {
if (data_column.is_null(row)) {
result.append_null();
continue;
}
auto date = data_column.value(row);
if (date < 0 || date > MAX_UNIX_TIMESTAMP) {
result.append_null();
continue;
}
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(date);
int offset = ctz.lookup_offset(t).offset;
int minute = impl_minute_from_unixtime(date + offset);
result.append(minute);
}
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> TimeFunctions::second_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
static const auto epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
auto ctz = context->state()->timezone_obj();
auto size = columns[0]->size();
ColumnViewer<TYPE_BIGINT> data_column(columns[0]);
ColumnBuilder<TYPE_INT> result(size);
for (int row = 0; row < size; ++row) {
if (data_column.is_null(row)) {
result.append_null();
continue;
}
auto date = data_column.value(row);
if (date < 0 || date > MAX_UNIX_TIMESTAMP) {
result.append_null();
continue;
}
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(date);
int offset = ctz.lookup_offset(t).offset;
int second = impl_second_from_unixtime(date + offset);
result.append(second);
}
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> TimeFunctions::year_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
static const auto epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
auto ctz = context->state()->timezone_obj();
auto size = columns[0]->size();
ColumnViewer<TYPE_BIGINT> data_column(columns[0]);
ColumnBuilder<TYPE_INT> result(size);
for (int row = 0; row < size; ++row) {
if (data_column.is_null(row)) {
result.append_null();
continue;
}
auto date = data_column.value(row);
if (date < 0 || date > MAX_UNIX_TIMESTAMP) {
result.append_null();
continue;
}
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(date);
int offset = ctz.lookup_offset(t).offset;
int64_t adjusted = date + offset;
int64_t days = floor_div_86400(adjusted);
DateValue dv = DateValue::from_days_since_unix_epoch((int)days);
int y, m, d;
dv.to_date(&y, &m, &d);
result.append(y);
}
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> TimeFunctions::month_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
static const auto epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
auto ctz = context->state()->timezone_obj();
auto size = columns[0]->size();
ColumnViewer<TYPE_BIGINT> data_column(columns[0]);
ColumnBuilder<TYPE_INT> result(size);
for (int row = 0; row < size; ++row) {
if (data_column.is_null(row)) {
result.append_null();
continue;
}
auto date = data_column.value(row);
if (date < 0 || date > MAX_UNIX_TIMESTAMP) {
result.append_null();
continue;
}
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(date);
int offset = ctz.lookup_offset(t).offset;
int64_t adjusted = date + offset;
int64_t days = floor_div_86400(adjusted);
DateValue dv = DateValue::from_days_since_unix_epoch((int)days);
int y, m, d;
dv.to_date(&y, &m, &d);
result.append(m);
}
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> TimeFunctions::day_from_unixtime(FunctionContext* context, const Columns& columns) {
DCHECK_EQ(columns.size(), 1);
RETURN_IF_COLUMNS_ONLY_NULL(columns);
static const auto epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
auto ctz = context->state()->timezone_obj();
auto size = columns[0]->size();
ColumnViewer<TYPE_BIGINT> data_column(columns[0]);
ColumnBuilder<TYPE_INT> result(size);
for (int row = 0; row < size; ++row) {
if (data_column.is_null(row)) {
result.append_null();
continue;
}
auto date = data_column.value(row);
if (date < 0 || date > MAX_UNIX_TIMESTAMP) {
result.append_null();
continue;
}
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(date);
int offset = ctz.lookup_offset(t).offset;
int64_t adjusted = date + offset;
int64_t days = floor_div_86400(adjusted);
DateValue dv = DateValue::from_days_since_unix_epoch((int)days);
int y, m, d;
dv.to_date(&y, &m, &d);
result.append(d);
}
return result.build(ColumnHelper::is_all_const(columns));
}
std::string TimeFunctions::convert_format(const Slice& format) {
switch (format.get_size()) {
case 8:

View File

@ -722,12 +722,12 @@ public:
DEFINE_VECTORIZED_FN(from_unix_to_datetime_ms_64);
// TODO
// DEFINE_VECTORIZED_FN(year_from_unixtime);
// DEFINE_VECTORIZED_FN(month_from_unixtime);
// DEFINE_VECTORIZED_FN(day_from_unixtime);
DEFINE_VECTORIZED_FN(year_from_unixtime);
DEFINE_VECTORIZED_FN(month_from_unixtime);
DEFINE_VECTORIZED_FN(day_from_unixtime);
DEFINE_VECTORIZED_FN(hour_from_unixtime);
// DEFINE_VECTORIZED_FN(minute_from_unixtime);
// DEFINE_VECTORIZED_FN(second_from_unixtime);
DEFINE_VECTORIZED_FN(minute_from_unixtime);
DEFINE_VECTORIZED_FN(second_from_unixtime);
// from_unix_datetime with format's auxiliary method
static Status from_unix_prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope);

View File

@ -4727,4 +4727,92 @@ TEST_F(TimeFunctionsTest, hourFromUnixTime) {
}
}
} // namespace starrocks
TEST_F(TimeFunctionsTest, minuteFromUnixTime) {
RuntimeState* state = _utils->get_fn_ctx()->state();
std::string prev_timezone = state->timezone();
ASSERT_TRUE(state->set_timezone("UTC"));
DeferOp defer([&]() { state->set_timezone(prev_timezone); });
Int64Column::Ptr tc = Int64Column::create();
tc->append(0); // 00:00:00 -> 0
tc->append(59); // 00:00:59 -> 0
tc->append(60); // 00:01:00 -> 1
tc->append(3599); // 00:59:59 -> 59
tc->append(3600); // 01:00:00 -> 0
tc->append(3723); // 01:02:03 -> 2
int expected[] = {0, 0, 1, 59, 0, 2};
Columns columns;
columns.emplace_back(tc);
ColumnPtr result = TimeFunctions::minute_from_unixtime(_utils->get_fn_ctx(), columns).value();
auto mins = ColumnHelper::cast_to<TYPE_INT>(result);
for (size_t i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
EXPECT_EQ(expected[i], mins->get_data()[i]);
}
}
TEST_F(TimeFunctionsTest, secondFromUnixTime) {
RuntimeState* state = _utils->get_fn_ctx()->state();
std::string prev_timezone = state->timezone();
ASSERT_TRUE(state->set_timezone("UTC"));
DeferOp defer([&]() { state->set_timezone(prev_timezone); });
Int64Column::Ptr tc = Int64Column::create();
tc->append(0); // -> 0
tc->append(59); // -> 59
tc->append(60); // -> 0
tc->append(61); // -> 1
tc->append(3723); // 01:02:03 -> 3
int expected[] = {0, 59, 0, 1, 3};
Columns columns;
columns.emplace_back(tc);
ColumnPtr result = TimeFunctions::second_from_unixtime(_utils->get_fn_ctx(), columns).value();
auto secs = ColumnHelper::cast_to<TYPE_INT>(result);
for (size_t i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
EXPECT_EQ(expected[i], secs->get_data()[i]);
}
}
TEST_F(TimeFunctionsTest, ymdFromUnixTime) {
RuntimeState* state = _utils->get_fn_ctx()->state();
std::string prev_timezone = state->timezone();
ASSERT_TRUE(state->set_timezone("UTC"));
DeferOp defer([&]() { state->set_timezone(prev_timezone); });
Int64Column::Ptr tc = Int64Column::create();
tc->append(0); // 1970-01-01
tc->append(86399); // 1970-01-01
tc->append(86400); // 1970-01-02
tc->append(946684800); // 2000-01-01 00:00:00
tc->append(951868800); // 2000-03-01 00:00:00 (leap year)
Columns columns;
columns.emplace_back(tc);
ColumnPtr yearCol = TimeFunctions::year_from_unixtime(_utils->get_fn_ctx(), columns).value();
ColumnPtr monthCol = TimeFunctions::month_from_unixtime(_utils->get_fn_ctx(), columns).value();
ColumnPtr dayCol = TimeFunctions::day_from_unixtime(_utils->get_fn_ctx(), columns).value();
auto years = ColumnHelper::cast_to<TYPE_INT>(yearCol);
auto months = ColumnHelper::cast_to<TYPE_INT>(monthCol);
auto days = ColumnHelper::cast_to<TYPE_INT>(dayCol);
EXPECT_EQ(1970, years->get_data()[0]);
EXPECT_EQ(1, months->get_data()[0]);
EXPECT_EQ(1, days->get_data()[0]);
EXPECT_EQ(1970, years->get_data()[1]);
EXPECT_EQ(1, months->get_data()[1]);
EXPECT_EQ(1, days->get_data()[1]);
EXPECT_EQ(1970, years->get_data()[2]);
EXPECT_EQ(1, months->get_data()[2]);
EXPECT_EQ(2, days->get_data()[2]);
EXPECT_EQ(2000, years->get_data()[3]);
EXPECT_EQ(1, months->get_data()[3]);
EXPECT_EQ(1, days->get_data()[3]);
EXPECT_EQ(2000, years->get_data()[4]);
EXPECT_EQ(3, months->get_data()[4]);
EXPECT_EQ(1, days->get_data()[4]);
}
} //namespace starrocks

View File

@ -90,6 +90,11 @@ public class FunctionSet {
public static final String FROM_UNIXTIME_MS = "from_unixtime_ms";
public static final String HOUR = "hour";
public static final String HOUR_FROM_UNIXTIME = "hour_from_unixtime";
public static final String YEAR_FROM_UNIXTIME = "year_from_unixtime";
public static final String MONTH_FROM_UNIXTIME = "month_from_unixtime";
public static final String DAY_FROM_UNIXTIME = "day_from_unixtime";
public static final String MINUTE_FROM_UNIXTIME = "minute_from_unixtime";
public static final String SECOND_FROM_UNIXTIME = "second_from_unixtime";
public static final String MINUTE = "minute";
public static final String MONTH = "month";
public static final String MONTHNAME = "monthname";

View File

@ -395,6 +395,16 @@ public class SimplifiedPredicateRule extends BottomUpScalarOperatorRewriteRule {
return simplifiedJsonQuery(call);
} else if (FunctionSet.HOUR.equalsIgnoreCase(call.getFnName())) {
return simplifiedHourFromUnixTime(call);
} else if (FunctionSet.YEAR.equalsIgnoreCase(call.getFnName())) {
return simplifiedExtractFromUnixTime(call, FunctionSet.YEAR_FROM_UNIXTIME);
} else if (FunctionSet.MONTH.equalsIgnoreCase(call.getFnName())) {
return simplifiedExtractFromUnixTime(call, FunctionSet.MONTH_FROM_UNIXTIME);
} else if (FunctionSet.DAY.equalsIgnoreCase(call.getFnName())) {
return simplifiedExtractFromUnixTime(call, FunctionSet.DAY_FROM_UNIXTIME);
} else if (FunctionSet.MINUTE.equalsIgnoreCase(call.getFnName())) {
return simplifiedExtractFromUnixTime(call, FunctionSet.MINUTE_FROM_UNIXTIME);
} else if (FunctionSet.SECOND.equalsIgnoreCase(call.getFnName())) {
return simplifiedExtractFromUnixTime(call, FunctionSet.SECOND_FROM_UNIXTIME);
}
return call;
}
@ -659,4 +669,28 @@ public class SimplifiedPredicateRule extends BottomUpScalarOperatorRewriteRule {
return call;
}
// Simplify extract(from_unixtime(ts)) to extract_from_unixtime(ts)
private static ScalarOperator simplifiedExtractFromUnixTime(CallOperator call, String targetFn) {
if (call.getChildren().size() != 1) {
return call;
}
ScalarOperator child = call.getChild(0);
if (!(child instanceof CallOperator)) {
return call;
}
CallOperator childCall = (CallOperator) child;
if (!FunctionSet.FROM_UNIXTIME.equalsIgnoreCase(childCall.getFnName())) {
return call;
}
Type[] argTypes = childCall.getChildren().stream().map(ScalarOperator::getType).toArray(Type[]::new);
Function fn = Expr.getBuiltinFunction(targetFn, argTypes, Function.CompareMode.IS_IDENTICAL);
if (fn == null) {
return call;
}
return new CallOperator(targetFn, call.getType(), childCall.getChildren(), fn);
}
}

View File

@ -214,4 +214,57 @@ public class SimplifiedPredicateRuleTest {
ScalarOperator result4 = rule.apply(hourCall4, null);
assertEquals(hourCall4, result4);
}
@Test
public void applyExtractFromUnixTime() {
ColumnRefOperator tsColumn = new ColumnRefOperator(1, Type.BIGINT, "ts", true);
CallOperator fromUnixTimeCall = new CallOperator(FunctionSet.FROM_UNIXTIME, Type.VARCHAR,
Lists.newArrayList(tsColumn), null);
// year(from_unixtime(ts)) -> year_from_unixtime(ts)
CallOperator yearCall = new CallOperator(FunctionSet.YEAR, Type.INT,
Lists.newArrayList(fromUnixTimeCall), null);
ScalarOperator yearResult = rule.apply(yearCall, null);
assertEquals(OperatorType.CALL, yearResult.getOpType());
CallOperator yearResultCall = (CallOperator) yearResult;
assertEquals(FunctionSet.YEAR_FROM_UNIXTIME, yearResultCall.getFnName());
assertEquals(tsColumn, yearResultCall.getChild(0));
// month(from_unixtime(ts)) -> month_from_unixtime(ts)
CallOperator monthCall = new CallOperator(FunctionSet.MONTH, Type.INT,
Lists.newArrayList(fromUnixTimeCall), null);
ScalarOperator monthResult = rule.apply(monthCall, null);
assertEquals(OperatorType.CALL, monthResult.getOpType());
CallOperator monthResultCall = (CallOperator) monthResult;
assertEquals(FunctionSet.MONTH_FROM_UNIXTIME, monthResultCall.getFnName());
assertEquals(tsColumn, monthResultCall.getChild(0));
// day(from_unixtime(ts)) -> day_from_unixtime(ts)
CallOperator dayCall = new CallOperator(FunctionSet.DAY, Type.INT,
Lists.newArrayList(fromUnixTimeCall), null);
ScalarOperator dayResult = rule.apply(dayCall, null);
assertEquals(OperatorType.CALL, dayResult.getOpType());
CallOperator dayResultCall = (CallOperator) dayResult;
assertEquals(FunctionSet.DAY_FROM_UNIXTIME, dayResultCall.getFnName());
assertEquals(tsColumn, dayResultCall.getChild(0));
// minute(from_unixtime(ts)) -> minute_from_unixtime(ts)
CallOperator minuteCall = new CallOperator(FunctionSet.MINUTE, Type.INT,
Lists.newArrayList(fromUnixTimeCall), null);
ScalarOperator minuteResult = rule.apply(minuteCall, null);
assertEquals(OperatorType.CALL, minuteResult.getOpType());
CallOperator minuteResultCall = (CallOperator) minuteResult;
assertEquals(FunctionSet.MINUTE_FROM_UNIXTIME, minuteResultCall.getFnName());
assertEquals(tsColumn, minuteResultCall.getChild(0));
// second(from_unixtime(ts)) -> second_from_unixtime(ts)
CallOperator secondCall = new CallOperator(FunctionSet.SECOND, Type.INT,
Lists.newArrayList(fromUnixTimeCall), null);
ScalarOperator secondResult = rule.apply(secondCall, null);
assertEquals(OperatorType.CALL, secondResult.getOpType());
CallOperator secondResultCall = (CallOperator) secondResult;
assertEquals(FunctionSet.SECOND_FROM_UNIXTIME, secondResultCall.getFnName());
assertEquals(tsColumn, secondResultCall.getChild(0));
}
}

View File

@ -604,12 +604,12 @@ vectorized_functions = [
# specialized version of from_unixtime to reduce the cost of datetime conversion
# TODO: 50380 year_from_unixtime
# TODO: 50381 month_from_unixtime
# TODO: 50382 day_from_unixtime
[50380, 'year_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::year_from_unixtime'],
[50381, 'month_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::month_from_unixtime'],
[50382, 'day_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::day_from_unixtime'],
[50383, 'hour_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::hour_from_unixtime'],
# TODO: 50384 minute_from_unixtime
# TODO: 50385 second_from_unixtime
[50384, 'minute_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::minute_from_unixtime'],
[50385, 'second_from_unixtime', True, False, 'INT', ['BIGINT'], 'TimeFunctions::second_from_unixtime'],
[50310, 'dayname', True, False, 'VARCHAR', ['DATETIME'], 'TimeFunctions::day_name'],
[50311, 'monthname', True, False, 'VARCHAR', ['DATETIME'], 'TimeFunctions::month_name'],