[Feature] Support expression partition for dbt (#38724)

Signed-off-by: Astralidea <astralidea@163.com>
This commit is contained in:
Xueyan Li 2024-01-09 17:13:04 +08:00 committed by GitHub
parent 9e11e613a3
commit ec352bd482
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 27 deletions

View File

@ -25,17 +25,18 @@ $ pip install dbt-starrocks
```
## Supported features
| Starrocks <= 2.5 | Starrocks 2.5 ~ 3.1 | Starrocks >= 3.1 | Feature |
|:----------------:|:--------------------:|:-----------------:|:---------------------------------:|
| ✅ | ✅ | ✅ | Table materialization |
| ✅ | ✅ | ✅ | View materialization |
| ❌ | ❌ | ✅ | Materialized View materialization |
| ❌ | ✅ | ✅ | Incremental materialization |
| ❌ | ✅ | ✅ | Primary Key Model |
| ✅ | ✅ | ✅ | Sources |
| ✅ | ✅ | ✅ | Custom data tests |
| ✅ | ✅ | ✅ | Docs generate |
| ❌ | ❌ | ❌ | Kafka |
| Starrocks <= 2.5 | Starrocks 2.5 ~ 3.1 | Starrocks >= 3.1 | Feature |
|:----------------:|:-------------------:|:-----------------:|:---------------------------------:|
| ✅ | ✅ | ✅ | Table materialization |
| ✅ | ✅ | ✅ | View materialization |
| ❌ | ❌ | ✅ | Materialized View materialization |
| ❌ | ✅ | ✅ | Incremental materialization |
| ❌ | ✅ | ✅ | Primary Key Model |
| ✅ | ✅ | ✅ | Sources |
| ✅ | ✅ | ✅ | Custom data tests |
| ✅ | ✅ | ✅ | Docs generate |
| ❌ | ❌ | ✅ | Expression Partition |
| ❌ | ❌ | ❌ | Kafka |
### Notice
1. When StarRocks Version < 2.5, `Create table as` can only set engine='OLAP' and table_type='DUPLICATE'
@ -84,6 +85,9 @@ models:
buckets: 3 // default 10
partition_by: ['some_date']
partition_by_init: ["PARTITION p1 VALUES [('1971-01-01 00:00:00'), ('1991-01-01 00:00:00')),PARTITION p1972 VALUES [('1991-01-01 00:00:00'), ('1999-01-01 00:00:00'))"]
// RANGE, LIST, or Expr partition types should be used in conjunction with partition_by configuration
// Expr partition type requires an expression (e.g., date_trunc) specified in partition_by
partition_type: 'RANGE' // RANGE or LIST or Expr Need to be used in combination with partition_by configuration
properties: [{"replication_num":"1", "in_memory": "true"}]
refresh_method: 'async' // only for materialized view default manual
```
@ -93,6 +97,7 @@ models:
```
{{ config(materialized='view') }}
{{ config(materialized='table', engine='OLAP', buckets=32, distributed_by=['id']) }}
{{ config(materialized='table', partition_by=['date_trunc("day", first_order)'], partition_type='Expr') }}
{{ config(materialized='incremental', table_type='PRIMARY', engine='OLAP', buckets=32, distributed_by=['id']) }}
{{ config(materialized='materialized_view') }}
{{ config(materialized='materialized_view', properties={"storage_medium":"SSD"}) }}

View File

@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
version = "1.4.0"
version = "1.4.1"

View File

@ -21,6 +21,7 @@
{%- set table_type = config.get('table_type', 'DUPLICATE') -%}
{%- set keys = config.get('keys') -%}
{%- set partition_by = config.get('partition_by') -%}
{%- set partition_type = config.get('partition_type', 'RANGE') -%}
{%- set partition_by_init = config.get('partition_by_init') -%}
{%- set buckets = config.get('buckets', 10) -%}
{%- set distributed_by = config.get('distributed_by') -%}
@ -79,7 +80,7 @@
{# 3. SET PARTITION #}
{%- if partition_by is not none -%}
{{ starrocks__partition_by(partition_by, partition_by_init) }}
{{ starrocks__partition_by(partition_type, partition_by, partition_by_init) }}
{%- endif -%}
{# 4. SET DISTRIBUTED #}
@ -122,18 +123,29 @@
{% endif %}
{%- endmacro %}
{%- macro starrocks__partition_by(cols, init) -%}
{%- if cols is not none %}
PARTITION BY RANGE (
{%- for col in cols -%}
{{ col }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
)(
{%- if init is not none -%}
{%- for row in init -%}
{{ row }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
{%- endif -%}
)
{%- macro starrocks__partition_by(p_type, cols, init) -%}
{%- if p_type == 'Expr' %}
{%- if cols | length != 1 -%}
{%- set msg -%}
The number of partition_by parameters for expression partition should be 1
{%- endset -%}
{{ exceptions.raise_compiler_error(msg) }}
{%- endif -%}
PARTITION BY {{ cols[0] }}
{%- else -%}
{%- if cols is not none %}
PARTITION BY {{ p_type }} (
{%- for col in cols -%}
{{ col }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
)(
{%- if init is not none -%}
{%- for row in init -%}
{{ row }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
{%- endif -%}
)
{% endif -%}
{% endif -%}
{%- endmacro -%}

View File

@ -41,7 +41,7 @@ with open(os.path.join(this_directory, "README.md")) as f:
package_name = "dbt-starrocks"
# make sure this always matches dbt/adapters/starrocks/__version__.py
package_version = "1.4.0"
package_version = "1.4.1"
description = """The Starrocks adapter plugin for dbt"""