原sql,一個base表a,LEFT JOIN三個表抽數
SELECT
ccu.*,
ctr.*,
om.*,
of.*
FROM
ods.a ccu
LEFT JOIN
ods.b ctr ON ccu.coupon_code = ctr.coupon_code AND ctr.is_deleted = 0
LEFT JOIN
ods.c om ON ctr.bill_code = om.order_id AND om.deleted = 0
LEFT JOIN
ods.d of ON om.order_id = of.order_id AND of.deleted = 0 and of.foe_type=1 and of.status=20
WHERE
ccu.created_date BETWEEN $start_time AND $end_time;
缺點致命傷:如果四個表數據量都比較大,需要大量的內存做left join匹配,就容易打爆內存,同時效率很低;
**優化一:**減少base表的取數據范圍,將base表寫成子查詢,而不是將where語句寫到最后,經驗證性能可以提升40%左右(單表千萬以上的數據量),但是由于b,c,d表數據量很大,LEFT JOIN依然會內存爆掉
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.created_date BETWEEN $start_time AND $end_time) AS ccutwoLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_code AND ctr.is_deleted = 0LEFT JOINods.order_master om ON ctr.bill_code = om.order_id AND om.deleted = 0LEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20
**優化二:**通過where條件減少b,c,d表數據量,性能又提升一個level,基于create_date統一數據時間范圍
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.created_date BETWEEN $start_time AND $end_time) AS ccutwoLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_codeAND ctr.is_deleted = 0 and ctr.created_date BETWEEN $start_time AND $end_timeLEFT JOINods.order_master om ON ctr.bill_code = om.order_idAND om.deleted = 0 and om.created_date BETWEEN $start_time AND $end_timeLEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20AND of.created_date BETWEEN $start_time AND $end_time
**優化三:**基于doris數據庫表的特性,通過分區字段限制數據范圍性能更高,trans_date為分區字段
SELECT ccutwo.*,ctr.*,om.*,of.*
FROM(SELECT ccu.* FROM ods.a ccu WHEREccu.trans_date = $trans_dateLEFT JOINods.coupon_trade_record ctr ON ccutwo.coupon_code = ctr.coupon_codeAND ctr.is_deleted = 0 and ctr.trans_date = $trans_dateLEFT JOINods.order_master om ON ctr.bill_code = om.order_idAND om.deleted = 0 and om.trans_date = $trans_dateLEFT JOINods.order_foe of ON om.order_id = of.order_idAND of.deleted = 0AND of.foe_type = 1AND of.status = 20AND of.trans_date = $trans_date
**優化四:**按照sql開發規范,多表LEFT JOIN不能超過三個表,拆分兩個sql執行,同時根據doris主鍵模型(該模型保證 Key 列的唯一性,插入或更新數據時,新數據會覆蓋具有相同 Key 的舊數據,確保數據記錄為最新。),新建主鍵模型表
老表帶分區字段
CREATE TABLE `dwd_table` (--fields
) ENGINE=OLAP
UNIQUE KEY(`coupon_code`, `trans_date`)
COMMENT '領券生命周期dwd寬表'
PARTITION BY RANGE(`trans_date`)
(PARTITION p20250809 VALUES [('2025-08-09'), ('2025-08-10')),
PARTITION p20250810 VALUES [('2025-08-10'), ('2025-08-11')))
DISTRIBUTED BY HASH(`coupon_code`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.time_zone" = "Asia/Shanghai",
"dynamic_partition.start" = "-2147483648",
"dynamic_partition.end" = "32",
"dynamic_partition.prefix" = "p",
"dynamic_partition.replication_allocation" = "tag.location.default: 3",
"dynamic_partition.buckets" = "1",
"dynamic_partition.create_history_partition" = "true",
"dynamic_partition.history_partition_num" = "350",
"dynamic_partition.hot_partition_num" = "0",
"dynamic_partition.reserved_history_periods" = "NULL",
"dynamic_partition.storage_policy" = "",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
新表,不帶分區字段
CREATE TABLE `dwd_table_new` (--fields
) ENGINE=OLAP
UNIQUE KEY(`coupon_code`)
COMMENT '生命周期dwd寬表'
DISTRIBUTED BY HASH(`coupon_code`) BUCKETS 8
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"inverted_index_storage_format" = "V1",
"enable_unique_key_merge_on_write" = "true",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728",
"enable_mow_light_delete" = "false"
);
寫入時先寫base(a)表,再寫附表(bcd),如果UNIQUE KEY一樣就會覆蓋老數據,不會新插入一條