-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData-Beans-Create-Orders.sql
219 lines (178 loc) · 8.11 KB
/
Data-Beans-Create-Orders.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
-- location_id 1 to 50
-- customer_id 1 to 10,702
-- 200 usa
-- 200 london GBR
-- 200 japan JPN
-- company_id 1 to 10
-- menu_id 1 to 212
-- customer reviews 1 to 8,027 (missing some rows)
-- CREATE OR REPLACE TABLE `PROJECT-ID.coffee_curated.order_backup` AS SELECT * FROM `PROJECT-ID.coffee_curated.order`
INSERT INTO
`PROJECT-ID.coffee_curated.order` (
order_id,
location_id,
customer_id,
order_datetime,
order_completion_datetime)
WITH
data_max_id AS (
-- 2021-11-01 03:59:47.952295 UTC, 2023-12-01 14:35:18.628451 UTC, 10000000, 10000000
SELECT MIN(order_datetime) AS min_order_datetime, MAX(order_datetime) AS max_order_datetime, MAX(order_id) AS max_id, COUNT(*) AS record_count
FROM`PROJECT-ID.coffee_curated.order`
),
data_random_data AS (
SELECT CAST(ROUND(1 + RAND() * (10 - 1)) AS INT64) AS location_id,
CAST(ROUND(1 + RAND() * (10702 - 1)) AS INT64) AS customer_id,
TIMESTAMP_ADD(data_max_id.max_order_datetime, INTERVAL CAST(ROUND(1 + RAND() * (30 * 24 * 60 * 2) - 1) AS INT64) MINUTE) AS order_datetime, -- 2 months of minutes
FROM UNNEST(GENERATE_ARRAY(1, 1000000)) AS element
CROSS JOIN data_max_id
),
data_random_all_data AS (
SELECT *,
TIMESTAMP_ADD(order_datetime, INTERVAL CAST(ROUND(60 + RAND() * ((60*15)) - 60) AS INT64) SECOND) AS order_completion_datetime, -- from 60 seconds to 15 minutes
FROM data_random_data
),
data_ranked AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY order_datetime) AS row_number
FROM data_random_all_data
)
-- SELECT extract(YEAR from order_datetime) as year, extract(MONTH from order_datetime) as month, count(*) FROM data_ranked GROUP BY ALL;
-- SELECT min(order_datetime), max(order_datetime) FROM data_ranked
SELECT row_number + max_id AS order_id,
location_id,
customer_id,
order_datetime,
order_completion_datetime
FROM data_ranked
CROSS JOIN data_max_id;
/*
SELECT extract(YEAR from order_datetime) as year,
extract(MONTH from order_datetime) as month,
count(*)
FROM`PROJECT-ID.coffee_curated.order`
group by all
order by 1 desc, 2 desc
delete
from `PROJECT-ID.coffee_curated.order`
where extract(YEAR from order_datetime) = 2027
and extract(MONTH from order_datetime) > 6;
*/
-- location_id 1 to 50
-- customer_id 1 to 10,702
-- 200 usa
-- 200 london GBR
-- 200 japan JPN
-- company_id 1 to 10
-- menu_id 1 to 212
-- customer reviews 1 to 8,027 (missing some rows)
--truncate table `PROJECT-ID.coffee_curated.order_item`;
-- CREATE OR REPLACE TABLE `PROJECT-ID.coffee_curated.order_item_backup` AS SELECT * FROM `PROJECT-ID.coffee_curated.order_item`
INSERT INTO `PROJECT-ID.coffee_curated.order_item`
(order_item_id, order_id, menu_id, quantity, item_size, item_price, item_total)
WITH
data_max_id AS (
SELECT IFNULL(MAX(order_item_id),0) AS max_id
FROM `PROJECT-ID.coffee_curated.order_item`
),
-- order's without items
data_order AS (
SELECT order_id,
CASE WHEN RAND() <= .5 THEN 1 -- most order are 1 item
ELSE CAST(ROUND(2 + RAND() * (5 - 2)) AS INT64) -- up to 5 items per order
END AS order_item_count
FROM `PROJECT-ID.coffee_curated.order` AS order_t
WHERE NOT EXISTS (SELECT * FROM `PROJECT-ID.coffee_curated.order_item` AS order_item WHERE order_t.order_id = order_item.order_id)
),
data_order_with_array AS
(
select order_id,
order_item_count,
GENERATE_ARRAY(1, order_item_count) as order_item_array,
RAND() AS quantity_rand
from data_order
),
data_random_all_data AS (
SELECT order_id,
order_item AS order_item_id,
CAST(ROUND(1 + RAND() * (212 - 1)) AS INT64) AS menu_id,
CASE WHEN quantity_rand <= .75 THEN 1 -- most orders are 1 quanity
WHEN quantity_rand <= .85 THEN 2
WHEN quantity_rand <= .90 THEN 3
WHEN quantity_rand <= .95 THEN 4
ELSE 5
END AS quantity,
from data_order_with_array
CROSS JOIN UNNEST(order_item_array) AS order_item
),
data_ranked AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY order_id, order_item_id) AS row_number
FROM data_random_all_data
)
SELECT row_number + max_id AS order_item_id,
data_ranked.order_id,
data_ranked.menu_id,
data_ranked.quantity,
menu.item_size,
menu.item_price,
ROUND(CAST(data_ranked.quantity * menu.item_price AS NUMERIC), 2, "ROUND_HALF_EVEN") AS item_total
FROM data_ranked
CROSS JOIN data_max_id
INNER JOIN `PROJECT-ID.data_beans_curated.menu` AS menu
ON data_ranked.menu_id = menu.menu_id;
SELECT avg(item_total)
from `PROJECT-ID.coffee_curated.order` as parent
inner join `PROJECT-ID.coffee_curated.order_item` as child on parent.order_id = child.order_id
/*
CREATE OR REPLACE TABLE `data-analytics-golden-v1-share.coffee_curated.order` COPY `PROJECT-ID.coffee_curated.order`;
CREATE OR REPLACE TABLE `data-analytics-golden-v1-share.coffee_curated.order_item` COPY `PROJECT-ID.coffee_curated.order_item`;
*/
CREATE OR REPLACE MATERIALIZED VIEW `PROJECT-ID.coffee_curated.looker_databeans_report`
CLUSTER BY sale_date
OPTIONS (enable_refresh = true, refresh_interval_minutes = 30, description='Used for Looker Studio Pro with Duet AI')
AS
SELECT
EXTRACT(DATE FROM TIMESTAMP(order_datetime)) AS sale_date,
city.city_name,
TIMESTAMP_DIFF(order_table.order_completion_datetime,order_table.order_datetime, SECOND) AS seconds_to_make_order,
customer.customer_name,
company.company_name,
order_item.quantity,
menu.item_name,
menu.item_size,
menu.item_price as sale_price
FROM `PROJECT-ID.coffee_curated.order` AS order_table
INNER JOIN `PROJECT-ID.coffee_curated.order_item` AS order_item
ON order_table.order_id = order_item.order_id
INNER JOIN `PROJECT-ID.coffee_curated.menu` AS menu
ON order_item.menu_id = menu.menu_id
INNER join PROJECT-ID.coffee_curated.location as location
ON order_table.location_id = location.location_id
INNER join PROJECT-ID.coffee_curated.city as city
ON location.city_id = city.city_id
INNER join PROJECT-ID.coffee_curated.customer as customer
ON customer.customer_id=order_table.customer_id
INNER join PROJECT-ID.coffee_curated.company as company
ON menu.company_id = company.company_id;
/*
SELECT """EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/""" || table_name || """/""" || table_name || """_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.""" || table_name || """`);"""
FROM coffee_curated.INFORMATION_SCHEMA.TABLES
WHERE table_type = 'BASE TABLE'
ORDER BY table_name;
EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/order/order_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.order`);
EXPORT DATA OPTIONS ( uri = 'gs://PROJECT-ID/data-beans/v1/export/order_item/order_item_*.avro', format = 'AVRO', overwrite = true) AS ( SELECT * FROM `PROJECT-ID.coffee_curated.order_item`);
LOAD DATA OVERWRITE `PROJECT-ID.coffee_curated.LOAD_order_item`
FROM FILES ( format = 'AVRO', uris = ['gs://PROJECT-ID/data-beans/v1/export/order_item/order_item_*.avro']);
LOAD DATA OVERWRITE `PROJECT-ID.coffee_curated.load_order` FROM FILES ( format = 'AVRO', uris = ['gs://PROJECT-ID/data-beans/v1/export/order/order_*.avro']);
CREATE TABLE `PROJECT-ID.coffee_curated.LOAD_order`
CLUSTER BY (order_id)
AS
SELECT order_id, location_id, customer_id,
TIMESTAMP_MICROS(order_datetime) AS order_datetime, TIMESTAMP_MICROS(order_completion_datetime) AS order_completion_datetime
FROM `PROJECT-ID.coffee_curated.load_order` ;
DROP TABLE `PROJECT-ID.coffee_curated.load_order`;
-- DELETE (by hand)
gsutil cp gs://PROJECT-ID/data-beans/v1/export/order/* gs://data-analytics-golden-demo/data-beans/v1/export/order/
gsutil cp gs://PROJECT-ID/data-beans/v1/export/order_item/* gs://data-analytics-golden-demo/data-beans/v1/export/order_item/
*/