BigQuery - Group by on arrays
up vote
0
down vote
favorite
I want to group by on an array.
sample query:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM `project.dataset.table`
Result of the above query:
Row description labels.key labels.value cost
1 compute application scaled-server 0.323316
department hrd
2 compute application scaled-server 0.342825
department hrd
I want result like below:
Row description labels.key labels.value cost
1 compute application scaled-server 0.666141
department hrd
arrays group-by google-cloud-platform google-bigquery
add a comment |
up vote
0
down vote
favorite
I want to group by on an array.
sample query:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM `project.dataset.table`
Result of the above query:
Row description labels.key labels.value cost
1 compute application scaled-server 0.323316
department hrd
2 compute application scaled-server 0.342825
department hrd
I want result like below:
Row description labels.key labels.value cost
1 compute application scaled-server 0.666141
department hrd
arrays group-by google-cloud-platform google-bigquery
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I want to group by on an array.
sample query:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM `project.dataset.table`
Result of the above query:
Row description labels.key labels.value cost
1 compute application scaled-server 0.323316
department hrd
2 compute application scaled-server 0.342825
department hrd
I want result like below:
Row description labels.key labels.value cost
1 compute application scaled-server 0.666141
department hrd
arrays group-by google-cloud-platform google-bigquery
I want to group by on an array.
sample query:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM `project.dataset.table`
Result of the above query:
Row description labels.key labels.value cost
1 compute application scaled-server 0.323316
department hrd
2 compute application scaled-server 0.342825
department hrd
I want result like below:
Row description labels.key labels.value cost
1 compute application scaled-server 0.666141
department hrd
arrays group-by google-cloud-platform google-bigquery
arrays group-by google-cloud-platform google-bigquery
edited Nov 19 at 20:28
Mikhail Berlyant
54k43064
54k43064
asked Nov 19 at 20:16
SQLadmin
323215
323215
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23
add a comment |
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23
add a comment |
1 Answer
1
active
oldest
votes
up vote
1
down vote
accepted
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
), temp AS (
SELECT description, labels, SUM(cost) AS cost
FROM `project.dataset.table`
GROUP BY description, labels
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM temp
add a comment |
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
1
down vote
accepted
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
), temp AS (
SELECT description, labels, SUM(cost) AS cost
FROM `project.dataset.table`
GROUP BY description, labels
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM temp
add a comment |
up vote
1
down vote
accepted
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
), temp AS (
SELECT description, labels, SUM(cost) AS cost
FROM `project.dataset.table`
GROUP BY description, labels
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM temp
add a comment |
up vote
1
down vote
accepted
up vote
1
down vote
accepted
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
), temp AS (
SELECT description, labels, SUM(cost) AS cost
FROM `project.dataset.table`
GROUP BY description, labels
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM temp
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.323316 cost UNION ALL
SELECT 'compute' description, '[{"key":"application","value":"scaled-server"},{"key":"department","value":"hrd"}]' labels, 0.342825 cost
), temp AS (
SELECT description, labels, SUM(cost) AS cost
FROM `project.dataset.table`
GROUP BY description, labels
)
SELECT
description,
ARRAY(
SELECT AS STRUCT
JSON_EXTRACT_SCALAR(kv, '$.key') key,
JSON_EXTRACT_SCALAR(kv, '$.value') value
FROM UNNEST(SPLIT(labels, '},{')) kv_temp,
UNNEST([CONCAT('{', REGEXP_REPLACE(kv_temp, r'^[{|}]$', ''), '}')]) kv
) labels,
cost
FROM temp
answered Nov 19 at 20:33
Mikhail Berlyant
54k43064
54k43064
add a comment |
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53382005%2fbigquery-group-by-on-arrays%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
what is the logic of grouping? is it - all keys and values should be the same in order to be grouped?
– Mikhail Berlyant
Nov 19 at 20:20
I just updated the question, the intention is, the description column has what service is this like VM, cloudSQL, BQ and the label column has tell us the information about the labels like vm labels (env=prod) SO, end of the day if I want to get cost for compute or vm which has the env=staging . Thats why I need to group by on DESCRIPTION first then key values
– SQLadmin
Nov 19 at 20:23