Spotify sql queries by andreacappelletti97 · Pull Request #2 · FeatureBaseDB/sql-examples · GitHub
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions data/spotify/insert_artists.sql
35 changes: 35 additions & 0 deletions data/spotify/insert_playlists.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
-- Create table playlists
CREATE TABLE playlists (
_id id,
uri STRING,
name STRING,
description STRING,
spotify_query STRING,
author STRING,
n_tracks int,
playlist_followers int
);

-- Url of the dataset: https://transfer.sh/csCuCE2FTO/final_playlists.csv
-- Insert data into playlists with a bulk insert by ingesting a csv file from an url

BULK INSERT
INTO playlists (_id, uri, name, description, spotify_query, author, n_tracks, playlist_followers)
MAP(0 id,
1 STRING,
2 STRING,
3 STRING,
4 STRING,
5 STRING,
6 int,
7 int)
FROM
'https://transfer.sh/csCuCE2FTO/final_playlists.csv'
WITH
BATCHSIZE 100000
FORMAT 'CSV'
INPUT 'URL'
HEADER_ROW;

-- Make sure the data is there by selecting the first row
SELECT * FROM playlists LIMIT 1;
40 changes: 40 additions & 0 deletions data/spotify/insert_tracks.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- Create table tracks
CREATE TABLE tracks (
_id id,
name STRING,
artists_names STRINGSET,
track_uri STRING,
popularity int,
album_type STRING,
is_playable bool,
release_date STRING,
artists_uris STRINGSET,
playlist_uris STRINGSET
);


-- Url of the dataset: https://transfer.sh/uEVnKIdpnx/final_tracks.csv
-- Insert data into tracks with a bulk insert by ingesting a csv file from an url

BULK INSERT
INTO tracks (_id, name, artists_names, track_uri, popularity, album_type, is_playable, release_date, artists_uris, playlist_uris)
MAP(0 id,
1 STRING,
2 STRINGSET,
3 STRING,
4 int,
5 STRING,
6 bool,
7 STRING,
8 STRINGSET,
9 STRINGSET)
FROM
'https://transfer.sh/uEVnKIdpnx/final_tracks.csv'
WITH
BATCHSIZE 100000
FORMAT 'CSV'
INPUT 'URL'
HEADER_ROW;

-- Make sure the data is there by selecting the first row
SELECT * FROM tracks LIMIT 1;
174 changes: 174 additions & 0 deletions data/spotify/queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
-- This file contains some SQL queries that you can use to explore the data with fbsql.

-- Find the most popular artists:
SELECT * FROM artists
ORDER BY artist_popularity DESC
LIMIT 10;

-- Find the most popular tracks:
SELECT * FROM tracks
ORDER BY popularity DESC
LIMIT 10;

-- Find the most popular playlists:
SELECT * FROM playlists
ORDER BY playlist_followers DESC
LIMIT 10;

-- Find the most popular playlists with more than 100 tracks:
SELECT * FROM playlists
WHERE n_tracks > 100
ORDER BY playlist_followers DESC
LIMIT 10;

-- Find playlists with more than 100 tracks and sort them by the number of followers in descending order:
SELECT * FROM playlists
WHERE n_tracks > 100
ORDER BY playlist_followers DESC;

-- Find the albums released in a specific year along with the total number of tracks in each album:
SELECT release_date, album_type, COUNT(*) AS track_count
FROM tracks
WHERE SUBSTRING(release_date, 1, 4) = '2022' -- Replace '2022' with the desired year
GROUP BY release_date, album_type;

-- Find comprehensive details of playlists, tracks, and associated artists by performing a join operation on the respective tables.
SELECT
p.name AS playlist_name,
p.description AS playlist_description,
p.n_tracks AS playlist_tracks,
p.playlist_followers AS playlist_followers,
t.name AS track_name,
t.popularity AS track_popularity,
t.release_date AS track_release_date,
a.artist_popularity AS artist_popularity,
a.artist_followers AS artist_followers
FROM
playlists p
JOIN tracks t ON SETCONTAINS(t.playlist_uris, p.uri)
JOIN artists a ON SETCONTAINS(t.artists_uris, a._id);

-- Find the average track and artist popularity, as well as the count of distinct tracks and artists, for each playlist.
SELECT
p.name AS playlist_name,
AVG(t.popularity) AS average_track_popularity,
AVG(a.artist_popularity) AS average_artist_popularity,
COUNT(DISTINCT t.name) AS distinct_track_count,
COUNT(DISTINCT a._id) AS distinct_artist_count
FROM
playlists p
JOIN tracks t ON SETCONTAINS(t.playlist_uris, p.uri)
JOIN artists a ON SETCONTAINS(t.artists_uris, a._id)
GROUP BY
p.name;

-- Find the most popular artists by the average popularity of their tracks across all playlists.
SELECT
a._id AS artist_id,
a.artist_popularity AS artist_popularity,
COUNT(DISTINCT p._id) AS playlist_count,
AVG(t.popularity) AS average_track_popularity
FROM
artists a
JOIN tracks t ON SETCONTAINS(t.artists_uris, a._id)
JOIN playlists p ON SETCONTAINS(t.playlist_uris, p.uri)
GROUP BY
a._id, a.artist_popularity
HAVING
COUNT(DISTINCT p._id) > 5
ORDER BY
average_track_popularity DESC
LIMIT 10;

-- Find the most popular artists by the total number of followers across all playlists.
SELECT
p.name AS playlist_name,
t.name AS track_name,
a._id AS artist_id,
COUNT(DISTINCT t._id) AS track_count,
SUM(a.artist_followers) AS total_artist_followers
FROM
playlists p
JOIN tracks t ON SETCONTAINS(t.playlist_uris, p.uri)
JOIN artists a ON SETCONTAINS(t.artists_uris, a._id)
WHERE
p.playlist_followers > 10000
GROUP BY
p.name, t.name, a._id
HAVING
COUNT(DISTINCT t._id) > 5
ORDER BY
total_artist_followers DESC;

-- Find the most popular playlists by the average popularity of their tracks.
SELECT
p.name AS playlist_name,
COUNT(DISTINCT t._id) AS track_count,
AVG(t.popularity) AS average_track_popularity,
MAX(a.artist_followers) AS max_artist_followers
FROM
playlists p
JOIN tracks t ON SETCONTAINS(t.playlist_uris, p.uri)
JOIN artists a ON SETCONTAINS(t.artists_uris, a._id)
WHERE
p.n_tracks > 50
GROUP BY
p.name
HAVING
COUNT(DISTINCT t._id) > 10
ORDER BY
max_artist_followers DESC
LIMIT 5;

-- Find the most popular playlists by the average popularity of their tracks, but only for playlists that contain tracks from a specific artist.
SELECT
p.name AS playlist_name,
COUNT(DISTINCT t._id) AS track_count,
AVG(t.popularity) AS average_track_popularity,
MAX(a.artist_followers) AS max_artist_followers
FROM
playlists p
JOIN tracks t ON SETCONTAINS(t.playlist_uris, p.uri)
JOIN artists a ON SETCONTAINSANY(t.artists_uris, ['spotify:artist:0001wHqxbF2YYRQxGdbyER'])
WHERE
p.playlist_followers > 10000
GROUP BY
p.name
HAVING
COUNT(DISTINCT t._id) > 10
ORDER BY
max_artist_followers DESC;

-- Find the substrings of the first two characters of the name of each track.
SELECT
_id,
SUBSTRING(name, 1, 2) AS substr
FROM
tracks;

-- Find the artists who have collaborated with the most number of other artists:
SELECT a1._id AS artist_id, COUNT(DISTINCT a2._id) AS collaborator_count
FROM artists a1
JOIN tracks t1 ON SETCONTAINS(t1.artists_uris, a1._id)
JOIN artists a2 ON SETCONTAINS(t1.artists_uris, a2._id) AND a1._id != a2._id
GROUP BY a1._id
ORDER BY collaborator_count DESC
LIMIT 10;

-- Find the artists with the highest average track popularity:
SELECT a._id AS artist_id, AVG(t.popularity) AS average_track_popularity
FROM artists a
JOIN tracks t ON SETCONTAINS(t.artists_uris, a._id)
GROUP BY a._id
ORDER BY average_track_popularity DESC
LIMIT 10;

-- Find the artists with the most collaborative tracks:
SELECT a._id AS artist_id, COUNT(DISTINCT t._id) AS collaborative_track_count
FROM artists a
JOIN tracks t ON SETCONTAINS(t.artists_uris, a._id)
GROUP BY a._id
ORDER BY collaborative_track_count DESC
LIMIT 10;


52 changes: 52 additions & 0 deletions data/spotify/readme.md