Mongodb
02_music_aggregation.py
import os
from pymongo import MongoClient
from dotenv import load_dotenv
load_dotenv()
def run_music_analytics():
client = MongoClient(os.getenv("MONGO_URI"))
db = client[os.getenv("DB_NAME", "music_db")]
tracks = db["tracks"]
# Seed data for analytics if empty
if tracks.count_documents({}) <= 1:
tracks.insert_many([
{"title": "Starboy", "artist": "The Weeknd", "genre": "Pop", "play_count": 1500, "year": 2016},
{"title": "Blinding Lights", "artist": "The Weeknd", "genre": "Pop", "play_count": 3000, "year": 2019},
{"title": "Level of Concern", "artist": "Twenty One Pilots", "genre": "Indie", "play_count": 800, "year": 2020},
{"title": "Chlorine", "artist": "Twenty One Pilots", "genre": "Indie", "play_count": 1200, "year": 2018},
{"title": "One More Time", "artist": "Daft Punk", "genre": "Electronic", "play_count": 5000, "year": 2000},
])
# Interview Task: Find top genres by total play_count
pipeline = [
# 1. Filter tracks from 2000 onwards
{"$match": {"year": {"$gte": 2000}}},
# 2. Group by genre and sum plays
{
"$group": {
"_id": "$genre",
"total_plays": {"$sum": "$play_count"},
"unique_artists": {"$addToSet": "$artist"},
"avg_plays": {"$avg": "$play_count"}
}
},
# 3. Add a field for artist count
{"$addFields": {"artist_count": {"$size": "$unique_artists"}}},
# 4. Sort by most played
{"$sort": {"total_plays": -1}},
# 5. Clean up output
{"$project": {"_id": 0, "genre": "$_id", "total_plays": 1, "artist_count": 1}}
]
print("--- Top Music Genres by Play Count ---")
results = list(tracks.aggregate(pipeline))
for res in results:
print(f"Genre: {res['genre']} | Plays: {res['total_plays']} | Artists: {res['artist_count']}")
if __name__ == "__main__":
run_music_analytics()