1import pandas as pd
2import numpy as np
3from faker import Faker
4import random
5
6
7fake = Faker()
8
9
10# Define the number of records
11num_records = 3000
12
13
14# Project managers
15project_managers = [fake.name() for _ in range(10)]
16
17
18# Team members
19team_members = [fake.name() for _ in range(300)]
20
21
22# Possible project types and statuses
23project_types = ['Development', 'Research', 'Testing', 'Deployment', 'Maintenance']
24project_statuses = ['Not Started', 'In Progress', 'Completed', 'On Hold', 'Cancelled']
25
26
27data = []
28
29
30# Sets to store unique project names and IDs
31project_names_set = set()
32project_ids_set = set()
33
34
35# pre-defined project health values
36health_needs_attention = 50
37health_not_set = 11
38health_on_track = num_records - health_needs_attention - health_not_set
39
40
41#pre-defiend priorety values
42priority_weights = {
43 'Low': 0.4,
44 'Medium': 0.35,
45 'High': 0.2,
46 'Critical': 0.05
47}
48
49
50#generate the project health list
51project_health_list = (['Needs Attention'] * health_needs_attention +
52 ['Not Set'] * health_not_set +
53 ['On Track'] * health_on_track)
54
55
56random.shuffle(project_health_list)
57
58
59for i in range(num_records):
60 #generate a unique project ID
61 while True:
62 project_id = fake.uuid4()
63 if project_id not in project_ids_set:
64 project_ids_set.add(project_id)
65 break
66
67
68#Generate a unique project name
69 while True:
70 project_name = fake.bs().title()
71 if project_name not in project_names_set:
72 project_names_set.add(project_name)
73 break
74
75
76 project_manager = random.choice(project_managers)
77 num_team_members = random.randint(5, 15)
78 assigned_team_members = random.sample(team_members, num_team_members)
79 start_date = fake.date_between(start_date='-2y', end_date='today')
80 end_date = fake.date_between(start_date=start_date, end_date='+1y')
81 budget = round(random.uniform(10000, 99999), 2)
82 expenses = round(random.uniform(2350, 100000), 2) #allowing some of expnenses to go over the budget
83
84
85 #choose project health from the pre-defined list
86 health = project_health_list[i]
87
88
89 #choose priority based on weight distrubution
90 priority = random.choices(
91 population=list(priority_weights.keys()),
92 weights=list(priority_weights.values()),
93 k=1
94 )[0]
95
96
97 status = random.choice(project_statuses)
98 project_type = random.choice(project_types)
99 deployment_date = fake.date_between(start_date=start_date, end_date=end_date)
100
101
102 data.append([
103 project_id, project_name, project_manager, ', '.join(assigned_team_members), start_date, end_date, budget, expenses, priority, status, health, project_type, deployment_date
104 ])
105
106
107#create DataFrame
108
109
110columns = [
111 'Project ID', 'Project Name', 'Project Manager', 'Team Members', 'Start Date', 'End Date',
112 'Budget', 'Expenses', 'Priority', 'Project Status',
113 'Project Health', 'Project Type', 'Deployment Date'
114]
115df = pd.DataFrame(data, columns=columns)
116
117
118#Save to CSV
119df.to_csv('project_managment_dataset.csv', index=False)
120
121
122print("Dataset created successfully with the following fields:")
123print(df.head())
Explore and download the Python scripts that power this project management dashboard. Whether you're looking to learn, customize, or implement your own data solutions, the code is here for you to build on and innovate.
The data presented in this dashboard were generated using Python with the Faker library. The dataset is entirely synthetic and created for demonstration purposes only, without reflecting any real-world projects or actual company data. Colors and design elements were inspired by the Plaky app by CAKE.com.