Exploring 311 Data With PCA

Principal Component Analysis in R

Principal Component Analysis is an unsupervised method that reduces the number of dimensions in a dataset and highlights where the data varies. We will use PCA to analyze the 311 dataset from the WPRDC.

Setup

Install packages

install.packages(c("tidyverse", "lubridate", "broom", "ggfortify", "ggrepel", "janitor"))

Set up your environment

library(tidyverse)
library(lubridate)
library(broom)
library(ggfortify)
library(ggrepel)
library(janitor)

options(scipen = 999, digits = 4)
set.seed(1234)

theme_set(theme_bw())

Load the data

read_csv("https://raw.githubusercontent.com/conorotompkins/pittsburgh_311/master/data/pittsburgh_311.csv", progress = FALSE) %>% 
  clean_names() %>% 
  mutate(date = ymd(str_sub(created_on, 1, 10)),
         month = month(date, label = TRUE)) %>% 
  filter(date < "2018-07-19") -> df

Prep the data

Create a dataframe of the top request types

(df %>% 
  count(request_type, sort = TRUE) %>% 
  filter(n > 400)-> df_top_requests)
## # A tibble: 84 x 2
##    request_type                             n
##    <chr>                                <int>
##  1 Potholes                             25202
##  2 Weeds/Debris                         16503
##  3 Building Maintenance                 10469
##  4 Snow/Ice removal                      7006
##  5 Refuse Violations                     6515
##  6 Abandoned Vehicle (parked on street)  5877
##  7 Missed Pick Up                        4689
##  8 Replace/Repair a Sign                 4445
##  9 Building Without a Permit             4404
## 10 Litter                                4198
## # … with 74 more rows

Count the number of requests per month by request type, filter for the top request types, and fill in gaps in the data

(df %>%
  semi_join(df_top_requests) %>% 
  group_by(request_type, month) %>% 
  summarize(n = n()) %>% 
  complete(request_type, month) %>% 
  replace_na(replace = list(n = 0)) -> df_months)
## # A tibble: 1,008 x 3
## # Groups:   request_type [84]
##    request_type                         month     n
##    <chr>                                <ord> <dbl>
##  1 Abandoned Vehicle (parked on street) Jan     523
##  2 Abandoned Vehicle (parked on street) Feb     427
##  3 Abandoned Vehicle (parked on street) Mar     452
##  4 Abandoned Vehicle (parked on street) Apr     417
##  5 Abandoned Vehicle (parked on street) May     488
##  6 Abandoned Vehicle (parked on street) Jun     466
##  7 Abandoned Vehicle (parked on street) Jul     457
##  8 Abandoned Vehicle (parked on street) Aug     596
##  9 Abandoned Vehicle (parked on street) Sep     525
## 10 Abandoned Vehicle (parked on street) Oct     571
## # … with 998 more rows

Calculate the percentage of a request type for each month

(df_months %>% 
  group_by(request_type) %>% 
  mutate(request_type_total = sum(n),
         month_percentage = n / request_type_total) -> df_months)
## # A tibble: 1,008 x 5
## # Groups:   request_type [84]
##    request_type                   month     n request_type_tot… month_percentage
##    <chr>                          <ord> <dbl>             <dbl>            <dbl>
##  1 Abandoned Vehicle (parked on … Jan     523              5877           0.0890
##  2 Abandoned Vehicle (parked on … Feb     427              5877           0.0727
##  3 Abandoned Vehicle (parked on … Mar     452              5877           0.0769
##  4 Abandoned Vehicle (parked on … Apr     417              5877           0.0710
##  5 Abandoned Vehicle (parked on … May     488              5877           0.0830
##  6 Abandoned Vehicle (parked on … Jun     466              5877           0.0793
##  7 Abandoned Vehicle (parked on … Jul     457              5877           0.0778
##  8 Abandoned Vehicle (parked on … Aug     596              5877           0.101 
##  9 Abandoned Vehicle (parked on … Sep     525              5877           0.0893
## 10 Abandoned Vehicle (parked on … Oct     571              5877           0.0972
## # … with 998 more rows

Check for bad data

df_months %>% 
  filter(is.na(month_percentage) | is.nan(month_percentage))
## # A tibble: 0 x 5
## # Groups:   request_type [0]
## # … with 5 variables: request_type <chr>, month <ord>, n <dbl>,
## #   request_type_total <dbl>, month_percentage <dbl>

Spread the data to turn the months into the columns

(df_months %>% 
  select(request_type, month, month_percentage) %>% 
  spread(month, month_percentage) %>% 
  ungroup() -> df_months)
## # A tibble: 84 x 13
##    request_type     Jan     Feb    Mar    Apr    May    Jun    Jul    Aug    Sep
##    <chr>          <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 Abandoned V… 0.0890  0.0727  0.0769 0.0710 0.0830 0.0793 0.0778 0.101  0.0893
##  2 Barking Dog  0.0563  0.0608  0.0608 0.0631 0.104  0.101  0.0788 0.113  0.124 
##  3 Board Up (P… 0.0395  0.0482  0.0658 0.0943 0.114  0.0899 0.110  0.123  0.0855
##  4 Broken Side… 0.0337  0.155   0.148  0.0872 0.105  0.0964 0.0696 0.0735 0.0528
##  5 Building Ma… 0.0708  0.0919  0.103  0.0739 0.0842 0.0829 0.0725 0.0919 0.0776
##  6 Building Wi… 0.0842  0.0697  0.0636 0.0577 0.105  0.0883 0.0924 0.0815 0.0829
##  7 Catch Basin… 0.0636  0.0377  0.0778 0.0748 0.0984 0.132  0.0825 0.127  0.105 
##  8 City Source… 0.00527 0.00246 0.0105 0.0428 0.196  0.213  0.195  0.164  0.0808
##  9 City Steps,… 0.0443  0.0180  0.0148 0.0197 0.116  0.216  0.203  0.146  0.118 
## 10 City Steps,… 0.0265  0.0305  0.0713 0.0509 0.128  0.120  0.136  0.128  0.108 
## # … with 74 more rows, and 3 more variables: Oct <dbl>, Nov <dbl>, Dec <dbl>

Check that they all add up to 1 across the rows

(df_months %>% 
  select(Jan:Dec) %>% 
  mutate(row_sum = rowSums(.)) %>% 
  select(row_sum, everything()) -> test)
## # A tibble: 84 x 13
##    row_sum     Jan     Feb    Mar    Apr    May    Jun    Jul    Aug    Sep
##      <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1       1 0.0890  0.0727  0.0769 0.0710 0.0830 0.0793 0.0778 0.101  0.0893
##  2       1 0.0563  0.0608  0.0608 0.0631 0.104  0.101  0.0788 0.113  0.124 
##  3       1 0.0395  0.0482  0.0658 0.0943 0.114  0.0899 0.110  0.123  0.0855
##  4       1 0.0337  0.155   0.148  0.0872 0.105  0.0964 0.0696 0.0735 0.0528
##  5       1 0.0708  0.0919  0.103  0.0739 0.0842 0.0829 0.0725 0.0919 0.0776
##  6       1 0.0842  0.0697  0.0636 0.0577 0.105  0.0883 0.0924 0.0815 0.0829
##  7       1 0.0636  0.0377  0.0778 0.0748 0.0984 0.132  0.0825 0.127  0.105 
##  8       1 0.00527 0.00246 0.0105 0.0428 0.196  0.213  0.195  0.164  0.0808
##  9       1 0.0443  0.0180  0.0148 0.0197 0.116  0.216  0.203  0.146  0.118 
## 10       1 0.0265  0.0305  0.0713 0.0509 0.128  0.120  0.136  0.128  0.108 
## # … with 74 more rows, and 3 more variables: Oct <dbl>, Nov <dbl>, Dec <dbl>

Perform basic comparisons

df_months %>% 
  ggplot(aes(Jan, Jul)) +
  geom_point()

Remember that each dot represents a request type, and the month shows what % of that request type occurred that month

df_months %>% 
  ggplot(aes(Apr, Oct)) +
  geom_point()

It is not feasible to plot all the months against each other. PCA can help by condensing the columns and increasing the variance. PCA creates eigenvectors that represents the data in a concentrated way. Eigenvectors and eigenvalues do not represent observed data. They are calculated representations of the data. We will refer to eigenvectors as “principal components”.

In this case, where our data is measured by months in a year, each principal component could loosely be compared to a season.

Prep the data for PCA

The PCA function requires an all-numeric dataframe, so drop the request types into the dataframe metadata

(df_months %>% 
  ungroup() %>% 
  remove_rownames() %>% 
  column_to_rownames(var = "request_type") -> df_months_pca1)
##                                                  Jan      Feb      Mar      Apr
## Abandoned Vehicle (parked on street)        0.088991 0.072656 0.076910 0.070955
## Barking Dog                                 0.056306 0.060811 0.060811 0.063063
## Board Up (PLI referral to DPW)              0.039474 0.048246 0.065789 0.094298
## Broken Sidewalk                             0.033665 0.154552 0.147666 0.087223
## Building Maintenance                        0.070780 0.091890 0.103353 0.073933
## Building Without a Permit                   0.084242 0.069709 0.063579 0.057675
## Catch Basin, Clogged                        0.063642 0.037714 0.077784 0.074838
## City Source (CDBG)                          0.005267 0.002458 0.010534 0.042837
## City Steps, Need Cleared                    0.044262 0.018033 0.014754 0.019672
## City Steps, Need Repaired                   0.026477 0.030550 0.071283 0.050916
## Collapsed Catch Basin                       0.064220 0.053899 0.061927 0.075688
## Commercial Refuse/Dumpsters                 0.079430 0.079430 0.089613 0.077393
## Curb /Broken/Deteriorated                   0.042373 0.048729 0.072034 0.110169
## Curb/Request for Asphalt Windrow            0.038660 0.020619 0.028351 0.085052
## Dead Animal                                 0.038181 0.032566 0.043234 0.076923
## Dead tree (Public property)                 0.034516 0.028763 0.049856 0.066155
## Drainage/Leak                               0.141304 0.050000 0.035870 0.083696
## Drug Enforcement                            0.077085 0.049755 0.065172 0.079187
## Dumping, Private Property                   0.064315 0.076763 0.120332 0.093361
## Dumpster (on Street)                        0.070866 0.048819 0.042520 0.088189
## Early Set Out                               0.072444 0.069736 0.062288 0.063643
## Excessive Noise/Disturbances                0.057377 0.047131 0.056011 0.085383
## Field                                       0.016432 0.014085 0.042254 0.110329
## Fire Safety System Not Working              0.093750 0.185547 0.128906 0.077474
## Graffiti, Documentation                     0.057116 0.054307 0.103933 0.102060
## Graffiti, Removal                           0.088710 0.111290 0.098387 0.046774
## Hydrant                                     0.121771 0.062731 0.075646 0.053506
## Illegal Dumping                             0.065672 0.057214 0.076617 0.106965
## Illegal Parking                             0.095682 0.075074 0.081943 0.079735
## Junk Vehicles                               0.079384 0.093602 0.114929 0.068720
## Leak                                        0.171456 0.097418 0.048709 0.044812
## Leaves/Street Cleaning                      0.028967 0.031486 0.030227 0.059194
## Litter                                      0.064316 0.064555 0.085755 0.093378
## Litter Can, Public                          0.064777 0.049393 0.060729 0.069636
## Maintenance Issue                           0.026455 0.031746 0.039153 0.078307
## Mayor's Office                              0.158455 0.033289 0.023968 0.027963
## Missed Blue Bag                             0.094002 0.042077 0.051925 0.068935
## Missed Pick Up                              0.076775 0.048198 0.047345 0.058861
## Need Potable Water                          0.002398 0.914868 0.001199 0.003597
## Operating Without a License                 0.041215 0.021692 0.149675 0.114967
## Overgrowth                                  0.005058 0.007867 0.006462 0.019106
## Parking Authority                           0.086022 0.075269 0.064516 0.105376
## Patrol                                      0.063164 0.047816 0.071429 0.081464
## Paving Concern/Problem                      0.054819 0.043324 0.042440 0.071618
## Paving Request                              0.052950 0.047504 0.108321 0.114675
## Permit Parking (Residential Parking Permit) 0.107062 0.075171 0.063781 0.079727
## Playground                                  0.015038 0.024436 0.043233 0.093985
## Potholes                                    0.123324 0.052972 0.105230 0.112570
## Pruning (city tree)                         0.024019 0.025372 0.044317 0.057510
## Public Right of Way                         0.033397 0.029580 0.020992 0.057252
## Question                                    0.079824 0.064739 0.043997 0.024513
## Referral                                    0.099161 0.050725 0.046148 0.049962
## Refuse Violations                           0.079202 0.059708 0.065848 0.084728
## Replace/Repair a Sign                       0.067492 0.053093 0.080315 0.084814
## Request New Sign                            0.069169 0.049768 0.059047 0.097005
## Retaining Wall Maintenance                  0.066239 0.091880 0.115385 0.096154
## Rodent control                              0.040957 0.033594 0.041417 0.052462
## Root prune                                  0.022321 0.038690 0.053571 0.098214
## Sidewalk Obstruction                        0.052799 0.042621 0.044529 0.052163
## Sidewalk, Lack of Snow/Ice Removal          0.767726 0.090465 0.002445 0.002445
## Sinkhole                                    0.103995 0.058973 0.062143 0.066582
## Smoke detectors                             0.118421 0.064145 0.062500 0.092105
## Snow/Ice removal                            0.681273 0.135027 0.005995 0.002712
## Speeding                                    0.063973 0.060606 0.084175 0.094276
## Street Cleaning/Sweeping                    0.027306 0.026790 0.035033 0.102009
## Street Light - Repair                       0.078803 0.055112 0.067830 0.044888
## Street Obstruction/Closure                  0.126273 0.040733 0.081466 0.061100
## Thank you - DPW                             0.136264 0.046154 0.032967 0.065934
## Traffic                                     0.065356 0.053666 0.061637 0.073326
## Traffic or Pedestrian Signal, Repair        0.089659 0.047221 0.069934 0.069337
## Traffic or Pedestrian Signal, Request       0.056641 0.029297 0.099609 0.087891
## Tree Fallen Across Road                     0.042589 0.031516 0.051959 0.034923
## Tree Fallen Across Sidewalk                 0.034125 0.028190 0.044510 0.044510
## Tree Issues                                 0.038384 0.056566 0.076768 0.048485
## Tree Removal                                0.042949 0.036507 0.071582 0.074445
## Unpermitted Electrical Work                 0.145055 0.012088 0.030769 0.019780
## Unpermitted HVAC Work                       0.108516 0.045330 0.064560 0.048077
## Utility Cut - Other                         0.114889 0.067995 0.052755 0.059789
## Utility Cut - PWSA                          0.202261 0.050251 0.057789 0.075377
## Utility Pole                                0.065728 0.075117 0.049296 0.075117
## Vacant Building                             0.088199 0.083230 0.096066 0.048861
## Weeds/Debris                                0.029631 0.024965 0.035751 0.045083
## Wires                                       0.060651 0.071006 0.060651 0.078402
## Zoning Issue                                0.055000 0.065000 0.080833 0.082500
##                                                   May       Jun      Jul
## Abandoned Vehicle (parked on street)        0.0830356 0.0792922 0.077761
## Barking Dog                                 0.1036036 0.1013514 0.078829
## Board Up (PLI referral to DPW)              0.1140351 0.0899123 0.109649
## Broken Sidewalk                             0.1048202 0.0964040 0.069625
## Building Maintenance                        0.0841532 0.0829115 0.072500
## Building Without a Permit                   0.1049046 0.0883288 0.092416
## Catch Basin, Clogged                        0.0984090 0.1319976 0.082499
## City Source (CDBG)                          0.1955758 0.2134831 0.195225
## City Steps, Need Cleared                    0.1163934 0.2163934 0.203279
## City Steps, Need Repaired                   0.1283096 0.1201629 0.136456
## Collapsed Catch Basin                       0.1100917 0.0917431 0.083716
## Commercial Refuse/Dumpsters                 0.0529532 0.1038697 0.105906
## Curb /Broken/Deteriorated                   0.1525424 0.1122881 0.116525
## Curb/Request for Asphalt Windrow            0.1430412 0.2113402 0.155928
## Dead Animal                                 0.0713083 0.1021898 0.139809
## Dead tree (Public property)                 0.1246405 0.1447747 0.154362
## Drainage/Leak                               0.0902174 0.1043478 0.102174
## Drug Enforcement                            0.0988087 0.1023125 0.088998
## Dumping, Private Property                   0.0746888 0.0622407 0.064315
## Dumpster (on Street)                        0.0787402 0.1354331 0.105512
## Early Set Out                               0.0886933 0.0873392 0.111713
## Excessive Noise/Disturbances                0.0887978 0.0758197 0.075137
## Field                                       0.1854460 0.1384977 0.150235
## Fire Safety System Not Working              0.1139323 0.0572917 0.047526
## Graffiti, Documentation                     0.1207865 0.1254682 0.073970
## Graffiti, Removal                           0.0338710 0.0596774 0.091935
## Hydrant                                     0.0922509 0.0571956 0.064576
## Illegal Dumping                             0.1000000 0.1228856 0.113930
## Illegal Parking                             0.0765456 0.0691855 0.059863
## Junk Vehicles                               0.0864929 0.0710900 0.104265
## Leak                                        0.0491963 0.0526059 0.057964
## Leaves/Street Cleaning                      0.0629723 0.0541562 0.021411
## Litter                                      0.0826584 0.0855169 0.098380
## Litter Can, Public                          0.0923077 0.0995951 0.127126
## Maintenance Issue                           0.1417989 0.1185185 0.135450
## Mayor's Office                              0.0319574 0.1824234 0.065246
## Missed Blue Bag                             0.0841540 0.1020591 0.087735
## Missed Pick Up                              0.1027938 0.1123907 0.118789
## Need Potable Water                          0.0011990 0.0023981 0.000000
## Operating Without a License                 0.4338395 0.0542299 0.028200
## Overgrowth                                  0.1219444 0.2489463 0.234335
## Parking Authority                           0.0860215 0.0838710 0.081720
## Patrol                                      0.1015348 0.0879575 0.095041
## Paving Concern/Problem                      0.1114058 0.1255526 0.085765
## Paving Request                              0.1397882 0.1458396 0.118306
## Permit Parking (Residential Parking Permit) 0.0569476 0.0706150 0.079727
## Playground                                  0.1184211 0.1691729 0.159774
## Potholes                                    0.1346322 0.1150702 0.107095
## Pruning (city tree)                         0.1234777 0.1742219 0.168133
## Public Right of Way                         0.1316794 0.1650763 0.154580
## Question                                    0.0483972 0.0936518 0.122564
## Referral                                    0.0362319 0.0846682 0.129291
## Refuse Violations                           0.0784344 0.0983883 0.100844
## Replace/Repair a Sign                       0.1196850 0.1113611 0.094713
## Request New Sign                            0.0932096 0.0927879 0.097427
## Retaining Wall Maintenance                  0.0961538 0.0982906 0.085470
## Rodent control                              0.0745513 0.1099862 0.141279
## Root prune                                  0.1190476 0.1264881 0.163690
## Sidewalk Obstruction                        0.0807888 0.1075064 0.123410
## Sidewalk, Lack of Snow/Ice Removal          0.0097800 0.0000000 0.002445
## Sinkhole                                    0.0786303 0.1122384 0.128725
## Smoke detectors                             0.0871711 0.1348684 0.046053
## Snow/Ice removal                            0.0008564 0.0001427 0.000000
## Speeding                                    0.0976431 0.0909091 0.104377
## Street Cleaning/Sweeping                    0.1257084 0.1298300 0.123132
## Street Light - Repair                       0.0498753 0.0675810 0.097257
## Street Obstruction/Closure                  0.0549898 0.0509165 0.087576
## Thank you - DPW                             0.0769231 0.0967033 0.105495
## Traffic                                     0.0887354 0.0600425 0.054729
## Traffic or Pedestrian Signal, Repair        0.0854752 0.1040048 0.086671
## Traffic or Pedestrian Signal, Request       0.1074219 0.1191406 0.085938
## Tree Fallen Across Road                     0.1345826 0.2206133 0.137990
## Tree Fallen Across Sidewalk                 0.1424332 0.1958457 0.126113
## Tree Issues                                 0.1010101 0.1010101 0.129293
## Tree Removal                                0.1295634 0.1410165 0.118826
## Unpermitted Electrical Work                 0.1054945 0.1230769 0.085714
## Unpermitted HVAC Work                       0.1689560 0.0879121 0.085165
## Utility Cut - Other                         0.0797186 0.0738570 0.082063
## Utility Cut - PWSA                          0.1005025 0.0967337 0.095477
## Utility Pole                                0.1126761 0.1244131 0.107981
## Vacant Building                             0.0683230 0.0608696 0.064182
## Weeds/Debris                                0.1373690 0.1666364 0.157062
## Wires                                       0.0961538 0.1434911 0.087278
## Zoning Issue                                0.0958333 0.0866667 0.103333
##                                                   Aug      Sep       Oct
## Abandoned Vehicle (parked on street)        0.1014123 0.089331 0.0971584
## Barking Dog                                 0.1126126 0.123874 0.1126126
## Board Up (PLI referral to DPW)              0.1228070 0.085526 0.0877193
## Broken Sidewalk                             0.0734507 0.052793 0.0849273
## Building Maintenance                        0.0918903 0.077562 0.0787086
## Building Without a Permit                   0.0815168 0.082879 0.1128520
## Catch Basin, Clogged                        0.1266942 0.104891 0.0931055
## City Source (CDBG)                          0.1639747 0.080758 0.0582865
## City Steps, Need Cleared                    0.1459016 0.118033 0.0557377
## City Steps, Need Repaired                   0.1283096 0.107943 0.0855397
## Collapsed Catch Basin                       0.1238532 0.113532 0.0986239
## Commercial Refuse/Dumpsters                 0.1344196 0.071283 0.0712831
## Curb /Broken/Deteriorated                   0.1186441 0.084746 0.0572034
## Curb/Request for Asphalt Windrow            0.1082474 0.079897 0.0605670
## Dead Animal                                 0.1235261 0.112296 0.1274565
## Dead tree (Public property)                 0.1236817 0.102589 0.0882071
## Drainage/Leak                               0.1076087 0.058696 0.0782609
## Drug Enforcement                            0.1135249 0.117730 0.0946041
## Dumping, Private Property                   0.1327801 0.076763 0.1016598
## Dumpster (on Street)                        0.1070866 0.086614 0.0881890
## Early Set Out                               0.1570752 0.080569 0.0663507
## Excessive Noise/Disturbances                0.0881148 0.090164 0.1038251
## Field                                       0.1197183 0.098592 0.0610329
## Fire Safety System Not Working              0.0449219 0.050781 0.0651042
## Graffiti, Documentation                     0.0608614 0.067416 0.1207865
## Graffiti, Removal                           0.1048387 0.125806 0.0935484
## Hydrant                                     0.1254613 0.077491 0.0867159
## Illegal Dumping                             0.0651741 0.068657 0.0726368
## Illegal Parking                             0.0969087 0.103042 0.0991168
## Junk Vehicles                               0.0912322 0.072275 0.0758294
## Leak                                        0.1066732 0.080857 0.0681929
## Leaves/Street Cleaning                      0.0440806 0.012594 0.0642317
## Litter                                      0.1202954 0.095760 0.0824202
## Litter Can, Public                          0.1246964 0.110121 0.0850202
## Maintenance Issue                           0.1174603 0.113228 0.1047619
## Mayor's Office                              0.1118509 0.114514 0.0892144
## Missed Blue Bag                             0.1056401 0.089526 0.0957923
## Missed Pick Up                              0.1106846 0.098315 0.0835999
## Need Potable Water                          0.0731415 0.000000 0.0000000
## Operating Without a License                 0.0390456 0.021692 0.0455531
## Overgrowth                                  0.1944366 0.104805 0.0396179
## Parking Authority                           0.1032258 0.090323 0.0731183
## Patrol                                      0.1097993 0.103306 0.1151122
## Paving Concern/Problem                      0.0813439 0.089302 0.1432361
## Paving Request                              0.0928896 0.075340 0.0490166
## Permit Parking (Residential Parking Permit) 0.1321185 0.102506 0.0956720
## Playground                                  0.1672932 0.090226 0.0733083
## Potholes                                    0.0759860 0.050155 0.0442425
## Pruning (city tree)                         0.1586604 0.082206 0.0801759
## Public Right of Way                         0.1832061 0.094466 0.0620229
## Question                                    0.1646763 0.065996 0.1451917
## Referral                                    0.1525553 0.089245 0.1060259
## Refuse Violations                           0.1122026 0.085035 0.0968534
## Replace/Repair a Sign                       0.0899888 0.088189 0.0758155
## Request New Sign                            0.1100801 0.113454 0.0889920
## Retaining Wall Maintenance                  0.0769231 0.055556 0.1089744
## Rodent control                              0.1385182 0.125633 0.1214910
## Root prune                                  0.1056548 0.096726 0.0937500
## Sidewalk Obstruction                        0.1246819 0.117048 0.0966921
## Sidewalk, Lack of Snow/Ice Removal          0.0000000 0.002445 0.0000000
## Sinkhole                                    0.1230184 0.088142 0.0798985
## Smoke detectors                             0.0756579 0.041118 0.1118421
## Snow/Ice removal                            0.0001427 0.000000 0.0001427
## Speeding                                    0.1144781 0.112795 0.0791246
## Street Cleaning/Sweeping                    0.1215868 0.098403 0.0963421
## Street Light - Repair                       0.1057357 0.105237 0.1134663
## Street Obstruction/Closure                  0.1201629 0.120163 0.0916497
## Thank you - DPW                             0.1252747 0.112088 0.0879121
## Traffic                                     0.1046759 0.162062 0.1232731
## Traffic or Pedestrian Signal, Repair        0.1165571 0.105798 0.0854752
## Traffic or Pedestrian Signal, Request       0.0742188 0.111328 0.1035156
## Tree Fallen Across Road                     0.1831346 0.064736 0.0442930
## Tree Fallen Across Sidewalk                 0.2121662 0.063798 0.0474777
## Tree Issues                                 0.1454545 0.094949 0.0848485
## Tree Removal                                0.1460272 0.085183 0.0787402
## Unpermitted Electrical Work                 0.0571429 0.065934 0.1318681
## Unpermitted HVAC Work                       0.0824176 0.075549 0.0879121
## Utility Cut - Other                         0.1160610 0.110199 0.0937866
## Utility Cut - PWSA                          0.0716080 0.062814 0.0690955
## Utility Pole                                0.1197183 0.077465 0.0563380
## Vacant Building                             0.0749482 0.077847 0.0815735
## Weeds/Debris                                0.1616070 0.097922 0.0718657
## Wires                                       0.1316568 0.091716 0.0532544
## Zoning Issue                                0.0908333 0.079167 0.0875000
##                                                  Nov      Dec
## Abandoned Vehicle (parked on street)        0.086098 0.076400
## Barking Dog                                 0.074324 0.051802
## Board Up (PLI referral to DPW)              0.076754 0.065789
## Broken Sidewalk                             0.064269 0.030604
## Building Maintenance                        0.093419 0.078900
## Building Without a Permit                   0.086285 0.075613
## Catch Basin, Clogged                        0.064820 0.043606
## City Source (CDBG)                          0.025632 0.005969
## City Steps, Need Cleared                    0.022951 0.024590
## City Steps, Need Repaired                   0.075356 0.038697
## Collapsed Catch Basin                       0.068807 0.053899
## Commercial Refuse/Dumpsters                 0.069246 0.065173
## Curb /Broken/Deteriorated                   0.046610 0.038136
## Curb/Request for Asphalt Windrow            0.047680 0.020619
## Dead Animal                                 0.083661 0.048849
## Dead tree (Public property)                 0.051774 0.030681
## Drainage/Leak                               0.070652 0.077174
## Drug Enforcement                            0.067274 0.045550
## Dumping, Private Property                   0.074689 0.058091
## Dumpster (on Street)                        0.083465 0.064567
## Early Set Out                               0.069059 0.071090
## Excessive Noise/Disturbances                0.092213 0.140027
## Field                                       0.032864 0.030516
## Fire Safety System Not Working              0.051432 0.083333
## Graffiti, Documentation                     0.072097 0.041199
## Graffiti, Removal                           0.066129 0.079032
## Hydrant                                     0.090406 0.092251
## Illegal Dumping                             0.083582 0.066667
## Illegal Parking                             0.088077 0.074828
## Junk Vehicles                               0.091232 0.050948
## Leak                                        0.097418 0.124696
## Leaves/Street Cleaning                      0.430730 0.159950
## Litter                                      0.070272 0.056694
## Litter Can, Public                          0.068826 0.047773
## Maintenance Issue                           0.053968 0.039153
## Mayor's Office                              0.083888 0.077230
## Missed Blue Bag                             0.091316 0.086840
## Missed Pick Up                              0.070164 0.072084
## Need Potable Water                          0.000000 0.001199
## Operating Without a License                 0.026030 0.023861
## Overgrowth                                  0.013768 0.003653
## Parking Authority                           0.068817 0.081720
## Patrol                                      0.070838 0.052538
## Paving Concern/Problem                      0.108753 0.042440
## Paving Request                              0.034493 0.020877
## Permit Parking (Residential Parking Permit) 0.077449 0.059226
## Playground                                  0.041353 0.003759
## Potholes                                    0.053289 0.025434
## Pruning (city tree)                         0.041272 0.020636
## Public Right of Way                         0.044847 0.022901
## Question                                    0.084852 0.061596
## Referral                                    0.081998 0.073989
## Refuse Violations                           0.067536 0.071220
## Replace/Repair a Sign                       0.070191 0.064342
## Request New Sign                            0.076339 0.052720
## Retaining Wall Maintenance                  0.070513 0.038462
## Rodent control                              0.071330 0.048780
## Root prune                                  0.043155 0.038690
## Sidewalk Obstruction                        0.086514 0.071247
## Sidewalk, Lack of Snow/Ice Removal          0.000000 0.122249
## Sinkhole                                    0.058339 0.039315
## Smoke detectors                             0.088816 0.077303
## Snow/Ice removal                            0.003711 0.169997
## Speeding                                    0.052189 0.045455
## Street Cleaning/Sweeping                    0.082947 0.030912
## Street Light - Repair                       0.115960 0.098254
## Street Obstruction/Closure                  0.089613 0.075356
## Thank you - DPW                             0.065934 0.048352
## Traffic                                     0.088735 0.063762
## Traffic or Pedestrian Signal, Repair        0.084877 0.054991
## Traffic or Pedestrian Signal, Request       0.068359 0.056641
## Tree Fallen Across Road                     0.036627 0.017036
## Tree Fallen Across Sidewalk                 0.040059 0.020772
## Tree Issues                                 0.070707 0.052525
## Tree Removal                                0.047960 0.027201
## Unpermitted Electrical Work                 0.101099 0.121978
## Unpermitted HVAC Work                       0.074176 0.071429
## Utility Cut - Other                         0.069168 0.079719
## Utility Cut - PWSA                          0.081658 0.036432
## Utility Pole                                0.075117 0.061033
## Vacant Building                             0.132091 0.123810
## Weeds/Debris                                0.045386 0.026722
## Wires                                       0.076923 0.048817
## Zoning Issue                                0.092500 0.080833

Create the PCA object

(df_months_pca1 %>% 
  prcomp(scale = TRUE) -> pc)
## Standard deviations (1, .., p=12):
##  [1] 2.0313544303132324842 1.5112299607905628207 1.3677583442481697773
##  [4] 1.0647449915481699279 0.9373153843502747273 0.6612690017981469603
##  [7] 0.6319678449167120959 0.5732234023111666410 0.4666060722915733039
## [10] 0.4192405535100036107 0.3847717270238655285 0.0000000000000003547
## 
## Rotation (n x k) = (12 x 12):
##         PC1     PC2       PC3      PC4      PC5        PC6      PC7      PC8
## Jan -0.3509  0.2377  0.036588 -0.40554  0.40259 -0.0878343  0.04648 -0.10077
## Feb -0.2189  0.2230 -0.226391  0.69886 -0.14610  0.1005237  0.15306  0.04311
## Mar -0.0235 -0.4858 -0.323760  0.11519  0.19761 -0.1724337 -0.62037 -0.31077
## Apr  0.1329 -0.4686 -0.301500 -0.04306  0.07313 -0.5014973  0.54199  0.09929
## May  0.2339 -0.1448 -0.445597 -0.33914 -0.15507  0.5723879 -0.10942  0.40042
## Jun  0.4049  0.1866  0.002112 -0.22765 -0.21210 -0.0386388  0.13438 -0.42038
## Jul  0.4322  0.1697  0.095923 -0.10888 -0.01800 -0.1443989  0.02032 -0.13081
## Aug  0.3866  0.1805  0.189907  0.17598 -0.04242 -0.2554247 -0.44835  0.13536
## Sep  0.2944 -0.1580  0.365255  0.16856  0.42595 -0.0005222  0.04916  0.53195
## Oct  0.1150 -0.4130  0.389922  0.16708  0.17452  0.5136274  0.18989 -0.43757
## Nov -0.1323 -0.3291  0.333424 -0.05855 -0.69855 -0.1384698 -0.02866  0.09659
## Dec -0.3720 -0.1311  0.333522 -0.25017 -0.03408 -0.0524049 -0.15824  0.15902
##           PC9     PC10      PC11   PC12
## Jan  0.119474  0.19207  0.310620 0.5699
## Feb -0.085692 -0.10149 -0.130098 0.5209
## Mar -0.259371 -0.01955  0.035606 0.1620
## Apr  0.264116 -0.01396 -0.143422 0.1379
## May  0.106727 -0.02849 -0.028027 0.2717
## Jun -0.373786  0.36939 -0.413396 0.2471
## Jul -0.103405 -0.77442  0.256091 0.2205
## Aug  0.607417  0.20278 -0.123033 0.1983
## Sep -0.454920  0.16862  0.035475 0.1519
## Oct  0.304459 -0.02271 -0.003209 0.1456
## Nov -0.104258  0.14115  0.387838 0.2472
## Dec -0.008971 -0.35391 -0.678722 0.1743

Inspect the PCA object with tidier functions from the broom library. These functions turn the PCA object into a tidy dataframe

pc %>% 
  tidy() %>% 
  head()
## # A tibble: 6 x 3
##   row                                     PC   value
##   <chr>                                <dbl>   <dbl>
## 1 Abandoned Vehicle (parked on street)     1 -0.844 
## 2 Barking Dog                              2 -0.844 
## 3 Board Up (PLI referral to DPW)           3  0.383 
## 4 Broken Sidewalk                          4  0.311 
## 5 Building Maintenance                     5  0.206 
## 6 Building Without a Permit                6  0.0620
pc %>% 
  tidy("pcs")
## # A tibble: 12 x 4
##       PC  std.dev percent cumulative
##    <dbl>    <dbl>   <dbl>      <dbl>
##  1     1 2.03e+ 0  0.344       0.344
##  2     2 1.51e+ 0  0.190       0.534
##  3     3 1.37e+ 0  0.156       0.690
##  4     4 1.06e+ 0  0.0945      0.785
##  5     5 9.37e- 1  0.0732      0.858
##  6     6 6.61e- 1  0.0364      0.894
##  7     7 6.32e- 1  0.0333      0.927
##  8     8 5.73e- 1  0.0274      0.955
##  9     9 4.67e- 1  0.0181      0.973
## 10    10 4.19e- 1  0.0146      0.988
## 11    11 3.85e- 1  0.0123      1    
## 12    12 3.55e-16  0           1
pc %>% 
  augment(data = df_months) -> au

au %>% 
  head()
## # A tibble: 6 x 26
##   .rownames request_type    Jan    Feb    Mar    Apr    May    Jun    Jul    Aug
##   <chr>     <chr>         <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 1         Abandoned V… 0.0890 0.0727 0.0769 0.0710 0.0830 0.0793 0.0778 0.101 
## 2 2         Barking Dog  0.0563 0.0608 0.0608 0.0631 0.104  0.101  0.0788 0.113 
## 3 3         Board Up (P… 0.0395 0.0482 0.0658 0.0943 0.114  0.0899 0.110  0.123 
## 4 4         Broken Side… 0.0337 0.155  0.148  0.0872 0.105  0.0964 0.0696 0.0735
## 5 5         Building Ma… 0.0708 0.0919 0.103  0.0739 0.0842 0.0829 0.0725 0.0919
## 6 6         Building Wi… 0.0842 0.0697 0.0636 0.0577 0.105  0.0883 0.0924 0.0815
## # … with 16 more variables: Sep <dbl>, Oct <dbl>, Nov <dbl>, Dec <dbl>,
## #   .fittedPC1 <dbl>, .fittedPC2 <dbl>, .fittedPC3 <dbl>, .fittedPC4 <dbl>,
## #   .fittedPC5 <dbl>, .fittedPC6 <dbl>, .fittedPC7 <dbl>, .fittedPC8 <dbl>,
## #   .fittedPC9 <dbl>, .fittedPC10 <dbl>, .fittedPC11 <dbl>, .fittedPC12 <dbl>

Plot how the PCA object explains the variance in the data

pc %>% 
  tidy("pcs") %>%
  select(-std.dev) %>% 
  gather(measure, value, -PC) %>% 
    ggplot(aes(PC, value)) +
    geom_line() +
    geom_point() +
    facet_wrap(~measure) +
    labs(title = "Variance explained by each principal component",
         x = "Principal Component",
         y = NULL) +
    scale_x_continuous(breaks = 1:12)

The first two principal components explain most of the variance

For an in-depth plot we need to create the PCA object a different way

df_months %>% 
  nest() %>% 
  mutate(pca = map(data, ~ prcomp(.x %>% select(-request_type), 
                                  center = TRUE, scale = TRUE)),
         pca_aug = map2(pca, data, ~augment(.x, data = .y))) -> df_months_pca2

Plot the PCA data

df_months_pca2 %>%
  mutate(
    pca_graph = map2(
      .x = pca,
      .y = data,
      ~ autoplot(.x, loadings = TRUE, loadings.label = TRUE,
                 loadings.label.repel = TRUE,
                 data = .y) +
        theme_bw() +
        labs(x = "Principal Component 1",
             y = "Principal Component 2",
             title = "First two principal components of PCA on 311 dataset")
    )
  ) %>%
  pull(pca_graph)
## [[1]]

This shows that summer and winter explain a significant part of the variance

Plot the data to show the outliers

au %>% 
  mutate(outlier = case_when(abs(.fittedPC1) > 2 & abs(.fittedPC2) > 1.5 ~ TRUE),
         pothole = case_when(request_type == "Potholes" ~ "Potholes",
                             request_type != "Potholes" ~ "Other")) -> au

au %>% 
ggplot(aes(.fittedPC1, .fittedPC2)) +
  geom_point() +
  geom_label_repel(data = au %>% filter(outlier),
             aes(label = request_type)) +
  theme_bw()

au %>% 
ggplot(aes(.fittedPC1, .fittedPC2)) +
  geom_point(aes(color = pothole)) +
  geom_label_repel(data = au %>% filter(request_type == "Potholes"),
             aes(label = request_type)) +
  theme_bw() +
  scale_color_manual(NULL, values = c("black", "red"))

Related