using datasets from Kaggle
Every strategy game on the Apple App Store
fichier : TD_appstore_games.csv
## 'data.frame': 17005 obs. of 18 variables:
## $ URL : Factor w/ 16845 levels "https://apps.apple.com/us/app/%D8%B5%D8%B1%D8%A7%D8%B9-%D8%A7%D9%84%D8%AC%D8%A8%D8%A7%D8%A8%D8%B1%D8%A9/id934298011",..: 14322 12379 10394 14283 12944 14269 7205 3757 2909 1491 ...
## $ ID : int 284921427 284926400 284946595 285755462 285831220 286210009 286313771 286363959 286566987 286682679 ...
## $ Name : Factor w/ 16845 levels "- Turning -",..: 14328 12480 10575 14336 13031 14333 7476 4125 3297 51 ...
## $ Subtitle : Factor w/ 5010 levels "","\"'Be a GOD' Action Chess\"",..: 1 1 1 1 1 3347 1 1 1 1 ...
## $ Icon.URL : Factor w/ 16845 levels "https://is1-ssl.mzstatic.com/image/thumb/Purple/v4/00/a4/05/00a40516-b1b6-9fc9-8011-b899e61ce8a9/source/512x512bb.jpg",..: 5229 12441 14514 7528 1188 1583 13470 14272 8730 10152 ...
## $ Average.User.Rating : num 4 3.5 3 3.5 3.5 3 2.5 2.5 2.5 2.5 ...
## $ User.Rating.Count : int 3553 284 8376 190394 28 47 35 125 44 184 ...
## $ Price : num 2.99 1.99 0 0 2.99 0 0 0.99 0 0 ...
## $ In.app.Purchases : Factor w/ 3804 levels "","0","0.0, 0.0",..: 1 1 1 1 1 1178 1 1 1 1 ...
## $ Description : Factor w/ 16472 levels "---------------------------------------------------\\nTCG\\uff08Trading Card Game\\uff09+ Rogue-like = Spell He"| __truncated__,..: 11129 14282 12521 15293 3951 13742 2359 531 2959 7191 ...
## $ Developer : Factor w/ 8693 levels "\"Alex Artem'ev\"",..: 5153 4297 955 5153 6556 5889 6524 1519 3787 1029 ...
## $ Age.Rating : Factor w/ 4 levels "12+","17+","4+",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Languages : Factor w/ 991 levels "","AF, AR, CA, HR, CS, DA, NL, EN, FI, FR, DE, EL, HE, HU, ID, IT, JA, KO, MS, NB, PL, PT, RO, RU, ZH, SK, ES, SV,"| __truncated__,..: 286 316 316 286 295 316 1 316 753 316 ...
## $ Size : num 15853568 12328960 674816 21552128 34689024 ...
## $ Primary.Genre : Factor w/ 21 levels "Book","Business",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ Genres : Factor w/ 1004 levels "Books, Games, Board, Strategy",..: 710 585 185 710 588 298 284 585 589 240 ...
## $ Original.Release.Date : Factor w/ 3083 levels "1/01/2014","1/01/2015",..: 258 258 258 1588 972 2364 2364 2285 2285 59 ...
## $ Current.Version.Release.Date: Factor w/ 2511 levels "1/01/2014","1/01/2015",..: 1915 688 2161 1915 1215 1750 482 1879 1321 51 ...
## Loading required package: jpeg
myCol = colorRampPalette(c("red", "yellow", "darkgreen"))(10)
bdd1 <- bdd1[order(
bdd1$User.Rating.Count,
bdd1$Average.User.Rating,
decreasing = TRUE),]
myX <- 15
bddTopX <- head(bdd1, myX)
myBar <- barplot(bddTopX$Average.User.Rating,
ylim = c(0, 6.5), col = myCol[bddTopX$Average.User.Rating * 2])
trash <- lapply(1:nrow(bddTopX), function(i){
myurl <- as.character(bddTopX$Icon.URL[i])
z <- tempfile()
download.file(myurl, z, mode = "wb")
img <- readJPEG(z)
rasterImage(img,
xleft = myBar[i] - 0.4,
xright = myBar[i] + 0.4,
ybottom = bddTopX$Average.User.Rating[i] + 0.5,
ytop = bddTopX$Average.User.Rating[i] + 1.25)
file.remove(z)
})
bddTopX$Name <- gsub(pattern = "\\\\xe9",
replacement = "é", x = bddTopX$Name)
bddTopX$Name <- gsub(pattern = "\\\\u2122",
replacement = "TM", x = bddTopX$Name)
text(x = myBar, y = 0.25, srt = 90, labels = bddTopX$Name, pos = 4)
require("wordcloud")
require("palettesForR")
par(mfrow = c(2, 2), mar = c(0, 0, 0, 0))
trash <- lapply(1:4, function(i){
desc <- bddTopX$Description[i]
desc <- gsub(pattern = "\\\\u2022", replacement = "", x = desc)
desc <- gsub(pattern = "\\\\u25cf", replacement = "", x = desc)
desc <- gsub(pattern = "\\\\uff08", replacement = "", x = desc)
desc <- gsub(pattern = '\\|:|\\\\n|\\"|,|\\(|\\)|\\.|:|\\*',
replacement = " ", x = desc)
desc <- gsub(pattern = '!', replacement = " ", x = desc)
desc <- gsub(pattern = ' | | ', replacement = " ", x = desc)
desc <- gsub(pattern = '[0-9]', replacement = "", x = desc)
descW <- strsplit(desc, split = " ")[[1]]
descW <- descW[descW != ""]
wordcloud(descW, col = as.vector(Tango_gpl), scale = c(2.5, 0.5))
text(x = 0.5, y = 1, bddTopX$Name[i])
})
Dataset used for learning data visualization and basic regression
fichier : TD_HRDataset_v13.csv
Feature | Description | DataType |
---|---|---|
Employee | Name Employee’s full name | Text |
EmpID | Employee ID is unique to each employee | Text |
MarriedID | Is the person married (1 or 0 for yes or no) | Binary |
MaritalStatusID | Marital status code that matches the text field MaritalDesc | Integer |
EmpStatusID | Employment status code that matches text field EmploymentStatus | Integer |
DeptID | Department ID code that matches the department the employee works in | Integer |
Feature | Description | DataType |
---|---|---|
PerfScoreID | Performance Score code that matches the employee’s most recent performance score | Integer |
FromDiversityJobFairID | Was the employee sourced from the Diversity job fair? 1 or 0 for yes or no | Binary |
PayRate | The person’s hourly pay rate. All salaries are converted to hourly pay rate | Float |
Termd | Has this employee been terminated - 1 or 0 | Binary |
PositionID | An integer indicating the person’s position | Integer |
Feature | Description | DataType |
---|---|---|
Position | The text name/title of the position the person has | Text |
State | The state that the person lives in | Text |
Zip | The zip code for the employee | Text |
DOB | Date of Birth for the employee | Date |
Sex | Sex - M or F | Text |
MaritalDesc | The marital status of the person (divorced, single, widowed, separated, etc) | Text |
Feature | Description | DataType |
---|---|---|
CitizenDesc | Label for whether the person is a Citizen or Eligible NonCitizen | Text |
HispanicLatino | Yes or No field for whether the employee is Hispanic/Latino | Text |
RaceDesc | Description/text of the race the person identifies with | Text |
DateofHire | Date the person was hired | Date |
DateofTermination | Date the person was terminated, only populated if, in fact, Termd = 1 | Date |
TermReason | A text reason / description for why the person was terminated | Text |
Feature | Description | DataType |
---|---|---|
EmploymentStatus | A description/category of the person’s employment status. Anyone currently working full time = Active | Text |
Department | Name of the department that the person works in | Text |
ManagerName | The name of the person’s immediate manager | Text |
ManagerID | A unique identifier for each manager. | Integer |
RecruitmentSource | The name of the recruitment source where the employee was recruited from | Text |
Feature | Description | DataType |
---|---|---|
PerformanceScore | Performance Score text/category (Fully Meets, Partially Meets, PIP, Exceeds) | Text |
EngagementSurvey | Results from the last engagement survey, managed by our external partner Float | |
EmpSatisfaction | A basic satisfaction score between 1 and 5, as reported on a recent employee satisfaction survey | Integer |
SpecialProjectsCount | The number of special projects that the employee worked on during the last 6 months | Integer |
Feature | Description | DataType |
---|---|---|
LastPerformanceReviewDate | The most recent date of the person’s last performance review. | Date |
DaysLateLast30 | The number of times that the employee was late to work during the last 30 days | Integer |
## 'data.frame': 401 obs. of 35 variables:
## $ Employee_Name : Factor w/ 311 levels "","Adinolfi, Wilson K",..: 30 156 271 129 257 262 34 41 67 68 ...
## $ EmpID : int 1103024456 1106026572 1302053333 1211050782 1307059817 711007713 1504073368 1403065721 1408069481 1306059197 ...
## $ MarriedID : int 1 0 0 1 0 1 1 0 0 1 ...
## $ MaritalStatusID : int 1 2 0 1 0 1 1 0 0 1 ...
## $ GenderID : int 0 1 1 0 0 0 0 0 0 1 ...
## $ EmpStatusID : int 1 1 1 1 1 5 5 1 1 1 ...
## $ DeptID : int 1 1 1 1 1 1 6 6 6 6 ...
## $ PerfScoreID : int 3 3 3 3 3 3 3 3 1 3 ...
## $ FromDiversityJobFairID : int 1 0 0 0 0 1 0 0 0 0 ...
## $ PayRate : num 28.5 23 29 21.5 16.6 ...
## $ Termd : int 0 0 0 1 0 1 1 0 0 0 ...
## $ PositionID : int 1 1 1 2 2 2 3 3 3 3 ...
## $ Position : Factor w/ 33 levels "","Accountant I",..: 2 2 2 3 3 3 4 4 4 4 ...
## $ State : Factor w/ 29 levels "","AL","AZ","CA",..: 12 12 12 12 12 12 27 28 29 17 ...
## $ Zip : int 1450 1460 2703 2170 2330 1844 21851 5664 98052 3062 ...
## $ DOB : Factor w/ 307 levels "","01/02/51",..: 284 88 205 224 116 140 130 111 110 220 ...
## $ Sex : Factor w/ 3 levels "","F","M ": 2 3 3 2 2 2 2 2 2 3 ...
## $ MaritalDesc : Factor w/ 6 levels "","Divorced",..: 3 2 5 3 5 3 3 5 5 3 ...
## $ CitizenDesc : Factor w/ 4 levels "","Eligible NonCitizen",..: 4 4 4 4 4 4 2 4 4 4 ...
## $ HispanicLatino : Factor w/ 5 levels "","no","No","yes",..: 3 3 3 3 3 3 3 3 5 3 ...
## $ RaceDesc : Factor w/ 7 levels "","American Indian or Alaska Native",..: 4 4 7 7 7 3 4 7 7 2 ...
## $ DateofHire : Factor w/ 100 levels "","1/10/2011",..: 21 10 95 33 53 93 85 87 36 87 ...
## $ DateofTermination : Factor w/ 94 levels "","01/02/12",..: 1 1 1 29 1 76 56 1 1 1 ...
## $ TermReason : Factor w/ 18 levels "","Another position",..: 12 12 12 1 12 4 2 12 12 12 ...
## $ EmploymentStatus : Factor w/ 6 levels "","Active","Future Start",..: 2 2 2 5 2 6 6 2 2 2 ...
## $ Department : Factor w/ 7 levels "","Admin Offices",..: 2 2 2 2 2 2 6 6 6 6 ...
## $ ManagerName : Factor w/ 22 levels "","Alex Sweetwater",..: 5 5 5 5 5 5 14 14 14 14 ...
## $ ManagerID : int 1 1 1 1 1 1 17 17 17 17 ...
## $ RecruitmentSource : Factor w/ 24 levels "","Billboard",..: 5 23 10 18 23 5 21 2 23 18 ...
## $ PerformanceScore : Factor w/ 5 levels "","Exceeds","Fully Meets",..: 3 3 3 3 3 3 3 3 5 3 ...
## $ EngagementSurvey : num 2.04 5 3.9 3.24 5 3.8 3.14 5 2.3 3.6 ...
## $ EmpSatisfaction : int 2 4 5 3 3 4 5 5 1 5 ...
## $ SpecialProjectsCount : int 6 4 5 4 5 4 0 0 0 0 ...
## $ LastPerformanceReview_Date: Factor w/ 43 levels "","1/10/2019",..: 5 7 8 1 5 1 1 11 18 20 ...
## $ DaysLateLast30 : int 0 0 0 NA 0 NA NA 0 0 0 ...
The University of Copenhagen’s Zoological Museum zapped insects for 18 years
fichier : TD_flightTrap.csv
## 'data.frame': 44088 obs. of 7 variables:
## $ order : Factor w/ 2 levels "COLEOPTERA","LEPIDOPTERA": 2 2 2 2 2 2 2 2 2 2 ...
## $ family : Factor w/ 104 levels "ACROLEPIIDAE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 1534 levels "Abraxas grossulariata L.",..: 25 25 25 25 25 25 25 25 25 25 ...
## $ year : int 1994 1994 1996 1998 1999 1999 2000 2003 2004 2006 ...
## $ date1 : Factor w/ 577 levels "10/1/03","10/1/93",..: 385 436 301 255 506 476 317 414 310 256 ...
## $ date2 : Factor w/ 577 levels "10/1/02","10/11/93",..: 432 449 310 271 378 483 338 459 315 281 ...
## $ individuals: int 1 1 1 1 1 1 1 1 1 1 ...
Inbound US border crossing entries
fichier : TD_Border_Crossing_Entry_Data.csv
## 'data.frame': 346733 obs. of 8 variables:
## $ Port.Name: Factor w/ 116 levels "Alcan","Alexandria Bay",..: 19 108 73 65 106 57 74 84 80 22 ...
## $ State : Factor w/ 15 levels "Alaska","Arizona",..: 3 5 3 2 10 5 11 13 11 10 ...
## $ Port.Code: int 2507 108 2506 2604 715 109 3401 2309 3403 712 ...
## $ Border : Factor w/ 2 levels "US-Canada Border",..: 2 1 2 2 1 1 1 2 1 1 ...
## $ Date : Factor w/ 279 levels "01/01/1996 12:00:00 AM",..: 72 72 72 72 72 72 72 72 72 72 ...
## $ Measure : Factor w/ 12 levels "Bus Passengers",..: 12 7 12 9 4 12 1 10 6 12 ...
## $ Value : int 34447 428 81217 62 16377 179 1054 1808 6685 24759 ...
## $ Location : Factor w/ 224 levels "POINT (-100.05 49)",..: 75 142 88 54 162 143 198 205 17 163 ...
require("sp")
gadm <- readRDS("./DATA/gadm36_USA_1_sp.rds")
myCol <- colorRampPalette(c("blue", "red"))(101)
tabState <- data.frame(table(bdd4$State))
tabState$Freq <- round( (tabState$Freq - min(tabState$Freq)) /
(max(tabState$Freq) - min(tabState$Freq)) * 100 + 1)
dfCol <- merge(gadm@data$NAME_1, tabState,
by.x = 1, by.y = 1, all = TRUE)
dfCol$myCol <- myCol[dfCol$Freq]
plot(gadm, xlim = c(-124, -63), ylim = c(24, 50), col = dfCol$myCol)
legend("bottomleft", title = "Border Crossing Entry (Rank events)",
legend = c("10q", "20q", "30q", "40q",
"50q", "60q", "70q", "80q", "90q", "100q"),
fill = myCol[c(10, 20, 30, 40, 50, 60,
70, 80, 90, 100)], ncol = 4)