-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataset Links.rtf
103 lines (101 loc) · 8.2 KB
/
Dataset Links.rtf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{\rtf1\ansi\ansicpg1252\cocoartf2759
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 HelveticaNeue-Bold;\f1\fnil\fcharset0 HelveticaNeue;}
{\colortbl;\red255\green255\blue255;}
{\*\expandedcolortbl;;}
{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid1\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid1}
{\list\listtemplateid2\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid101\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid2}
{\list\listtemplateid3\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid201\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid3}
{\list\listtemplateid4\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid301\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid4}
{\list\listtemplateid5\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}.}{\leveltext\leveltemplateid401\'02\'00.;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid5}}
{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}{\listoverride\listid2\listoverridecount0\ls2}{\listoverride\listid3\listoverridecount0\ls3}{\listoverride\listid4\listoverridecount0\ls4}{\listoverride\listid5\listoverridecount0\ls5}}
\paperw11900\paperh16840\margl1440\margr1440\vieww12720\viewh7240\viewkind0
\deftab560
\pard\pardeftab560\partightenfactor0
\f0\b\fs40 \cf0 Datasets\
\pard\pardeftab560\slleading20\partightenfactor0
\fs26 \cf0 \
\pard\pardeftab560\pardirnatural\partightenfactor0
\ls1\ilvl0
\f1\b0 \cf0 {\listtext 1. }
\f0\b Massively large data set - to show optimised queries\
\ls1\ilvl0
\f1\b0 {\listtext 2. }
\f0\b Target data set - to show techniques\
\ls1\ilvl0
\f1\b0 {\listtext 3. }
\f0\b Original data set - to show creative insights - {\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets?feedbackIds=14"}}{\fldrslt https://www.kaggle.com/datasets?feedbackIds=14}} \
\pard\tx720\pardeftab560\pardirnatural\partightenfactor0
\cf0 \
Large Datasets - over million rows - best queries for large datasets? Techniques handling importing and handling large data sets?\
1. https://www.kaggle.com/datasets/rishitjavia/netflix-movie-rating-dataset?select=Netflix_Dataset_Rating.csv - 2.65 million records\
2. https://www.kaggle.com/datasets/jfreyberg/spotify-chart-data - 5.2 million records\
3. https://www.kaggle.com/datasets/davidgauthier/glassdoor-job-reviews-2 - 10.2 million records\
4. https://www.kaggle.com/datasets/mirbektoktogaraev/should-this-loan-be-approved-or-denied - 800K records\
5. https://www.kaggle.com/datasets/cisautomotiveapi/large-car-dataset - 5.7 million records\
6. https://www.kaggle.com/c/riiid-test-answer-prediction/data?select=example_test.csv >100 million records\
\
\
\
E-com datasets\
\pard\pardeftab560\pardirnatural\partightenfactor0
{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/swathiunnikrishnan/amazon-consumer-behaviour-dataset"}}{\fldrslt
\f1\b0 \cf0 https://www.kaggle.com/datasets/swathiunnikrishnan/amazon-consumer-behaviour-dataset}}
\f1\b0
\f0\b \
\pard\pardeftab560\slleading20\partightenfactor0
{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/nelgiriyewithana/global-youtube-statistics-2023"}}{\fldrslt
\f1\b0 \cf0 https://www.kaggle.com/datasets/nelgiriyewithana/global-youtube-statistics-2023}}
\f1\b0 \
\pard\pardeftab560\slleading20\partightenfactor0
{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/bhanupratapbiswas/uber-data-analysis"}}{\fldrslt \cf0 https://www.kaggle.com/datasets/bhanupratapbiswas/uber-data-analysis}} \
\pard\pardeftab560\pardirnatural\partightenfactor0
{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/anandshaw2001/amazon-sales-dataset"}}{\fldrslt \cf0 https://www.kaggle.com/datasets/anandshaw2001/amazon-sales-dataset}} \
\pard\pardeftab560\slleading20\partightenfactor0
\cf0 \
\pard\pardeftab560\slleading20\pardirnatural\partightenfactor0
\cf0 \
\pard\pardeftab560\slleading20\partightenfactor0
\cf0 Media - Youtube, Spotify, Netflix - {\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/nelgiriyewithana/global-youtube-statistics-2023"}}{\fldrslt https://www.kaggle.com/datasets/nelgiriyewithana/global-youtube-statistics-2023}} \
Logistics - {\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/bhanupratapbiswas/uber-data-analysis"}}{\fldrslt https://www.kaggle.com/datasets/bhanupratapbiswas/uber-data-analysis}} \
Ecom - \
\pard\pardeftab560\pardirnatural\partightenfactor0
\ls2\ilvl0\cf0 {\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/anandshaw2001/amazon-sales-dataset"}}{\fldrslt https://www.kaggle.com/datasets/anandshaw2001/amazon-sales-dataset}} \
{\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/vivek468/superstore-dataset-final"}}{\fldrslt https://www.kaggle.com/datasets/vivek468/superstore-dataset-final}} \
{\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/mehmettahiraslan/customer-shopping-dataset"}}{\fldrslt https://www.kaggle.com/datasets/mehmettahiraslan/customer-shopping-dataset}} \
\pard\pardeftab560\slleading20\partightenfactor0
\cf0 Social - Fb, Insta, TikTok - {\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/andrewmvd/okcupid-profiles/data"}}{\fldrslt https://www.kaggle.com/datasets/andrewmvd/okcupid-profiles/data}} \
Pharma\
\pard\pardeftab560\pardirnatural\partightenfactor0
\ls3\ilvl0\cf0 {\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset"}}{\fldrslt https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset}} - check affects of high n low sleep on other activities\
\pard\pardeftab560\slleading20\partightenfactor0
\cf0 Stock price - \
Business - \
\pard\pardeftab560\pardirnatural\partightenfactor0
\ls4\ilvl0\cf0 {\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/nelgiriyewithana/billionaires-statistics-dataset"}}{\fldrslt https://www.kaggle.com/datasets/nelgiriyewithana/billionaires-statistics-dataset}} - which sector has more bills.\
{\listtext 0. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/pralabhpoudel/world-energy-consumption"}}{\fldrslt https://www.kaggle.com/datasets/pralabhpoudel/world-energy-consumption}} \
\pard\pardeftab560\slleading20\partightenfactor0
\cf0 Search - {\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/aristotle609/mediumsearchdataset"}}{\fldrslt https://www.kaggle.com/datasets/aristotle609/mediumsearchdataset}} \
Psychology \
\pard\pardeftab560\pardirnatural\partightenfactor0
\ls5\ilvl0\cf0 {\listtext 1. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/swathiunnikrishnan/amazon-consumer-behaviour-dataset"}}{\fldrslt https://www.kaggle.com/datasets/swathiunnikrishnan/amazon-consumer-behaviour-dataset}} \
{\listtext 2. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/kazanova/sentiment140"}}{\fldrslt https://www.kaggle.com/datasets/kazanova/sentiment140}} - check if tweets have some keywords to do shadow banning\
{\listtext 3. }{\field{\*\fldinst{HYPERLINK "https://www.kaggle.com/datasets/zeesolver/consumer-behavior-and-shopping-habits-dataset"}}{\fldrslt https://www.kaggle.com/datasets/zeesolver/consumer-behavior-and-shopping-habits-dataset}} \
\pard\tx720\pardeftab560\pardirnatural\partightenfactor0
\cf0 \
\
\
Cities\
1. https://github.com/lmfmaier/cities-json/blob/master/cities500.json\
2. https://gist.github.com/Lwdthe1/81818d30d23f012628aac1cdf672627d\
3. https://github.com/stefangabos/world_countries/\
4. https://github.com/samayo/country-json/blob/master/src/country-by-cities.json\
\
\
\
https://middlebury.libguides.com/econstats/large-datasets\
\
https://github.com/awesomedata/awesome-public-datasets\
\
\
\
}