I had a Hubspot OA and came up with a solution, but it wasn't correct. I tried to fix it, but I couldn't figure out what was wrong with it. If someone knows the solution or knows what's wrong with my solution, please let me know.
Problem description:
You're in charge of implementing a new analytics "sessions" view. You're given a set of data that consists of individual web page visits, along with a visitorId which is generated by a tracking cookie that uniquely identifies each visitor. From this data we need to generate a list of sessions for each visitor.
You can get the raw event data from the dataset API at REDACTED
The data set looks like this:
"events": [
{
"url": "/pages/a-big-river",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754583000
},
{
"url": "/pages/a-small-dog",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754631000
},
{
"url": "/pages/a-big-talk",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512709065294
},
{
"url": "/pages/a-sad-story",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512711000000
},
{
"url": "/pages/a-big-river",
"visitorId": "d1177368-2310-11e8-9e2a-9b860a0d9039",
"timestamp": 1512754436000
},
{
"url": "/pages/a-sad-story",
"visitorId": "f877b96c-9969-4abc-bbe2-54b17d030f8b",
"timestamp": 1512709024000
}
]
}Given this input data, we want to create a set of sessions of the incoming data. A sessions is defined as a group of events from a single visitor with no more than 10 minutes between each event. A visitor can have multiple sessions.
So given the example input data above, we would expect output which looks like:
"sessionsByUser": {
"f877b96c-9969-4abc-bbe2-54b17d030f8b": [
{
"duration": 41294,
"pages": [
"/pages/a-sad-story",
"/pages/a-big-talk"
],
"startTime": 1512709024000
},
{
"duration": 0,
"pages": [
"/pages/a-sad-story"
],
"startTime": 1512711000000
}
],
"d1177368-2310-11e8-9e2a-9b860a0d9039": [
{
"duration": 195000,
"pages": [
"/pages/a-big-river",
"/pages/a-big-river",
"/pages/a-small-dog"
],
"startTime": 1512754436000
}
]
}
}Once the event data has been transformed into session, you will need to send the result via an http POST to REDACTED
Notes
Timestamps are in milliseconds.
Events may not be given in chronological order.
The visitors in sessionsByUser can be in any order.
For each visitor, sessions to be in chronological order.
For each session, the URLs should be sorted in chronological order
For a session with only one event the duration should be zero
Evaluation
When you’re done, this page will update with a form to upload your code. We’ll evaluate you based on three things:
First and foremost, if you complete the project within three hours.
Next, the time from when you click the start button below to the time you submit a correct solution.
Finally, the quality of code you submit. We’re looking for simplicity, clarity and readability over cleverness or flexbility.
We think you should be able to complete this project in a single sitting, so try to allocate a single block if you can.
My solution:
My approach was firstly to:
Then, for each visitorId in the map, I would create a list of sessions. My approach to this was that I would maintain a current session and I would iterate through every URL visit (in chronological order, because they have been sorted) by a certain visitorId.
If the current session was empty, I would add the URL visit in the current iteration to the session. If the URL visit in the current iteration occurred more than 10 minutes after the first URL visit in the current session, I would add the current session to a list of sessions and set the current session to be empty. Otherwise, I would add the current iteration's URL visit to the current session.
A session's duration would be the difference between the timestamps of the first and last URL visits in the session. The session's pages would be a list of all of the pages in the session, in chronological order. The startTime would be the timestamp of the first URL visit in the session.
Below is the implementation of my approach in Javascript.
const axios = require("axios");
const moment = require("moment");
const math = require("mathjs");
/**
* Creates a map from the response data
* that maps a visitorId to a list
* of objects containing a visited url and timestamp,
* sorted by timestamp
* @param {} data
*/
function createVisitorMap(data) {
let result = new Map();
//firstly populate the map
for(let event of data.events) {
let eventData = {
url: event.url,
timestamp: event.timestamp
};
if(!result.has(event.visitorId)) {
result.set(event.visitorId, []);
}
result.get(event.visitorId).push(eventData);
}
//for each entry, sort the value by timestamp
for(let key of result.keys()) {
result.get(key).sort((a,b) => a.timestamp - b.timestamp);
}
return result;
}
/**
* Creates an object that contains a list of sessions
* for each visitorId in chronological order.
* @param {} visitorMap
*/
function createSessions(visitorMap) {
//for each visitor in the map:
//iterate through each url visit:
//1st visit is the start of a session.
//if the next visit is < 10 minutes after the 1st visit in the session, add it to the session
//otherwise, create a new session and add this visit to it.
//sessions:
//startTime is the timestamp of the 1st visit in the session.
//duration is the difference between the timestamps of the 1st and last visit.
//pages is a list all of the pages in order.
let result = {};
for(let key of visitorMap.keys()) {
let sessions = [];
let currVisits = [];
let visits = visitorMap.get(key);
currVisits.push(visits[0]);
for(let i = 1; i<visits.length; i++) {
let diff = moment(visits[i].timestamp).diff(currVisits[0].timestamp, "minutes", true);
if(math.larger(diff, 10)) {
//create the new session.
let newSession = {
startTime: currVisits[0].timestamp,
duration: currVisits[currVisits.length-1].timestamp - currVisits[0].timestamp,
pages: []
};
for(let visit of currVisits) {
newSession.pages.push(visit.url);
}
sessions.push(newSession);
//reset variables.
currVisits = [];
currVisits.push(visits[i]);
} else {
currVisits.push(visits[i]);
}
}
//there are visits left over in currVisits,
//so create the last session.
let newSession = {
startTime: currVisits[0].timestamp,
duration: currVisits[currVisits.length-1].timestamp - currVisits[0].timestamp,
pages: []
};
for(let visit of currVisits) {
newSession.pages.push(visit.url);
}
sessions.push(newSession);
result[key] = sessions;
}
return result;
}
axios.get("REDACTED GET URL")
.then((response) => {
//create a map of visitor to list of url visits
let visitorMap = createVisitorMap(response.data);
let sessions = createSessions(visitorMap);
let result = {
sessionsByUser: sessions
};
console.log(JSON.stringify(result));
axios.post("REDACTED POST URL", result)
.then((response) => {
console.log(response);
})
.catch((error) => {
console.log(error);
})
})I spent hours looking at my solution and the problem description to figure out what was wrong, but I wasn't able to finish in time. Would someone be able to tell my why my solution was wrong or provide their own solution?