# M D B l o c a l
ASYA KAMSKY
LEAD KNOW-IT-ALL MONGODB, INC @asya999 #askAsya
PIPELINE POWER
DOING MORE WITH MONGODB AGGREGATION FRAMEWORK
APPLICATIONS & DATA
STORE
RETRIEVE
find() & aggregate()
# M D B l o c a l
OPTIONS FOR ANALYTICS
pre-aggregate
aggregate	
in	MongoDB
aggregate	elsewhere
# M D B l o c a l
pre-aggregate
aggregate	
in	MongoDB
aggregate	elsewhere
OPTIONS FOR ANALYTICS
# M D B l o c a l
pre-aggregate
aggregate	
in	MongoDB
aggregate	elsewhere
OPTIONS FOR ANALYTICS
⏱⏱
# M D B l o c a l
pre-aggregate
aggregate	
in	MongoDB
aggregate	elsewhere
OPTIONS FOR ANALYTICS
# M D B l o c a l
ANALYTICS = AGGREGATION
pre-aggregate
aggregate	
in	MongoDB
aggregate	elsewhere
PIPELINE
ps ax |grep mongod |head	1
*nix	command	line	pipe
PIPELINE
$match $group | $sort|
Input stream {} {} {} {} Result {} {} ...
PIPELINE
MongoDB	document	pipeline
Stage	1 Stage	2 Stage	3 Stage	4
{} {} {} {}
{} {} {} {}
DATA PIPELINE
{} {} {} {}
{"$stage":{	...	}}
START
Collection
View
Special	stage
STAGES
{title: "The Great Gatsby",
language: "English",
subjects: "Long Island"}
{title: "The Great Gatsby",
language: "English",
subjects: "New York"}
{title: "The Great Gatsby",
language: "English",
subjects: "1920s"}
{title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{"$match":{"language":"English"}}
$match
{ _id:"Long Island",
count: 1 },
$group
{ _id: "New York",
count: 2 },
$unwind
{ _id: "1920s",
count: 1 },
$sort $skip$limit $project
{"$unwind":"$subjects"}
{"$group":{"_id":"$subjects", "count":{"$sum:1}}
{ _id: "Harlem",
count: 1 },
{ _id: "Long Island",
count: 1 },
{ _id: "New York",
count: 2 },
{ _id: "1920s",
count: 1 },
{title: "Open City",
language: "English",
subjects: [
"New York"
"Harlem" ] }
{ title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{ title: "War and Peace",
language: "Russian",
subjects: [
"Russia",
"War of 1812",
"Napoleon"] },
{ title: "Open City",
language: "English",
subjects: [
"New York",
"Harlem" ] },
{title: "Open City",
language: "English",
subjects: "New York"}
{title: "Open City",
language: "English",
subjects: "Harlem"}
{ _id: "Harlem",
count: 1 },
{"$sort:{"count":-1} {"$limit":3}
{"$project":...}
STAGES
Group	and	
Transform
Aliases
Special
•Input
•Output
Reorder
Transform
Decrease
Increase
{title: "The Great Gatsby",
language: "English",
subjects: "Long Island"}
{title: "The Great Gatsby",
language: "English",
subjects: "New York"}
{title: "The Great Gatsby",
language: "English",
subjects: "1920s"}
{title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{"$match":{"language":"English"}}
$match
{ _id:"Long Island",
count: 1 },
$group
{ _id: "New York",
count: 2 },
$unwind
{ _id: "1920s",
count: 1 },
$sort $skip$limit $project
{"$unwind":"$subjects"}
{"$group":{"_id":"$subjects", "count":{"$sum:1}}
{ _id: "Harlem",
count: 1 },
{ _id: "Long Island",
count: 1 },
{ _id: "New York",
count: 2 },
{ _id: "1920s",
count: 1 },
{title: "Open City",
language: "English",
subjects: [
"New York"
"Harlem" ] }
{ title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{ title: "War and Peace",
language: "Russian",
subjects: [
"Russia",
"War of 1812",
"Napoleon"] },
{ title: "Open City",
language: "English",
subjects: [
"New York",
"Harlem" ] },
{title: "Open City",
language: "English",
subjects: "New York"}
{title: "Open City",
language: "English",
subjects: "Harlem"}
{ _id: "Harlem",
count: 1 },
{"$sort:{"count":-1} {"$limit":3}
{"$project":...}
# M D B l o c a l
LET ME EXPLAIN...
db.books.aggregate([
{$match:{"language":"English"}},
{$unwind:"$subjects"},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { "language" : "English"},
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
db.books.aggregate([
{$match:{"language":"English"}},
{$unwind:"$subjects"},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { "language" : "English"},
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
db.books.aggregate([
{$match:{"language":"English"}},
{$unwind:"$subjects"},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { },
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
db.books.aggregate([
{$unwind:"$subjects"},
{$match:{"language":"English"}},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
db.books.aggregate([
{$unwind:"$subjects"},
{$match:{"language":"English"}},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { "language" : "English"},
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
db.books.aggregate([
{$unwind:"$subjects"},
{$match:{"language":"English","subjects":/^[ABC]/}},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { "language" : "English"},
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$match" : {"subjects" : {"$regex" : "^[ABC]"}}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
db.books.aggregate([
{$unwind:"$subjects"},
{$match:{"language":"English","subjects":/^[ABC]/}},
{$group:{_id:"$subjects",count:{$sum:1}}},
{$sort:{count:-1}},
{$limit:3}
],{explain:true})
{"stages" : [
{"$cursor" : {"query" : { "language" : "English"},
"fields" : { "subjects" : 1,"_id" : 0} ...
}},
{"$unwind" : {"path" : "$subjects"}},
{"$match" : {"subjects" : {"$regex" : "^[ABC]"}}},
{"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}},
{"$sort" : {
"sortKey" : {"count" : -1},
"limit" : NumberLong(3)
}}
] }
#MDBTOUR
STREAMING VS BLOCKING
{title: "The Great Gatsby",
language: "English".
subjects: "Long Island"}
{title: "The Great Gatsby",
language: "English",
subjects: "New York"}
{title: "The Great Gatsby",
language: "English",
subjects: "1920s"}
{title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{"$match":{"language":"English"}}
$match
{ _id:"Long Island",
count: 1 },
$group
{ _id: "New York",
count: 2 },
$unwind
{ _id: "1920s",
count: 1 },
$sort $skip$limit $project
{"$unwind":"$subjects"}
{"$group":{"_id":"$subjects", "count":{"$sum:1}}
{ _id: "Harlem",
count: 1 },
{ _id: "Long Island",
count: 1 },
{ _id: "New York",
count: 2 },
{ _id: "1920s",
count: 1 },
{title: "Open City",
language: "English",
subjects: [
"New York"
"Harlem" ] }
{ title: "The Great Gatsby",
language: "English",
subjects: [
"Long Island",
"New York",
"1920s"] },
{ title: "War and Peace",
language: "Russian",
subjects: [
"Russia",
"War of 1812",
"Napoleon"] },
{ title: "Open City",
language: "English",
subjects: [
"New York",
"Harlem" ] },
{title: "Open City",
language: "English",
subjects: "New York"}
{title: "Open City",
language: "English",
subjects: "Harlem"}
{ _id: "Harlem",
count: 1 },
{"$sort:{"count":-1} {"$limit":3}
{"$project":...}
$group $sort
1
Group	and	
Transform
Aliases
Special
•Input
•Output
Reorder
Transform
Decrease
Increase
$group
$sort
#MDBTOUR
STREAMING VS BLOCKING
RESOURCE USE
INPUT STAGE RESULTSSTAGE
STREAMING RESOURCE USE
Each	document	is	streamed	through	in	RAM
STREAMING RESOURCE USE
INPUT STAGE RESULTSSTAGE
BLOCKING RESOURCE USE
Sort	with	Limit:
uses	sizeOfDoc *	Limit	+	sizeOfDoc
Sort	without	Limit:
uses	sizeOfAllDocs
Group:
uses	sizeOfDoc *	numberOfGroups +	sizeOfDoc
BLOCKING RESOURCE USE
#MDBTOUR
STAGES,
EXPRESSIONS,
OPERATORS,
ACCUMULATORS,
OH MY!
# M D B l o c a l
EXPRESSIONS
# M D B l o c a l
ARRAY EXPRESSIONS
# M D B l o c a l
$arrayElemAt
$concatArrays
$indexOfArray
$isArray
$size
$range
$reverseArray
ARRAY EXPRESSIONS
$map
$reduce
$filter
$slice
$zip
$in
...	plus	all	the	set	expressions
# M D B l o c a l
$map
$reduce
$filter
$slice
$zip
$in
...	plus	all	the	set	expressions
$arrayElemAt
$concatArrays
$indexOfArray
$isArray
$size
$range
$reverseArray
ARRAY EXPRESSIONS
# M D B l o c a l
$map
input: array
output: array
$filter
input: array
output: subset of array
$reduce
input: array
output: anything
you
want
ARRAY EXPRESSIONS
# M D B l o c a l
"arr":[{"a":1},{"a":99},{"a":5},{"a":3}]
{"$map": {
"input": "$arr",
"in": "$$this"
}}
{"$map":{
"input": "$arr",
"as":"eachElem",
"in":{"b":"$$eachElem.a"}
} }
{"$map":{
"input": {"$range":[0,{"$size":"$arr"}]},
"as":"index",
"in":{"c":{"$arrayElemAt":["$arr.a","$$index"]}}
} }
ARRAY EXPRESSIONS
"a" 1
"a" 99
"a" 5
"a" 3
"b" 1
"b" 99
"b" 5
"b" 3
"c" 1
"c" 99
"c" 5
"c" 3
"a" 1
"a" 99
"a" 5
"a" 3
"a" 1
"a" 99
"a" 5
"a" 3
0
1
2
3
{"$map": {
"input": "$arr",
"as": "var",
"in": "$$var" }}
$map
{"$map": {
"input": "$arr.a",
"as": "eachElem",
"in": {"b":"$$eachElem"} }}
1
99
5
3
# M D B l o c a l
"arr":[{"a":1},{"a":99},{"a":5},{"a":3}]
{"$filter": {
"input": "$arr",
"cond":{"$lt":["$$this.a",10]}
}}
{"$filter":{
"input": "$arr",
"as":"elem",
"cond":{"$lt":["$$elem.a",10]}
} }
ARRAY EXPRESSIONS
"a" 1
"a" 99
"a" 5
"a" 3
$filter
"a" 1
"a" 5
"a" 3
# M D B l o c a l
"arr":[{"a":1},{"a":99},{"a":5},{"a":3}]
{"$reduce": {
"input": "$arr",
"initialValue": 0,
"in": {$add:["$$value","$$this.a"]}
} }
ARRAY EXPRESSIONS
"a" 1
"a" 99
"a" 5
"a" 3
$reduce
01100105108
"a" 1
"a" 99
"a" 5
"a" 3
# M D B l o c a l
"arr":[{"a":1},{"a":99},{"a":5},{"a":3}]
{"$reduce": {
"input": "$arr",
"initialValue": 0,
"in": {$add:["$$value","$$this.a"]}
} }
{"$reduce":{
"input": "$arr",
"intialValue":[],
"in":{"$concatArrays":[
[ "$$this" ],
"$$value"
]}
} }
ARRAY EXPRESSIONS $reduce
"a" 1
"a" 99
"a" 5
"a" 3
108
"a" 1
"a" 99
"a" 5
"a" 3
# M D B l o c a l
"arr":[{"a":1},{"a":99},{"a":5},{"a":3}]
{"$reduce": {
"input": "$arr",
"initialValue": 0,
"in": {$add:["$$value","$$this.a"]}
} }
{"$reduce":{
"input": "$arr",
"intialValue":[],
"in":{"$concatArrays":[
[ "$$this" ],
"$$value"
]}
} }
ARRAY EXPRESSIONS $reduce
"a" 1
"a" 99
"a" 5
"a" 3
[]"a" 1"a" 99
"a" 1
"a" 5
"a" 99
"a" 1
"a" 3
"a" 5
"a" 99
"a" 1
108
"a" 1
"a" 99
"a" 5
"a" 3
# M D B l o c a l
READABILITY TIP
# M D B l o c a l
Functions for expressions
reverseArray = function(input) {
return {"$reduce":{
"input": input,
"intialValue":[],
"in":{"$concatArrays":[
[ "$$this" ],
"$$value"
]}
}};
};
db.c.aggregate([ {"$addFields":{
"revArray":reverseArray("$origArray")
} } ])
ENCAPSULATE COMPLEXITY
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
{children: [
{name:"Max", dob:"1994-12-01", dep:true},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.c.aggregate([
{$addFields:{
numChildren:{$size:"$children"},
numDependents:{$size:{
$filter:{
input:"$children.dep",
cond: "$$this"
}
}}
}},
...
])
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
new in 3.6!
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.c.find({$expr:{$gt:["$a","$b"]}})
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
new in 3.6!
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.c.find({$expr: {
$lt:[ {$size:{$filter:{
input:"$children.dep",
cond:"$$this"
}}},
2
}})
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
new in 3.6!
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.c.find({$expr:{$gt:[
{$let:{
vars:{dobs:{$map:{
input:"$children.dob"
in: {$year:{$dateFromString:{
dateString:"$$this"
}}}}}},
in:{$subtract:[
{$max:"$$dobs"},
{$min:"$$dobs"}
]}}},
10
]}})
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
new in 3.6!
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.c.update({$expr: {$anyElementTrue:{$map:{
input:"$children",
in: {$and:[
{$lt:["$$this.dob","1997-01-22"]},
"$$this.dep"
]}
}}}},
{$set:{ audit:true }}
)
# M D B l o c a l
AGGREGATION
FIND QUERY
UPDATE QUERY
DOCUMENT VALIDATION
EXPRESSIONS
new in 3.6!
{children: [
{name:"Max", dob:"1994-12-01", dep:false},
{name:"Sam", dob:"1997-09-28", dep:true},
{name:"Kim", dob:"2000-02-29", dep:true}
]}
db.createCollection("c", validator:
{$expr: {
<anything you can express>
}})
# M D B l o c a l
NETWORK SUSPECT ACTIVITY DETECTION
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T05:28:13Z")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T08:54:04Z")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T09:01:11Z")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T09:04:59Z")
}
]}
$sort$match $group $addFields $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
}
]}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
$sort$match $group $addFields $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
}
]}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$sort$match $group $addFields $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$project
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ "user" : "35237073",
"suspectLogins" : [
{"diff": 4.8333333333,
"ip1": "106.220.151.16",
"t1":"2017-05-08T06:58",
"ip2": "223.182.113.15"
"t2":"2017-05-08T07:03"
},
{"diff": 8.3,
"ip1": "223.182.113.15",
"t1":"2017-05-08T07:03",
"ip2": "49.206.217.26",
"t2":"2017-05-08T07:11"
}
]
}
$sort$match $group $addFields $match $project
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
}
]}
$sort$match $group $addFields $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$addFields $match $proje
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}},
{$addFields:{diffIpNum:{$size:{$setUnion:"$ips.ip"}}}},
{$match:{diffIpNum:{$gt:1}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
}
]}
$sort$match $group $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$addFields $match $project
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{$expr:{$gt:[{$size:"$diffIps"},1]}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group $match
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
$addFields $match $project
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$match $addFields $match $project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
}},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$match $addFields $match $project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$match $addFields $match $project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
$match $addFields $match $project
{ _id: "303900",
ips: [
{ip:"71.56.112.56",
ts:ISODate("2017-05-08T...")
},
{ip:"71.56.112.56",
ts:ISODate("2017-05-09T...")
},
{ip:"12.130.117.87",
ts:ISODate("2017-05-09T...")
},
diffIps: [
"71.56.112.56",
"12.130.117.87"
]
]}
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$lt:["$$this.diff",10]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ "user" : "35237073",
"suspectLogins" : [
{"diff": 4.8333333333,
"ip1": "106.220.151.16",
"t1":"2017-05-08T06:58",
"ip2": "223.182.113.15"
"t2":"2017-05-08T07:03"
},
{"diff": 8.3,
"ip1": "223.182.113.15",
"t1":"2017-05-08T07:03",
"ip2": "49.206.217.26",
"t2":"2017-05-08T07:11"
}
]
} $match $addFields $match $project
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$lt:["$$this.diff",10]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ "user" : "35237073",
"suspectLogins" : [
{"diff": 4.8333333333,
"ip1": "106.220.151.16",
"t1":"2017-05-08T06:58",
"ip2": "223.182.113.15"
"t2":"2017-05-08T07:03"
},
{"diff": 8.3,
"ip1": "223.182.113.15",
"t1":"2017-05-08T07:03",
"ip2": "49.206.217.26",
"t2":"2017-05-08T07:11"
}
]
} $match $addFields $match $project
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$lt:["$$this.diff",10]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ "user" : "35237073",
"suspectLogins" : [
{"diff": 4.8333333333,
"ip1": "106.220.151.16",
"t1":"2017-05-08T06:58",
"ip2": "223.182.113.15"
"t2":"2017-05-08T07:03"
},
{"diff": 8.3,
"ip1": "223.182.113.15",
"t1":"2017-05-08T07:03",
"ip2": "49.206.217.26",
"t2":"2017-05-08T07:11"
}
]
} $match $addFields $match $project
$sort$match $group
start=ISODate("...")
end=ISODate("...")
{
user: "303900",
ipaddr: "71.56.112.56",
ts:ISODate("2017-05-08T...")
}
{$match:{ts:{$gte:start,$lt:end}}},
{$sort:{ts:1}},
{$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}},
diffIps:{$addToSet:"$ipaddr"}}},
{$match:{"diffIps.1":{$exists:true}}},
{$addFields:{diffs: {$filter:{
input:{$map:{
input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i",
in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]},
ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}},
in:{
diff:{$cond:{
if:{$ne:["$$ip1.ip","$$ip2.ip"]},
then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]},
else: 9999 }},
ip1:"$$ip1.ip", t1:"$$ip1.ts",
ip2:"$$ip2.ip", t2:"$$ip2.ts"
}}}}},
cond:{$lt:["$$this.diff",10]}
}}}},
{$match:{"diffs":{$ne:[]}}},
{$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
{ "user" : "35237073",
"suspectLogins" : [
{"diff": 4.8333333333,
"ip1": "106.220.151.16",
"t1":"2017-05-08T06:58",
"ip2": "223.182.113.15"
"t2":"2017-05-08T07:03"
},
{"diff": 8.3,
"ip1": "223.182.113.15",
"t1":"2017-05-08T07:03",
"ip2": "49.206.217.26",
"t2":"2017-05-08T07:11"
}
]
} $match $addFields $match $project
# M D B l o c a l
understand stages
• Best order for performance
• Avoid unnecessary "blocking"
• keep "streaming"
• Maximize use of indexes
• early stages get the index!
• Liberally check explain() output
POWERFUL AGGREGATIONS
understand expressions
• Schema manipulation
• Array transformation
• Use in find query filter, ...
use functions
• Readable, debug-able, reusable
# M D B l o c a l
Better performance & optimizations
More stages & expressions
More options for output
Compass helper for aggregate
Unify different languages
THE FUTURE OF AGGREGATION
https://github.com/asya999/mdbw17
https://github.com/asya999/mdbw17
# M D B l o c a l
THANK YOU!
https://github.com/asya999/mdbw17

Doing More with MongoDB Aggregation

  • 1.
    # M DB l o c a l ASYA KAMSKY LEAD KNOW-IT-ALL MONGODB, INC @asya999 #askAsya PIPELINE POWER DOING MORE WITH MONGODB AGGREGATION FRAMEWORK
  • 2.
  • 4.
    # M DB l o c a l OPTIONS FOR ANALYTICS pre-aggregate aggregate in MongoDB aggregate elsewhere
  • 5.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 6.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 10.
  • 11.
    # M DB l o c a l pre-aggregate aggregate in MongoDB aggregate elsewhere OPTIONS FOR ANALYTICS
  • 12.
    # M DB l o c a l ANALYTICS = AGGREGATION pre-aggregate aggregate in MongoDB aggregate elsewhere
  • 13.
  • 14.
    ps ax |grepmongod |head 1 *nix command line pipe PIPELINE
  • 15.
    $match $group |$sort| Input stream {} {} {} {} Result {} {} ... PIPELINE MongoDB document pipeline
  • 16.
    Stage 1 Stage 2 Stage 3Stage 4 {} {} {} {} {} {} {} {} DATA PIPELINE {} {} {} {} {"$stage":{ ... }} START Collection View Special stage STAGES
  • 17.
    {title: "The GreatGatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
  • 18.
  • 20.
  • 21.
    {title: "The GreatGatsby", language: "English", subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...}
  • 22.
    # M DB l o c a l LET ME EXPLAIN...
  • 23.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 24.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 25.
    db.books.aggregate([ {$match:{"language":"English"}}, {$unwind:"$subjects"}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { }, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 26.
  • 27.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English"}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}} }}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 28.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 29.
    db.books.aggregate([ {$unwind:"$subjects"}, {$match:{"language":"English","subjects":/^[ABC]/}}, {$group:{_id:"$subjects",count:{$sum:1}}}, {$sort:{count:-1}}, {$limit:3} ],{explain:true}) {"stages" : [ {"$cursor": {"query" : { "language" : "English"}, "fields" : { "subjects" : 1,"_id" : 0} ... }}, {"$unwind" : {"path" : "$subjects"}}, {"$match" : {"subjects" : {"$regex" : "^[ABC]"}}}, {"$group" : {"_id" : "$subjects","count" : {"$sum" : {"$const" : 1}}}}, {"$sort" : { "sortKey" : {"count" : -1}, "limit" : NumberLong(3) }} ] }
  • 30.
  • 31.
    {title: "The GreatGatsby", language: "English". subjects: "Long Island"} {title: "The Great Gatsby", language: "English", subjects: "New York"} {title: "The Great Gatsby", language: "English", subjects: "1920s"} {title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, {"$match":{"language":"English"}} $match { _id:"Long Island", count: 1 }, $group { _id: "New York", count: 2 }, $unwind { _id: "1920s", count: 1 }, $sort $skip$limit $project {"$unwind":"$subjects"} {"$group":{"_id":"$subjects", "count":{"$sum:1}} { _id: "Harlem", count: 1 }, { _id: "Long Island", count: 1 }, { _id: "New York", count: 2 }, { _id: "1920s", count: 1 }, {title: "Open City", language: "English", subjects: [ "New York" "Harlem" ] } { title: "The Great Gatsby", language: "English", subjects: [ "Long Island", "New York", "1920s"] }, { title: "War and Peace", language: "Russian", subjects: [ "Russia", "War of 1812", "Napoleon"] }, { title: "Open City", language: "English", subjects: [ "New York", "Harlem" ] }, {title: "Open City", language: "English", subjects: "New York"} {title: "Open City", language: "English", subjects: "Harlem"} { _id: "Harlem", count: 1 }, {"$sort:{"count":-1} {"$limit":3} {"$project":...} $group $sort 1
  • 32.
  • 33.
  • 34.
  • 35.
  • 36.
  • 37.
  • 38.
  • 39.
    # M DB l o c a l EXPRESSIONS
  • 43.
    # M DB l o c a l ARRAY EXPRESSIONS
  • 44.
    # M DB l o c a l $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS $map $reduce $filter $slice $zip $in ... plus all the set expressions
  • 45.
    # M DB l o c a l $map $reduce $filter $slice $zip $in ... plus all the set expressions $arrayElemAt $concatArrays $indexOfArray $isArray $size $range $reverseArray ARRAY EXPRESSIONS
  • 46.
    # M DB l o c a l $map input: array output: array $filter input: array output: subset of array $reduce input: array output: anything you want ARRAY EXPRESSIONS
  • 47.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$map": { "input": "$arr", "in": "$$this" }} {"$map":{ "input": "$arr", "as":"eachElem", "in":{"b":"$$eachElem.a"} } } {"$map":{ "input": {"$range":[0,{"$size":"$arr"}]}, "as":"index", "in":{"c":{"$arrayElemAt":["$arr.a","$$index"]}} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 "b" 1 "b" 99 "b" 5 "b" 3 "c" 1 "c" 99 "c" 5 "c" 3 "a" 1 "a" 99 "a" 5 "a" 3 "a" 1 "a" 99 "a" 5 "a" 3 0 1 2 3 {"$map": { "input": "$arr", "as": "var", "in": "$$var" }} $map {"$map": { "input": "$arr.a", "as": "eachElem", "in": {"b":"$$eachElem"} }} 1 99 5 3
  • 48.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$filter": { "input": "$arr", "cond":{"$lt":["$$this.a",10]} }} {"$filter":{ "input": "$arr", "as":"elem", "cond":{"$lt":["$$elem.a",10]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $filter "a" 1 "a" 5 "a" 3
  • 49.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } ARRAY EXPRESSIONS "a" 1 "a" 99 "a" 5 "a" 3 $reduce 01100105108 "a" 1 "a" 99 "a" 5 "a" 3
  • 50.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 108 "a" 1 "a" 99 "a" 5 "a" 3
  • 51.
    # M DB l o c a l "arr":[{"a":1},{"a":99},{"a":5},{"a":3}] {"$reduce": { "input": "$arr", "initialValue": 0, "in": {$add:["$$value","$$this.a"]} } } {"$reduce":{ "input": "$arr", "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} } } ARRAY EXPRESSIONS $reduce "a" 1 "a" 99 "a" 5 "a" 3 []"a" 1"a" 99 "a" 1 "a" 5 "a" 99 "a" 1 "a" 3 "a" 5 "a" 99 "a" 1 108 "a" 1 "a" 99 "a" 5 "a" 3
  • 52.
    # M DB l o c a l READABILITY TIP
  • 53.
    # M DB l o c a l Functions for expressions reverseArray = function(input) { return {"$reduce":{ "input": input, "intialValue":[], "in":{"$concatArrays":[ [ "$$this" ], "$$value" ]} }}; }; db.c.aggregate([ {"$addFields":{ "revArray":reverseArray("$origArray") } } ]) ENCAPSULATE COMPLEXITY
  • 54.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:true}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]}
  • 55.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.aggregate([ {$addFields:{ numChildren:{$size:"$children"}, numDependents:{$size:{ $filter:{ input:"$children.dep", cond: "$$this" } }} }}, ... ])
  • 56.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:["$a","$b"]}})
  • 57.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr: { $lt:[ {$size:{$filter:{ input:"$children.dep", cond:"$$this" }}}, 2 }})
  • 58.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.find({$expr:{$gt:[ {$let:{ vars:{dobs:{$map:{ input:"$children.dob" in: {$year:{$dateFromString:{ dateString:"$$this" }}}}}}, in:{$subtract:[ {$max:"$$dobs"}, {$min:"$$dobs"} ]}}}, 10 ]}})
  • 59.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.c.update({$expr: {$anyElementTrue:{$map:{ input:"$children", in: {$and:[ {$lt:["$$this.dob","1997-01-22"]}, "$$this.dep" ]} }}}}, {$set:{ audit:true }} )
  • 60.
    # M DB l o c a l AGGREGATION FIND QUERY UPDATE QUERY DOCUMENT VALIDATION EXPRESSIONS new in 3.6! {children: [ {name:"Max", dob:"1994-12-01", dep:false}, {name:"Sam", dob:"1997-09-28", dep:true}, {name:"Kim", dob:"2000-02-29", dep:true} ]} db.createCollection("c", validator: {$expr: { <anything you can express> }})
  • 61.
    # M DB l o c a l NETWORK SUSPECT ACTIVITY DETECTION
  • 62.
  • 63.
    {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, $sort$match $group start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T08:54:04Z") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T09:01:11Z") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T09:04:59Z") } ]}
  • 64.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}},
  • 65.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]} {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}}
  • 66.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] }
  • 67.
    $sort$match $group $addFields$match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
  • 68.
    $sort$match $group $addFields$match start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $proje {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}}}, {$addFields:{diffIpNum:{$size:{$setUnion:"$ips.ip"}}}}, {$match:{diffIpNum:{$gt:1}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") } ]}
  • 69.
    $sort$match $group $match start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{$expr:{$gt:[{$size:"$diffIps"},1]}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 70.
    $sort$match $group $match start=ISODate("...") end=ISODate("...") { user:"303900", ipaddr: "71.56.112.56", ts:ISODate("2017-05-08T...") } $addFields $match $project {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 71.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 72.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 73.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 74.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$and:[{$lt:["$$this.diff",10]},{$ne:["$$this.ip1","$$this.ip2"]}]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} $match $addFields $match $project { _id: "303900", ips: [ {ip:"71.56.112.56", ts:ISODate("2017-05-08T...") }, {ip:"71.56.112.56", ts:ISODate("2017-05-09T...") }, {ip:"12.130.117.87", ts:ISODate("2017-05-09T...") }, diffIps: [ "71.56.112.56", "12.130.117.87" ] ]}
  • 75.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 76.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 77.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 78.
    $sort$match $group start=ISODate("...") end=ISODate("...") { user: "303900", ipaddr:"71.56.112.56", ts:ISODate("2017-05-08T...") } {$match:{ts:{$gte:start,$lt:end}}}, {$sort:{ts:1}}, {$group:{_id:"$user",ips:{$push:{ip:"$ipaddr", ts:"$ts"}}, diffIps:{$addToSet:"$ipaddr"}}}, {$match:{"diffIps.1":{$exists:true}}}, {$addFields:{diffs: {$filter:{ input:{$map:{ input: {$range:[0,{$subtract:[{$size:"$ips"},1]}]}, as:"i", in:{$let:{vars:{ip1:{$arrayElemAt:["$ips","$$i"]}, ip2:{$arrayElemAt:["$ips",{$add:["$$i",1]}]}}, in:{ diff:{$cond:{ if:{$ne:["$$ip1.ip","$$ip2.ip"]}, then:{$divide:[{$subtract:["$$ip2.ts","$$ip1.ts"]},60000]}, else: 9999 }}, ip1:"$$ip1.ip", t1:"$$ip1.ts", ip2:"$$ip2.ip", t2:"$$ip2.ts" }}}}}, cond:{$lt:["$$this.diff",10]} }}}}, {$match:{"diffs":{$ne:[]}}}, {$project:{_id:0, user:"$_id", suspectLogins:"$diffs"}} { "user" : "35237073", "suspectLogins" : [ {"diff": 4.8333333333, "ip1": "106.220.151.16", "t1":"2017-05-08T06:58", "ip2": "223.182.113.15" "t2":"2017-05-08T07:03" }, {"diff": 8.3, "ip1": "223.182.113.15", "t1":"2017-05-08T07:03", "ip2": "49.206.217.26", "t2":"2017-05-08T07:11" } ] } $match $addFields $match $project
  • 79.
    # M DB l o c a l understand stages • Best order for performance • Avoid unnecessary "blocking" • keep "streaming" • Maximize use of indexes • early stages get the index! • Liberally check explain() output POWERFUL AGGREGATIONS understand expressions • Schema manipulation • Array transformation • Use in find query filter, ... use functions • Readable, debug-able, reusable
  • 80.
    # M DB l o c a l Better performance & optimizations More stages & expressions More options for output Compass helper for aggregate Unify different languages THE FUTURE OF AGGREGATION
  • 81.
  • 82.
    # M DB l o c a l THANK YOU! https://github.com/asya999/mdbw17