the Detail of MongoDB & MongoEngine
郭學聰 Hsueh-Tsung KuoSat, 02 Jun 2018
# PyMongo
mongoClient['db']['collection'].insert({'first': {'second': {'third': 5}}})
mongoClient['db']['collection'].find({'first.second.third': {'$gte': 4}})
# MongoEngine
doc = Doc()
doc.first.second.third = 5
doc.save()
Doc.objects(first__second__third__gte=4)
ORM usage
class HighScore(mongoengine.document.Document):
stage = mongoengine.fields.StringField(required=True)
mode = mongoengine.fields.StringField(required=True)
username = mongoengine.fields.StringField(required=True)
score = mongoengine.fields.FloatField(required=True)
timestamp = mongoengine.fields.DateTimeField(required=True)
meta = {
'indexes': [
{
'fields': ['stage', 'mode', '-score', '-timestamp', 'username'],
},
{
'fields': ['stage', 'mode', 'username'],
'unique': True,
},
],
}
Python descriptors
field.__get__(self, doc, type=None) --> value
field.__set__(self, doc, value) --> None
field.__delete__(self, doc) --> None
mongoengine/queryset/base.py
class BaseQuerySet(object):
......
def next(self):
......
if self._limit == 0 or self._none:
raise StopIteration
raw_doc = self._cursor.next()
if self._as_pymongo:
return self._get_as_pymongo(raw_doc)
doc = self._document._from_son(
raw_doc, _auto_dereference=self._auto_dereference,
only_fields=self.only_fields)
if self._scalar:
return self._get_scalar(doc)
return doc
mongoengine/base/fields.py
class BaseField(object):
......
def __set__(self, instance, value):
......
if instance._initialised:
try:
if (self.name not in instance._data or
instance._data[self.name] != value):
instance._mark_as_changed(self.name)
except Exception:
# Values cant be compared eg: naive and tz datetimes
# So mark it as changed
instance._mark_as_changed(self.name)
mongoengine/document.py
class Document(BaseDocument):
......
def save(self, force_insert=False, validate=True, clean=True,
write_concern=None, cascade=None, cascade_kwargs=None,
_refs=None, save_condition=None, signal_kwargs=None, **kwargs):
......
try:
# Save a new document or update an existing one
if created:
object_id = self._save_create(doc, force_insert, write_concern)
else:
object_id, created = self._save_update(doc, save_condition,
write_concern)
def _save_update(self, doc, save_condition, write_concern):
......
update_doc = self._get_update_doc()
if update_doc:
upsert = save_condition is None
last_error = collection.update(select_dict, update_doc,
upsert=upsert, **write_concern)
def _get_update_doc(self):
......
updates, removals = self._delta()
update_doc = {}
if updates:
update_doc['$set'] = updates
if removals:
update_doc['$unset'] = removals
return update_doc
mongoengine/fields.py
class ReferenceField(BaseField):
......
def __get__(self, instance, owner):
......
dereferenced = cls._get_db().dereference(value)
if dereferenced is None:
raise DoesNotExist('Trying to dereference unknown document %s' % value)
else:
instance._data[self.name] = cls._from_son(dereferenced)
high_scores_1 = HighScore.objects(...).only('username')
high_scores_2 = HighScore.objects(...).exclude('timestamp')
high_score_1.save()
mongoengine.errors.ValidationError:
ValidationError (HighScore:0123456789abcdef01234567)
(Field is required: ['mode', 'song', 'score', 'timestamp'])
full control PyMongo
# findAndModify(), single
old_high_score = HighScore.objects(stage='stage1', mode='hard', username='woshige')
.modify(set__score=0.0)
# update(), single or multiple
HighScore.objects(stage='stage1', mode='hard')
.update(multi=True, inc__score=100000.0)
service->database: update()
service->database: Doc.objects(doc='doc2').modify()
note right of database: collide at document2 ?
database-->service: old document2
database-->service: update() status
gantt
title one operation to one document at same time
section update()
update document1 :done,u1,2018-06-02,1s
update document2 :active,u2,after u1,1s
update document3 :u3,after m1,1s
section modify()
modify document2 :crit,active,m1,after u2,1s
note right of service: unique index ['player', 'guild']
service->database: Transaction(player='1', guild='a', coin=100).save()
note right of database: session start
service->database: Player.objects(player="1").update(inc__coin=-100)
service->database: Transaction(player='1', guild='a', coin=200).save()
note right of database: fail!
service->database: Guild.objects(guild="a").update(inc__coin=100)
service->database: Transaction.objects(player='1', guild='a').delete()
note right of database: session end
gantt
title modify two documents
section transaction1
start transaction :done,t1s,2018-06-02,1s
player coin -100 :active,p1,after t1s,1s
guild coin +100 :active,g1,after p1,1s
end transaction :t1e,after g1,1s
section transaction2
start transaction fail :crit,done,t2s,2018-06-02T00:00:01.5,1s
Perform Two Phase Commits
class HighScore(mongoengine.document.Document):
stage = mongoengine.fields.StringField(required=True)
mode = mongoengine.fields.StringField(required=True)
username = mongoengine.fields.StringField(required=True)
score = mongoengine.fields.FloatField(required=True)
timestamp = mongoengine.fields.DateTimeField(required=True)
meta = {
'indexes': [
{
'fields': ['stage', 'mode', '-score', '-timestamp', 'username'],
},
{
'fields': ['stage', 'mode', 'username'],
'unique': True,
},
],
}
digraph {
nodesep=0.5
node[color="magenta",fontname="Courier",shape="box"]
edge[color="orange",style="dashed"]
"addr.zip"->{"\"10036\"" "\"94301\""}
"\"10036\""->"{\l userid: \"xyz\",\l addr: [\l {zip: \"10036\", ...},\l {zip: \"94301\", ...}\l ],\l ...\l}\l"
"\"94301\""->"{\l userid: \"xyz\",\l addr: [\l {zip: \"10036\", ...},\l {zip: \"94301\", ...}\l ],\l ...\l}\l"
{rank=same;"\"10036\"" "\"94301\""}
}
class HighScore(mongoengine.document.Document):
stage = mongoengine.fields.StringField(required=True)
mode = mongoengine.fields.StringField(required=True)
username = mongoengine.fields.StringField(required=True)
score = mongoengine.fields.FloatField(required=True)
timestamp = mongoengine.fields.DateTimeField(required=True)
meta = {
'indexes': [
{
'fields': ['stage', 'mode', '-score', '-timestamp', 'username'],
},
{
'fields': ['stage', 'mode', 'username'],
'unique': True,
},
],
}
digraph {
nodesep=0.1
node[color="magenta",fontname="Courier",shape="box"]
edge[color="orange",style="dashed"]
"['stage', 'mode', 'username']"->{"stage1" "stage2" "stage3"}
"stage1"->{"easy" "normal" "hard"}
"stage2"->{"easy " "normal " "hard "}
"stage3"->{" easy " " normal " " hard "}
"easy"->{"#" "q"}
"normal"->"w"
"hard"->"e"
"easy "->"r"
"normal "->"t"
"hard "->"y"
" easy "->"u"
" normal "->"i"
" hard "->{"o" "p"}
}
index can help query
HighScore.objects(stage='stage1', mode='hard', username='woshige')
HighScore.objects(stage='stage1', mode='hard')
HighScore.objects(stage='stage1')
HighScore.objects(stage='stage1', mode='hard', score__gte=1000000.0)
HighScore.objects(stage='stage1', mode='hard').order_by('-score', '-timestamp')
HighScore.objects(stage='stage1', mode='hard').order_by('-score')
index cannot help query
HighScore.objects(stage='stage1', username='woshige')
HighScore.objects(mode='hard', username='woshige')
HighScore.objects(mode='hard')
HighScore.objects(username='woshige')
HighScore.objects(stage='stage1', mode='hard').order_by('-timestamp')
query_plan = HighScore.objects(...).explain()
# MongoDB 3.x
{
......
'executionStages': {
'stage': 'SORT', # sort with CPU & RAM (true) or walk through sortable index (false)
},
......
'nReturned': 1, # number of documents that match the query condition
'totalDocsExamined': 3, # number of documents scanned
'totalKeysExamined': 5, # number of index entries scanned
......
}
# MongoDB 2.x
{
......
'n' : 1, # number of documents that match the query condition
'nscannedObjects': 3, # number of documents scanned
'nscanned': 5, # number of index entries scanned
'scanAndOrder': false # sort with CPU & RAM (true) or walk through sortable index (false)
......
}
equal + range
HighScore.objects(stage='stage1', mode='hard', username__gte='w').explain()
# ['username', 'stage', 'mode']
{
'nReturned': 12,
'totalDocsExamined': 12,
'totalKeysExamined': 47,
}
# ['stage', 'mode', 'username']
{
'nReturned': 12,
'totalDocsExamined': 12,
'totalKeysExamined': 12,
}
equal + sort
HighScore.objects(stage='stage1', mode='hard')
.order_by('-score', '-timestamp').explain()
# ['-score', '-timestamp', 'stage', 'mode']
{
'nReturned': 100,
'totalDocsExamined': 3000,
'totalKeysExamined': 3000,
}
# ['stage', 'mode', '-score', '-timestamp']
{
'nReturned': 100,
'totalDocsExamined': 100,
'totalKeysExamined': 100,
}
equal + sort + range
HighScore.objects(stage='stage1', mode='hard', username__gte='w')
.order_by('-score', '-timestamp').explain()
# ['-score', '-timestamp', 'stage', 'mode', 'username']
{
'nReturned': 12,
'totalDocsExamined': 3000,
'totalKeysExamined': 3000,
}
# ['username', '-score', '-timestamp', 'stage', 'mode']
{
'executionStages': {
'stage': 'SORT',
},
.....
'nReturned': 12,
'totalDocsExamined': 12,
'totalKeysExamined': 48,
}
# ['username', 'stage', 'mode', '-score', '-timestamp']
{
'executionStages': {
'stage': 'SORT',
},
.....
'nReturned': 12,
'totalDocsExamined': 12,
'totalKeysExamined': 47,
}
# ['stage', 'mode', '-score', '-timestamp', 'username']
{
'nReturned': 12,
'totalDocsExamined': 12,
'totalKeysExamined': 101,
}
"do not sort through database!!!"
Hsueh-Tsung KuoSat, 02 Jun 2018
# update(), single or multiple
HighScore.objects(stage='stage1', mode='hard')
.update(multi=True, inc__score=100000.0)
"equal, sort, range"
Hsueh-Tsung KuoSat, 02 Jun 2018
Sdorica -sunset-
Rayark Careers | Make A Difference