Query using find and sort of MongoDB

Question

I am trying to run a query against a MongoDB and using pymongo to write out the results as individual xml files. I can get the query to work when I am only using sort and not adding any find criteria. What I want to do is be able to insert various find criteria and get the script to write out xml based off that. THE script works with this line:

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, ).sort("_id", ):

however if I try and add a find criteria like so;

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ):

the result is this:

KeyError: 'format' function find_id_sort in python_write_xml_toFile.py at line 43 format_data = post['format']

full script is here:

import sys
import os
import xml.etree.cElementTree as ET
import pymongo
from pymongo import MongoClient
from bson import Binary, Code
from bson.json_util import dumps  
import io, json
from itertools import groupby
from bson.objectid import ObjectId
import datetime


connection = MongoClient("localhost:27017", slave_okay=True)
db = connection['video_ingest_db']    


def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ): # find all entries (with filename - add: "({}, {'format.filename':1})" and sort using _id 
        # GRAB VALUES FROM FIELDS --------------------------------------------------
        video_id_timestamp = post['_id'].generation_time # get the timestamp off the Object ID
        video_id = post['_id'] # get the ObjectId
        streams_data = post['streams']
        format_data = post['format']
        format_name = post['format']['format_name']
        format_long_name = post['format']['format_long_name']
        path_filename = post['format']['filename']

        codec_name_0 = post['streams'][0]['codec_name']
        codec_name_1 = post['streams'][1]['codec_name']
        #codec_name_2 = post['streams'][2]['codec_name']

        codec_type_0 = post['streams'][0]['codec_type']
        codec_type_1 = post['streams'][1]['codec_type']
        #codec_type_2 = post['streams'][2]['codec_type']

        frame_rate = post['streams'][0]['avg_frame_rate']
        video_height = post['streams'][0]['height'] 
        video_width = post['streams'][0]['width'] 

        #print "codec_name: %s" % streams_data_codec_name
        print "format name: %s " % format_name
        #print "Streams data: %s" % streams_data
        print "format data: %s" % format_data
        print "Video ID: %s" % video_id
        print "Creation Time: %s" % video_id_timestamp
        #print "Metadata: %s" % post

        # CONVERT OUT TO STRING -------------------------------------------------- 
        id_to_string = str(video_id) # convert the ObjectId to string
        timestamp_to_string = str(video_id_timestamp) 
        filename_to_string = str(path_filename)
        format_name_to_string = str(format_name)
        format_long_name_to_string = str(format_long_name)

        codec_name_0_to_str = str(codec_name_0)
        codec_name_1_to_str = str(codec_name_1)
        #codec_name_2_to_str = str(codec_name_2)

        codec_type_0_to_str = str(codec_type_0)
        codec_type_1_to_str = str(codec_type_1)
        #codec_type_2_to_str = str(codec_type_2)

        frame_rate_to_str = str(frame_rate)
        video_height_to_str = str(video_height)
        video_width_to_str = str(video_width)

        metadata_file_name = "/Users/mathiesj/Desktop/metadata/" + id_to_string + ".xml" # create the path and filenaming convention of the metadata files


        # WRITE VALUES TO XML FORMAT --------------------------------------------------
        root = ET.Element("video_metadata")
        metadata = ET.SubElement(root, "metadata")
        #streams = ET.SubElement(root, "streams")

        mongodb_id_field = ET.SubElement(metadata, "mongodb_id")
        mongodb_id_field.set("id", id_to_string)
        mongodb_id_field.text = "some value1" 

        creation_time_field = ET.SubElement(metadata, "creation_time")
        creation_time_field.set("time_stamp", timestamp_to_string)
        creation_time_field.text = "some value2"

        filename_field = ET.SubElement(metadata, "path_filename")
        filename_field.set("path_filename", filename_to_string)
        filename_field.text = "some value3"

        video_format_field = ET.SubElement(metadata, "video_format")
        video_format_field.set("video_format_name", format_name_to_string)
        video_format_field.set("video_format_long_name", format_long_name_to_string)
        video_format_field.text = "some value4"

        stream0_field = ET.SubElement(metadata, "stream_0")
        stream0_field.set("codec_type", codec_type_0_to_str)
        stream0_field.set("codec_name", codec_name_0_to_str)
        stream0_field.set("frame_rate", frame_rate_to_str)
        stream0_field.set("video_height", video_height_to_str)
        stream0_field.set("video_width", video_width_to_str)
        stream0_field.text = "some value5"

        stream1_field = ET.SubElement(metadata, "stream_1")
        stream1_field.set("codec_type", codec_type_1_to_str)
        stream1_field.set("codec_name", codec_name_1_to_str)
        stream1_field.text = "some value6"

        #stream2_field = ET.SubElement(technical_metadata, "stream 2")
        #stream2_field.set("codec_type", codec_type_2_to_str)
        #stream2_field.set("codec_name", codec_name_2_to_str)
        #stream2_field.text = "some value4"


        tree = ET.ElementTree(root)
        tree.write(metadata_file_name)




find_id_sort()

Derick · Accepted Answer · 2013-08-06 13:18:42Z

2

Shouldn't your query be:

db.video_md_fcsvr.find({"streams.codec": "prores"}).sort( "_id", 1 ):

The first argument is the criteria, and the second is the projection, which selects which fields to return.

answered Aug 6, 2013 at 13:18

Derick

36.9k7 gold badges82 silver badges104 bronze badges

Sign up to request clarification or add additional context in comments.

3 Comments

JRM Over a year ago

Could be - let me try

JRM Over a year ago

Thank you - that was perfect. I guess I was looking myself blind

JRM Over a year ago

also just realized that I tried to call streams.codec instead of streams.codec_name - I changed the script

Collectives™ on Stack Overflow

Query using find and sort of MongoDB

1 Answer 1

3 Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

3 Comments

Your Answer

Sign up or log in

Post as a guest

Related