1

I am trying to run a query against a MongoDB and using pymongo to write out the results as individual xml files. I can get the query to work when I am only using sort and not adding any find criteria. What I want to do is be able to insert various find criteria and get the script to write out xml based off that. THE script works with this line:

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, ).sort("_id", ):

however if I try and add a find criteria like so;

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ):

the result is this:

KeyError: 'format' function find_id_sort in python_write_xml_toFile.py at line 43 format_data = post['format']

full script is here:

import sys
import os
import xml.etree.cElementTree as ET
import pymongo
from pymongo import MongoClient
from bson import Binary, Code
from bson.json_util import dumps  
import io, json
from itertools import groupby
from bson.objectid import ObjectId
import datetime


connection = MongoClient("localhost:27017", slave_okay=True)
db = connection['video_ingest_db']    


def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ): # find all entries (with filename - add: "({}, {'format.filename':1})" and sort using _id 
        # GRAB VALUES FROM FIELDS --------------------------------------------------
        video_id_timestamp = post['_id'].generation_time # get the timestamp off the Object ID
        video_id = post['_id'] # get the ObjectId
        streams_data = post['streams']
        format_data = post['format']
        format_name = post['format']['format_name']
        format_long_name = post['format']['format_long_name']
        path_filename = post['format']['filename']

        codec_name_0 = post['streams'][0]['codec_name']
        codec_name_1 = post['streams'][1]['codec_name']
        #codec_name_2 = post['streams'][2]['codec_name']

        codec_type_0 = post['streams'][0]['codec_type']
        codec_type_1 = post['streams'][1]['codec_type']
        #codec_type_2 = post['streams'][2]['codec_type']

        frame_rate = post['streams'][0]['avg_frame_rate']
        video_height = post['streams'][0]['height'] 
        video_width = post['streams'][0]['width'] 

        #print "codec_name: %s" % streams_data_codec_name
        print "format name: %s " % format_name
        #print "Streams data: %s" % streams_data
        print "format data: %s" % format_data
        print "Video ID: %s" % video_id
        print "Creation Time: %s" % video_id_timestamp
        #print "Metadata: %s" % post

        # CONVERT OUT TO STRING -------------------------------------------------- 
        id_to_string = str(video_id) # convert the ObjectId to string
        timestamp_to_string = str(video_id_timestamp) 
        filename_to_string = str(path_filename)
        format_name_to_string = str(format_name)
        format_long_name_to_string = str(format_long_name)

        codec_name_0_to_str = str(codec_name_0)
        codec_name_1_to_str = str(codec_name_1)
        #codec_name_2_to_str = str(codec_name_2)

        codec_type_0_to_str = str(codec_type_0)
        codec_type_1_to_str = str(codec_type_1)
        #codec_type_2_to_str = str(codec_type_2)

        frame_rate_to_str = str(frame_rate)
        video_height_to_str = str(video_height)
        video_width_to_str = str(video_width)

        metadata_file_name = "/Users/mathiesj/Desktop/metadata/" + id_to_string + ".xml" # create the path and filenaming convention of the metadata files


        # WRITE VALUES TO XML FORMAT --------------------------------------------------
        root = ET.Element("video_metadata")
        metadata = ET.SubElement(root, "metadata")
        #streams = ET.SubElement(root, "streams")

        mongodb_id_field = ET.SubElement(metadata, "mongodb_id")
        mongodb_id_field.set("id", id_to_string)
        mongodb_id_field.text = "some value1" 

        creation_time_field = ET.SubElement(metadata, "creation_time")
        creation_time_field.set("time_stamp", timestamp_to_string)
        creation_time_field.text = "some value2"

        filename_field = ET.SubElement(metadata, "path_filename")
        filename_field.set("path_filename", filename_to_string)
        filename_field.text = "some value3"

        video_format_field = ET.SubElement(metadata, "video_format")
        video_format_field.set("video_format_name", format_name_to_string)
        video_format_field.set("video_format_long_name", format_long_name_to_string)
        video_format_field.text = "some value4"

        stream0_field = ET.SubElement(metadata, "stream_0")
        stream0_field.set("codec_type", codec_type_0_to_str)
        stream0_field.set("codec_name", codec_name_0_to_str)
        stream0_field.set("frame_rate", frame_rate_to_str)
        stream0_field.set("video_height", video_height_to_str)
        stream0_field.set("video_width", video_width_to_str)
        stream0_field.text = "some value5"

        stream1_field = ET.SubElement(metadata, "stream_1")
        stream1_field.set("codec_type", codec_type_1_to_str)
        stream1_field.set("codec_name", codec_name_1_to_str)
        stream1_field.text = "some value6"

        #stream2_field = ET.SubElement(technical_metadata, "stream 2")
        #stream2_field.set("codec_type", codec_type_2_to_str)
        #stream2_field.set("codec_name", codec_name_2_to_str)
        #stream2_field.text = "some value4"


        tree = ET.ElementTree(root)
        tree.write(metadata_file_name)




find_id_sort()

1 Answer 1

2

Shouldn't your query be:

db.video_md_fcsvr.find({"streams.codec": "prores"}).sort( "_id", 1 ):

The first argument is the criteria, and the second is the projection, which selects which fields to return.

Sign up to request clarification or add additional context in comments.

3 Comments

Could be - let me try
Thank you - that was perfect. I guess I was looking myself blind
also just realized that I tried to call streams.codec instead of streams.codec_name - I changed the script

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.