0

I want to loop through a bunch of several json files like

{
"_notes": [], 
"deviceid": "353422071089618", 
"grp-milkuse/milksold": "0.0", 
"hh_id": "0753628391", 
"regdate": "2017-10-03", 
"gpsloc": "-9.10112605948487 32.897751368000414 1488.8210801196851 4.0", 
"grp-milkuse/milkprocess": "0.0", 
"_xform_id_string": "ADGG-LNG01-20170725", 
"hh_country": "2", 
"meta/instanceID": "uuid:0b3b8257-f154-42e8-ae17-119bfa04ceca", 
"_duration": "", 
"visitdate": "2017-10-03", 
"hh_region": "1007", 
"_geolocation": [
    -9.10112605948487, 
    32.897751368000414
], 
"hh_district": "1037", 
"datacollid": "0758990688", 
"grp-milkuse/milkcalf": "0.0", 
"_status": "submitted_via_web", 
"formhub/uuid": "98dfbfd65ef24a92a46d6f794e748627", 
"rpt_animrec": [
    {
        "rpt_animrec/grp_animrec/cowmilked": "3", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015233", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/heartgirth": "150.0", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/anim_weight/weight": "200.0", 
        "rpt_animrec/grp_feedwater/feedtype": "3 4", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/served": "2"
    }, 
    {
        "rpt_animrec/sirehastag": "2", 
        "rpt_animrec/siredetails/sirename": "Nil", 
        "rpt_animrec/grp_feedwater/feedtype": "2 3 4", 
        "rpt_animrec/siredetails/sirebreed": "2", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015236", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/grp_service/dateserv": "2016-12-22", 
        "rpt_animrec/anim_weight/weight": "250.0", 
        "rpt_animrec/siredetails/sirecnty": "Nil", 
        "rpt_animrec/grp_servicedtls/servechange": "2", 
        "rpt_animrec/grp_servicedtls/servsourcebull": "1", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/served": "1", 
        "rpt_animrec/grp_animrec/cowmilked": "3", 
        "rpt_animrec/grp_servicedtls/servicechangeyes": "1", 
        "rpt_animrec/grp_servicedtls/sercost": "15000.0", 
        "rpt_animrec/anim_weight/heartgirth": "160.0", 
        "rpt_animrec/siredetails/sirecomp": "5", 
        "rpt_animrec/grp_service/servtype": "1"
    }, 
    {
        "rpt_animrec/sirehastag": "2", 
        "rpt_animrec/siredetails/sirename": "Nill", 
        "rpt_animrec/grp_feedwater/feedtype": "1 3 4", 
        "rpt_animrec/siredetails/sirebreed": "2", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015237", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/grp_service/dateserv": "2017-02-09", 
        "rpt_animrec/anim_weight/weight": "350.0", 
        "rpt_animrec/siredetails/sirecnty": "Nill", 
        "rpt_animrec/grp_servicedtls/servechange": "2", 
        "rpt_animrec/grp_servicedtls/servsourcebull": "1", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/served": "1", 
        "rpt_animrec/grp_animrec/cowmilked": "2", 
        "rpt_animrec/grp_servicedtls/servicechangeyes": "1", 
        "rpt_animrec/drydate": "2017-07-22", 
        "rpt_animrec/grp_servicedtls/sercost": "15000.0", 
        "rpt_animrec/anim_weight/heartgirth": "170.0", 
        "rpt_animrec/siredetails/sirecomp": "5", 
        "rpt_animrec/grp_service/servtype": "1"
    }
], 
"_bamboo_dataset_id": "", 
"start_time": "2017-10-03T13:25:01.529+03", 
"_uuid": "0b3b8257-f154-42e8-ae17-119bfa04ceca", 
"_tags": [], 
"grp-milkuse/milkprice": "0.0", 
"_userform_id": "adggtnz_ADGG-LNG01-20170725", 
"_submitted_by": null, 
"meta/instanceName": "ADGG-LNG01-20170725-HH0753628391", 
"enumtype": "2", 
"hh_village": "4835", 
"grp-milkuse/milkconsumed": "0.0", 
"_submission_time": "2017-10-05T18:35:19", 
"_version": "20170725", 
"_attachments": [], 
"end_time": "2017-10-03T13:31:28.876+03", 
"hh_kebele": "1807", 
"_id": 369982

}

I want to loop through several json file like this one and change tagid ["rpt_animrec/grp_animrec/tagid": "TZN000404015236", ] to have a different prefix.

I want to write a python script that will loop through several json files like this and change the tagid. This is the code i have so far

import json
import os
#python code to update TZN to ETH 
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
    if json_file.endswith(".json"):
        processed_json = "%s%s" % (json_dir_processed, json_file)
        json_file = json_dir + json_file
        print "Processing %s -> %s" % (json_file, processed_json)
        with open(json_file, 'r') as f:
            json_data = json.load(f)
            json_data['TZN'] = json_data['ETH']
        with open(processed_json, 'w') as f:
            f.write(json.dumps(json_data, indent=4))
    else:
        print "%s not a JSON file" % json_file
4
  • 1
    Do you know about python's json module? What have you tried so far? Commented May 24, 2018 at 21:36
  • I have tried the code above Commented May 24, 2018 at 21:44
  • I am a little confused, are you trying to change TZN000404015236 to ETH000404015236 ? Commented May 24, 2018 at 21:53
  • Yes that is what i am trying to do Commented May 24, 2018 at 22:02

3 Answers 3

1

Why bother parsing the json if you only want to replace one substring in a non-specific location with another substring? Just operate on the json string returned by the file object directly. You don't even need the json module.

import os
#python code to update TZN to ETH
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
    if json_file.endswith(".json"):
        processed_json = "%s%s" % (json_dir_processed, json_file)
        json_file = "%s%s" % (json_dir, json_file)
        print "Processing %s -> %s" % (json_file, processed_json)
        open(processed_json, 'w').write(
            open(json_file).read().replace('TZN', 'ETH'))
    else:
        print "%s not a JSON file" % json_file
Sign up to request clarification or add additional context in comments.

1 Comment

@MirieriMogaka If you want to show your appreciation, please mark my answer as solving your problem. Glad I could help.
1
import json
import os

def process(fp):
    json_data = json.loads(fp.read())
    for d in json_data.get('rpt_animrec', []):
        key = "rpt_animrec/grp_animrec/tagid"
        d[key] = d[key].replace('TZN', 'ETH')

    return json.dumps(json_data, indent=4)


source_path = '/opt/new/file/20180116/'
dest_path = '/opt/new/file/20180116updated/'

for path, dirnames, fnames in os.walk(source_path, topdown=True):
    for fname in filter(lambda name: name.endswith('.json'), fnames):
        full_path = os.path.join(path, fname)
        end_path = os.path.join(dest_path, fname)
        print('Processing: {full_path} => {dest_path}'.format(
            full_path=full_path,
            dest_path=end_path
        ))
        with open(full_path, 'r') as in_fp, open(dest_path, 'w') as out_fp:
            out_fp.write(process(in_fp))

You can try something like this if you want to traverse the whole path (including subdirectories), or change the topdown to False if u dont.

Comments

1
import json
import glob
import os
import re

from pathlib import Path


def get_json_files():
    return glob.glob('/home/test/PycharmProjects/test/*.json')


def read_json_file(filename):
    with open(filename) as f:
        data = json.load(f)

    return data


def process_json(json_data):
    for selection in json_data['rpt_animrec']:
        if selection['rpt_animrec/grp_animrec/tagid']:
            processed_data = selection['rpt_animrec/grp_animrec/tagid']
            processed_data = re.sub('TZN', "ETH", processed_data)
            selection['rpt_animrec/grp_animrec/tagid'] = processed_data
    return json_data


def write_json(json_data, file_path):
    filename = os.path.basename(file_path)
    json_dir_processed = Path("/home/test/PycharmProjects/test/processed/{}".format(filename))
    with open(json_dir_processed, 'w') as f:
        f.write(json.dumps(json_data, indent=4))


def process_json_files(json_files):
    for json_file in json_files:
        print("Processing {}".format(json_file))
        json_data = read_json_file(json_file)
        write_json(process_json(json_data), json_file)


if __name__ == '__main__':
    json_files_list = get_json_files()
    process_json_files(json_files_list)

This is how I would go about making that change.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.