Hey guys.
I've been programming alone for quite a while and I'm worried I might be perpetuating bad habits I might not even be aware of. I'd appreciate the feedback on this piece of code and on how I've approached the problem. It's an automation script to calculate the mean and standard deviation of values in json files inside a directory (the json files have nested structures). Thank you for your time.
import copy
import glob
import json
import os
import statistics as stats
from sys import argv
from typing import Any
def read_json(file_name: str) -> Any:
with open(file_name, mode="r", encoding="utf-8") as file:
return json.load(file)
def group_metrics(json_files: list[Any]) -> Any:
grouped_json = copy.deepcopy(json_files[0])
for json_object in json_files[1:]:
copy_information(grouped_json, json_object)
return grouped_json
def copy_information(grouped_json: Any, json_object: Any):
for key, value in json_object.items():
if isinstance(value, dict):
copy_information(grouped_json[key], value)
continue
grouped_json[key] = [grouped_json[key]]
grouped_json[key].append(value)
def calculate_stats(grouped_json: Any) -> Any:
for key, value in grouped_json.items():
if isinstance(value, dict):
calculate_stats(grouped_json[key])
continue
mean = stats.fmean(value)
std = stats.stdev(value)
grouped_json[key] = (mean, std)
return grouped_json
def main():
assert len(argv) == 2
directory = argv[1]
assert os.path.exists(directory)
directory_files = [file_name for file_name in glob.glob(f"{directory}/*.json")]
json_files = [read_json(file_name) for file_name in directory_files]
json_grouped = group_metrics(json_files)
json_mean = calculate_stats(json_grouped)
print(json.dumps(json_mean, indent=4))
if __name__ == "__main__":
main()