Find Average Food Intake (Food Groups) by Age

--

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

import os

data_folder = './csvdietfiles/'
diet_files = os.listdir(data_folder)
diet_files
['dairy_intakes.txt_Female.csv',
'dairy_intakes.txt_Male.csv',
'dark_green_vegetables_subgroup.txt_Female.csv',
'dark_green_vegetables_subgroup.txt_Male.csv',
'fruits_intake.txt_Female.csv',
'fruits_intake.txt_Male.csv',
'grains_intake.txt_Female.csv',
'grains_intake.txt_Male.csv',
'legumes_beans_and_peas_subgroup.txt_Female.csv',
'legumes_beans_and_peas_subgroup.txt_Male.csv',
'meat_poultry_and_eggs_subgroup.txt_Female.csv',
'meat_poultry_and_eggs_subgroup.txt_Male.csv',
'nuts_seeds_and_soy_products_subgroup.txt_Female.csv',
'nuts_seeds_and_soy_products_subgroup.txt_Male.csv',
'other_vegetables_subgroup.txt_Female.csv',
'other_vegetables_subgroup.txt_Male.csv',
'protein_intake.txt_Female.csv',
'protein_intake.txt_Male.csv',
'red_and_orange_vegetables_subgroup.txt_Female.csv',
'red_and_orange_vegetables_subgroup.txt_Male.csv',
'seafood_subgroup.txt_Female.csv',
'seafood_subgroup.txt_Male.csv',
'starchy_vegetables_subgroup.txt_Female.csv',
'starchy_vegetables_subgroup.txt_Male.csv',
'vegetable_intake.txt_Female.csv',
'vegetable_intake.txt_Male.csv']
def write_to_file(filename, data):
file = open(filename,'w')

#file_data = data #[ aNumber.strip() for aRow in data for aNumber in aRow ]
file_data = 'age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high\n'
for aRow in data:
element_count = len(aRow)
for i, aNumber in enumerate(aRow):
if (element_count == 4) and (i==1) :
file_data += '120,'

file_data += aNumber

if (i < len(aRow) - 1):
file_data += ','

file_data += '\n'

#file_data = str(file_data)
print(file_data)
file.write(file_data)
file.close()
#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(diet_file):
return diet_file

print('Select a measure:')
diet_file = interactive(f, diet_file = diet_files);
display(diet_file)
Select a measure:



interactive(children=(Dropdown(description='diet_file', options=('dairy_intakes.txt_Female.csv', 'dairy_intake…
path = data_folder + diet_file.result
print ('Selected: ' + path)

path_f = data_folder + 'starchy_vegetables_subgroup.txt_Female.csv'
print ('Selected: ' + path_f)
Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Male.csv
Selected: ./csvdietfiles/starchy_vegetables_subgroup.txt_Female.csv
import pandas as pd
df = pd.read_csv(path)
df.head()
png
df_f = pd.read_csv(path_f)
df_f.head()
png
import os.path
import re

# open the file for reading
#path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
# print('debug: exists')
file = open(path,'r')
file_f = open(path_f,'r')
else:
print('Fatal error: file does not exist')
exit(1)

# diet_data = []
diet_data = []
# diet_data['Female'] = {}

csv_diet_folder = './agescsvdietfiles/subgroup/'

file_name = csv_diet_folder + diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:
if is_first_line == 1:
is_first_line = 0
line_f = file_f.readline()
continue

line_f = file_f.readline()
#print(line_f)
if line !='':


#diet_data[current_gender] = re.findall('[0-9]*', line)
#print(re.findall('[0-9]*[\.]*[0-9]*', line))
#x = re.findall('[0-9]+[\.]*[0-9]*', line)
#diet_data.append(x)
s = line.split(',')
#print(s)
s_f = line_f.split(',')

age_from = int(s[0])
age_to = int(s[1])
recom_low = float(s[3])
recom_high = float(s[4])

age_from_f = int(s_f[0])
age_to_f = int(s_f[1])
recom_low_f = float(s_f[3])
recom_high_f = float(s_f[4])

#print(age_from, age_to)
for a in range(age_from, age_to+1):
#print(a,a, recom_low, recom_high)
#print(a,a, recom_low_f, recom_high_f)

recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150

# for verification
#recom_l = round( ( recom_low + recom_low_f )/2, 2)
#recom_h = round( ( recom_high + recom_high_f )/2, 2)

#print(a,a, recom_l, recom_h)
#print('----')

#only output
#print(recom_l, ',', recom_h)
d = str(a) + ',' + str(a) + ',' + str(recom_l) + ',' + str(recom_h) + '\n'
#write_to_file(file_name + '_' + '.csv', d)
file_write.write(d)



#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)
#diet_data
file.close()
file_f.close()
#write_to_file(file_name + '_' + '.csv', d)
file_write.close()
./agescsvdietfiles/subgroup/starchy_vegetables_subgroup.txt_Male.csv### process_mortality_data
data_folder = './process_mortality_data/'
mortality_file = os.listdir(data_folder)
mortality_file
['hellomortality_h_1_Total patient deaths_ ESRD patients.csv',
'hellomortality_h_1_Total patient deaths_ ESRD_patients.csv',
'mortality_h_1_Total patient deaths_ ESRD patients.xlsx',
'mortality_h_1_Total patient deaths_ ESRD_patients.csv',
'_hellomortality_h_1_Total patient deaths_ ESRD_patients.csv']
#print('Select a health measure/aspect to visualize\n')
# create the interactive interface
def f(mortality_file):
return mortality_file

print('Select a measure:')
mortality_file = interactive(f, mortality_file = mortality_file);
display(mortality_file)
Select a measure:



interactive(children=(Dropdown(description='mortality_file', options=('hellomortality_h_1_Total patient deaths…
path = data_folder + mortality_file.result
print ('Selected: ' + path)
Selected: ./process_mortality_data/mortality_h_1_Total patient deaths_ ESRD_patients.csvimport os.path
import re

# open the file for reading
# path = 'dietfiles/vegetable_intake.txt'
if os.path.exists(path):
# print('debug: exists')
file = open(path,'r')

else:
print('Fatal error: file does not exist')
exit(1)

# diet_data = []
diet_data = []
# diet_data['Female'] = {}

csv_diet_folder = './process_mortality_data/output/'

file_name = csv_diet_folder + diet_file.result
file_write = open(file_name,'w')
print(file_name)
current_gender = ''
is_first_line = 1
recom_low = 0
recom_high = 0
for line in file:
if is_first_line == 1:
is_first_line = 0
continue


if line !='':
#diet_data[current_gender] = re.findall('[0-9]*', line)
#print(re.findall('[0-9]*[\.]*[0-9]*', line))
#x = re.findall('[0-9]+[\.]*[0-9]*', line)
#diet_data.append(x)
s_l = line.split(',')
print(s_l)
s = s_l[0].split('-')
#print(s)

try:
age_from = int(s[0])
age_to = int(s[1])
except:
age_from = int(s[0].replace('+', ''))
age_to = 120


# divide by the number of ages
recom_low = float(s_l[3])/5
#recom_high = float(s_l[4])

#print(age_from, age_to)
for a in range(age_from, age_to+1):
print(a,a, recom_low)
#print(a,a, recom_low_f, recom_high_f)

#recom_l = round( ( recom_low + recom_low_f )/2, 2) * 150
#recom_h = round( ( recom_high + recom_high_f )/2, 2) * 150

# for verification
#recom_l = round( ( recom_low + recom_low_f )/2, 2)
#recom_h = round( ( recom_high + recom_high_f )/2, 2)

#print(a,a, recom_l, recom_h)
#print('----')

#only output
#print(recom_l, ',', recom_h)
d = str(a) + ',' + str(a) + ',' + str(recom_low) + '\n'
#write_to_file(file_name + '_' + '.csv', d)
file_write.write(d)



#write_to_file(file_name + '_' + current_gender + '.csv', diet_data)
#diet_data
file.close()

#write_to_file(file_name + '_' + '.csv', d)
file_write.close()
./process_mortality_data/output/mortality_h_1_Total patient deaths_ ESRD_patients.csv

References:

Ignore the below code:

# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []

for line in file:
if line !='':
# find date
# assumption: dates are in the correct format and no wrong dates are there in the file
# assumption: dates are in a consistent format such as yyyy-mm-dd
# times are ignored assumed times are not part of date as output
# year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
# month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
#day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
date_part= re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])

# append the date to the list
date_list.append(date_part)

# hash part
hash_part=line.split()[1]
hash = re.findall('[a-zA-Z0-9]+', hash_part)
hash_list.append(hash)


# event part
event_part = line.split()[6]
event = re.findall('[a-zA-Z0-9]+', event_part)
event_list.append(event)

# close the file
file.close()


#print the list with all dates
print('All dates found')
print(date_list)

print('\nAll hashes found')
print(hash_list)

print('\nAll events found')
print(event_list)


#print('\nFile content for verification')
#file = open('logs.txt','r')
#print(file.read())
file.close()


# reference
# https://docs.python.org/2/library/os.path.html
All dates found
[]

All hashes found
[]

All events found
[]
# 1.3 method 2 : output line by line
import os.path
import re

# open the file for reading
path = 'logs.txt'
if os.path.exists(path):
# print('debug: exists')
file = open('logs.txt','r')
else:
print('Fatal error: file does not exist')
exit(1)

# list to store the found dates, hashes, and events (named accordingly)
# interpretation of the problem: three diferent output
date_list = []
hash_list = []
event_list = []

line_by_line = []
for line in file:
if line !='':
# find date
# assumption: dates are in the correct format and no wrong dates are there in the file
# assumption: dates are in a consistent format such as yyyy-mm-dd
# times are ignored assumed times are not part of date as output
# year: 4 digit, where any digit can be 0 to 9. 9999 is the maximum year allowed
# month: always 2 digits assumed. first digit can be between 0 and 1 inclusive
#day: 0 to 31. first digit: 0 to 3, 2nd digit: 0 to 9
date_part = re.findall('[0-9]{4}-[01][1-2]-[0-3][0-9]', line.split()[0])

# append the date to the list
date_list.append(date_part)

# hash part
hash_part = line.split()[1]
hash = re.findall('[a-zA-Z0-9]+', hash_part)
hash_list.append(hash)

# event part
event_part = line.split()[6]
event = re.findall('[a-zA-Z0-9]+', event_part)
event_list.append(event)

line_by_line.append ( [ date_part[0], hash[0], event[0] ] )
#line_by_line = str(date_part[0]) + ',' + str(hash[0]), + ',' + str(event[0])

# close the file
file.close()


# print the list with all dates
print(line_by_line)


print('\nFile content for verification')
file = open('logs.txt','r')
print(file.read())
file.close()


# reference
# https://docs.python.org/2/library/os.path.html
Sample Data File Content

age_from, age_to, average_intake, recommended_intake_low, recommended_intake_high
1,3,2.5,2.0,2.5
4,8,2.2,2.5,3.0
9,13,2.4,2.9,3.1
14,18,2.5,2.9,3.1
19,30,1.9,2.9,3.1
31,50,1.8,2.9,3.1
51,70,1.7,2.9,3.1
71,120,1.6,2.9,3.1


Data were just copied and pasted to files from web-page - may have done some manual cleanup

--

--

Justetc Social Services (non-profit)
Data Science Project Development

All proceeds from Medium will go to Justetc Social Services ( non-profit). Justetc Social Services provides services in the Training and Education Areas.