Find all csv file names with path in all current level sub-directories
all_filenames = list();
from pathlib import Path for path in Path('./').rglob('truthsAndTracksByMonteByTimeGlobal.csv'): # print(path.name); # print(path.parent); parent = path.parent; name = path.name; all_filenames.append(str(parent) + '/' + str(name) );
all_filenames = sorted(list(all_filenames)); all_filenames[:10]['738020.4493/truthsandtracksbymontebytimeglobal.csv', '738023.8086/truthsandtracksbymontebytimeglobal.csv', '738148.6338/truthsandtracksbymontebytimeglobal.csv', '738148.6348/truthsandtracksbymontebytimeglobal.csv', '738148.6357/truthsandtracksbymontebytimeglobal.csv', '738207.7685/truthsandtracksbymontebytimeglobal.csv', '738207.778/truthsandtracksbymontebytimeglobal.csv', '738207.782/truthsandtracksbymontebytimeglobal.csv', '738207.8366/truthsandtracksbymontebytimeglobal.csv', '738208.7243/truthsandtracksbymontebytimeglobal.csv']# can work in some cases ... for the same path # extension = 'csv' # all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
Test : Check the data for each csv files to be combined.
Do the data align well with each other
row_total_count = 0 #for i in range(0, len(all_filenames)): for i in range(0, 5): j = i + 1 for f in all_filenames[i:j]: file = f print(file) df_s = pd.read_csv(f) print(df_s.shape, f) #print(f) row_total_count += df_s.shape[0] # print(df_s.head())738020.4493/truthsandtracksbymontebytimeglobal.csv (103500, 31) 738020.4493/truthsandtracksbymontebytimeglobal.csv 738023.8086/truthsandtracksbymontebytimeglobal.csv (3700, 31) 738023.8086/truthsandtracksbymontebytimeglobal.csv 738148.6338/truthsandtracksbymontebytimeglobal.csv (37, 31) 738148.6338/truthsandtracksbymontebytimeglobal.csv 738148.6348/truthsandtracksbymontebytimeglobal.csv (37, 31) 738148.6348/truthsandtracksbymontebytimeglobal.csv 738148.6357/truthsandtracksbymontebytimeglobal.csv (1850, 31) 738148.6357/truthsandtracksbymontebytimeglobal.csv
keep track of total rows in all files so that you can compare the shape with the final combined data file
row_total_count = 0 for f in all_filenames: file = f #print(file) df_s = pd.read_csv(f) #print(df_s.shape, f) # print(f) row_total_count += df_s.shape[0] # print(df_s.head()) row_total_count117594row_total_count117594file = all_filenames[1]; print(file); df = pd.read_csv(file, header=0); df.head(1)738023.8086/truthsandtracksbymontebytimeglobal.csv
All proceeds from Medium will go to Justetc Social Services ( non-profit). Justetc Social Services provides services in the Training and Education Areas.
All proceeds from Medium will go to Justetc Social Services ( non-profit). Justetc Social Services provides services in the Training and Education Areas.