CSV File combine into one.

Applied to a Tracking and Estimation Project

#!/usr/bin/env python
# coding: utf-8

import os;
import glob;
import pandas as pd;
data_folder = './'
os.chdir(data_folder)

Find all csv file names with path in all current level sub-directories

all_filenames = list();

from pathlib import Path
for path in Path('./').rglob('truthsAndTracksByMonteByTimeGlobal.csv'):
# print(path.name);
# print(path.parent);
parent = path.parent;
name = path.name;
all_filenames.append(str(parent) + '/' + str(name) );

all_filenames = sorted(list(all_filenames));
all_filenames[:10]
['738020.4493/truthsandtracksbymontebytimeglobal.csv',
'738023.8086/truthsandtracksbymontebytimeglobal.csv',
'738148.6338/truthsandtracksbymontebytimeglobal.csv',
'738148.6348/truthsandtracksbymontebytimeglobal.csv',
'738148.6357/truthsandtracksbymontebytimeglobal.csv',
'738207.7685/truthsandtracksbymontebytimeglobal.csv',
'738207.778/truthsandtracksbymontebytimeglobal.csv',
'738207.782/truthsandtracksbymontebytimeglobal.csv',
'738207.8366/truthsandtracksbymontebytimeglobal.csv',
'738208.7243/truthsandtracksbymontebytimeglobal.csv']
# can work in some cases ... for the same path
# extension = 'csv'
# all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

Test : Check the data for each csv files to be combined.

Do the data align well with each other

row_total_count = 0
#for i in range(0, len(all_filenames)):
for i in range(0, 5):
j = i + 1
for f in all_filenames[i:j]:
file = f
print(file)
df_s = pd.read_csv(f)
print(df_s.shape, f)
#print(f)
row_total_count += df_s.shape[0]
# print(df_s.head())
738020.4493/truthsandtracksbymontebytimeglobal.csv
(103500, 31) 738020.4493/truthsandtracksbymontebytimeglobal.csv
738023.8086/truthsandtracksbymontebytimeglobal.csv
(3700, 31) 738023.8086/truthsandtracksbymontebytimeglobal.csv
738148.6338/truthsandtracksbymontebytimeglobal.csv
(37, 31) 738148.6338/truthsandtracksbymontebytimeglobal.csv
738148.6348/truthsandtracksbymontebytimeglobal.csv
(37, 31) 738148.6348/truthsandtracksbymontebytimeglobal.csv
738148.6357/truthsandtracksbymontebytimeglobal.csv
(1850, 31) 738148.6357/truthsandtracksbymontebytimeglobal.csv

keep track of total rows in all files so that you can compare the shape with the final combined data file

row_total_count = 0
for f in all_filenames:
file = f
#print(file)
df_s = pd.read_csv(f)
#print(df_s.shape, f)
# print(f)
row_total_count += df_s.shape[0]
# print(df_s.head())
row_total_count
117594row_total_count117594file = all_filenames[1];
print(file);
df = pd.read_csv(file, header=0);
df.head(1)
738023.8086/truthsandtracksbymontebytimeglobal.csv
png
df.shape(3700, 31)df_s.columnsIndex(['Monte', 'Time', 'x', 'x_dot', 'y', 'y_dot', 'ID',
'number_of_targets_to_track', 'distanceThreshold', 'v_max',
'NN_threshold', 'n_count_for_confirmed', 'm_count_for_confirmed',
'n_count_for_dead', 'm_count_for_dead', 'Pd', 'FalseAlarmDensityLambda',
'RangeErrorStdDeviation', 'AzimuthErrorStdDeviation',
'target_std_deviaion_sigma_v', 'ParameterSet', 'assignedTrackId',
'truthToTrackPositionRmse', 'truthToTrackPositionNormSquare',
'truthToTrackVelocityRmse', 'truthToTrackVelocityNormSquare',
'isAFalseTrack', 'predicted_state_x', 'predicted_state_x_dot',
'predicted_state_y', 'predicted_state_y_dot'],
dtype='object')

Combine all files in the list, axis =0 i.e. one after another

combined_csv = pd.concat([pd.read_csv(f, header=None, skiprows=1) for f in all_filenames], axis=0)combined_csv.shape(117594, 31)combined_csv.to_csv( 'all_tracking_data_combined.csv', index=False, encoding='utf-8-sig')
combined_csv.shape
(117594, 31)combined_csv.columnsInt64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
dtype='int64')
df = pd.read_csv('all_tracking_data_combined.csv', sep=',', dtype='float64', header=None)
df.head(10)
png

--

--

Justetc Social Services (non-profit)

All proceeds from Medium will go to Justetc Social Services ( non-profit). Justetc Social Services provides services in the Training and Education Areas.