blob: a3761ae9cd15f03cb25a25bae00172d7c171f80a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
import csv
import io
import boto3
from typing import List, Dict
from .utils import Utilities
class FileHandler:
"""
A class to read CSV data from a local file, S3 object, or string. Near
the project completion, support for JSON/Parquet files will be added.
"""
def __init__(self):
self.utils = Utilities()
def read_local(self, path) -> List[Dict[str, str]]:
"""
A method to read a local CSV file and return the data as a list of
dictionaries.
"""
with open(path, mode="r", encoding="utf-8") as f:
return self.read_string(f.read())
def read_s3(self, path) -> List[Dict[str, str]]:
"""
A method to read an S3 object containing CSV data
and return the data as a list of dictionaries.
"""
bucket, key = self.utils.get_s3_path(path)
client = boto3.client("s3")
response = client.get_object(Bucket=bucket, Key=key)
content = response["Body"].read().decode("utf-8")
read_csv_content = self.read_string(content)
return read_csv_content
@staticmethod
def read_string(content: str) -> List[Dict[str, str]]:
"""
A method to read CSV data from a string and return the data as a list
of dictionaries.
"""
if not content.strip():
return []
f = io.StringIO(content)
reader = csv.DictReader(f)
return [dict(row) for row in reader]
|