python files - ghdrako/doc_snipets GitHub Wiki
text_file = open("tasks.txt")
text_data = text_file.read() # read all data from file
print(text_data)
text_file.close()
assert text_file.closed
from collections import namedtuple
Task = namedtuple("Task", "task_id title urgency") # Creates a named tuple class
with open("tasks.txt") as file:
for line in file:
stripped_line = line.strip() # Removes the trailing line break
task_id, title, urgency = stripped_line.split(",") # Splits the string with commas
task = Task(task_id, title, urgency)
print(f"{stripped_line}: {task}")
If the file doesn’t have too much data, we can read the lines to form a list object
using the readlines
method. Because list objects are mutable, it’ll be easier to
change the data and save it for other purposes.
with open("tasks.txt") as file:
lines = file.readlines()
updated_lines = [f"#{row}: {line.strip()}" for row, line in enumerate(lines, start=1)]
Read single line
with open("tasks.txt") as file:
print(file.readline())
print(file.readline())
print(file.readline(5))
print(file.readline(8))
print(file.readline())
Write to file
with open("tasks_new.txt", "w") as file:
print("File:", file)
result = file.write(data)
print("Writing result:", result)
with open("tasks.txt", "a") as file:
file.write(f"\n{new_task}")
Mode | read | write | create | truncate | Cursor position |
---|---|---|---|---|---|
r | * | Start | |||
w | * | * | * | Start | |
a | * | * | End | ||
r+ | * | * | Start | ||
w+ | * | * | * | * | Start |
a+ | * | * | * | End | |
x | * | Start |
read: reads the data; write: writes new data; create: creates a new file; truncate: resizes the file; cursor position: when the operation starts.
Typ of files:
- csv RFC4180
- tsv
- fixed-size
- json
- xml
Open and close
with open("file.txt","r") as file: # using Context Manager
print("file name:" + file.name)
print("encoding:" + file.encoding)
print("mode:" + file.mode)
print("is closed:" + str(file.closed))
file.close()
print("is closed:" + str(file.closed))
with open("important.tmp", "w") as fout:
fout.write("The horse raced past the barn")
fout.write("fell.\n")
os.rename("important.tmp", "important")T
def open_for_write(fname, mode=""):
os.makedirs(os.path.dirname(fname), exists_ok=True)
return open(fname, "w" + mode)
with open_for_write("some/deep/nested/name/of/file.txt") as fp:
fp.write("hello world")
with NamedTemporaryFile() as fp:
fp.write("line 1\n")
fp.write("line 2\n")
fp.flush()
function_taking_file_name(fp.name)
Note that the fp.flush there is important. The file object caches write until closed. However, NamedTemporaryFile vanishes when closed. Explicitly flushing it is important before calling a function that reopens the file for reading.
reading
with open("file.txt","r") as file:
content = file.read() # calosc pliku
line = file.readline() # jedna linie
lines = file.readlines() # read all lines into array object
pprint(lines)
Iterate over file object
with open("file.txt","r") as file:
for line in file:
print(line)
cols = line.split(sep=',')
print(cols[0])
file.seek(0) # go to beginning of file
Remove whitespace
with open('pi_digits.txt') as file_object:
contents = file_object.read()
print(contents.rstrip())
''rstrip()'' method removes, or strips, any whitespace characters from the right side of a string.
with open(filename) as file_object:
lines = file_object.readlines()
text = ''
for line in lines:
text += line.strip()
csv
Use read_csv to load tabular data. Use read_table to load TSV files
import csv
from pprint import pprint
with open("people.txt",newline='') as csvfile:
person = csv.reader(csv.file,delimiter=',',quotechar='"')
fields = next(person) # omit the header
for row in person:
pprint(row)
person_dict = dict(zip(fields, row)) # read each row as a dict object, with the header’s field names becoming the keys
Instead of calling the reader function, we call the DictReader constructor to create a DictReader object that takes the first row as the keys.
with open("tasks.txt", newline="") as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
print(row)
csv without header
with open("tasks.txt", newline="") as file:
csv_reader = csv.reader(file)
tasks_rows = list(csv_reader) # retrieve all the rows as a list object
print(tasks_rows)
append to csv
new_task = "1004,Museum,3"
with open("tasks.txt", "a", newline="") as file:
file.write("\n")
csv_writer = csv.writer(file)
csv_writer.writerow(new_task.split(","))
write csv
fields = ['task_id', 'title', 'urgency']
with open("tasks_dict.txt", "w", newline="") as file:
csv_writer = csv.DictWriter(file, fieldnames=fields)
csv_writer.writeheader()
csv_writer.writerows(tasks)
csv.list_dialects()
using Numpy
- loadtext()
- genfromtxt()
- recfromcsv() - use to read data of diffrent types from a text file. Creates a recarray, which allows field access using attributes
- recfromtxt() - read data from csv
import numpy as np
np.loadtxt(file.txt') # load file generated by numpy
try:
np.loadtxt(file.txt',delimiter=',', usecols=0) # oter option skiprows,dtype,converter,comments
except Exception as e:
print(str(e))
np.genfromtxt(file.txt',delimiter=',',skip_header=1) # can handle missing value - use missing_values - wstawia nan w braki
np.genfromtxt(file.txt',delimiter=',',skip_header=1, filling_values=0) # wstawia 0 w braki
Pandas
import panda as pd
posts_csv = pd.read_csv('file.csv') # all row loaded
posts_csv.head(2) # print first 2 rows
Skiping rows using function
posts_csv = pd.read_csv('file.csv',skiprows=lambda x: x % 2 != 0)
Load specified columns
posts_csv = pd.read_csv('file.csv',usecols=[0,6,7,8])
Specify no header in file
posts_csv = pd.read_csv('file.csv',header=None, prefix='col')
posts_header.columns