python files - ghdrako/doc_snipets GitHub Wiki

text_file = open("tasks.txt")
text_data = text_file.read()  # read all data from file
print(text_data)
text_file.close()
assert text_file.closed
from collections import namedtuple
Task = namedtuple("Task", "task_id title urgency") # Creates a named tuple class
with open("tasks.txt") as file:
  for line in file:
    stripped_line = line.strip()  # Removes the trailing line break
    task_id, title, urgency = stripped_line.split(",") # Splits the string with commas
    task = Task(task_id, title, urgency)
    print(f"{stripped_line}: {task}")

If the file doesn’t have too much data, we can read the lines to form a list object using the readlines method. Because list objects are mutable, it’ll be easier to change the data and save it for other purposes.

with open("tasks.txt") as file:
  lines = file.readlines()
  updated_lines = [f"#{row}: {line.strip()}" for row, line in enumerate(lines, start=1)]

Read single line

with open("tasks.txt") as file:  
  print(file.readline())
  print(file.readline())
  print(file.readline(5))
  print(file.readline(8))
  print(file.readline())

Write to file

with open("tasks_new.txt", "w") as file:
  print("File:", file)
  result = file.write(data)
  print("Writing result:", result)
with open("tasks.txt", "a") as file:
  file.write(f"\n{new_task}")
Mode read write create truncate Cursor position
r * Start
w * * * Start
a * * End
r+ * * Start
w+ * * * * Start
a+ * * * End
x * Start

read: reads the data; write: writes new data; create: creates a new file; truncate: resizes the file; cursor position: when the operation starts.

Typ of files:

  • csv RFC4180
  • tsv
  • fixed-size
  • json
  • xml

Open and close

with open("file.txt","r") as file:    # using Context Manager
  print("file name:" + file.name)
  print("encoding:" + file.encoding)
  print("mode:" + file.mode)
  print("is closed:" + str(file.closed))
  file.close()
  print("is closed:" + str(file.closed))
with open("important.tmp", "w") as fout:    
  fout.write("The horse raced past the barn")    
  fout.write("fell.\n")
os.rename("important.tmp", "important")T
def open_for_write(fname, mode=""):    
  os.makedirs(os.path.dirname(fname), exists_ok=True)
  return open(fname, "w" + mode)

with open_for_write("some/deep/nested/name/of/file.txt") as fp:    
  fp.write("hello world")
with NamedTemporaryFile() as fp:    
  fp.write("line 1\n")    
  fp.write("line 2\n")    
  fp.flush()    
  function_taking_file_name(fp.name)

Note that the fp.flush there is important. The file object caches write until closed. However, NamedTemporaryFile vanishes when closed. Explicitly flushing it is important before calling a function that reopens the file for reading.

reading

with open("file.txt","r") as file: 
  content = file.read()    # calosc pliku 
  line = file.readline()   # jedna linie
  lines = file.readlines() # read all lines into array object
  pprint(lines)

Iterate over file object

with open("file.txt","r") as file:
  for line in file:
    print(line)
    cols = line.split(sep=',')
    print(cols[0])
file.seek(0)  # go to beginning of file

Remove whitespace

with open('pi_digits.txt') as file_object: 
  contents = file_object.read()
  print(contents.rstrip())

''rstrip()'' method removes, or strips, any whitespace characters from the right side of a string.

with open(filename) as file_object: 
  lines = file_object.readlines()
  text = ''
  for line in lines:
    text += line.strip()

csv

Use read_csv to load tabular data. Use read_table to load TSV files

import csv
from pprint import pprint

with open("people.txt",newline='') as csvfile:
  person = csv.reader(csv.file,delimiter=',',quotechar='"')
  fields = next(person)   # omit the header
  for row in person:
    pprint(row)
    person_dict = dict(zip(fields, row)) # read each row as a dict object, with the header’s field names becoming the keys

Instead of calling the reader function, we call the DictReader constructor to create a DictReader object that takes the first row as the keys.

with open("tasks.txt", newline="") as file:
  csv_reader = csv.DictReader(file)
  for row in csv_reader:
    print(row)

csv without header

with open("tasks.txt", newline="") as file:
  csv_reader = csv.reader(file)
  tasks_rows = list(csv_reader) # retrieve all the rows as a list object
  print(tasks_rows)

append to csv

new_task = "1004,Museum,3"
with open("tasks.txt", "a", newline="") as file:
  file.write("\n")
  csv_writer = csv.writer(file)
  csv_writer.writerow(new_task.split(","))

write csv

fields = ['task_id', 'title', 'urgency']
with open("tasks_dict.txt", "w", newline="") as file:
  csv_writer = csv.DictWriter(file, fieldnames=fields)
  csv_writer.writeheader()
  csv_writer.writerows(tasks)
csv.list_dialects()

using Numpy

  • loadtext()
  • genfromtxt()
  • recfromcsv() - use to read data of diffrent types from a text file. Creates a recarray, which allows field access using attributes
  • recfromtxt() - read data from csv
import numpy as np
np.loadtxt(file.txt') # load file generated by numpy
try:
  np.loadtxt(file.txt',delimiter=',', usecols=0)   # oter option skiprows,dtype,converter,comments
except Exception as e:
  print(str(e))

np.genfromtxt(file.txt',delimiter=',',skip_header=1) # can handle missing value - use missing_values - wstawia nan w braki
np.genfromtxt(file.txt',delimiter=',',skip_header=1, filling_values=0) # wstawia 0 w braki

Pandas

import panda as pd
posts_csv = pd.read_csv('file.csv') # all row loaded
posts_csv.head(2)                   # print first 2 rows

Skiping rows using function

posts_csv = pd.read_csv('file.csv',skiprows=lambda x: x % 2 != 0) 

Load specified columns

posts_csv = pd.read_csv('file.csv',usecols=[0,6,7,8]) 

Specify no header in file

posts_csv = pd.read_csv('file.csv',header=None, prefix='col') 

posts_header.columns