KR_XML - somaz94/python-study GitHub Wiki

Python XML ์ฒ˜๋ฆฌ ๊ฐœ๋… ์ •๋ฆฌ


1๏ธโƒฃ XML ๊ธฐ์ดˆ

XML(eXtensible Markup Language)์€ ๋ฐ์ดํ„ฐ๋ฅผ ๊ตฌ์กฐํ™”ํ•˜์—ฌ ์ €์žฅํ•˜๊ณ  ์ „์†กํ•˜๋Š” ๋ฐ ์‚ฌ์šฉ๋˜๋Š” ๋งˆํฌ์—… ์–ธ์–ด์ด๋‹ค. ์ž์ฒด ์„ค๋ช…์ ์ธ ๊ตฌ์กฐ์™€ ํ”Œ๋žซํผ ๋…๋ฆฝ์ ์ธ ํŠน์„ฑ์œผ๋กœ ๋‹ค์–‘ํ•œ ์‹œ์Šคํ…œ ๊ฐ„ ๋ฐ์ดํ„ฐ ๊ตํ™˜์— ๋„๋ฆฌ ์‚ฌ์šฉ๋œ๋‹ค.

XML์˜ ์ฃผ์š” ํŠน์ง•์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค:

  • ๊ณ„์ธต์  ๊ตฌ์กฐ๋กœ ๋ณต์žกํ•œ ๋ฐ์ดํ„ฐ ํ‘œํ˜„ ๊ฐ€๋Šฅ
  • ์ž์ฒด ์„ค๋ช…์ ์ธ ํƒœ๊ทธ ์‚ฌ์šฉ์œผ๋กœ ๊ฐ€๋…์„ฑ ํ–ฅ์ƒ
  • ํ™•์žฅ ๊ฐ€๋Šฅํ•œ ์‚ฌ์šฉ์ž ์ •์˜ ํƒœ๊ทธ ์ง€์›
  • ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜์œผ๋กœ ๋‹ค์–‘ํ•œ ํ”Œ๋žซํผ์—์„œ ํ˜ธํ™˜
import xml.etree.ElementTree as ET

# XML ๋ฌธ์ž์—ด ํŒŒ์‹ฑ
xml_string = '''
<root>
    <person id="1">
        <name>ํ™๊ธธ๋™</name>
        <age>30</age>
        <skills>
            <skill>Python</skill>
            <skill>Java</skill>
        </skills>
    </person>
    <person id="2">
        <name>๊น€์ฒ ์ˆ˜</name>
        <age>25</age>
        <skills>
            <skill>JavaScript</skill>
        </skills>
    </person>
</root>
'''

# ๋ฌธ์ž์—ด์—์„œ XML ํŒŒ์‹ฑ
root = ET.fromstring(xml_string)

# ํŠน์ • ์š”์†Œ ์ ‘๊ทผ
person = root.find('person')
print(f"์ด๋ฆ„: {person.find('name').text}")  # ์ด๋ฆ„: ํ™๊ธธ๋™
print(f"๋‚˜์ด: {person.find('age').text}")   # ๋‚˜์ด: 30

# ์†์„ฑ ์ ‘๊ทผ
print(f"ID: {person.get('id')}")  # ID: 1

# ๋ชจ๋“  person ์š”์†Œ ์ˆœํšŒ
for person in root.findall('person'):
    name = person.find('name').text
    age = person.find('age').text
    print(f"{name}({age}์„ธ)")

# ์ค‘์ฒฉ๋œ ์š”์†Œ ์ ‘๊ทผ
for person in root.findall('person'):
    name = person.find('name').text
    skills = [skill.text for skill in person.findall('skills/skill')]
    print(f"{name}์˜ ๊ธฐ์ˆ : {', '.join(skills)}")

โœ… ํŠน์ง•:

  • ๊ณ„์ธต์  ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ ํ‘œํ˜„์— ์ ํ•ฉ
  • ์‚ฌ๋žŒ์ด ์ฝ๊ณ  ์ดํ•ดํ•˜๊ธฐ ์‰ฌ์šด ํ˜•์‹
  • ์ž์ฒด ์„ค๋ช…์ ์ธ ํƒœ๊ทธ๋กœ ๋ฌธ์„œ ์˜๋ฏธ ์ „๋‹ฌ
  • ์œ ๋‹ˆ์ฝ”๋“œ ์ง€์›์œผ๋กœ ๋‹ค๊ตญ์–ด ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅ
  • ํ™•์žฅ์„ฑ์ด ๋›ฐ์–ด๋‚˜ ์ƒˆ๋กœ์šด ๋ฐ์ดํ„ฐ ์š”์†Œ ์ถ”๊ฐ€ ์šฉ์ด
  • ๋‹ค์–‘ํ•œ ํ”„๋กœ๊ทธ๋ž˜๋ฐ ์–ธ์–ด์—์„œ ์ง€์›ํ•˜๋Š” ํ‘œ์ค€ ํ˜•์‹


2๏ธโƒฃ XML ํŒŒ์ผ ์ฒ˜๋ฆฌ

Python์—์„œ๋Š” xml.etree.ElementTree ๋ชจ๋“ˆ์„ ์‚ฌ์šฉํ•˜์—ฌ XML ํŒŒ์ผ์„ ์ฝ๊ณ  ์“ธ ์ˆ˜ ์žˆ๋‹ค. ์ด ๋ชจ๋“ˆ์€ XML์„ ํŠธ๋ฆฌ ๊ตฌ์กฐ๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ํšจ์œจ์ ์ธ ์ฒ˜๋ฆฌ๋ฅผ ๊ฐ€๋Šฅํ•˜๊ฒŒ ํ•œ๋‹ค.

XML ํŒŒ์ผ ์ฒ˜๋ฆฌ์˜ ๊ธฐ๋ณธ ์›Œํฌํ”Œ๋กœ์šฐ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค:

  • ํŒŒ์ผ์—์„œ XML ์ฝ๊ธฐ ๋˜๋Š” ์ƒˆ XML ํŠธ๋ฆฌ ์ƒ์„ฑ
  • ์š”์†Œ ๋ฐ ์†์„ฑ ์กฐ์ž‘
  • ๋ณ€๊ฒฝ์‚ฌํ•ญ ํŒŒ์ผ์— ์ €์žฅ
import xml.etree.ElementTree as ET
import os

# XML ํŒŒ์ผ ์ฝ๊ธฐ
def read_xml_file(file_path):
    if not os.path.exists(file_path):
        print(f"ํŒŒ์ผ์ด ์กด์žฌํ•˜์ง€ ์•Š๋Š”๋‹ค: {file_path}")
        return None
    
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        return root
    except ET.ParseError as e:
        print(f"XML ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {e}")
        return None

# XML ํŒŒ์ผ ์“ฐ๊ธฐ
def write_xml_file(root, file_path):
    tree = ET.ElementTree(root)
    
    # XML ์„ ์–ธ๊ณผ ์ธ๋ดํŠธ ์„ค์ •
    tree.write(file_path, 
               encoding='utf-8', 
               xml_declaration=True, 
               method='xml',
               short_empty_elements=False)
    
    # ์ฐธ๊ณ : ElementTree๋Š” ๋“ค์—ฌ์“ฐ๊ธฐ๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์Œ
    # ๋“ค์—ฌ์“ฐ๊ธฐ๊ฐ€ ํ•„์š”ํ•˜๋ฉด ์ถ”๊ฐ€ ์ฒ˜๋ฆฌ ํ•„์š”

# ์ƒˆ XML ๋ฌธ์„œ ์ƒ์„ฑ
def create_xml_document():
    # ๋ฃจํŠธ ์š”์†Œ ์ƒ์„ฑ
    root = ET.Element('library')
    
    # ์ฒซ ๋ฒˆ์งธ ์ฑ… ์ถ”๊ฐ€
    book1 = ET.SubElement(root, 'book')
    book1.set('id', '1')
    book1.set('available', 'true')
    
    title1 = ET.SubElement(book1, 'title')
    title1.text = 'ํŒŒ์ด์ฌ ํ”„๋กœ๊ทธ๋ž˜๋ฐ'
    
    author1 = ET.SubElement(book1, 'author')
    author1.text = 'ํ™๊ธธ๋™'
    
    year1 = ET.SubElement(book1, 'year')
    year1.text = '2023'
    
    # ๋‘ ๋ฒˆ์งธ ์ฑ… ์ถ”๊ฐ€
    book2 = ET.SubElement(root, 'book')
    book2.set('id', '2')
    book2.set('available', 'false')
    
    title2 = ET.SubElement(book2, 'title')
    title2.text = 'XML ์ฒ˜๋ฆฌ ๊ธฐ์ดˆ'
    
    author2 = ET.SubElement(book2, 'author')
    author2.text = '๊น€์ฒ ์ˆ˜'
    
    year2 = ET.SubElement(book2, 'year')
    year2.text = '2022'
    
    return root

# ์‚ฌ์šฉ ์˜ˆ์‹œ
if __name__ == "__main__":
    # ์ƒˆ XML ๋ฌธ์„œ ์ƒ์„ฑ
    library = create_xml_document()
    
    # ํŒŒ์ผ๋กœ ์ €์žฅ
    output_file = 'library.xml'
    write_xml_file(library, output_file)
    print(f"XML ํŒŒ์ผ์ด ์ƒ์„ฑ๋˜์—ˆ๋‹ค: {output_file}")
    
    # ์ €์žฅ๋œ ํŒŒ์ผ ๋‹ค์‹œ ์ฝ๊ธฐ
    root = read_xml_file(output_file)
    if root is not None:
        print(f"๋ฃจํŠธ ํƒœ๊ทธ: {root.tag}")
        print(f"์ฑ… ์ˆ˜: {len(root.findall('book'))}")

โœ… ํŠน์ง•:

  • ํŒŒ์ผ ๊ธฐ๋ฐ˜ XML ์ฝ๊ธฐ/์“ฐ๊ธฐ ์ง€์›
  • ํŠธ๋ฆฌ ๊ตฌ์กฐ๋กœ XML ๋ฐ์ดํ„ฐ ํ‘œํ˜„
  • ๋‹ค์–‘ํ•œ ์ธ์ฝ”๋”ฉ ์˜ต์…˜ ์ง€์›
  • ๋Œ€์šฉ๋Ÿ‰ XML ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅ (iterparse ๊ธฐ๋Šฅ)
  • ์œ ์—ฐํ•œ ์š”์†Œ ๋ฐ ์†์„ฑ ์กฐ์ž‘
  • ์—๋Ÿฌ ์ฒ˜๋ฆฌ๋ฅผ ํ†ตํ•œ ์•ˆ์ •์ ์ธ ํŒŒ์ผ ์ฒ˜๋ฆฌ
  • XML ์„ ์–ธ ๋ฐ ์ถœ๋ ฅ ํ˜•์‹ ์ œ์–ด ๊ฐ€๋Šฅ


3๏ธโƒฃ XML ์š”์†Œ ์กฐ์ž‘

XML ํŠธ๋ฆฌ ๋‚ด์˜ ์š”์†Œ๋ฅผ ์กฐ์ž‘ํ•˜๋Š” ๊ฒƒ์€ XML ์ฒ˜๋ฆฌ์˜ ํ•ต์‹ฌ ๊ธฐ๋Šฅ์ด๋‹ค. Python์˜ ElementTree ๋ชจ๋“ˆ์€ ์š”์†Œ ์ƒ์„ฑ, ์ˆ˜์ •, ์‚ญ์ œ ๋ฐ ๊ฒ€์ƒ‰์„ ์œ„ํ•œ ๋‹ค์–‘ํ•œ ๋ฉ”์„œ๋“œ๋ฅผ ์ œ๊ณตํ•œ๋‹ค.

์š”์†Œ ์กฐ์ž‘์˜ ์ฃผ์š” ์ž‘์—…์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค:

  • ์ƒˆ ์š”์†Œ ์ƒ์„ฑ ๋ฐ ํŠธ๋ฆฌ์— ์ถ”๊ฐ€
  • ์†์„ฑ ์„ค์ • ๋ฐ ์ˆ˜์ •
  • ์š”์†Œ ๋‚ด์šฉ(ํ…์ŠคํŠธ) ๋ณ€๊ฒฝ
  • ํŠน์ • ์š”์†Œ ๊ฒ€์ƒ‰
  • ์š”์†Œ ์‚ญ์ œ
import xml.etree.ElementTree as ET

# ์š”์†Œ ์ƒ์„ฑ๊ณผ ์†์„ฑ ์„ค์ •
def create_person(name, age, job=None, skills=None):
    """์‚ฌ๋žŒ ์ •๋ณด๋ฅผ ๋‹ด์€ XML ์š”์†Œ ์ƒ์„ฑ"""
    person = ET.Element('person')
    
    # ๊ธฐ๋ณธ ์†์„ฑ ์ถ”๊ฐ€
    person.set('id', str(id(person))[-6:])  # ๊ณ ์œ  ID ์ƒ์„ฑ
    
    # ์ž์‹ ์š”์†Œ ์ถ”๊ฐ€
    name_elem = ET.SubElement(person, 'name')
    name_elem.text = name
    
    age_elem = ET.SubElement(person, 'age')
    age_elem.text = str(age)
    
    # ์„ ํƒ์  ์š”์†Œ ์ถ”๊ฐ€
    if job:
        job_elem = ET.SubElement(person, 'job')
        job_elem.text = job
    
    # ์ค‘์ฒฉ ์š”์†Œ ๊ตฌ์กฐ ์ƒ์„ฑ
    if skills and len(skills) > 0:
        skills_elem = ET.SubElement(person, 'skills')
        for skill in skills:
            skill_elem = ET.SubElement(skills_elem, 'skill')
            skill_elem.text = skill
    
    return person

# ์š”์†Œ ๊ฒ€์ƒ‰ ๋ฐ ์ˆ˜์ •
def modify_person(person_elem, new_age=None, new_job=None):
    """๊ธฐ์กด person ์š”์†Œ ์ˆ˜์ •"""
    if new_age:
        age_elem = person_elem.find('age')
        if age_elem is not None:
            age_elem.text = str(new_age)
        else:
            age_elem = ET.SubElement(person_elem, 'age')
            age_elem.text = str(new_age)
    
    if new_job:
        job_elem = person_elem.find('job')
        if job_elem is not None:
            job_elem.text = new_job
        else:
            job_elem = ET.SubElement(person_elem, 'job')
            job_elem.text = new_job

# ์š”์†Œ ์‚ญ์ œ
def remove_skill(person_elem, skill_name):
    """ํŠน์ • ์Šคํ‚ฌ ์š”์†Œ ์‚ญ์ œ"""
    skills_elem = person_elem.find('skills')
    if skills_elem is not None:
        for skill_elem in skills_elem.findall('skill'):
            if skill_elem.text == skill_name:
                skills_elem.remove(skill_elem)
                return True
    return False

# ์š”์†Œ ๊ฒ€์ƒ‰ ๋ฐ ํ•„ํ„ฐ๋ง
def find_people_by_criteria(root, min_age=None, job=None, skill=None):
    """์กฐ๊ฑด์— ๋งž๋Š” ์‚ฌ๋žŒ ์š”์†Œ ์ฐพ๊ธฐ"""
    results = []
    
    for person in root.findall('person'):
        # ๋‚˜์ด ์กฐ๊ฑด ํ™•์ธ
        if min_age is not None:
            age_elem = person.find('age')
            if age_elem is None or int(age_elem.text) < min_age:
                continue
        
        # ์ง์—… ์กฐ๊ฑด ํ™•์ธ
        if job is not None:
            job_elem = person.find('job')
            if job_elem is None or job_elem.text != job:
                continue
        
        # ๊ธฐ์ˆ  ์กฐ๊ฑด ํ™•์ธ
        if skill is not None:
            skills_found = False
            for skill_elem in person.findall('.//skill'):
                if skill_elem.text == skill:
                    skills_found = True
                    break
            if not skills_found:
                continue
        
        # ๋ชจ๋“  ์กฐ๊ฑด ๋งŒ์กฑ
        results.append(person)
    
    return results

# ์‚ฌ์šฉ ์˜ˆ์‹œ
if __name__ == "__main__":
    # ๋ฃจํŠธ ์š”์†Œ ์ƒ์„ฑ
    root = ET.Element('people')
    
    # ์‚ฌ๋žŒ ์š”์†Œ ์ถ”๊ฐ€
    p1 = create_person('ํ™๊ธธ๋™', 30, 'developer', ['Python', 'JavaScript'])
    p2 = create_person('๊น€์ฒ ์ˆ˜', 25, 'designer', ['Photoshop'])
    p3 = create_person('์ด์˜ํฌ', 35, 'manager')
    
    root.append(p1)
    root.append(p2)
    root.append(p3)
    
    # ์š”์†Œ ์ˆ˜์ •
    modify_person(p1, new_age=31, new_job='senior developer')
    
    # ์š”์†Œ ์‚ญ์ œ
    remove_skill(p1, 'JavaScript')
    
    # ์กฐ๊ฑด ๊ฒ€์ƒ‰
    developers = find_people_by_criteria(root, job='senior developer')
    for dev in developers:
        print(f"๊ฐœ๋ฐœ์ž: {dev.find('name').text}, ๋‚˜์ด: {dev.find('age').text}")

โœ… ํŠน์ง•:

  • ์ง๊ด€์ ์ธ ์š”์†Œ ์ƒ์„ฑ ๋ฐ ํŠธ๋ฆฌ ๊ตฌ์„ฑ
  • ์†์„ฑ ๊ด€๋ฆฌ๋ฅผ ์œ„ํ•œ ๊ฐ„๋‹จํ•œ ๋ฉ”์„œ๋“œ
  • ์œ ์—ฐํ•œ ์š”์†Œ ๋‚ด์šฉ ์ˆ˜์ •
  • ๋‹ค์–‘ํ•œ ์กฐ๊ฑด์œผ๋กœ ์š”์†Œ ๊ฒ€์ƒ‰ ๊ฐ€๋Šฅ
  • ํŠธ๋ฆฌ ๊ตฌ์กฐ ์ˆœํšŒ ๋ฐ ์กฐ์ž‘ ๊ธฐ๋Šฅ
  • ์š”์†Œ ๊ฐ„ ๊ด€๊ณ„(๋ถ€๋ชจ-์ž์‹) ์ ‘๊ทผ ์ง€์›
  • ์ค‘์ฒฉ๋œ ๋ณต์žกํ•œ XML ๊ตฌ์กฐ ์ฒ˜๋ฆฌ ๋Šฅ๋ ฅ


4๏ธโƒฃ XPath ์‚ฌ์šฉ

XPath๋Š” XML ๋ฌธ์„œ ๋‚ด์—์„œ ์š”์†Œ์™€ ์†์„ฑ์„ ์ฐพ๊ธฐ ์œ„ํ•œ ๊ฐ•๋ ฅํ•œ ์ฟผ๋ฆฌ ์–ธ์–ด์ด๋‹ค. ElementTree๋Š” ์ œํ•œ๋œ XPath ๊ตฌ๋ฌธ์„ ์ง€์›ํ•˜๋ฉฐ, ๋” ์™„์ „ํ•œ ์ง€์›์„ ์œ„ํ•ด์„œ๋Š” lxml ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋‹ค.

XPath๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด ๋‹ค์Œ๊ณผ ๊ฐ™์€ ์ž‘์—…์ด ๊ฐ€๋Šฅํ•˜๋‹ค:

  • ํŠน์ • ๊ฒฝ๋กœ์˜ ์š”์†Œ ์„ ํƒ
  • ์กฐ๊ฑด์— ๋”ฐ๋ฅธ ์š”์†Œ ํ•„ํ„ฐ๋ง
  • ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ๋ฅผ ํ†ตํ•œ ์š”์†Œ ์ ‘๊ทผ
  • ๋ณต์žกํ•œ ๊ณ„์ธต ๊ตฌ์กฐ ํƒ์ƒ‰
import xml.etree.ElementTree as ET
from lxml import etree  # ์™„์ „ํ•œ XPath ์ง€์›์„ ์œ„ํ•ด

# ์ƒ˜ํ”Œ XML ๋ฐ์ดํ„ฐ
xml_data = '''
<company>
    <department name="๊ฐœ๋ฐœํŒ€">
        <employee id="1001" type="์ •๊ทœ์ง">
            <name>ํ™๊ธธ๋™</name>
            <position>์„ ์ž„ ๊ฐœ๋ฐœ์ž</position>
            <salary currency="KRW">70000000</salary>
            <skills>
                <skill level="expert">Python</skill>
                <skill level="intermediate">Java</skill>
            </skills>
        </employee>
        <employee id="1002" type="๊ณ„์•ฝ์ง">
            <name>๊น€์ฒ ์ˆ˜</name>
            <position>์ฃผ๋‹ˆ์–ด ๊ฐœ๋ฐœ์ž</position>
            <salary currency="KRW">50000000</salary>
            <skills>
                <skill level="intermediate">JavaScript</skill>
                <skill level="beginner">Python</skill>
            </skills>
        </employee>
    </department>
    <department name="๋””์ž์ธํŒ€">
        <employee id="2001" type="์ •๊ทœ์ง">
            <name>์ด์˜ํฌ</name>
            <position>UI ๋””์ž์ด๋„ˆ</position>
            <salary currency="KRW">65000000</salary>
        </employee>
    </department>
</company>
'''

# ElementTree์™€ lxml ๋ชจ๋‘๋กœ ํŒŒ์‹ฑ
root_et = ET.fromstring(xml_data)
root_lxml = etree.fromstring(xml_data.encode('utf-8'))

def xpath_examples_elementtree():
    """ElementTree์˜ ์ œํ•œ๋œ XPath ๊ธฐ๋Šฅ ์˜ˆ์‹œ"""
    print("=== ElementTree XPath ์˜ˆ์‹œ ===")
    
    # 1. ๋ชจ๋“  employee ์š”์†Œ ์ฐพ๊ธฐ
    employees = root_et.findall('.//employee')
    print(f"์ง์› ์ˆ˜: {len(employees)}")
    
    # 2. ํŠน์ • ์†์„ฑ์„ ๊ฐ€์ง„ ์š”์†Œ ์ฐพ๊ธฐ
    regular_employees = root_et.findall(".//employee[@type='์ •๊ทœ์ง']")
    print(f"์ •๊ทœ์ง ์ง์› ์ˆ˜: {len(regular_employees)}")
    
    # 3. ํŠน์ • ๋ถ€์„œ์˜ ์ง์› ์ฐพ๊ธฐ
    dev_employees = root_et.findall("./department[@name='๊ฐœ๋ฐœํŒ€']/employee")
    print(f"๊ฐœ๋ฐœํŒ€ ์ง์› ์ˆ˜: {len(dev_employees)}")
    
    # 4. ๋ชจ๋“  ์Šคํ‚ฌ ์ฐพ๊ธฐ
    skills = root_et.findall('.//skill')
    print(f"์ด ์Šคํ‚ฌ ์ˆ˜: {len(skills)}")
    
    # 5. ์ด๋ฆ„์œผ๋กœ ์ง์› ์ฐพ๊ธฐ
    for emp in employees:
        if emp.find('name').text == 'ํ™๊ธธ๋™':
            print(f"ํ™๊ธธ๋™์˜ ์ง๊ธ‰: {emp.find('position').text}")

def xpath_examples_lxml():
    """lxml์˜ ์™„์ „ํ•œ XPath ๊ธฐ๋Šฅ ์˜ˆ์‹œ"""
    print("\n=== lxml XPath ์˜ˆ์‹œ ===")
    
    # 1. ๊ธ‰์—ฌ๊ฐ€ 6์ฒœ๋งŒ์› ์ด์ƒ์ธ ์ง์› ์ฐพ๊ธฐ (ElementTree์—์„œ๋Š” ๋ถˆ๊ฐ€๋Šฅ)
    high_salary = root_lxml.xpath(".//employee[number(salary) >= 60000000]")
    print(f"๊ณ ์•ก์—ฐ๋ด‰ ์ง์› ์ˆ˜: {len(high_salary)}")
    for emp in high_salary:
        name = emp.xpath("./name/text()")[0]
        salary = emp.xpath("./salary/text()")[0]
        print(f"- {name}: {salary}์›")
    
    # 2. Python ๊ธฐ์ˆ ์„ ๊ฐ€์ง„ ์ง์› ์ฐพ๊ธฐ
    python_devs = root_lxml.xpath(".//employee[.//skill='Python']")
    print(f"\nPython ๊ฐœ๋ฐœ์ž ์ˆ˜: {len(python_devs)}")
    for emp in python_devs:
        name = emp.xpath("./name/text()")[0]
        level = emp.xpath(".//skill[text()='Python']/@level")[0]
        print(f"- {name} (๋ ˆ๋ฒจ: {level})")
    
    # 3. ์ง์› ID์™€ ์ด๋ฆ„ ํ•จ๊ป˜ ๊ฐ€์ ธ์˜ค๊ธฐ
    print("\n์ง์› ๋ชฉ๋ก:")
    for emp in root_lxml.xpath(".//employee"):
        emp_id = emp.get('id')
        name = emp.xpath("./name/text()")[0]
        position = emp.xpath("./position/text()")[0]
        print(f"- [{emp_id}] {name} ({position})")
    
    # 4. ์ง๊ธ‰์— '์„ ์ž„' ๋˜๋Š” '์‹œ๋‹ˆ์–ด'๊ฐ€ ํฌํ•จ๋œ ์ง์› ์ฐพ๊ธฐ
    senior_staff = root_lxml.xpath(".//employee[contains(position, '์„ ์ž„') or contains(position, '์‹œ๋‹ˆ์–ด')]")
    print(f"\n์„ ์ž„๊ธ‰ ์ง์› ์ˆ˜: {len(senior_staff)}")
    for emp in senior_staff:
        print(f"- {emp.xpath('./name/text()')[0]}")
    
    # 5. ํŠน์ • ๋ถ€์„œ์˜ ํ‰๊ท  ๊ธ‰์—ฌ ๊ณ„์‚ฐ
    dev_salaries = root_lxml.xpath(".//department[@name='๊ฐœ๋ฐœํŒ€']/employee/salary/text()")
    if dev_salaries:
        avg_salary = sum(float(salary) for salary in dev_salaries) / len(dev_salaries)
        print(f"\n๊ฐœ๋ฐœํŒ€ ํ‰๊ท  ๊ธ‰์—ฌ: {avg_salary:,.0f}์›")

# ์‹คํ–‰
xpath_examples_elementtree()
xpath_examples_lxml()

โœ… ํŠน์ง•:

  • ๊ฐ•๋ ฅํ•œ XML ์ฟผ๋ฆฌ ๊ธฐ๋Šฅ ์ œ๊ณต
  • ๋ณต์žกํ•œ ์กฐ๊ฑด์‹์œผ๋กœ ์š”์†Œ ํ•„ํ„ฐ๋ง
  • ๊ฒฝ๋กœ ๊ธฐ๋ฐ˜์˜ ์ง๊ด€์ ์ธ ์š”์†Œ ์ ‘๊ทผ
  • ๊ณ„์ธต ๊ตฌ์กฐ ํƒ์ƒ‰์„ ์œ„ํ•œ ๋‹ค์–‘ํ•œ ์—ฐ์‚ฐ์ž
  • ์†์„ฑ ๊ฐ’ ๊ธฐ๋ฐ˜ ๊ฒ€์ƒ‰ ์ง€์›
  • ElementTree๋Š” ๊ธฐ๋ณธ์ ์ธ XPath๋ฅผ ์ง€์›
  • lxml์€ ์™„์ „ํ•œ XPath 1.0/2.0 ์ง€์›


5๏ธโƒฃ XML ์Šคํ‚ค๋งˆ ๊ฒ€์ฆ

XML ์Šคํ‚ค๋งˆ๋Š” XML ๋ฌธ์„œ์˜ ๊ตฌ์กฐ์™€ ๋‚ด์šฉ์„ ์ •์˜ํ•˜๋Š” ๋ฐฉ๋ฒ•์ด๋‹ค. XML ๋ฌธ์„œ๊ฐ€ ์Šคํ‚ค๋งˆ์— ์ •์˜๋œ ๊ทœ์น™์„ ์ค€์ˆ˜ํ•˜๋Š”์ง€ ๊ฒ€์ฆํ•จ์œผ๋กœ์จ ๋ฐ์ดํ„ฐ์˜ ๋ฌด๊ฒฐ์„ฑ์„ ๋ณด์žฅํ•  ์ˆ˜ ์žˆ๋‹ค.

XML ์Šคํ‚ค๋งˆ ๊ฒ€์ฆ์˜ ์ฃผ์š” ์ด์ ์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค:

  • ๋ฌธ์„œ ๊ตฌ์กฐ์˜ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
  • ๋ฐ์ดํ„ฐ ํƒ€์ž… ๊ฒ€์ฆ
  • ํ•„์ˆ˜ ์š”์†Œ ๋ฐ ์†์„ฑ ํ™•์ธ
  • ๋น„์ฆˆ๋‹ˆ์Šค ๊ทœ์น™ ์ ์šฉ
import xmlschema
from lxml import etree
import os

# XSD ํŒŒ์ผ์„ ์‚ฌ์šฉํ•œ ๊ฒ€์ฆ
def validate_with_xmlschema(xml_file, xsd_file):
    """xmlschema ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•œ XML ๊ฒ€์ฆ"""
    try:
        schema = xmlschema.XMLSchema(xsd_file)
        is_valid = schema.is_valid(xml_file)
        
        if is_valid:
            print(f"XML ํŒŒ์ผ์ด ์Šคํ‚ค๋งˆ์— ์œ ํšจํ•˜๋‹ค: {xml_file}")
        else:
            print(f"XML ํŒŒ์ผ์ด ์Šคํ‚ค๋งˆ์— ์œ ํšจํ•˜์ง€ ์•Š๋‹ค: {xml_file}")
            # ์ƒ์„ธ ์˜ค๋ฅ˜ ์ •๋ณด
            validator = schema.iter_errors(xml_file)
            for error in validator:
                print(f"- ์˜ค๋ฅ˜: {error}")
                
        return is_valid
    except Exception as e:
        print(f"๊ฒ€์ฆ ๊ณผ์ •์—์„œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
        return False

# lxml์„ ์‚ฌ์šฉํ•œ ๊ฒ€์ฆ
def validate_with_lxml(xml_file, xsd_file):
    """lxml ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•œ XML ๊ฒ€์ฆ"""
    try:
        # ์Šคํ‚ค๋งˆ ๋กœ๋“œ
        xmlschema_doc = etree.parse(xsd_file)
        xmlschema = etree.XMLSchema(xmlschema_doc)
        
        # XML ํŒŒ์ผ ๋กœ๋“œ
        xml_doc = etree.parse(xml_file)
        
        # ๊ฒ€์ฆ
        is_valid = xmlschema.validate(xml_doc)
        
        if is_valid:
            print(f"XML ํŒŒ์ผ์ด ์Šคํ‚ค๋งˆ์— ์œ ํšจํ•˜๋‹ค: {xml_file}")
        else:
            print(f"XML ํŒŒ์ผ์ด ์Šคํ‚ค๋งˆ์— ์œ ํšจํ•˜์ง€ ์•Š๋‹ค: {xml_file}")
            # ์ƒ์„ธ ์˜ค๋ฅ˜ ์ •๋ณด
            log = xmlschema.error_log
            for error in log:
                print(f"- ์ค„ {error.line}, ์—ด {error.column}: {error.message}")
                
        return is_valid
    except Exception as e:
        print(f"๊ฒ€์ฆ ๊ณผ์ •์—์„œ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
        return False

# XML ์Šคํ‚ค๋งˆ(XSD) ์˜ˆ์‹œ
def create_sample_xsd():
    """์ƒ˜ํ”Œ XSD ํŒŒ์ผ ์ƒ์„ฑ"""
    xsd_content = '''<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
  <xs:element name="employees">
    <xs:complexType>
      <xs:sequence>
        <xs:element name="employee" maxOccurs="unbounded">
          <xs:complexType>
            <xs:sequence>
              <xs:element name="name" type="xs:string"/>
              <xs:element name="age" type="xs:positiveInteger"/>
              <xs:element name="department" type="xs:string"/>
              <xs:element name="position" type="xs:string" minOccurs="0"/>
            </xs:sequence>
            <xs:attribute name="id" type="xs:ID" use="required"/>
          </xs:complexType>
        </xs:element>
      </xs:sequence>
    </xs:complexType>
  </xs:element>
</xs:schema>
'''
    with open("employees.xsd", "w", encoding="utf-8") as f:
        f.write(xsd_content)
    return "employees.xsd"

# ์œ ํšจํ•œ XML ์˜ˆ์‹œ
def create_valid_xml():
    """XSD์— ์œ ํšจํ•œ XML ํŒŒ์ผ ์ƒ์„ฑ"""
    xml_content = '''<?xml version="1.0" encoding="UTF-8"?>
<employees>
  <employee id="e1">
    <name>ํ™๊ธธ๋™</name>
    <age>30</age>
    <department>๊ฐœ๋ฐœ</department>
    <position>์„ ์ž„ ๊ฐœ๋ฐœ์ž</position>
  </employee>
  <employee id="e2">
    <name>๊น€์ฒ ์ˆ˜</name>
    <age>25</age>
    <department>๋””์ž์ธ</department>
  </employee>
</employees>
'''
    with open("valid_employees.xml", "w", encoding="utf-8") as f:
        f.write(xml_content)
    return "valid_employees.xml"

# ์œ ํšจํ•˜์ง€ ์•Š์€ XML ์˜ˆ์‹œ
def create_invalid_xml():
    """XSD์— ์œ ํšจํ•˜์ง€ ์•Š์€ XML ํŒŒ์ผ ์ƒ์„ฑ"""
    xml_content = '''<?xml version="1.0" encoding="UTF-8"?>
<employees>
  <employee id="e1">
    <name>ํ™๊ธธ๋™</name>
    <age>-30</age>  <!-- ์Œ์ˆ˜๋Š” positiveInteger์— ์œ ํšจํ•˜์ง€ ์•Š์Œ -->
    <department>๊ฐœ๋ฐœ</department>
  </employee>
  <employee>  <!-- id ์†์„ฑ ๋ˆ„๋ฝ -->
    <name>๊น€์ฒ ์ˆ˜</name>
    <age>25</age>
    <!-- department ์š”์†Œ ๋ˆ„๋ฝ -->
  </employee>
</employees>
'''
    with open("invalid_employees.xml", "w", encoding="utf-8") as f:
        f.write(xml_content)
    return "invalid_employees.xml"

# ์‹คํ–‰ ์˜ˆ์‹œ
if __name__ == "__main__":
    # ์ƒ˜ํ”Œ ํŒŒ์ผ ์ƒ์„ฑ
    xsd_file = create_sample_xsd()
    valid_xml = create_valid_xml()
    invalid_xml = create_invalid_xml()
    
    print("xmlschema ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•œ ๊ฒ€์ฆ:")
    validate_with_xmlschema(valid_xml, xsd_file)
    validate_with_xmlschema(invalid_xml, xsd_file)
    
    print("\nlxml ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•œ ๊ฒ€์ฆ:")
    validate_with_lxml(valid_xml, xsd_file)
    validate_with_lxml(invalid_xml, xsd_file)
    
    # ์ž„์‹œ ํŒŒ์ผ ์ •๋ฆฌ
    for file in [xsd_file, valid_xml, invalid_xml]:
        if os.path.exists(file):
            os.remove(file)

โœ… ํŠน์ง•:

  • XML ์Šคํ‚ค๋งˆ(XSD)๋ฅผ ์‚ฌ์šฉํ•œ ๊ตฌ์กฐ ๊ฒ€์ฆ
  • ๋ฐ์ดํ„ฐ ํƒ€์ž… ๋ฐ ์ œ์•ฝ์กฐ๊ฑด ์ ์šฉ ๊ฐ€๋Šฅ
  • ์˜ค๋ฅ˜ ์œ„์น˜ ๋ฐ ์›์ธ ์ƒ์„ธ ๋ณด๊ณ 
  • ๋ณต์žกํ•œ ๋น„์ฆˆ๋‹ˆ์Šค ๊ทœ์น™ ์ ์šฉ ๊ฐ€๋Šฅ
  • DTD, RelaxNG ๋“ฑ ๋‹ค์–‘ํ•œ ์Šคํ‚ค๋งˆ ์–ธ์–ด ์ง€์›
  • ๋Œ€์šฉ๋Ÿ‰ XML ํŒŒ์ผ ์ ์ง„์  ๊ฒ€์ฆ ์ง€์›
  • API ํ†ตํ•ฉ ๋ฐ ๋ฐ์ดํ„ฐ ๊ตํ™˜ ์‹œ ์‹ ๋ขฐ์„ฑ ๋ณด์žฅ


6๏ธโƒฃ XML ๋ณ€ํ™˜ ๋ฐ ์ตœ์ ํ™”

XML ๋ฐ์ดํ„ฐ๋Š” ์ข…์ข… ๋‹ค๋ฅธ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•˜๊ฑฐ๋‚˜ ์ฒ˜๋ฆฌ ์„ฑ๋Šฅ์„ ์ตœ์ ํ™”ํ•ด์•ผ ํ•œ๋‹ค. Python์—์„œ๋Š” ๋‹ค์–‘ํ•œ ๋„๊ตฌ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ XML์„ ํšจ์œจ์ ์œผ๋กœ ์ฒ˜๋ฆฌํ•˜๊ณ  ๋‹ค๋ฅธ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•  ์ˆ˜ ์žˆ๋‹ค.

์ฃผ์š” XML ๋ณ€ํ™˜ ๋ฐ ์ตœ์ ํ™” ๊ธฐ๋ฒ•์€ ๋‹ค์Œ๊ณผ ๊ฐ™๋‹ค:

  • XML์—์„œ ๋‹ค๋ฅธ ํ˜•์‹(JSON, CSV ๋“ฑ)์œผ๋กœ ๋ณ€ํ™˜
  • XSLT๋ฅผ ์‚ฌ์šฉํ•œ XML ๋ณ€ํ™˜
  • ๋Œ€์šฉ๋Ÿ‰ XML ํŒŒ์ผ์˜ ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์  ์ฒ˜๋ฆฌ
  • ๋ฐ์ดํ„ฐ ์••์ถ• ๋ฐ ์ตœ์ ํ™”
import xml.etree.ElementTree as ET
import json
import csv
from lxml import etree
import io

# XML์„ JSON์œผ๋กœ ๋ณ€ํ™˜
def xml_to_json(xml_data, root_name='root'):
    """XML ๋ฐ์ดํ„ฐ๋ฅผ JSON ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
    
    def _element_to_dict(element):
        result = {}
        
        # ์†์„ฑ ์ฒ˜๋ฆฌ
        for key, value in element.attrib.items():
            result[f"@{key}"] = value
        
        # ์ž์‹ ์š”์†Œ ์ฒ˜๋ฆฌ
        children = list(element)
        if len(children) == 0:
            # ํ…์ŠคํŠธ๋งŒ ์žˆ๋Š” ๊ฒฝ์šฐ
            if element.text and element.text.strip():
                if len(result) == 0:
                    return element.text.strip()
                result['#text'] = element.text.strip()
        else:
            # ์ž์‹ ์š”์†Œ๋“ค ์ฒ˜๋ฆฌ
            child_elements = {}
            for child in children:
                child_dict = _element_to_dict(child)
                tag = child.tag
                
                if tag in child_elements:
                    # ๋™์ผ ํƒœ๊ทธ๊ฐ€ ์ด๋ฏธ ์กด์žฌํ•˜๋ฉด ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
                    if not isinstance(child_elements[tag], list):
                        child_elements[tag] = [child_elements[tag]]
                    child_elements[tag].append(child_dict)
                else:
                    child_elements[tag] = child_dict
            
            result.update(child_elements)
        
        return result
    
    # XML ํŒŒ์‹ฑ
    root = ET.fromstring(xml_data) if isinstance(xml_data, str) else xml_data
    return {root.tag: _element_to_dict(root)}

# XML์„ CSV๋กœ ๋ณ€ํ™˜
def xml_to_csv(xml_data, mapping, output_file):
    """XML ๋ฐ์ดํ„ฐ๋ฅผ CSV๋กœ ๋ณ€ํ™˜"""
    
    root = ET.fromstring(xml_data) if isinstance(xml_data, str) else xml_data
    items = root.findall(mapping['row_xpath'])
    
    # CSV ํŒŒ์ผ ์ƒ์„ฑ
    with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=mapping['headers'])
        writer.writeheader()
        
        for item in items:
            row = {}
            for csv_col, xpath in mapping['columns'].items():
                element = item.find(xpath)
                if element is not None and element.text:
                    row[csv_col] = element.text.strip()
                else:
                    row[csv_col] = ''
            writer.writerow(row)

# XSLT๋ฅผ ์‚ฌ์šฉํ•œ XML ๋ณ€ํ™˜
def transform_with_xslt(xml_data, xslt_data):
    """XSLT๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ XML ๋ณ€ํ™˜"""
    
    # XML ๋ฐ XSLT ํŒŒ์‹ฑ
    xml_doc = etree.parse(io.StringIO(xml_data)) if isinstance(xml_data, str) else xml_data
    xslt_doc = etree.parse(io.StringIO(xslt_data)) if isinstance(xslt_data, str) else xslt_data
    
    # XSLT ๋ณ€ํ™˜๊ธฐ ์ƒ์„ฑ
    transform = etree.XSLT(xslt_doc)
    
    # ๋ณ€ํ™˜ ์‹คํ–‰
    result = transform(xml_doc)
    
    return etree.tostring(result, pretty_print=True, encoding='utf-8').decode('utf-8')

# ๋Œ€์šฉ๋Ÿ‰ XML ํŒŒ์ผ ์ฒ˜๋ฆฌ (์ดํ„ฐ๋ ˆ์ดํ„ฐ ํŒจํ„ด)
def process_large_xml(xml_file, element_tag, callback):
    """๋Œ€์šฉ๋Ÿ‰ XML ํŒŒ์ผ์˜ ํŠน์ • ์š”์†Œ๋ฅผ ์ˆœ์ฐจ์ ์œผ๋กœ ์ฒ˜๋ฆฌ"""
    
    # ์ด๋ฒคํŠธ ๊ธฐ๋ฐ˜ ํŒŒ์„œ ์‚ฌ์šฉ
    context = etree.iterparse(xml_file, events=('end',), tag=element_tag)
    
    count = 0
    try:
        for event, elem in context:
            # ์ฝœ๋ฐฑ ํ•จ์ˆ˜๋กœ ์š”์†Œ ์ฒ˜๋ฆฌ
            callback(elem)
            count += 1
            
            # ์ฒ˜๋ฆฌ ํ›„ ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
            elem.clear()
            # ๋ถ€๋ชจ ์š”์†Œ์˜ ์ฐธ์กฐ๋„ ์ œ๊ฑฐ
            while elem.getprevious() is not None:
                del elem.getparent()[0]
    except Exception as e:
        print(f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
    
    return count

# ์ƒ˜ํ”Œ XML ๋ฐ์ดํ„ฐ
sample_xml = '''
<employees>
  <employee id="1001" department="๊ฐœ๋ฐœ">
    <name>ํ™๊ธธ๋™</name>
    <position>์„ ์ž„ ๊ฐœ๋ฐœ์ž</position>
    <skills>
      <skill>Python</skill>
      <skill>XML</skill>
    </skills>
    <projects>
      <project>
        <name>๋ฐ์ดํ„ฐ ๋ถ„์„ ์‹œ์Šคํ…œ</name>
        <duration>6๊ฐœ์›”</duration>
      </project>
      <project>
        <name>์›น ์„œ๋น„์Šค ๊ฐœ๋ฐœ</name>
        <duration>3๊ฐœ์›”</duration>
      </project>
    </projects>
  </employee>
  <employee id="1002" department="๋งˆ์ผ€ํŒ…">
    <name>๊น€์ฒ ์ˆ˜</name>
    <position>๋งˆ์ผ€ํŒ… ๋งค๋‹ˆ์ €</position>
    <skills>
      <skill>๋ฐ์ดํ„ฐ ๋ถ„์„</skill>
      <skill>์†Œ์…œ ๋ฏธ๋””์–ด</skill>
    </skills>
  </employee>
</employees>
'''

# ์ƒ˜ํ”Œ XSLT ๋ณ€ํ™˜
sample_xslt = '''
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
  <xsl:output method="html" indent="yes"/>
  
  <xsl:template match="/">
    <html>
      <head>
        <title>์ง์› ๋ชฉ๋ก</title>
      </head>
      <body>
        <h1>์ง์› ๋ช…๋‹จ</h1>
        <table border="1">
          <tr>
            <th>ID</th>
            <th>์ด๋ฆ„</th>
            <th>๋ถ€์„œ</th>
            <th>์ง์ฑ…</th>
            <th>์Šคํ‚ฌ</th>
          </tr>
          <xsl:for-each select="//employee">
            <tr>
              <td><xsl:value-of select="@id"/></td>
              <td><xsl:value-of select="name"/></td>
              <td><xsl:value-of select="@department"/></td>
              <td><xsl:value-of select="position"/></td>
              <td>
                <xsl:for-each select="skills/skill">
                  <xsl:value-of select="."/>
                  <xsl:if test="position() != last()">, </xsl:if>
                </xsl:for-each>
              </td>
            </tr>
          </xsl:for-each>
        </table>
      </body>
    </html>
  </xsl:template>
</xsl:stylesheet>
'''

# ์‹คํ–‰ ์˜ˆ์‹œ
if __name__ == "__main__":
    # XML์„ JSON์œผ๋กœ ๋ณ€ํ™˜
    json_data = xml_to_json(sample_xml)
    print("JSON ๋ณ€ํ™˜ ๊ฒฐ๊ณผ:")
    print(json.dumps(json_data, indent=2, ensure_ascii=False))
    
    # XML์„ CSV๋กœ ๋ณ€ํ™˜
    mapping = {
        'row_xpath': './/employee',
        'headers': ['ID', '์ด๋ฆ„', '๋ถ€์„œ', '์ง์ฑ…', '์Šคํ‚ฌ'],
        'columns': {
            'ID': '@id',
            '์ด๋ฆ„': 'name',
            '๋ถ€์„œ': '@department',
            '์ง์ฑ…': 'position',
            '์Šคํ‚ฌ': 'skills'  # ์ด ๋ถ€๋ถ„์€ ๋ณ„๋„ ์ฒ˜๋ฆฌ ํ•„์š” (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
        }
    }
    
    # XML์„ XSLT๋กœ HTML๋กœ ๋ณ€ํ™˜
    html_output = transform_with_xslt(sample_xml, sample_xslt)
    print("\nXSLT ๋ณ€ํ™˜ ๊ฒฐ๊ณผ:")
    print(html_output[:500] + "...")  # ๊ฒฐ๊ณผ์˜ ์ผ๋ถ€๋งŒ ์ถœ๋ ฅ

โœ… ํŠน์ง•:

  • ๋‹ค์–‘ํ•œ ๋ฐ์ดํ„ฐ ํ˜•์‹์œผ๋กœ ์œ ์—ฐํ•œ ๋ณ€ํ™˜
  • XSLT๋ฅผ ํ†ตํ•œ ๊ฐ•๋ ฅํ•œ ๋ณ€ํ™˜ ๋ฐ ์Šคํƒ€์ผ๋ง
  • ๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์ ์ธ ๋Œ€์šฉ๋Ÿ‰ XML ์ฒ˜๋ฆฌ
  • ์ŠคํŠธ๋ฆฌ๋ฐ ํŒŒ์„œ๋ฅผ ํ†ตํ•œ ์ ์ง„์  ์ฒ˜๋ฆฌ
  • ๋ณต์žกํ•œ XML ๊ตฌ์กฐ์˜ ๋‹จ์ˆœํ™”
  • ๋ฐ์ดํ„ฐ ๋งˆ์ด๊ทธ๋ ˆ์ด์…˜ ๋ฐ ํ†ตํ•ฉ ์ง€์›
  • ์„ฑ๋Šฅ ์ตœ์ ํ™”๋ฅผ ์œ„ํ•œ ๋‹ค์–‘ํ•œ ๊ธฐ๋ฒ• ์ ์šฉ


์ฃผ์š” ํŒ

โœ… ๋ชจ๋ฒ” ์‚ฌ๋ก€:

  1. ์ ์ ˆํ•œ ํŒŒ์„œ ์„ ํƒ: ์ž‘์€ XML ํŒŒ์ผ์—๋Š” ElementTree, ๋ณต์žกํ•œ ์ž‘์—…์ด๋‚˜ ๋Œ€์šฉ๋Ÿ‰ ํŒŒ์ผ์—๋Š” lxml์„ ์‚ฌ์šฉํ•˜์ž.

  2. ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ: ๋Œ€์šฉ๋Ÿ‰ XML ์ฒ˜๋ฆฌ ์‹œ iterparse ๋˜๋Š” SAX ํŒŒ์„œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์„ ์ตœ์†Œํ™”ํ•˜์ž.

  3. ์ ์ ˆํ•œ ์ธ์ฝ”๋”ฉ ์ง€์ •: XML ํŒŒ์ผ ์ƒ์„ฑ ์‹œ ํ•ญ์ƒ ๋ช…์‹œ์ ์œผ๋กœ ์ธ์ฝ”๋”ฉ(๋ณดํ†ต UTF-8)์„ ์ง€์ •ํ•˜์ž.

  4. XPath ํ™œ์šฉ: ๋ณต์žกํ•œ ์š”์†Œ ๊ฒ€์ƒ‰ ์‹œ XPath๋ฅผ ํ™œ์šฉํ•˜์—ฌ ์ฝ”๋“œ๋ฅผ ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์œ ์ง€ํ•˜์ž.

  5. ์—๋Ÿฌ ์ฒ˜๋ฆฌ ๊ตฌํ˜„: XML ํŒŒ์‹ฑ์€ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ•˜๊ธฐ ์‰ฌ์šฐ๋ฏ€๋กœ, ํ•ญ์ƒ ์ ์ ˆํ•œ ์˜ˆ์™ธ ์ฒ˜๋ฆฌ๋ฅผ ๊ตฌํ˜„ํ•˜์ž.

  6. ์Šคํ‚ค๋งˆ ๊ฒ€์ฆ ํ™œ์šฉ: ์ค‘์š”ํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฒ˜๋ฆฌํ•  ๋•Œ๋Š” ์Šคํ‚ค๋งˆ ๊ฒ€์ฆ์„ ํ†ตํ•ด ๋ฐ์ดํ„ฐ ์œ ํšจ์„ฑ์„ ๋ณด์žฅํ•˜์ž.

  7. ๋„ค์ž„์ŠคํŽ˜์ด์Šค ์ฒ˜๋ฆฌ: XML ๋„ค์ž„์ŠคํŽ˜์ด์Šค๋ฅผ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์ฒ˜๋ฆฌํ•˜์—ฌ ์ถฉ๋Œ์„ ๋ฐฉ์ง€ํ•˜๊ณ  ํ‘œ์ค€์„ ์ค€์ˆ˜ํ•˜์ž.

  8. ๋ณด์•ˆ ์ทจ์•ฝ์  ์ฃผ์˜: ์™ธ๋ถ€ ์—”ํ‹ฐํ‹ฐ ์ฐธ์กฐ(XXE)์™€ ๊ฐ™์€ XML ํŒŒ์‹ฑ ๊ด€๋ จ ๋ณด์•ˆ ์ทจ์•ฝ์ ์— ์ฃผ์˜ํ•˜์ž.

  9. ์„ฑ๋Šฅ ์ตœ์ ํ™”: ์ž์ฃผ ์ ‘๊ทผํ•˜๋Š” ์š”์†Œ๋Š” ์บ์‹ฑํ•˜๊ณ , ํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ํŒŒ์‹ฑํ•˜์—ฌ ์ฒ˜๋ฆฌ ์†๋„๋ฅผ ํ–ฅ์ƒ์‹œํ‚ค์ž.

  10. ์ฝ”๋“œ ๊ฐ€๋…์„ฑ ์œ ์ง€: XML ์ฒ˜๋ฆฌ ๋กœ์ง์„ ๋ชจ๋“ˆํ™”ํ•˜๊ณ  ๋ช…ํ™•ํ•œ ํ•จ์ˆ˜๋ช…๊ณผ ์ฃผ์„์„ ์‚ฌ์šฉํ•˜์—ฌ ์œ ์ง€๋ณด์ˆ˜์„ฑ์„ ๋†’์ด์ž.



โš ๏ธ **GitHub.com Fallback** โš ๏ธ