# Check the properties of a document
document.metadata
{'source':'TeddyNote','page': 1,'author':'Teddy'}
# Example file path
FILE_PATH = "./data/SPRI_AI_Brief_2023년12월호_F.pdf"
from langchain_community.document_loaders import PyPDFLoader
# Loader Settings
loader = PyPDFLoader(FILE_PATH)
# PDF loader
docs = loader.load()
# Check the number of loaded documents
len(docs)
23
# Check the first document
docs[0]
from langchain_text_splitters import CharacterTextSplitter
# Setting the text divider
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
# Split document
docs = loader.load_and_split(text_splitter=text_splitter)
# Check the number of loaded documents
len(docs)
# Check the first document
docs[0]
Document (metadata={'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 0}, page_content='December 2023')
# generator Load document in this way
for doc in loader.lazy_load():
print(doc.metadata)
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 0}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 1}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 2}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 3}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 4}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 5}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 6}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 7}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 8}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 9}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 10}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 11}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 12}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 13}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 14}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 15}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 16}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 17}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 18}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 19}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 20}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 21}
{'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 22}
# The document async Load in a manner
adocs = loader.aload()
# load documentation
await adocs
[Document (metadata={'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 0}, page_content=' 12/2023 issue'), Document (metadata<TA Artificial Industry Trend Brief\n 1. United States ▹ United States, safe and reliable AI development and use executive order · · · · · · · · · · · · · · · · · · · · · G7, Hiroshima AI process to AI company target international action force · · Submission of AI comments in terms of consumer protection and competition to the Copyright Office ················ 5\n ▹ EU AI law 3rd party negotiation, based model regulation related views, ovulation,································ Corporate/Industry \n ▹ American Frontier Model Forum, 1, $0 million AI Safety Fundraising ································ 7\n ▹ Cohir, Data Sources to Ensure Data Transparency Explorer Disclosure ············
...
(meditation)
...
Conference \non Artificial \nIntelligence\n-AI Development Association Conference (AAAI) promotes AI research, provides opportunities for exchanges between AI fields \n researchers, practitioners, scientists, students and engineers \N-Conference announces AI-related skills, special tracks, Invited speakers, \nworkshop, tutorial, poster session, topicn, exhibition Document (metadata={'source':'./data/SPRI_AI_Brief_2023 December issue_F.pdf','page': 22}, page_content='Homepage: https://spri.kr/\n보고서와 Inquiries related to AI Policy Lab (jayoo@spri.