<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cardio</journal-id><journal-id journal-id-type="publisher-id">cardio</journal-id><journal-id journal-id-type="index">26</journal-id><journal-title>JMIR Cardio</journal-title><abbrev-journal-title>JMIR Cardio</abbrev-journal-title><issn pub-type="epub">2561-1011</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e68817</article-id><article-id pub-id-type="doi">10.2196/68817</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Improving the Readability of Institutional Heart Failure&#x2013;Related Patient Education Materials Using GPT-4: Observational Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>King</surname><given-names>Ryan C</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Samaan</surname><given-names>Jamil S</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Haquang</surname><given-names>Joseph</given-names></name><degrees>DO</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bharani</surname><given-names>Vishnu</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Margolis</surname><given-names>Samuel</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Srinivasan</surname><given-names>Nitin</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Peng</surname><given-names>Yuxin</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yeo</surname><given-names>Yee Hui</given-names></name><degrees>MD, MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ghashghaei</surname><given-names>Roxana</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Medicine, Division of Cardiology, University of California, Irvine Medical Center</institution><addr-line>101 The City Dr S</addr-line><addr-line>Orange</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Medicine, Karsh Division of Gastroenterology and Hepatology, Cedars-Sinai Medical Center</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff3"><institution>David Geffen School of Medicine, University of California, Los Angeles</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Keck School of Medicine, University of Southern California</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff5"><institution>School of Mathematics and Statistics, Xi'an Jiaotong University</institution><addr-line>Xi'an</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Rivers</surname><given-names>John</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Rouhi</surname><given-names>Armaun D</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Nomali</surname><given-names>Mahin</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Ryan C King, MD, Department of Medicine, Division of Cardiology, University of California, Irvine Medical Center, 101 The City Dr S, Orange, CA, 92868, United States, 1 714-456-7890; <email>ryan.king2517@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>8</day><month>7</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e68817</elocation-id><history><date date-type="received"><day>15</day><month>11</month><year>2024</year></date><date date-type="rev-recd"><day>05</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>08</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Ryan C King, Jamil S Samaan, Joseph Haquang, Vishnu Bharani, Samuel Margolis, Nitin Srinivasan, Yuxin Peng, Yee Hui Yeo, Roxana Ghashghaei. Originally published in JMIR Cardio (<ext-link ext-link-type="uri" xlink:href="https://cardio.jmir.org">https://cardio.jmir.org</ext-link>), 8.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cardio, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cardio.jmir.org">https://cardio.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cardio.jmir.org/2025/1/e68817"/><abstract><sec><title>Background</title><p>Heart failure management involves comprehensive lifestyle modifications such as daily weights, fluid and sodium restriction, and blood pressure monitoring, placing additional responsibility on patients and caregivers, with successful adherence often requiring extensive counseling and understandable patient education materials (PEMs). Prior research has shown PEMs related to cardiovascular disease often exceed the American Medical Association&#x2019;s fifth- to sixth-grade recommended reading level. The large language model (LLM) ChatGPT may be a useful tool for improving PEM readability.</p></sec><sec><title>Objective</title><p>We aim to assess the readability of heart failure&#x2013;related PEMs from prominent cardiology institutions and evaluate GPT-4&#x2019;s ability to improve these metrics while maintaining accuracy and comprehensiveness.</p></sec><sec sec-type="methods"><title>Methods</title><p>A total of 143 heart failure&#x2013;related PEMs were collected from the websites of the top 10 institutions listed on the 2022&#x2010;2023 US News &#x0026; World Report for &#x201C;Best Hospitals for Cardiology, Heart &#x0026; Vascular Surgery.&#x201D; PEMs were individually entered into GPT-4 (version updated July 20, 2023), preceded by the prompt, &#x201C;Please explain the following in simpler terms.&#x201D; Readability was assessed using the Flesch Reading Ease score, Flesch-Kincaid Grade Level (FKGL), Gunning Fog Index, Coleman-Liau Index, Simple Measure of Gobbledygook Index, and Automated Readability Index. The accuracy and comprehensiveness of revised GPT-4 PEMs were assessed by a board-certified cardiologist.</p></sec><sec sec-type="results"><title>Results</title><p>For 143 institutional heart failure&#x2013;related PEMs analyzed, the median FKGL was 10.3 (IQR 7.9-13.1; high school sophomore) compared to 7.3 (IQR 6.1-8.5; seventh grade) for GPT-4&#x2019;s revised PEMs (<italic>P</italic>&#x003C;.001). Of the 143 institutional PEMs, there were 13 (9.1%) below the sixth-grade reading level, which improved to 33 (23.1%) after revision by GPT-4 (<italic>P</italic>&#x003C;.001). No revised GPT-4 PEMs were graded as less accurate or less comprehensive compared to institutional PEMs. A total of 33 (23.1%) GPT-4 PEMs were graded as more comprehensive.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>GPT-4 significantly improved the readability of institutional heart failure&#x2013;related PEMs. The model may be a promising adjunct resource in addition to care provided by a licensed health care professional for patients living with heart failure. Further rigorous testing and validation is needed to investigate its safety, efficacy, and impact on patient health literacy.</p></sec></abstract><kwd-group><kwd>patient education</kwd><kwd>heart failure</kwd><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>ChatGPT</kwd><kwd>GPT-4</kwd><kwd>health literacy</kwd><kwd>readability</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Heart failure affects approximately 1%&#x2010;2% of adults globally, with an estimated prevalence of 64 million people [<xref ref-type="bibr" rid="ref1">1</xref>]. Treatment involves extensive patient adherence to lifestyle modifications such as daily weights, fluid and sodium restriction, and rigorous guideline-directed medication regimens. Altogether, these interventions attempt to prevent disease progression and hospital admissions, which drive most of the financial burden ($39.2-$60 billion) related to the disease [<xref ref-type="bibr" rid="ref2">2</xref>]. Due to the complex degree of self-management required by patients with heart failure, improving patient education and health literacy may play a crucial role in improving outcomes [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>In the United States, the average adult&#x2019;s reading comprehension level is approximately seventh to eighth grade proficiency [<xref ref-type="bibr" rid="ref5">5</xref>], resulting in the American Medical Association (AMA) recommendation of written patient education materials (PEMs) being at a fifth- to sixth-grade reading level [<xref ref-type="bibr" rid="ref6">6</xref>]. However, a 2019 readability analysis of cardiovascular disease&#x2013;related PEMs reported that the mean reading level of materials was tenth grade, comparable to that of a high school sophomore [<xref ref-type="bibr" rid="ref7">7</xref>]. Inadequate health literacy has been associated with increased relative risk of emergency department visits, hospitalizations, and mortality for patients with heart failure [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], highlighting the need for accessible, readable, and high-quality PEMs.</p><p>ChatGPT is a large language model (LLM) that is gaining widespread public adoption [<xref ref-type="bibr" rid="ref9">9</xref>]. With an increasing number of patients seeking health information online [<xref ref-type="bibr" rid="ref10">10</xref>], the model has the potential to enhance patient health education and address the complexity of heart failure&#x2013;related PEMs. As ChatGPT&#x2019;s acceptance and usage have increased, initial research involved evaluating the model&#x2019;s accuracy and reliability. Several studies have shown that ChatGPT provides appropriate, accurate, and reliable knowledge across a wide range of cardiac and noncardiac medical conditions, including heart failure [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. In addition to accuracy, ChatGPT has been found to deliver more empathetic responses to real-world patient questions than physicians in online forums [<xref ref-type="bibr" rid="ref17">17</xref>]. As prior data regarding accuracy have been promising, an emerging focus has been on investigating the readability of the model&#x2019;s output.</p><p>Prior studies have shown ChatGPT provides accurate and comprehensive responses to questions related to heart failure, and another demonstrated its responses were at a college reading level, highlighting the need for further assessment of the readability of GPT&#x2019;s outputs [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Similarly, another study examining GPT-4&#x2019;s responses related to amyloidosis showed that while responses were often accurate and comprehensive, the average readability of responses ranged from a grade level of 10.3 (high school sophomore) to 21.7 (beyond graduate school) [<xref ref-type="bibr" rid="ref16">16</xref>]. We aim to expand on the previous literature by assessing the readability of heart failure&#x2013;related online PEMs from renowned cardiology institutions, assessing GPT-4&#x2019;s ability to improve the readability of these PEMs, and comparing the accuracy and comprehensiveness between institutional PEMs and GPT-4&#x2019;s revised PEMs.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Institutional Patient Education Materials</title><p>There were 143 PEMs (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref ref-type="fig" rid="figure1">Figure 1</xref>) related to heart failure collected in July 2023 from the top 10 ranked cardiology institutions (deidentified) listed on the 2022&#x2010;2023 US News &#x0026; World Report website as &#x201C;Best Hospitals for Cardiology, Heart &#x0026; Vascular Surgery.&#x201D; These PEMs include frequently asked questions (FAQs) presented as text descriptions of various aspects of heart failure such as causes, symptoms, medications, and procedures. Duplicate institutional PEMs were included since education materials varied between institutions, and readability of each PEM was the primary outcome of interest.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Diagram of institutional heart failure&#x2013;related PEM curation, revised GPT-4 PEM generation, and subsequent assessment of readability, accuracy, and comprehensiveness. Created in BioRender [<xref ref-type="bibr" rid="ref19">19</xref>]. FAQ: frequently asked question; PEM: patient education material.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cardio_v9i1e68817_fig01.png"/></fig></sec><sec id="s2-2"><title>GPT-4 Response Generation</title><p>Each institution&#x2019;s PEMs were entered into GPT-4 (version updated July 20, 2023), preceded by the prompt, &#x201C;Please explain the following in simpler terms.&#x201D; GPT-4 was accessed using the OpenAI website interface. Default model settings were used (temperature, max tokens, etc). The &#x201C;new chat&#x201D; function was used for each PEM, thus creating a new conversation without a record of prior inputs. Materials containing nontext components (images or videos) were excluded.</p></sec><sec id="s2-3"><title>Readability Assessment</title><p>The readability of institutional PEMs and GPT-4&#x2019;s revised PEMs were then assessed using the following validated formulas: Flesch Reading Ease (FRE) score [<xref ref-type="bibr" rid="ref20">20</xref>], Flesch-Kincaid Grade Level (FKGL) [<xref ref-type="bibr" rid="ref21">21</xref>], Gunning Fog Index [<xref ref-type="bibr" rid="ref22">22</xref>], Coleman-Liau Index [<xref ref-type="bibr" rid="ref23">23</xref>], Simple Measure of Gobbledygook (SMOG) Index [<xref ref-type="bibr" rid="ref24">24</xref>], and Automated Readability Index [<xref ref-type="bibr" rid="ref25">25</xref>]. The FRE score, measured on a scale of 0 to 100, indicates a text with a higher score has better ease of understanding. The remaining formulas directly translate a score into its corresponding US reading grade level, such as a score of 10 translating to a tenth-grade reading level. These metrics derive their scores from the mean length of sentences and words used in a given text. In contrast to the FRE, lower scores in the other formulas correspond to an easier level of understanding. The readability formulas were assessed using the <italic>Textstat</italic> library in Python (Python Software Foundation) and the <italic>Textstat readability</italic> package in R software (R Foundation for Statistical Computing).</p></sec><sec id="s2-4"><title>Accuracy and Comprehensiveness</title><p>Accuracy and comprehensiveness of GPT-4&#x2019;s revised PEMs (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) were assessed as secondary outcomes by an actively practicing board-certified cardiologist at a tertiary academic medical center. The reviewer was not blinded during grading. The reviewer used the following grading scale in <xref ref-type="other" rid="box1">Textbox 1</xref> when grading the original institutional PEMs and revised GPT-4 PEMs.</p><boxed-text id="box1"><title> Grading scale used by reviewer.</title><p>&#x201C;Compared to the institutional PEM, the GPT-4 revised PEM is&#x201D;:</p><list list-type="order"><list-item><p>Less accurate</p></list-item><list-item><p>Equally accurate</p></list-item><list-item><p>More accurate</p></list-item></list><p>&#x201C;Compared to the institutional PEM, the GPT-4 revised PEM is&#x201D;:</p><list list-type="order"><list-item><p>Less comprehensive</p></list-item><list-item><p>Equally comprehensiveness</p></list-item><list-item><p>More comprehensive</p></list-item></list></boxed-text></sec><sec id="s2-5"><title>Statistical Analysis</title><p>Descriptive statistics are presented as medians and IQRs. Readability metrics for institutional PEMs and GPT-4&#x2019;s revised PEMs were compared using the Mann-Whitney <italic>U</italic> test. Further subanalysis was performed investigating the proportion of PEMs meeting the sixth-grade reading level recommendation by the AMA among institutional PEMs and GPT-4&#x2019;s revised PEMs. Statistical analysis was conducted using SPSS (version 29; IBM Corporation).</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>The data collection process in this observational study did not involve patients and did not require the deidentification or protection of data. Therefore, no institutional review board approval was sought.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Readability Assessment</title><p>Readability analysis revealed GPT-4&#x2019;s revised PEMs were significantly more readable compared to institutional PEMs across all 6 metrics (<italic>P</italic>&#x003C;.001) (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The FRE score increased from a median institutional score of 48.6 (IQR 38.0-63.3; <italic>P</italic>&#x003C;.001; hard-to-read text, college reading level) to 72.2 (IQR 66.2-77.5; <italic>P</italic>&#x003C;.001; fairly easy-to-read text, seventh-grade level) after GPT-4 revision [<xref ref-type="bibr" rid="ref20">20</xref>]. The FKGL also saw improvement, decreasing from an institutional median reading level of tenth grade (IQR 7.9-13.1; <italic>P</italic>&#x003C;.001) to seventh grade (IQR 6.1-8.5; <italic>P</italic>&#x003C;.001) after GPT-4 revision. Furthermore, the institutional Automated Readability Index of 11.2 (IQR 7.7-14.5; <italic>P</italic>&#x003C;.001) improved to 8.3 (IQR 6.7-9.3; <italic>P</italic>&#x003C;.001) after GPT-4 revision. The other readability metrics (Gunning Fog Index, Coleman-Liau Index, and SMOG Index) also showed improved scores after GPT-4 revision: 9.8 (IQR 8.5-11.1; <italic>P</italic>&#x003C;.001), 8.9 (IQR 8.1-10.0; <italic>P</italic>&#x003C;.001), and 9.6 (IQR 8.5-10.7; <italic>P</italic>&#x003C;.001), respectively, compared to the median institutional scores of 13.1 (IQR 10.6-16.2), 12.3 (IQR 10.1-14.5), and 12.2 (IQR 10.3-14.6). Before GPT-4 revision, 9.1% (13/143) of institutional PEMs met the AMA&#x2019;s recommended sixth-grade reading level (<xref ref-type="table" rid="table1">Table 1</xref>). However, after GPT-4&#x2019;s revision, 23.1% (33/143) of PEMs met the sixth-grade recommendation. On average, GPT-4 revision led to a 3.6 reading grade level reduction.</p><p>An example of this simplification in reading level was seen when describing different types of heart failure. The institutional PEM described right-sided heart failure as most often resulting from left-sided heart failure due to increased pressure from the left ventricle not propelling blood to the rest of the body. However, GPT-4 provided a more basic explanation using an analogy of ventricles being small rooms and gave a more simplified explanation of right-sided heart failure as a result of left-sided heart failure. In another example, when explaining the various causes of heart failure, one institutional PEM provided a list of etiologies such as &#x201C;heart valve disease&#x201D; or &#x201C;coronary artery disease&#x201D; without a description, compared to GPT-4, which more thoroughly described the role of each cause in relation to heart failure in simple language.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Box and whiskers plot of median readability scores across 5 metrics including Automated Readability Index, Coleman-Liau Index, Flesch-Kincaid Grade Level, Gunning Fog Index, Simple Measure of Gobbledygook (SMOG) Index for institutional and GPT-4&#x2019;s revised PEMs. PEMs: patient education materials. * <italic>P</italic>&#x003C;.05.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cardio_v9i1e68817_fig02.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of the proportion of patient education materials (PEMs) meeting the American Medical Association&#x2019;s (AMA) recommended sixth-grade reading level between institutional and GPT-4&#x2019;s revised PEMs.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">&#x2264;Sixth-grade reading level</td><td align="left" valign="bottom">&#x2265;Sixth-grade reading level</td><td align="left" valign="bottom">Percent meeting AMA recommendation</td></tr></thead><tbody><tr><td align="left" valign="top">Institutional Flesch-Kincaid Grade Level</td><td align="left" valign="top">13</td><td align="left" valign="top">130</td><td align="left" valign="top">9.10</td></tr><tr><td align="left" valign="top">GPT-4 Flesch-Kincaid Grade Level</td><td align="left" valign="top">33</td><td align="left" valign="top">110</td><td align="left" valign="top">23.10</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Accuracy and Comprehensiveness</title><p>Following review by a board-certified cardiologist, 33 out of 143 (23.1%) revised GPT-4 PEMs were graded as more comprehensive than the corresponding institutional PEMs (<xref ref-type="table" rid="table2">Table 2</xref>). Additionally, all 143 (100%) revised GPT-4 PEMs were graded as equally accurate as their institutional PEM counterpart.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Evaluation of GPT-4&#x2019;s accuracy and comprehensiveness of revised patient education materials (PEMs) compared to institutional PEMs (N=143).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Scoring</td><td align="left" valign="bottom">Accuracy, n (%)</td><td align="left" valign="bottom">Comprehensiveness, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Less</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">Equal</td><td align="left" valign="top">143 (100)</td><td align="left" valign="top">110 (76.9)</td></tr><tr><td align="left" valign="top">More</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">33 (23.1)</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>LLMs are a rapidly developing technology with the potential to enhance the delivery of PEMs to patients of all levels of health literacy. In this study, we expanded on existing research that evaluated ChatGPT&#x2019;s ability to generate accurate and reliable answers to heart failure questions by examining GPT-4&#x2019;s ability to improve the readability of institutional PEMs. Our analysis shows that GPT-4, when prompted, was able to significantly enhance the readability of institutional PEMs for common heart failure&#x2013;related patient questions. After evaluation by a board-certified cardiologist, all of GPT-4&#x2019;s revised PEMs were graded as equally accurate and many were graded as more comprehensive as institutional PEMs, with no revised PEMs graded as less accurate or less comprehensive. GPT-4&#x2019;s capabilities to provide accurate, comprehensive, and readable PEMs in real-time and in a conversational manner underscores the future potential of LLMs to enhance patient education and ultimately patient health literacy.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>Previous research has demonstrated that ChatGPT possesses a broad knowledge base comprising various medical conditions, including cirrhosis, hepatocellular carcinoma, and bariatric surgery [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Its knowledge base also spans cardiovascular diseases such as acute coronary syndrome [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], heart failure [<xref ref-type="bibr" rid="ref12">12</xref>], atrial fibrillation [<xref ref-type="bibr" rid="ref29">29</xref>], and even rare disorders like amyloidosis [<xref ref-type="bibr" rid="ref16">16</xref>]&#x2014;a multisystemic infiltrative disease. Specifically, regarding amyloidosis, while GPT-4 provided accurate, comprehensive, and reliable answers to gastrointestinal, neurologic, and cardiology queries, the average FKGL of responses was 15.5 (college level), significantly exceeding the recommended sixth-grade reading level set forth by the AMA [<xref ref-type="bibr" rid="ref16">16</xref>]. Similar results were shown when examining responses to the surgical treatment of retinal diseases and hypothyroidism in pregnancy [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>A previous study examined ChatGPT&#x2019;s ability to simplify the readability of responses to bariatric surgery&#x2013;related FAQs [<xref ref-type="bibr" rid="ref32">32</xref>]. GPT-4 reduced the average grade reading level of PEMs from eleventh (high school junior) to sixth grade, aligning with the AMA&#x2019;s recommendation. Another study also showed that GPT-4 improved the readability of cardiovascular magnetic resonance reports, reducing the average reading level from tenth grade to fifth grade while maintaining high factual accuracy [<xref ref-type="bibr" rid="ref33">33</xref>]. When simplifying PEMs relating to aortic stenosis, GPT-3.5 was able to lower the mean FKGL from 9.2 to 5.9 when instructed to &#x201C;translate to a 5th grade reading level&#x201D; [<xref ref-type="bibr" rid="ref34">34</xref>]. Our study further contributes to this body of work by demonstrating GPT-4&#x2019;s ability to improve the median readability of institutional PEMs from 10.3 (high school sophomore) to 7.3 (seventh grade) while maintaining accuracy and often enhancing comprehensiveness (<xref ref-type="table" rid="table1">Table 1</xref>). However, a unique aspect of our study was the use of a general prompt, &#x201C;Please explain the following in simpler terms,&#x201D; compared to other studies that specifically requested simplification to a fifth- to sixth-grade reading level [<xref ref-type="bibr" rid="ref34">34</xref>]. Our prompt simulates an organic patient encounter with the GPT-4 platform written in language meant to mirror an actual patient request for simplification. This difference in prompting but similar significant improvement in readability shows the adaptability of LLMs in this domain and may increase the likelihood of future adoption. Furthermore, the enhanced readability underscores the potential of LLMs in fostering better patient understanding of heart failure&#x2013;related information.</p></sec><sec id="s4-3"><title>Limitations and Ethical Concerns</title><p>ChatGPT, while adept at generating conversational answers, has inherent limitations in accuracy and privacy. The model cannot access real-time patient records and often does not cite peer-reviewed articles or reference updated guidelines, which is crucial for accurate and evidence-based responses. Additionally, the current model may not reliably understand nuanced medical topics or accurately interpret complex medical questions [<xref ref-type="bibr" rid="ref35">35</xref>], leading to potential patient misunderstandings. In some cases, ChatGPT may also generate answers that initially seem factual due to its confident-appearing language but disseminate inaccurate information, known as artificial hallucinations [<xref ref-type="bibr" rid="ref36">36</xref>]. Utilizing artificial intelligence (AI) models like ChatGPT in health care settings may also not guarantee secure handling of patient information as the model may collect users&#x2019; conversation data for future training. Although OpenAI does have a privacy setting allowing for disabling user data collection, prioritizing patient confidentiality will be an important aspect of development if the technology is to be used as an adjunct health care tool [<xref ref-type="bibr" rid="ref37">37</xref>].</p><p>Furthermore, ChatGPT may also perpetuate social disparities due to implicit biases and contribute to accessibility gaps. Recent studies revealed that GPT-4 tended to promote outdated race-based medicine and overrepresent or underrepresent certain racial groups and sexes depending on the circumstance and thus potentially reinforce stereotypes [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Another concern is equitable access, as patients with lower socioeconomic status often have less access to certain technology such as the internet and may have barriers to utilizing these new AI tools [<xref ref-type="bibr" rid="ref40">40</xref>]. Altogether, these validity and ethical considerations emphasize that clinical oversight, such as US Food and Drug Administration regulation, is warranted prior to LLM incorporation in patient care [<xref ref-type="bibr" rid="ref41">41</xref>]. This would allow for consistent monitoring of this rapidly evolving technology, ensuring optimization of safety protocols with each new update of the model.</p><p>Our study has several limitations. Although we employed validated readability scoring systems as a surrogate for patient understanding, these formulas have their limitations, as previously reported [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. These formulas often generate a reading level score that inherently grades longer words and sentences as being more complex but are unable to assess a text&#x2019;s content for structure and clarity. Our study also did not involve patients, which is essential for the comprehensive assessment of ChatGPT as a patient educational resource. Future studies would benefit from involving patients to ensure relevance of questions, preference in language used, and assessment of patient understanding. A baseline assessment of a patient&#x2019;s understanding of the given topic would also be beneficial to assess if ChatGPT can improve comprehension rather than relying on scoring tools. Additionally, we employed only one expert reviewer to assess the accuracy and comprehensiveness of ChatGPT&#x2019;s responses. To limit the potential for bias through subjective review and promote diverse perspectives, future research would benefit from involving multiple reviewers from different backgrounds and training institutions. Our reviewer was also not blinded to the source of each PEM, allowing for possible bias when evaluating accuracy and comprehensiveness. Our study could also not incorporate or interpret questions containing multimedia at the time of data collection, but with the release of multimodal LLMs, like GPT-4v, including visual aids would be another valuable component of PEMs to investigate. The PEMs used are not comprehensive of all questions that may be asked by patients, which limits the generalizability of our results. Future studies using real-world patients and questions would be helpful to further understand the broad spectrum of questions patients may ask.</p></sec><sec id="s4-4"><title>Future Directions</title><p>We opted for a pragmatic approach in designing the GPT-4 prompt used to revise institutional PEMs. Our focus was on ensuring the prompt reflected a simple, intuitive command that patients would be likely to use in real-world scenarios. Although this method provided promising results, highlighting the versatility of GPT-4, exploring more intricate prompts may yield even more impressive outputs and functionality. We advocate further research into prompt engineering to better replicate natural conversations and offer specific instructions for generating higher-quality and personalized responses.</p><p>Medical institutions can utilize this technology by integrating ChatGPT directly into their online patient education platforms with customized readability based on the highest level of education completed by the patient. This type of personalization of readability assessment can be implemented in all patient-facing AI applications to ensure the appropriate reading level of text for all patients. For example, Buoy Health, a chatbot developed by Harvard Medical School in 2014, uses natural language processing to help users assess symptoms with reported accuracy rates of 90%&#x2010;98% [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Boston Children&#x2019;s Hospital has adopted this platform on their website to guide patients on symptoms and recommended next steps in seeking medical care [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. While not solely focused on education, it demonstrates how leading institutions are successfully leveraging chatbots as interactive tools. The consideration of readability assessment and adaptability in these patient-facing applications may increase patient engagement and ensure patients of all education levels can use these tools. Greater collaboration between trusted medical institutions and LLM platforms could improve patient access to simplified, accurate medical information that aligns with the AMAs recommended fifth- to sixth-grade reading level.</p></sec><sec id="s4-5"><title>Conclusions</title><p>Our study demonstrates GPT-4&#x2019;s ability to improve the readability of institutional heart failure&#x2013;related PEMs while also maintaining accuracy and comprehensiveness. Our results underscore the potential future utility of LLMs in improving the delivery of easy-to-understand and readable PEMs to patients of all health literacy levels. While ChatGPT may potentially be a valuable future tool in patient care, it should be used as a supplement to, rather than a replacement for, human expertise and judgment of a licensed health care professional. We recommend the development of future studies examining the optimization of readability outputs, personalization, and real-world implementation.</p></sec></sec></body><back><ack><p>ChatGPT-4 (version updated 16 May 2024), by OpenAI was used to improve readability. There was no funding obtained for this study.</p></ack><notes><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this paper's main text and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec></notes><fn-group><fn fn-type="conflict"><p>RG is a consultant for Pfizer, Alnylam, and AstraZeneca. None of the other authors have interests to disclose.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AMA</term><def><p>American Medical Association</p></def></def-item><def-item><term id="abb3">FAQ</term><def><p>frequently asked question</p></def></def-item><def-item><term id="abb4">FKGL</term><def><p>Flesch-Kincaid Grade Level</p></def></def-item><def-item><term id="abb5">FRE</term><def><p>Flesch Reading Ease score</p></def></def-item><def-item><term id="abb6">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb7">PEM</term><def><p>patient education material</p></def></def-item><def-item><term id="abb8">SMOG</term><def><p>Simple Measure of Gobbledygook</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Groenewegen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rutten</surname><given-names>FH</given-names> </name><name name-style="western"><surname>Mosterd</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hoes</surname><given-names>AW</given-names> </name></person-group><article-title>Epidemiology of heart failure</article-title><source>Eur J Heart Fail</source><year>2020</year><month>08</month><volume>22</volume><issue>8</issue><fpage>1342</fpage><lpage>1356</lpage><pub-id pub-id-type="doi">10.1002/ejhf.1858</pub-id><pub-id pub-id-type="medline">32483830</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Urbich</surname><given-names>M</given-names> </name><name name-style="western"><surname>Globe</surname><given-names>G</given-names> </name><name name-style="western"><surname>Pantiri</surname><given-names>K</given-names> </name><etal/></person-group><article-title>A systematic review of medical costs associated with heart failure in the USA (2014-2020)</article-title><source>Pharmacoeconomics</source><year>2020</year><month>11</month><volume>38</volume><issue>11</issue><fpage>1219</fpage><lpage>1236</lpage><pub-id pub-id-type="doi">10.1007/s40273-020-00952-0</pub-id><pub-id pub-id-type="medline">32812149</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berkman</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Sheridan</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Donahue</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Halpern</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Crotty</surname><given-names>K</given-names> </name></person-group><article-title>Low health literacy and health outcomes: an updated systematic review</article-title><source>Ann Intern Med</source><year>2011</year><month>07</month><day>19</day><volume>155</volume><issue>2</issue><fpage>97</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.7326/0003-4819-155-2-201107190-00005</pub-id><pub-id pub-id-type="medline">21768583</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peterson</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Shetterly</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>CL</given-names> </name><etal/></person-group><article-title>Health literacy and outcomes among patients with heart failure</article-title><source>JAMA</source><year>2011</year><month>04</month><day>27</day><volume>305</volume><issue>16</issue><fpage>1695</fpage><lpage>1701</lpage><pub-id pub-id-type="doi">10.1001/jama.2011.512</pub-id><pub-id pub-id-type="medline">21521851</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><article-title>Fast facts: adult literacy</article-title><source>NCES</source><year>2019</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://nces.ed.gov/fastfacts/display.asp?id=69">https://nces.ed.gov/fastfacts/display.asp?id=69</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Weiss</surname><given-names>BD</given-names> </name></person-group><source>Health Literacy: A Manual for Clinicians</source><year>2003</year><publisher-name>American Medical Association Foundation and American Medical Association</publisher-name></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayyaswami</surname><given-names>V</given-names> </name><name name-style="western"><surname>Padmanabhan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A readability analysis of online cardiovascular disease-related health education materials</article-title><source>Health Lit Res Pract</source><year>2019</year><month>04</month><volume>3</volume><issue>2</issue><fpage>e74</fpage><lpage>e80</lpage><pub-id pub-id-type="doi">10.3928/24748307-20190306-03</pub-id><pub-id pub-id-type="medline">31049489</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fabbri</surname><given-names>M</given-names> </name><name name-style="western"><surname>Murad</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Wennberg</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Health literacy and outcomes among patients with heart failure: a systematic review and meta-analysis</article-title><source>JACC Heart Fail</source><year>2020</year><month>06</month><volume>8</volume><issue>6</issue><fpage>451</fpage><lpage>460</lpage><pub-id pub-id-type="doi">10.1016/j.jchf.2019.11.007</pub-id><pub-id pub-id-type="medline">32466837</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Sidoti</surname><given-names>O</given-names> </name><name name-style="western"><surname>McClain</surname><given-names>C</given-names> </name></person-group><article-title>34% of U.S. adults have used ChatGPT, about double the share in 2023</article-title><source>Pew Research Center</source><year>2025</year><month>06</month><day>25</day><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.pewresearch.org/short-reads/2025/06/25/34-of-us-adults-have-used-chatgpt-about-double-the-share-in-2023/">https://www.pewresearch.org/short-reads/2025/06/25/34-of-us-adults-have-used-chatgpt-about-double-the-share-in-2023/</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>The social life of health information</article-title><source>Pew Research Center</source><year>2009</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.pewresearch.org/internet/2009/06/11/the-social-life-of-health-information">https://www.pewresearch.org/internet/2009/06/11/the-social-life-of-health-information</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarraju</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bruemmer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Van Iterson</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>L</given-names> </name><name name-style="western"><surname>Rodriguez</surname><given-names>F</given-names> </name><name name-style="western"><surname>Laffin</surname><given-names>L</given-names> </name></person-group><article-title>Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model</article-title><source>JAMA</source><year>2023</year><month>03</month><day>14</day><volume>329</volume><issue>10</issue><fpage>842</fpage><lpage>844</lpage><pub-id pub-id-type="doi">10.1001/jama.2023.1044</pub-id><pub-id pub-id-type="medline">36735264</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Mody</surname><given-names>B</given-names> </name><name name-style="western"><surname>Lombardo</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Ghashghaei</surname><given-names>R</given-names> </name></person-group><article-title>Appropriateness of ChatGPT in answering heart failure related questions</article-title><source>Heart Lung Circ</source><year>2024</year><month>09</month><volume>33</volume><issue>9</issue><fpage>1314</fpage><lpage>1318</lpage><pub-id pub-id-type="doi">10.1016/j.hlc.2024.03.005</pub-id><pub-id pub-id-type="medline">38821760</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Bharani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name></person-group><article-title>GPT-4V passes the BLS and ACLS examinations: an analysis of GPT-4V&#x2019;s image recognition capabilities</article-title><source>Resuscitation</source><year>2024</year><month>02</month><volume>195</volume><fpage>110106</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.110106</pub-id><pub-id pub-id-type="medline">38160904</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>WH</given-names> </name><etal/></person-group><article-title>Assessing the performance of ChatGPT in answering questions regarding cirrhosis and hepatocellular carcinoma</article-title><source>Clin Mol Hepatol</source><year>2023</year><month>07</month><volume>29</volume><issue>3</issue><fpage>721</fpage><lpage>732</lpage><pub-id pub-id-type="doi">10.3350/cmh.2023.0089</pub-id><pub-id pub-id-type="medline">36946005</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Assessing the accuracy of responses by the language model ChatGPT to questions regarding bariatric surgery</article-title><source>OBES SURG</source><year>2023</year><month>06</month><volume>33</volume><issue>6</issue><fpage>1790</fpage><lpage>1796</lpage><pub-id pub-id-type="doi">10.1007/s11695-023-06603-5</pub-id><pub-id pub-id-type="medline">37106269</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><etal/></person-group><article-title>A multidisciplinary assessment of chatgpt&#x2019;s knowledge of amyloidosis: observational study</article-title><source>JMIR Cardio</source><year>2024</year><month>04</month><day>19</day><volume>8</volume><fpage>e53421</fpage><pub-id pub-id-type="doi">10.2196/53421</pub-id><pub-id pub-id-type="medline">38640472</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayers</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Poliak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dredze</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Comparing physician and artificial intelligence chatbot responses to patient questions posted to a public social media forum</article-title><source>JAMA Intern Med</source><year>2023</year><month>06</month><day>1</day><volume>183</volume><issue>6</issue><fpage>589</fpage><lpage>596</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id><pub-id pub-id-type="medline">37115527</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riddell</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>C</given-names> </name><name name-style="western"><surname>McGrinder</surname><given-names>H</given-names> </name><name name-style="western"><surname>Earle</surname><given-names>NJ</given-names> </name><name name-style="western"><surname>Poppe</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Doughty</surname><given-names>RN</given-names> </name></person-group><article-title>College-level reading is required to understand ChatGPT&#x2019;s answers to lay questions relating to heart failure</article-title><source>Eur J Heart Fail</source><year>2023</year><month>12</month><volume>25</volume><issue>12</issue><fpage>2336</fpage><lpage>2337</lpage><pub-id pub-id-type="doi">10.1002/ejhf.3083</pub-id><pub-id pub-id-type="medline">37964183</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>R</given-names> </name></person-group><article-title>Figure 1</article-title><source>BioRender</source><access-date>2025-06-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://BioRender.com/imijjhx">https://BioRender.com/imijjhx</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Flesch</surname><given-names>R</given-names> </name></person-group><article-title>Guide to academic writing</article-title><source>University of Canterbury School of Business and Economics</source><year>2016</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://web.archive.org/web/20160712094308/http://www.mang.canterbury.ac.nz/writing_guide/writing/flesch.shtml">https://web.archive.org/web/20160712094308/http://www.mang.canterbury.ac.nz/writing_guide/writing/flesch.shtml</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Kincaid</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fishburne</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rogers</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chissom</surname><given-names>B</given-names> </name></person-group><article-title>Derivation of new readability formulas (Automated Readability Index, Fog Count and Flesch Reading Ease Formula) for Navy enlisted personnel</article-title><year>1975</year><access-date>2025-06-25</access-date><publisher-name>Institute for Simulation and Training</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://stars.library.ucf.edu/cgi/viewcontent.cgi?article=1055&#x0026;context=istlibrary">https://stars.library.ucf.edu/cgi/viewcontent.cgi?article=1055&#x0026;context=istlibrary</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gunning</surname><given-names>R</given-names> </name></person-group><article-title>The Fog Index after twenty years</article-title><source>Journal of Business Communication</source><year>1969</year><month>01</month><volume>6</volume><issue>2</issue><fpage>3</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1177/002194366900600202</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coleman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liau</surname><given-names>TL</given-names> </name></person-group><article-title>A computer readability formula designed for machine scoring</article-title><source>Journal of Applied Psychology</source><year>1975</year><volume>60</volume><issue>2</issue><fpage>283</fpage><lpage>284</lpage><pub-id pub-id-type="doi">10.1037/h0076540</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McLaughlin</surname><given-names>GH</given-names> </name></person-group><article-title>SMOG grading: a new readability formula</article-title><source>J Read</source><year>1969</year><access-date>2025-06-25</access-date><volume>12</volume><issue>8</issue><fpage>639</fpage><lpage>646</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.jstor.org/stable/40011226">https://www.jstor.org/stable/40011226</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Senter</surname><given-names>RJ</given-names> </name></person-group><article-title>Automated readability index</article-title><source>AMRL TR</source><year>1967</year><month>05</month><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="medline">5302480</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>WH</given-names> </name><etal/></person-group><article-title>ChatGPT&#x2019;s ability to comprehend and answer cirrhosis related questions in Arabic</article-title><source>Arab J Gastroenterol</source><year>2023</year><month>08</month><volume>24</volume><issue>3</issue><fpage>145</fpage><lpage>148</lpage><pub-id pub-id-type="doi">10.1016/j.ajg.2023.08.001</pub-id><pub-id pub-id-type="medline">37673708</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 15, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gurbuz</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Varis</surname><given-names>E</given-names> </name></person-group><article-title>Is ChatGPT knowledgeable of acute coronary syndromes and pertinent European Society of Cardiology Guidelines?</article-title><source>Minerva Cardiol Angiol</source><year>2024</year><month>06</month><volume>72</volume><issue>3</issue><fpage>299</fpage><lpage>303</lpage><pub-id pub-id-type="doi">10.23736/S2724-5683.24.06517-7</pub-id><pub-id pub-id-type="medline">38391252</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Rao</surname><given-names>AK</given-names> </name><etal/></person-group><article-title>Evaluating ChatGPT responses on atrial fibrillation for patient education</article-title><source>Cureus</source><year>2024</year><month>06</month><volume>16</volume><issue>6</issue><fpage>e61680</fpage><pub-id pub-id-type="doi">10.7759/cureus.61680</pub-id><pub-id pub-id-type="medline">38841294</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Onder</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Koc</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gokbulut</surname><given-names>P</given-names> </name><name name-style="western"><surname>Taskaldiran</surname><given-names>I</given-names> </name><name name-style="western"><surname>Kuskonmaz</surname><given-names>SM</given-names> </name></person-group><article-title>Evaluation of the reliability and readability of ChatGPT-4 responses regarding hypothyroidism during pregnancy</article-title><source>Sci Rep</source><year>2024</year><month>01</month><day>2</day><volume>14</volume><issue>1</issue><fpage>243</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-50884-w</pub-id><pub-id pub-id-type="medline">38167988</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Momenaei</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wakabayashi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Shahlaee</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Appropriateness and readability of ChatGPT-4-generated responses for surgical treatment of retinal diseases</article-title><source>Ophthalmol Retina</source><year>2023</year><month>10</month><volume>7</volume><issue>10</issue><fpage>862</fpage><lpage>868</lpage><pub-id pub-id-type="doi">10.1016/j.oret.2023.05.022</pub-id><pub-id pub-id-type="medline">37277096</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Srinivasan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Kanu</surname><given-names>MU</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samakar</surname><given-names>K</given-names> </name></person-group><article-title>Large language models and bariatric surgery patient education: a comparative readability analysis of GPT-3.5, GPT-4, Bard, and online institutional resources</article-title><source>Surg Endosc</source><year>2024</year><month>05</month><volume>38</volume><issue>5</issue><fpage>2522</fpage><lpage>2532</lpage><pub-id pub-id-type="doi">10.1007/s00464-024-10720-2</pub-id><pub-id pub-id-type="medline">38472531</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salam</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kravchenko</surname><given-names>D</given-names> </name><name name-style="western"><surname>Nowak</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Generative Pre-trained Transformer 4 makes cardiovascular magnetic resonance reports easy to understand</article-title><source>J Cardiovasc Magn Reson</source><year>2024</year><volume>26</volume><issue>1</issue><fpage>101035</fpage><pub-id pub-id-type="doi">10.1016/j.jocmr.2024.101035</pub-id><pub-id pub-id-type="medline">38460841</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rouhi</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Ghanem</surname><given-names>YK</given-names> </name><name name-style="western"><surname>Yolchieva</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Can artificial intelligence improve the readability of patient education materials on aortic stenosis? A pilot study</article-title><source>Cardiol Ther</source><year>2024</year><month>03</month><volume>13</volume><issue>1</issue><fpage>137</fpage><lpage>147</lpage><pub-id pub-id-type="doi">10.1007/s40119-023-00347-0</pub-id><pub-id pub-id-type="medline">38194058</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cascella</surname><given-names>M</given-names> </name><name name-style="western"><surname>Montomoli</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bellini</surname><given-names>V</given-names> </name><name name-style="western"><surname>Bignami</surname><given-names>E</given-names> </name></person-group><article-title>Evaluating the feasibility of ChatGPT in healthcare: an analysis of multiple clinical and research scenarios</article-title><source>J Med Syst</source><year>2023</year><month>03</month><day>4</day><volume>47</volume><issue>1</issue><fpage>33</fpage><pub-id pub-id-type="doi">10.1007/s10916-023-01925-4</pub-id><pub-id pub-id-type="medline">36869927</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alkaissi</surname><given-names>H</given-names> </name><name name-style="western"><surname>McFarlane</surname><given-names>SI</given-names> </name></person-group><article-title>Artificial hallucinations in ChatGPT: implications in scientific writing</article-title><source>Cureus</source><year>2023</year><month>02</month><volume>15</volume><issue>2</issue><fpage>e35179</fpage><pub-id pub-id-type="doi">10.7759/cureus.35179</pub-id><pub-id pub-id-type="medline">36811129</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><article-title>New ways to manage your data in ChatGPT</article-title><source>OpenAI</source><year>2023</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/new-ways-to-manage-your-data-in-chatgpt">https://openai.com/index/new-ways-to-manage-your-data-in-chatgpt</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zack</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lehman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Suzgun</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study</article-title><source>Lancet Digit Health</source><year>2024</year><month>01</month><volume>6</volume><issue>1</issue><fpage>e12</fpage><lpage>e22</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(23)00225-X</pub-id><pub-id pub-id-type="medline">38123252</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Omiye</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Lester</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Spichak</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rotemberg</surname><given-names>V</given-names> </name><name name-style="western"><surname>Daneshjou</surname><given-names>R</given-names> </name></person-group><article-title>Large language models propagate race-based medicine</article-title><source>NPJ Digit Med</source><year>2023</year><month>10</month><day>20</day><volume>6</volume><issue>1</issue><fpage>195</fpage><pub-id pub-id-type="doi">10.1038/s41746-023-00939-z</pub-id><pub-id pub-id-type="medline">37864012</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Sanders</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>ChatGPT: promise and challenges for deployment in low- and middle-income countries</article-title><source>Lancet Reg Health West Pac</source><year>2023</year><month>12</month><volume>41</volume><fpage>100905</fpage><pub-id pub-id-type="doi">10.1016/j.lanwpc.2023.100905</pub-id><pub-id pub-id-type="medline">37731897</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="report"><article-title>Proposed regulatory framework for modifications to artificial intelligence/machine learning (AI/ML)-based Software as a Medical Device (SaMD)</article-title><year>2019</year><access-date>2025-06-26</access-date><publisher-name>Food and Drug Administration</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/122535/download?attachment">https://www.fda.gov/media/122535/download?attachment</ext-link></comment></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Scott</surname><given-names>B</given-names> </name></person-group><article-title>The Gunning Fog Index (or FOG) readability formula</article-title><source>Readability Formula</source><year>2025</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://readabilityformulas.com/the-gunnings-fog-index-or-fog-readability-formula">https://readabilityformulas.com/the-gunnings-fog-index-or-fog-readability-formula</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>Tip 6. use caution with readability formulas for quality reports</article-title><source>AHRQ</source><year>2015</year><access-date>2024-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ahrq.gov/talkingquality/resources/writing/tip6.html">https://www.ahrq.gov/talkingquality/resources/writing/tip6.html</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><article-title>Buoy Health: a chatbot that helps diagnose your symptoms</article-title><source>Product Hunt</source><year>2017</year><access-date>2025-05-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.producthunt.com/posts/buoy-health">https://www.producthunt.com/posts/buoy-health</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x0106;irkovi&#x0107;</surname><given-names>A</given-names> </name></person-group><article-title>Evaluation of four artificial intelligence-assisted self-diagnosis apps on three diagnoses: two-year follow-up study</article-title><source>J Med Internet Res</source><year>2020</year><month>12</month><day>4</day><volume>22</volume><issue>12</issue><fpage>e18097</fpage><pub-id pub-id-type="doi">10.2196/18097</pub-id><pub-id pub-id-type="medline">33275113</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Accuracy and comprehensiveness data.</p><media xlink:href="cardio_v9i1e68817_app1.xlsx" xlink:title="XLSX File, 116 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Comparison of readability of institutional and GPT-4&#x2019;s revised patient education materials.</p><media xlink:href="cardio_v9i1e68817_app2.png" xlink:title="PNG File, 144 KB"/></supplementary-material></app-group></back></article>