<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Cardio</journal-id><journal-id journal-id-type="publisher-id">cardio</journal-id><journal-id journal-id-type="index">26</journal-id><journal-title>JMIR Cardio</journal-title><abbrev-journal-title>JMIR Cardio</abbrev-journal-title><issn pub-type="epub">2561-1011</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e76734</article-id><article-id pub-id-type="doi">10.2196/76734</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Large Language Models in Cardiology: Systematic Review</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Gendler</surname><given-names>Moran</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>N Nadkarni</surname><given-names>Girish</given-names></name><degrees>Dr med</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sudri</surname><given-names>Karin</given-names></name><degrees>MA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cohen-Shelly</surname><given-names>Michal</given-names></name><degrees>MBA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>S Glicksberg</surname><given-names>Benjamin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Efros</surname><given-names>Orly</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Soffer</surname><given-names>Shelly</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Klang</surname><given-names>Eyal</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Azrieli Faculty of Medicine, Bar-Ilan University</institution><addr-line>Henrietta Szold St 8, Safed, Israel</addr-line><addr-line>Safed</addr-line><country>Israel</country></aff><aff id="aff2"><institution>Windreich Department of AI and Human Health, Mount Sinai Medical Center, Mount Sinai</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff3"><institution>Sagol AI Hub, ARC Innovation Center, Sheba Medical Center</institution><addr-line>Ramat Gan</addr-line><country>Israel</country></aff><aff id="aff4"><institution>School of Medicine, Tel Aviv University</institution><addr-line>Tel Aviv</addr-line><country>Israel</country></aff><aff id="aff5"><institution>National Hemophilia Center and Thrombosis Institute, Sheba Medical Center</institution><addr-line>Ramat Gan</addr-line><country>Israel</country></aff><aff id="aff6"><institution>Institute of Hematology, Davidoff Cancer Center, Rabin Medical Center</institution><addr-line>Petah-Tikva</addr-line><country>Israel</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Mahmoud</surname><given-names>Randa Salah Gomaa</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Popoola</surname><given-names>Simisolaoluwa Ademide</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhang</surname><given-names>Yonggang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Moran Gendler, MD, Azrieli Faculty of Medicine, Bar-Ilan University, Henrietta Szold St 8, Safed, Israel, Safed, 1311502, Israel, 972 542354444; <email>morangendler@gmail.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>16</day><month>4</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e76734</elocation-id><history><date date-type="received"><day>29</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>16</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>19</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Moran Gendler, Girish N Nadkarni, Karin Sudri, Michal Cohen-Shelly, Benjamin S Glicksberg, Orly Efros, Shelly Soffer, Eyal Klang. Originally published in JMIR Cardio (<ext-link ext-link-type="uri" xlink:href="https://cardio.jmir.org">https://cardio.jmir.org</ext-link>), 16.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Cardio, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://cardio.jmir.org">https://cardio.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://cardio.jmir.org/2026/1/e76734"/><abstract><sec><title>Background</title><p>Large language models (LLMs) are increasingly used in health care, but their role in cardiology has not yet been systematically evaluated.</p></sec><sec><title>Objective</title><p>This review aimed to assess the applications, performance, and limitations of LLMs across diverse cardiology tasks, including chronic and progressive conditions, acute events, education, and diagnostic testing.</p></sec><sec sec-type="methods"><title>Methods</title><p>A systematic search was conducted in PubMed and Scopus for studies published up to April 14, 2024, using keywords related to LLMs and cardiology. Studies evaluating LLM outputs in cardiology-related tasks were included. Data were extracted across 5 predefined domains and the risk of bias was assessed using an adapted QUADAS-2 tool (developed by Whiting et al at the University of Bristol). The review protocol was registered in PROSPERO (CRD42024556397).</p></sec><sec sec-type="results"><title>Results</title><p>A total of 33 studies contributed quantitative outcome data to a descriptive synthesis. Across chronic conditions, ChatGPT-3.5 (OpenAI) answered 91% (43/47) heart failure questions accurately, although readability often required college-level comprehension. In acute scenarios, Bing Chat omitted key myocardial infarction first aid steps in 25% (5/20) to 45% (9/20) of cases, while cardiac arrest information was rated highly (mean 4.3/5, SD 0.7) but written above recommended reading levels. In physician education tasks, ChatGPT-4 (OpenAI) demonstrated higher accuracy than ChatGPT-3.5, improving from 38% (33/88) to 66% (58/88). In patient education studies, ChatGPT-4 provided scientifically adequate explanations (5.0&#x2010;6.0/7) comparable to hospital materials but at higher reading levels (11th vs 7th grade). In diagnostic testing, ChatGPT-4 interpreted 91% (36/40) electrocardiogram vignettes correctly, significantly better than emergency physicians (31/40, 77%; <italic>P</italic>&#x003C; .001), but showed lower performance in echocardiography.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLMs show meaningful potential in cardiology, especially for education and electrocardiogram interpretation, but performance varies across clinical tasks. Limitations in emergency guidance and readability, as well as small in silico study designs, highlight the need for multimodal models and prospective validation.</p></sec><sec><title>Trial Registration</title><p>PROSPERO CRD42024556397; https://www.crd.york.ac.uk/PROSPERO/view/CRD42024556397</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>natural language processing</kwd><kwd>large language models</kwd><kwd>generative AI</kwd><kwd>LLMs</kwd><kwd>cardiology</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Large language models (LLMs) such as OpenAI&#x2019;s ChatGPT, Google&#x2019;s Gemini, and Meta&#x2019;s LLaMA are advancing natural language processing by generating, understanding, and interpreting text. These models process text to produce coherent responses, understand context, summarize information, and engage in conversations [<xref ref-type="bibr" rid="ref1">1</xref>]. Their application in health care, particularly in cardiology, offers significant benefits due to their ability to analyze diverse and complex data&#x2014;from patient records to imaging studies [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>In cardiology, LLMs are increasingly being used to assist in the management of cardiovascular diseases by organizing and making clinical data more accessible [<xref ref-type="bibr" rid="ref4">4</xref>]. These models can enhance diagnostic accuracy, personalize treatment plans, and identify patterns in large datasets that traditional methods might overlook [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Additionally, LLMs offer the potential to automate routine documentation, thereby reducing the administrative burden on health care providers [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. However, integrating LLMs into clinical workflows poses challenges, and effective implementation is crucial to realizing their potential to improve patient care in cardiology [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Recent reviews have highlighted the emerging role of LLMs in cardiology. Sharma et al [<xref ref-type="bibr" rid="ref9">9</xref>] provided an early synthesis of ChatGPT applications, focusing on health literacy, clinical care, and research up to September 2023. Boonstra et al [<xref ref-type="bibr" rid="ref7">7</xref>] more recently examined LLMs across cardiovascular disease, with emphasis on prevention and patient education. Our review complements these works by incorporating a broader search, applying PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 methodology and organizing findings into 5 clinically relevant domains. It emphasizes current uses of LLMs in cardiology, their potential impact on care and patient outcomes, and the barriers to their practical application.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This review was conducted according to the PRISMA guidelines [<xref ref-type="bibr" rid="ref10">10</xref>] (<xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>).</p></sec><sec id="s2-2"><title>Search Strategy</title><p>A comprehensive literature search was conducted to identify studies on the application of LLMs in cardiology. The search was performed on April 14, 2024, in PubMed and Scopus, using a combination of keywords and Medical Subject Headings (MeSH) related to both cardiology and LLMs. The cardiology terms included &#x201C;Echocardiography,&#x201D; &#x201C;Arrhythmias,&#x201D; &#x201C;Cardiac Output,&#x201D; &#x201C;Heart Failure,&#x201D; &#x201C;Heart Valve Diseases,&#x201D; &#x201C;Myocardial Ischemia,&#x201D; &#x201C;Acute Coronary Syndrome,&#x201D; and &#x201C;Electrocardiogram.&#x201D; The LLM terms included &#x201C;ChatGPT,&#x201D; &#x201C;Large Language Models,&#x201D; &#x201C;OpenAI,&#x201D; &#x201C;Microsoft Bing Chat,&#x201D; &#x201C;Google Bard&#x201D; and &#x201C;Google Gemini.&#x201D; In Scopus, searches were conducted using the TITLE ABS KEY field to ensure consistency across databases. Scopus was included alongside PubMed to broaden coverage, capturing interdisciplinary studies at the intersection of artificial intelligence and cardiology that may not be indexed in PubMed. The complete search strategies are available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. This review was registered with PROSPERO (CRD42024556397) [<xref ref-type="bibr" rid="ref11">11</xref>].</p></sec><sec id="s2-3"><title>Study Selection</title><p>We included studies that (1) evaluated an application of LLMs in a specific field within cardiology, (2) were published in English, and (3) were peer reviewed. In addition to full original research articles, short reports and letters containing original data or quantitative analyses were also eligible. Studies that were non&#x2013;LLM-related, non&#x2013;cardiology-focused, or purely conceptual without empirical evaluation were excluded. Abstracts, conference papers, critical letters, and editorial commentaries were also excluded.</p><p>The search was supplemented by manual screening of the reference lists of included studies. Two reviewers (MG and SS) independently screened the titles and abstracts to determine whether the studies met the inclusion criteria. Full-text articles were reviewed when the title met the inclusion criteria or when there was any uncertainty. Disagreements were resolved by a third reviewer (EK).</p></sec><sec id="s2-4"><title>Data Extraction</title><p>Two independent reviewers (MG and SS) extracted data from the included studies using a standardized data extraction form. Discrepancies were resolved through discussion or consultation with a third reviewer (EK). Extracted information included study design, sample size, LLM application details (eg, LLM features examined, assessment method, validation metrics, and reference guidelines used for accuracy comparison), main findings, and limitations.</p></sec><sec id="s2-5"><title>Quality Assessment</title><p>Risk of bias was assessed using the QUADAS-2 tool (developed by Whiting et al at the University of Bristol) [<xref ref-type="bibr" rid="ref12">12</xref>], which is widely applied in diagnostic accuracy research. This framework was selected because many included studies evaluated LLMs in diagnostic or decision-making roles, making QUADAS-2 particularly suitable for systematically assessing potential bias in study design, case selection, index test conduct, and reference standards. A detailed summary of the assessments is presented in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>Since the studies evaluated LLM performance rather than human diagnostics, several adaptations were applied. In the patient selection domain, we assessed the transparency and representativeness of the test cases used to evaluate LLMs. Bias in this domain was considered high when studies used unreported or simplified cases that did not reflect real-world clinical variability. In the index test domain, we evaluated the standardization of prompts and scoring (number of runs and grading rules), model transparency (version, release date, and parameters), blinding to the reference standard during testing, and avoidance of post hoc prompt modification or selective reporting. The reference standard domain was adapted to assess the reliability of the comparator or ground truth (eg, expert consensus, guideline-based answers, or validated datasets).</p></sec><sec id="s2-6"><title>Data Synthesis</title><p>A narrative synthesis of the findings from the included studies was conducted. Due to anticipated heterogeneity in study designs and outcomes, a meta-analysis was not planned. Instead, the focus was on summarizing the applications, benefits, and limitations of LLMs in cardiology as reported in the included studies and identifying areas for future research. In this paper, &#x201C;quantitative synthesis&#x201D; refers to descriptive reporting of numerical outcomes extracted from individual studies (eg, accuracy rates, agreement statistics, readability scores, and error frequencies) without statistical pooling or calculation of combined effect estimates.</p><p>To structure the analysis, included studies were grouped into 5 categories that reflected the main areas of LLM application in cardiology. Two reviewers (MG and SS) independently categorized the studies, and any discrepancies were resolved through discussion. In cases where consensus could not be reached, a final decision was made by EK. Data were extracted for each category on study objectives, type of task, LLMs assessed, evaluation methods, and key performance outcomes.</p></sec><sec id="s2-7"><title>Chronic and Progressive Cardiac Conditions</title><p>Studies were included if they assessed LLMs in long-term cardiac conditions such as heart failure, hypertension, valvular disease, or atrial fibrillation.</p><sec id="s2-7-1"><title>Acute Cardiac Events</title><p>This group included studies evaluating LLMs in acute scenarios, including resuscitation, cardiac arrest, and myocardial infarction.</p></sec><sec id="s2-7-2"><title>Physician Education</title><p>Studies were categorized here if they tested LLMs on cardiology training, examination-style questions, or case vignettes aimed at medical professionals. Studies that focused on physician clinical decision-making and compared it with LLM performance were also included under this group.</p></sec><sec id="s2-7-3"><title>Patient Education</title><p>This group covered studies where LLMs provided information or educational content directly to patients.</p></sec><sec id="s2-7-4"><title>Cardiac Diagnostics Tests</title><p>Studies were included if they examined the use of LLMs for diagnostic interpretation, such as electrocardiograms (ECGs), echocardiography, and cardiac imaging.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>A total of 35 articles were identified for inclusion [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. Of these, 3 were retrieved exclusively from PubMed [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], while the remaining articles were identified in both databases. Following full-text assessment, 33 [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref46">46</xref>] studies contributed quantitative outcome data to the descriptive quantitative synthesis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref46">46</xref>], whereas 2 studies were included in qualitative synthesis only and were therefore not included in the quantitative synthesis [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. These 2 studies reported conceptual analyses and descriptive observations rather than measurable performance metrics (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flow diagram of the search and inclusion process.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cardio_v10i1e76734_fig01.png"/></fig><p>General details about the included articles, descriptions of their characteristics, main outcomes, and their advantages and limitations are summarized in <xref ref-type="table" rid="table1">Tables 1</xref><xref ref-type="table" rid="table2"/><xref ref-type="table" rid="table3"/>-<xref ref-type="table" rid="table4">4</xref>, respectively. Tables S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provide additional detail on characteristics and outcomes. Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides a detailed evaluation of each article using the QUADAS-2 tool. <xref ref-type="fig" rid="figure2">Figure 2</xref> shows the categorization of articles into core groups with corresponding cardiology subfields.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Studies included.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category and study</td><td align="left" valign="bottom">Publication date</td><td align="left" valign="bottom">Title</td><td align="left" valign="bottom">Journal</td><td align="left" valign="bottom">PMID</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Chronic and progressive cardiac conditions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dimitriadis et al<break/>[<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">ChatGPT and patients with heart failure</td><td align="left" valign="top">Angiology</td><td align="left" valign="top">38451243</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Riddle et al<break/>[<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">2023 December</td><td align="left" valign="top">College-level reading is required to understand ChatGPT&#x2019;s answers to lay questions relating to heart failure</td><td align="left" valign="top">European Journal of Heart Failure</td><td align="left" valign="top">37964183</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Krittanawong et al<break/>[<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">2023 November and December</td><td align="left" valign="top">Assessing the potential of ChatGPT for patient education in the cardiology clinic</td><td align="left" valign="top">Progress in Cardiovascular Diseases</td><td align="left" valign="top">37832625</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rouhi et al<break/>[<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Can artificial intelligence improve the readability of patient education materials on aortic stenosis? A pilot study</td><td align="left" valign="top">Cardiology and Therapy</td><td align="left" valign="top">38194058</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hillmann et al<break/>[<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">2023 December</td><td align="left" valign="top">Accuracy and comprehensibility of chat-based artificial intelligence for patient information on atrial fibrillation and cardiac implantable electronic devices</td><td align="left" valign="top">EP Europace</td><td align="left" valign="top">38127304</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Van Bulck et al<break/>[<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">2024 January</td><td align="left" valign="top">What if your patient switches from Dr. Google to Dr. ChatGPT? A vignette-based survey of the trustworthiness, value, and danger of ChatGPT-generated responses to health questions</td><td align="left" valign="top">European Journal of Cardiovascular Nursing</td><td align="left" valign="top">37094282</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kassab et al<break/>[<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">2023 November</td><td align="left" valign="top">Comparative analysis of chat-based artificial intelligence models in addressing common and challenging valvular heart disease clinical scenarios</td><td align="left" valign="top">Journal of the American Heart Association</td><td align="left" valign="top">37982246</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Han et al<break/>[<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">2024 January</td><td align="left" valign="top">Evaluation of GPT-4 for 10-year cardiovascular risk prediction- insights from the UK Biobank and KoGES<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> data</td><td align="left" valign="top">iScience</td><td align="left" valign="top">38357664</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ali et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">2023 November</td><td align="left" valign="top">Mapping the heartbeat of America with ChatGPT-4- unpacking the interplay of social vulnerability, digital literacy, and cardiovascular mortality in county residency choices</td><td align="left" valign="top">Journal of Personalized Medicine</td><td align="left" valign="top">38138852</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Li et al<break/>[<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Potential multidisciplinary use of large language models for addressing queries in cardio-oncology</td><td align="left" valign="top">Journal of the American Heart Association</td><td align="left" valign="top">38497458</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yano et al<break/>[<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">2023 November</td><td align="left" valign="top">Relevance of ChatGPT&#x2019;s responses to common hypertension-related patient inquiries</td><td align="left" valign="top">Hypertension</td><td align="left" valign="top">37916418</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kusunose et al<break/>[<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">2023 June</td><td align="left" valign="top">Evaluation of the accuracy of ChatGPT in answering clinical questions on the Japanese Society of Hypertension Guidelines</td><td align="left" valign="top">Circulation Journal</td><td align="left" valign="top">37286486</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Al Tibi et al<break/>[<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">A retrospective comparison of medication recommendations between a cardiologist and ChatGPT-4 for hypertension patients in a rural clinic</td><td align="left" valign="top">Cureus</td><td align="left" valign="top">38586651</td></tr><tr><td align="left" valign="top" colspan="5">Acute cardiac events</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun and Gautam [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">2024 January</td><td align="left" valign="top">Large language model-based chatbot as a source of advice on first aid in heart attack</td><td align="left" valign="top">Current Problems in Cardiology</td><td align="left" valign="top">37640177</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scquizzato et al<break/>[<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">2024 January</td><td align="left" valign="top">Testing ChatGPT ability to answer laypeople questions about cardiac arrest and cardiopulmonary resuscitation</td><td align="left" valign="top">Resuscitation</td><td align="left" valign="top">38081504</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Safranek et al<break/>[<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Automated HEART<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> score determination via ChatGPT: Honing a framework for iterative prompt development</td><td align="left" valign="top">Journal of the American College of Emergency Physicians Open</td><td align="left" valign="top">38481520</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun<break/>[<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">2023 August</td><td align="left" valign="top">Performance of an artificial intelligence-based chatbot when acting as EMS<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> dispatcher in a cardiac arrest scenario</td><td align="left" valign="top">Internal and Emergency Medicine</td><td align="left" valign="top">37603142</td></tr><tr><td align="left" valign="top" colspan="5">Physician education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><break/>Harskamp and De Clercq [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">2024 February</td><td align="left" valign="top">Performance of ChatGPT as an AI<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>-assisted decision support tool in medicine: a proof-of-concept study for interpreting symptoms and management of common cardiac conditions (AMSTELHEART-2)</td><td align="left" valign="top">Acta Cardiologica</td><td align="left" valign="top">38348835</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Skalidis and Cagnina<break/>[<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">2024 April</td><td align="left" valign="top">ChatGPT takes on the European Exam in Core Cardiology: an artificial intelligence success story?</td><td align="left" valign="top">European Heart Journal - Digital health</td><td align="left" valign="top">37265864</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yavuz and Kahraman [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Evaluation of the prediagnosis and management of ChatGPT-4.0 in clinical cases in cardiology</td><td align="left" valign="top">Future Cardiol</td><td align="left" valign="top">39049771</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gritti et al<break/>[<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">2024 February</td><td align="left" valign="top">Progression of an artificial intelligence Chatbot (ChatGPT) for pediatric cardiology educational knowledge assessment</td><td align="left" valign="top">Pediatric Cardiology</td><td align="left" valign="top">38170274</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lee et al<break/>[<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">2023 October</td><td align="left" valign="top">Evaluating the clinical decision-making ability of large language models using MKSAP-19<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup> cardiology questions</td><td align="left" valign="top">JACC Advance</td><td align="left" valign="top">38938709</td></tr><tr><td align="left" valign="top" colspan="5">Patient education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al<break/>[<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">AI in patient education: assessing the impact of ChatGPT-4 on conveying comprehensive information about chest pain</td><td align="left" valign="top">The American Journal of Emergency Medicine</td><td align="left" valign="top">38242775</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bushuven et al<break/>[<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">2023 November</td><td align="left" valign="top">&#x201C;ChatGPT, Can You Help Me Save My Child&#x2019;s Life?&#x201D; - diagnostic accuracy and supportive capabilities to lay rescuers by ChatGPT in prehospital basic life support and pediatric advanced life support cases - an in-silico analysis</td><td align="left" valign="top">Journal of Medical Systems</td><td align="left" valign="top">37987870</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lautrup et al<break/>[<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">2023 November</td><td align="left" valign="top">Heart-to-heart with ChatGPT: the impact of patients consulting AI for cardiovascular health advice</td><td align="left" valign="top">Open Heart</td><td align="left" valign="top">37945282</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moons and Van Bulck [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Using ChatGPT and Google Bard to improve the readability of written patient information: a proof of concept.</td><td align="left" valign="top">European Journal of Cardiovascular Nursing</td><td align="left" valign="top">37603843</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Almagazzachi et al<break/>[<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">2024 February</td><td align="left" valign="top">Generative artificial intelligence in patient<break/>education: ChatGPT takes on hypertension questions</td><td align="left" valign="top">Cureus</td><td align="left" valign="top">38435177</td></tr><tr><td align="left" valign="top" colspan="5">Cardiac diagnostic tests</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fija&#x010D;ko et al<break/>[<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">2023 December</td><td align="left" valign="top">Can novel multimodal chatbots such as Bing Chat Enterprise, ChatGPT-4 Pro, and Google Bard correctly interpret electrocardiogram images?</td><td align="left" valign="top">Resuscitation</td><td align="left" valign="top">37884222</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Zhu et al<break/>[<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">2023 July</td><td align="left" valign="top">ChatGPT can pass the AHA<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> exams: open-ended questions outperform multiple-choice format</td><td align="left" valign="top">Resuscitation</td><td align="left" valign="top">37349064</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>King et al<break/>[<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">2024 February</td><td align="left" valign="top">GPT-4V passes the BLS<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup> and ACLS<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup> examinations: an analysis of GPT-4V&#x2019;s image recognition capabilities</td><td align="left" valign="top">Resuscitation</td><td align="left" valign="top">38160904</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al<break/>[<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Comparison of emergency medicine specialist, cardiologist, and chat-GPT in electrocardiography assessment</td><td align="left" valign="top">The American Journal of Emergency Medicine</td><td align="left" valign="top">38507847</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kangiszer et al<break/>[<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">2024 March</td><td align="left" valign="top">Low performance of ChatGPT on echocardiography board review questions</td><td align="left" valign="top">JACC: Cardiovascular Imaging</td><td align="left" valign="top">37943230</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarangi et al<break/>[<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">2023 December</td><td align="left" valign="top">Radiological differential diagnoses based on cardiovascular and thoracic imaging patterns: perspectives of four large language models</td><td align="left" valign="top">The Indian Journal of Radiology &#x0026; Imaging</td><td align="left" valign="top">38549881</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>KoGES: Korean Genome and Epidemiology Study.</p></fn><fn id="table1fn2"><p><sup>b</sup>HEART: History, ECG, Age, Risk factors, Troponin risk algorithm.</p></fn><fn id="table1fn3"><p><sup>c</sup>EMS: Emergency Medical Services.</p></fn><fn id="table1fn4"><p><sup>d</sup>AI: artificial intelligence.</p></fn><fn id="table1fn5"><p><sup>e</sup>MKSAP-19: Medical Knowledge Self-Assessment Program (19th edition).</p></fn><fn id="table1fn6"><p><sup>f</sup>AHA: American Heart Association.</p></fn><fn id="table1fn7"><p><sup>g</sup>BLS: Basic Life Support.</p></fn><fn id="table1fn8"><p><sup>h</sup>ACLS: Advanced Cardiovascular Life Support.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Study characteristics.</p></caption> <table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category and authors</td><td align="left" valign="bottom">Tasks</td><td align="left" valign="bottom">LLM<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> features examined</td><td align="left" valign="bottom">LLM examined</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Chronic and progressive cardiac conditions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dimitriadis et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Answering questions about the management of HF<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Riddell et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Answer a set of hypothetical queries from a patient with HFrEF<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Krittanawong et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Answer HF-related questions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Reliability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rouhi et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Rewrite patient education materials to meet recommended reading skill levels for patients with AS<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item><list-item><p>Simplification</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Google Bard</p></list-item><list-item><p>ChatGPT- 3.5</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hillmann et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Answering questions about AF<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup> and CIED<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item><list-item><p>Appropriateness</p></list-item><list-item><p>Comprehensibility</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Google Bard</p></list-item><list-item><p>Bing Chat</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Van Bulck and Moons [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Responses to virtual prompts by patients (CHD<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup>, AF, HF, and Chol<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup>).</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Trustworthy</p></list-item><list-item><p>Valuable</p></list-item><list-item><p>Dangerous</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-3</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kassab et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Answer correctly on 15 patient-centered and 15 physician-centered VHD<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup> queries.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item><list-item><p>Google Bard</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Han et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Predict 10-year CVD<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup> risk from cohort data.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Robustness</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ali et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Predicting age-adjusted cardiovascular mortality across 3118 US counties and identifying associations with social vulnerability and digital literacy indices.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Assist with regression modeling</p></list-item><list-item><p>Code generation</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Li et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Answer 25 guideline-based cardio-oncology queries.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Guideline adherence</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item><list-item><p>Google Bard</p></list-item><list-item><p>Llama2</p></list-item><list-item><p>Claude2</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yano et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">Answer 20 common hypertension FAQs<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup> in Japanese and English.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Appropriateness</p></list-item><list-item><p>Language consistency</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kusunose et al [<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Answering guideline-based clinical questions on hypertension.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Al Tibi et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">Comparing antihypertensive medication recommendations between ChatGPT-4 and a cardiologist using real-world patient data.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4">Acute cardiac events</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun and Gautam [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Examination of the ability of chatbots to guide first aid for heart attacks.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Guideline adherence</p></list-item><list-item><p>Readability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Bing Chat</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scquizzato et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Answer lay FAQs on cardiac arrest and CPR<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Readability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-3.5</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Safranek et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Extract data from notes and compute HEART<sup><xref ref-type="table-fn" rid="table2fn13">m</xref></sup> score; test iterative prompt framework.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Guideline adherence</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Evaluate the ability to operate as an automated assistant for recognition of cardiac arrest and real-time CPR instructions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Guideline adherence</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>New Bing chatbot</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4">Physician education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Harskamp and De Clercq [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Answering questions related to common cardiac symptoms or conditions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5 January 2023</p></list-item><list-item><p>ChatGPT-3.5 September 2023 version</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Skalidis et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Test ChatGPT on exam-style questions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT-3</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yavuz and Kahraman [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Assess ChatGPT-4 inthediagnosis and management of cases.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item><list-item><p>Difficulty</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gritti et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Compare ChatGPT-4 and ChatGPT-3.5 in accuracy on multiple-choice pediatric cardiology questions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT 3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lee et al [<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">Achieving a passing score of 50 % using MKSAP-19's<sup><xref ref-type="table-fn" rid="table2fn14">n</xref></sup> cardiology questions.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item><list-item><p>PubMedGPT</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4">Patient education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Compare ChatGPT-4 vs hospital websites on chest pain FAQs.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item><list-item><p>Guideline adherence</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bushuven et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">Evaluate ChatGPT diagnostic and capabilities in BLS<sup><xref ref-type="table-fn" rid="table2fn15">o</xref></sup> and PALS<sup><xref ref-type="table-fn" rid="table2fn16">p</xref></sup> scenarios.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT-3.5</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lautrup et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Respond to prompts on 4 cardiovascular topics (MI<sup><xref ref-type="table-fn" rid="table2fn17">q</xref></sup>, PAD<sup><xref ref-type="table-fn" rid="table2fn18">r</xref></sup>, VV<sup><xref ref-type="table-fn" rid="table2fn19">s</xref></sup>, and CP)<sup><xref ref-type="table-fn" rid="table2fn20">t</xref></sup>.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moons and Van Bulck [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">Simplify patient info from journals.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Readability</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT</p></list-item><list-item><p>Google Bards</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Almagazzachi et al [<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Answering hypertension questions and assessing reproducibility across repeated runs.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="bottom"><list list-type="bullet"><list-item><p>ChatGPT (version not specified)</p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4">Cardiac diagnostic tests</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fija&#x010D;ko et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">Interpreting ECG images.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Google Bard</p></list-item><list-item><p>Bing Chat</p></list-item><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Zhu et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Interpreting ECG images.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>King et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Interpreting ECG images.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4V</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Interpreting ECG data.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kangiszer et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Answer correctly echocardiography board review questions and provide explanations that reflect standards of practice.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT-4</p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarangi et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Generate DDx for 15cardiac or thoracic imaging patterns.</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Diagnostic accuracy</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT- 3.5</p></list-item><list-item><p>Google Bard</p></list-item><list-item><p>Microsoft Bing Perplexity</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table2fn2"><p><sup>b</sup>HF: heart failure.</p></fn><fn id="table2fn3"><p><sup>c</sup>HFrEF: heart failure with reduced ejection fraction.</p></fn><fn id="table2fn4"><p><sup>d</sup>AS: aortic stenosis. </p></fn><fn id="table2fn5"><p><sup>e</sup>AF: atrial fibrillation.</p></fn><fn id="table2fn6"><p><sup>f</sup>CIED: cardiac implantable electronic device.</p></fn><fn id="table2fn7"><p><sup>g</sup>CHD: chronic heart disease.</p></fn><fn id="table2fn8"><p><sup>h</sup>Chol: cholesterol.</p></fn><fn id="table2fn9"><p><sup>i</sup>VHD: valvular heart disease.</p></fn><fn id="table2fn10"><p><sup>j</sup>CVD: cardiovascular disease.</p></fn><fn id="table2fn11"><p><sup>k</sup>FAQ: frequently asked question.</p></fn><fn id="table2fn12"><p><sup>l</sup>CPR: cardiopulmonary resuscitation.</p></fn><fn id="table2fn13"><p><sup>m</sup>HEART: History, ECG, Age, Risk factors, Troponin risk algorithm.</p></fn><fn id="table2fn14"><p><sup>n</sup>MKSAP-19: Medical Knowledge Self-Assessment Program (19th edition).</p></fn><fn id="table2fn15"><p><sup>o</sup>BLS: Basic Life Support.</p></fn><fn id="table2fn16"><p><sup>p</sup>PALS: Pediatric Advanced Life Support.</p></fn><fn id="table2fn17"><p><sup>q</sup>MI: myocardial infarction.</p></fn><fn id="table2fn18"><p><sup>r</sup>PAD: peripheral arterial disease.</p></fn><fn id="table2fn19"><p><sup>s</sup>VV: varicose veins.</p></fn><fn id="table2fn20"><p><sup>t</sup>CP: cardiovascular prevention.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Outcomes of large language model applications.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group and manuscript</td><td align="left" valign="bottom">Quantitative</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Chronic and progressive cardiac conditions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dimitriadis et al<break/>[<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">ChatGPT-3.5 answered 43/47 (91%) HF<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> patient questions adequately; 4/47 (9%) right but insufficient.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Riddell et al<break/>[<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">ChatGPT-4 responses to FAQs<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>: 71% (50/70, median FRE<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> 40.2, grade 16, IQR 48.3-34.6) at college-level readability; 23% (16/70) at recommended lower than college level and 4% (4/70) of responses requiring grade 8&#x2010;9.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Krittanawong et al<break/>[<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">On 20 HF patient questions, ChatGPT was reliable with explanation in 40% (8/20), reliable without explanation in 40% (8/20), and unreliable in 20% (4/20).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rouhi et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">ChatGPT-3.5 simplified aortic stenosis materials to 6th-7th grade, Bard to 8th-9th; both improved from college-level baseline, all <italic>P</italic>&#x003C;.001.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hillmann et al<break/>[<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">On 25 AF<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> questions, ChatGPT-4 produced 84% (21/25) appropriate and 92% (23/25) comprehensible responses with 24% (6/25) missing content. On 25 CIED<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup> questions, ChatGPT-4 produced 88% (22/25) appropriate and 100% (25/25) comprehensible responses with 52% (13/25) missing content.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Van Bulck and Moons<break/>[<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">40% (8/20) of experts rated ChatGPT&#x2019;s information as more valuable than Google, 45% (9/20) as equally valuable, and 15% (3/20) as less valuable.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kassab et al<break/>[<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">ChatGPT-4 provided 100% (15/15) accurate responses to patient-centered questions and 73% (11/15) accurate with 27% (4/15) partly accurate responses to complex clinical scenarios, outperforming Google Bard (40% [6/15] accurate).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Han et al<break/>[<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">ChatGPT-4 achieved AUROC<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> 0.725 in the UK Biobank and 0.664 in the KoGES<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> cohort for 10-year CVD<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> risk prediction, performing comparably to the ACC<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup> or AHA<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup> (0.733, 0.674) and Framingham (0.728, 0.675) models.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ali et al<break/>[<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">ChatGPT-4&#x2013;assisted regression explained 34% (R&#x00B2;=0.34) of the variability in age-adjusted cardiovascular mortality, with higher social vulnerability increasing mortality (&#x03B2;=+49.01) and greater digital literacy reducing it (&#x03B2;=&#x2013;4.51).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Li et al<break/>[<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">On 25 cardio-oncology questions, ChatGPT-4 provided 68% (17/25) appropriate responses, followed by Bard, Claude 2, and ChatGPT-3.5 with 52% (13/25), and Llama 2 with 48% (12/25) (<italic>P</italic>=.65).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yano et al<break/>[<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">ChatGPT-4&#x2019;s responses were rated appropriate in 85% (17/20) of cases, with strong interreviewer agreement (Gwet AC=0.890, SE 0.066, <italic>P</italic>&#x003C;.001).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kusunose et al<break/>[<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Overall accuracy 64.5% (20/31). Accuracy was higher for clinical questions (CQs) than for limited evidence-based questions: 80% (16/20) vs 36% (4/11) (<italic>P</italic>=.005). Across 21 CQs, 9 showed zero entropy (identical answers), while 7 of the remaining 12 had entropy &#x003E;0.5 (unacceptable variability).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Al Tibi et al<break/>[<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">Overall recommendations conflicted in 95% (38/40)<bold>;</bold> Cohen &#x03BA;=&#x2212;0.0127 (no agreement). Category match: stop 0%<bold>,</bold> decrease 0%<bold>,</bold> increase 6.7% (3/40)<bold>,</bold> add 12.5% (5/40).</td></tr><tr><td align="left" valign="top" colspan="2">Acute cardiac events</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun and Gautam<break/>[<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">In 60 Bing chatbot responses, inconsistent advice appeared in 25% (5/20) of responses for the Gambia and the United States and 45% (9/20) for India. Readability required a 12th-grade level for the Gambia and the United States and 10th grade for India (<italic>P</italic>&#x2264;.008).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scquizzato et al<break/>[<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">ChatGPT-3.5 answers to cardiac arrest and CPR<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup> questions were rated positively overall (mean 4.3/5, SD 0.7), with high scores for clarity (mean 4.4/5, SD 0.6), relevance (mean 4.3/5, SD 0.6), and accuracy (mean 4.0/5, SD 0.6).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Safranek et al<break/>[<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">ChatGPT-4 reduced nonnumerical errors from 5.7% (95% CI 3.6&#x2010;8.9) to 0.3% (0.1&#x2010;1.9), lowered subscore error to 0.10 (0.07&#x2010;0.14) points with less variability (SD 0.33), and correctly classified HEART<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup> risk groups in 100% (96.3&#x2010;100) of runs, compared with 81.5% (71.7&#x2010;88.4) for ChatGPT-3.5.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun<break/>[<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">In Scenario 1, the chatbot suggested inapplicable or excessive actions in 10% (1/10) of conversations; in Scenario 2, this occurred in 30% (3/10).<break/>In Scenario 2, the chatbot failed to transition to CPR instructions after assessing the victim&#x2019;s condition in 30% (3/10).</td></tr><tr><td align="left" valign="top" colspan="2">Physician education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Harskamp and De Clercq<break/>[<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">The January 2023 version of ChatGPT-3.5 performed significantly worse, answering 74% (37/50) versus 92% (46/50) of trivia questions compared with the September 2023 ChatGPT version (<italic>P</italic>=.03), and only 50% (10/20) of complex cases were answered correctly.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Skalidis et al<break/>[<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">ChatGPT answered 58.8% (213/362) correctly. ESC<sup><xref ref-type="table-fn" rid="table3fn13">m</xref></sup> 61.7% (42/68), BHDRA<sup><xref ref-type="table-fn" rid="table3fn14">n</xref></sup> 52.6% (79/150), StudyPRN 63.8% (92/144), approximating the 60% passing threshold.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yavuz and Kahraman<break/>[<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">ChatGPT-4 received high expert agreement for differential diagnoses (median 5, IQR 1) and management plans (median 4, IQR 1), with diagnostic accuracy of 4.47, SD 0.81 in Group 1 and 4.58, SD 0.67 in Group 2, with no significant difference between groups (<italic>P</italic>&#x003C;.26).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gritti et al<break/>[<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">ChatGPT-4 answered 66% (58/88) correctly, significantly outperforming ChatGPT-3.5 at 38% (33/88); <italic>P</italic>&#x003C;.001, with superior accuracy across every subspecialty topic.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lee et al<break/>[<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">ChatGPT-4 outperformed average MKSAP-19<sup><xref ref-type="table-fn" rid="table3fn15">o</xref></sup> users 80% (96/120) versus 60% (72/120); <italic>P</italic>&#x003C;.001; ChatGPT-3.5 also passed but lower at 55% (66/120), while PubMedGPT failed at 27% (32/120).</td></tr><tr><td align="left" valign="top" colspan="2">Patient education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al<break/>[<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Readability analysis showed: hospital website answers averaged a Flesch Reading Ease score of 65.6 (7th grade level), whereas ChatGPT-4 responses averaged 43.3 (11th grade level).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bushuven et al<break/>[<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">ChatGPT-3.5 and ChatGPT-4 correctly identified the diagnosis in 94% (124/132; <italic>P</italic>=.49) of responses, but advised emergency calls in only 54% (12/22) and provided correct first aid guidance in 45% (10/22), with incorrect advanced life support instructions in 14% (3/22) of cases.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lautrup et al<break/>[<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">ChatGPT-4 responses to 123 cardiovascular prompts averaged 3&#x2010;4 across the 4Cs. Myocardial infarction prompts scored highest (correctness 3.84/5; conciseness 3.65/5), while cardiovascular prevention scored lowest (correctness 3.03/5; conciseness 2.71/5).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moons and Van Bulck<break/>[<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">ChatGPT lowered readability modestly (JAMA grade 11&#x2192;9; Cochrane 17&#x2192;11; EJCN grade 10 unchanged) while preserving most content, with word counts changing minimally in JAMA (533&#x2192;525), by 14% in Cochrane (365&#x2192;315), and by 45% in EJCN (1,013&#x2192;563).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Almagazzachi et al<break/>[<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Appropriateness: 93% (93/100) overall and 7% (7/100) inappropriate, evaluated against guideline-based standards. Reproducibility: 93% (93/100) of questions reproducible and 7% (7/100) irreproducible.</td></tr><tr><td align="left" valign="top" colspan="2">Cardiac diagnostic tests</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fija&#x010D;ko et al<break/>[<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">ChatGPT-4 was correct in 17/27 (63%), Bard 13/27 (48.2%), and Bing 6/27 (22.2%).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Zhu et al<break/>[<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">ChatGPT achieved 84% (21/25) overall accuracy on BLS<sup><xref ref-type="table-fn" rid="table3fn16">p</xref></sup> and 78.9% (30/38) on evaluable ACLS<sup><xref ref-type="table-fn" rid="table3fn17">q</xref></sup> items using multiple-choice inputs, improving to 96% (24/25) and 92.1% (35/38) when incorrectly answered questions were rewritten as open-ended prompts.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>King et al<break/>[<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">ChatGPT-4V answered 96%(24/25) BLS and 90% (45/50) ACLS questions correctly, accuracy decreased to 75% (9/12) for questions containing ECG<sup><xref ref-type="table-fn" rid="table3fn18">r</xref></sup>.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al<break/>[<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">ChatGPT-4 correctly answered 91% (36/40), outperforming emergency medicine specialists 77% (31/40, <italic>P</italic>&#x003C;.001) and cardiologists 82% (33/40, <italic>P</italic>=.001).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kangiszer et al<break/>[<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">ChatGPT-4 answered 47% (67/141) in open-ended format, 53% (75/141) in multiple choice without justification, and 55% (78/141) in multiple choice with justification formats correctly.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarangi et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Perplexity performed highest with 67% (50/75) concordance, followed by ChatGPT at 65% (49/75) and Bing at 63% (47/75), while Bard showed the lowest performance at 45% (34/75).</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>HF: heart failure.</p></fn><fn id="table3fn2"><p><sup>b</sup>FAQ: frequently asked question.</p></fn><fn id="table3fn3"><p><sup>c</sup>FRE: Flesch Reading Ease.</p></fn><fn id="table3fn4"><p><sup>d</sup>AF: atrial fibrillation.</p></fn><fn id="table3fn5"><p><sup>e</sup>CIED: cardiac implantable electronic device.</p></fn><fn id="table3fn6"><p><sup>f</sup>AUROC: area under the receiver operating characteristic curve.</p></fn><fn id="table3fn7"><p><sup>g</sup>KoGES: Korean Genome and Epidemiology Study.</p></fn><fn id="table3fn8"><p><sup>h</sup>CVD: cardiovascular disease.</p></fn><fn id="table3fn9"><p><sup>i</sup>ACC: American College of Cardiology.</p></fn><fn id="table3fn10"><p><sup>j</sup>AHA: American Heart Association.</p></fn><fn id="table3fn11"><p><sup>k</sup>CPR: cardiopulmonary resuscitation.</p></fn><fn id="table3fn12"><p><sup>l</sup>HEART: History, ECG, Age, Risk factors, Troponin risk algorithm.</p></fn><fn id="table3fn13"><p><sup>m</sup>ESC: European Society of Cardiology.</p></fn><fn id="table3fn14"><p><sup>n</sup>BHDRA: British Heart Data Research Alliance. </p></fn><fn id="table3fn15"><p><sup>o</sup>MKSAP-19: Medical Knowledge Self-Assessment Program (19th edition).</p></fn><fn id="table3fn16"><p><sup>p</sup>BLS: Basic Life Support.</p></fn><fn id="table3fn17"><p><sup>q</sup>ACLS: Advanced Cardiovascular Life Support.</p></fn><fn id="table3fn18"><p><sup>r</sup>ECG: electrocardiogram.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Strengths and limitations.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Group and study</td><td align="left" valign="top">LLM<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> advantages</td><td align="left" valign="top">LLM disadvantages and limitations</td><td align="left" valign="top">Conclusion</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Chronic and progressive cardiac conditions</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dimitriadis et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Gave clear and supportive answers to common HF<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> questions.</td><td align="left" valign="top">Using a single LLM version and a fixed question source; lacked real patient interaction, no quantitative scoring.</td><td align="left" valign="top">Useful for patient education in HF, but the evidence is limited.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Riddell et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Accurate and consistent responses; objective readability assessment using validated metrics (FRE<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> and SMOG<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup>).</td><td align="left" valign="top">Questions were not validated with real patients; responses were tested on a single model sample.</td><td align="left" valign="top">Reliable content, but readability should be improved for patient use.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Krittanawong et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">ChatGPT model provided primarily reliable answers to commonly asked questions related to HF.</td><td align="left" valign="top">Prompts not validated with real patients; single LLM tested; lacked quantitative scoring and external validation.</td><td align="left" valign="top">Promising adjunct for HF education, not stand-alone.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rouhi et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Used standardized readability metrics on PEMs<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup> from major institutions; compared the LLMs under identical conditions.</td><td align="left" valign="top">Focused on readability only; small sample of 21 materials; limited to US sources.</td><td align="left" valign="top">Both LLMs improved the readability of aortic stenosis materials, but broader validation and comprehension testing are needed before patient application.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hillmann et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Standardized comparison across 3 LLMs using expert-blinded evaluation.</td><td align="left" valign="top">Questions not externally validated; small dataset (50 items); limited to electrophysiology topics.</td><td align="left" valign="top">ChatGPT-4 outperformed Bing and Bard in accuracy and comprehensibility, showing strong potential for patient education.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Van Bulck and Moons [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">ChatGPT provided clearer, more structured, and more reliable cardiology information than Google.</td><td align="left" valign="top">Very small sample (4 vignettes and 20 experts); prompts not validated with real patients.</td><td align="left" valign="top">ChatGPT is generally seen as trustworthy and useful, but evidence remains limited.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kassab et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">ChatGPT-4 is highly accurate for patient and physician queries; it outperformed Bard.</td><td align="left" valign="top">Small dataset (15 patient and 15 physician questions); subjective grading.</td><td align="left" valign="top">ChatGPT-4 is promising for patient education and clinician support in valvular disease.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Han et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">Large, real-world cohorts (UK Biobank and KoGES<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup>) with transparent methodology; ChatGPT-4 achieved accuracy comparable to established CVD<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup> risk models.</td><td align="left" valign="top">ChatGPT-4 outputs vary with identical prompts; training data remain non transparent.</td><td align="left" valign="top">ChatGPT-4 is feasible for population-level CVD risk prediction.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ali et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">Innovative use of ChatGPT-4 for epidemiology.</td><td align="left" valign="top">Reliance on secondary data sources may introduce reporting bias; ChatGPT-4&#x2019;s role is limited to regression assistance.</td><td align="left" valign="top">LLMs may complement population health research, but clinical relevance is limited.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Li et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Compared multiple LLMs using standardized ESC<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup> guideline&#x2013;based cardio-oncology questions.</td><td align="left" valign="top">Small question set, questions researcher-generated rather than patient-derived.</td><td align="left" valign="top">ChatGPT-4 shows promise for cardio-oncology, requires oversight.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yano et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">First study to evaluate ChatGPT-4&#x2019;s hypertension responses in English and Japanese.</td><td align="left" valign="top">Small sample (20 questions); prompts generated by the model rather than real patients; subjective evaluation.</td><td align="left" valign="top">ChatGPT-4 provided accurate and guideline-consistent hypertension information in both languages.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kusunose et al [<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Potential supplementary tool for rapid access to hypertension guideline information.</td><td align="left" valign="top">Overall accuracy may be insufficient for standalone use; inconsistent answers on repeat runs (entropy); small sample; single grading evaluation; no assessment of downstream clinical outcomes.</td><td align="left" valign="top">ChatGPT may assist clinicians as a supplement, but requires caution, especially for complex questions.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Al Tibi et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">Uses real-world patient data.</td><td align="left" valign="top">Single center and single cardiologist; assumes physician is correct; limited context given to ChatGPT-4; small sample.</td><td align="left" valign="top">ChatGPT-4 recommendations differed substantially from those of the cardiologist, with no agreement; further validation is needed before clinical use.</td></tr><tr><td align="left" valign="top" colspan="4">Acute cardiac events</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkum and Gautam [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Using repeated queries across countries.</td><td align="left" valign="top">Single nonvalidated prompt; frequent omissions of key guideline steps.</td><td align="left" valign="top">Provides relevant but often incomplete or incorrect MI<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup> first aid advice, limited for unsupervised public use.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scquizzato et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Dual evaluation by professionals and laypeople; generally positive ratings.</td><td align="left" valign="top">High reading level; subjective, unblinded scoring; single LLM version.</td><td align="left" valign="top">ChatGPT provided useful and mostly accurate CPR<sup><xref ref-type="table-fn" rid="table4fn10">j</xref></sup> information, but readability and safety gaps limit unsupervised use.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Safranek et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">The framework prompts improvement for automated HEART<sup><xref ref-type="table-fn" rid="table4fn11">k</xref></sup> score determination across a limited set of synthetic patient notes.</td><td align="left" valign="top">Synthetic dataset; HEART subscores limited to structured fields.</td><td align="left" valign="top">Promising clinician decision support concept that warrants validation on real clinical data.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Birkun [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Natural dialogue; frequently delivered straightforward CPR steps and handled a bystander barrier by encouraging continuation.</td><td align="left" valign="top">Omitted critical elements; diagnostic risk by not asking &#x201C;breathing normally&#x201D;; conversational glitches and occasional inapplicable suggestions.</td><td align="left" valign="top">May be a better-than-nothing option where T-CPR<sup><xref ref-type="table-fn" rid="table4fn12">l</xref></sup> is unavailable, but it should not be considered reliable for real-life emergencies.</td></tr><tr><td align="left" valign="top" colspan="4">Physician education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Harskamp and De Clercq [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Clear performance improvement in newer model versions.</td><td align="left" valign="top">Lower accuracy in complex consults; single run prompts; limited model transparency.</td><td align="left" valign="top">ChatGPT showed potential as AI<sup><xref ref-type="table-fn" rid="table4fn13">m</xref></sup> decision support for common cardiac conditions but requires further validation before clinical adoption.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Skalidis et al [<xref ref-type="bibr" rid="ref16">16</xref>]</td><td align="left" valign="top">Large exam dataset; transparent question sourcing.</td><td align="left" valign="top">Manual single run prompting; no control of model settings.</td><td align="left" valign="top">Potential aid for exam prep.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yavuz and Kahraman [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">High expert agreement for differential diagnoses.</td><td align="left" valign="top">Synthetic case format may not reflect real-world nuance; modest variability in expert ratings.</td><td align="left" valign="top">Useful adjunct for training, but structured oversight is needed.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gritti et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">ChatGPT-4 markedly improved accuracy over ChatGPT-3.5; outperformed across all subspecialties; objective scoring using textbook answer key.</td><td align="left" valign="top">Moderate overall accuracy; text-only questions (no ECG or echo); single-run testing.</td><td align="left" valign="top">ChatGPT-4 performs better than ChatGPT-3.5 but remains insufficient for high-stakes pediatric cardiology use.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lee et al [<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">Uses validated MKSAP-19 answer key.</td><td align="left" valign="top">Exam style questions only (no ECG, echo, or images); single run manual prompting.</td><td align="left" valign="top">ChatGPT-4 shows strong decision support potential, but limitations must be managed.</td></tr><tr><td align="left" valign="top" colspan="4">Patient education</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="left" valign="top">Direct comparison with hospital websites; blinded expert rating.</td><td align="left" valign="top">Language complexity; no patient validation.</td><td align="left" valign="top">Scientifically sound but limited accessibility</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bushuven et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">High diagnostic accuracy (94%) and reliable recognition of pediatric emergencies.</td><td align="left" valign="top">Simulated vignettes only; no direct comparison with humans&#x2019; competence in emergency situations.</td><td align="left" valign="top">ChatGPT-4 performs better than ChatGPT-3.5 in emergency recognition but still provides incorrect and inconsistent guidance, requiring further refinement before real-world use.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lautrup et al [<xref ref-type="bibr" rid="ref18">18</xref>]</td><td align="left" valign="top">Innovative 4C framework; diverse prompts.</td><td align="left" valign="top">Expert-based, not real patients; no replicate testing; no control of model parameters.</td><td align="left" valign="top">Useful framework, but highlights risks of LLM-driven patient advice.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moons and Van Bulck [<xref ref-type="bibr" rid="ref17">17</xref>]</td><td align="left" valign="top">Clear value of LLMs for simplifying patient information and improving readability.</td><td align="left" valign="top">ChatGPT rarely reaches 6th-grade level; Bard removes large amounts of text; English-only evaluation; visuals not assessed; temperature settings not tested.</td><td align="left" valign="top">ChatGPT is useful for simplifying Patient Education Materials, but requires further evaluation.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Almagazzachi et al [<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Large curated question set; repeated queries to assess reproducibility; dual evaluation against guidelines and physician judgment.</td><td align="left" valign="top">Predefined questions may limit topic coverage; no patient user testing; model version not specified.</td><td align="left" valign="top">ChatGPT demonstrated high accuracy and reproducibility for hypertension patient education, but human oversight remains necessary.</td></tr><tr><td align="left" valign="top" colspan="4">Cardiac diagnostic tests</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fija&#x010D;ko et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">First multimodal chatbot ECG<sup><xref ref-type="table-fn" rid="table4fn14">n</xref></sup> test.</td><td align="left" valign="top">Small dataset</td><td align="left" valign="top">Proof of concept multimodal LLMs can attempt ECG.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Zhu et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Open-ended prompts improved answer quality.</td><td align="left" valign="top">Open-ended prompts improved answer quality; evaluation restricted to exam-style items rather than real-world clinical variation.</td><td align="left" valign="top">ChatGPT can achieve high performance on AHA<sup><xref ref-type="table-fn" rid="table4fn15">o</xref></sup> exam content, especially when questions are reframed as open-ended prompts.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>King et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">ChatGPT-4V outperformed GPT-3.5, particularly with the inclusion of image-based questions.</td><td align="left" valign="top">No prospective testing with residents; only multiple-choice focus.</td><td align="left" valign="top">GPT-4 may support test preparation and training, but is limited to narrow tasks.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">ChatGPT showed better accuracy than the other 2 groups in everyday ECG questions.</td><td align="left" valign="top">Used text, not real ECG images; possible training exposure.</td><td align="left" valign="top">ChatGPT-4 is strong, but not a replacement.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Kangiszer et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Consistent strength in fact-based questions.</td><td align="left" valign="top">Low overall accuracy; no image interpretation capability evaluated; no comparison to human trainees.</td><td align="left" valign="top">ChatGPT-4 shows limited accuracy for echocardiography board content.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarangi et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Ability to generate reasonable differential diagnoses from text prompts.</td><td align="left" valign="top">Only text descriptions; 2 radiologists only.</td><td align="left" valign="top">Useful adjunct, but true concordance with experts remains limited.</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table4fn2"><p><sup>b</sup>HF: heart failure.</p></fn><fn id="table4fn3"><p><sup>c</sup>FRE: Flesch Reading Ease.</p></fn><fn id="table4fn4"><p><sup>d</sup>SMOG: Simple Measure of Gobbledygook.</p></fn><fn id="table4fn5"><p><sup>e</sup>PEM: Patient Education Materials.</p></fn><fn id="table4fn6"><p><sup>f</sup>KoGES: Korean Genome and Epidemiology Study.</p></fn><fn id="table4fn7"><p><sup>g</sup>CVD: cardiovascular disease.</p></fn><fn id="table4fn8"><p><sup>h</sup>ESC: European Society of Cardiology.</p></fn><fn id="table4fn9"><p><sup>i</sup>MI: myocardial infarction.</p></fn><fn id="table4fn10"><p><sup>j</sup>CPR: cardiopulmonary resuscitation.</p></fn><fn id="table4fn11"><p><sup>k</sup>HEART: History, ECG, Age, Risk factors, Troponin risk algorithm. </p></fn><fn id="table4fn12"><p><sup>l</sup>T-CPR: telecommunicator-assisted cardiopulmonary resuscitation.</p></fn><fn id="table4fn13"><p><sup>m</sup>AI: artificial intelligence.</p></fn><fn id="table4fn14"><p><sup>n</sup>ECG: electrocardiogram.</p></fn><fn id="table4fn15"><p><sup>o</sup>AHA: American Heart Association.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Categorization of articles into core groups with corresponding cardiology subfields.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cardio_v10i1e76734_fig02.png"/></fig></sec><sec id="s3-2"><title>Chronic and Progressive Cardiac Conditions</title><p>Thirteen studies [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] evaluated the application of LLMs in chronic cardiovascular disease, spanning heart failure, hypertension, valvular disease, atrial fibrillation, cardiovascular risk prediction, and cardio-oncology.</p><p>Heart failure was the most frequently studied topic. Dimitriadis et al [<xref ref-type="bibr" rid="ref27">27</xref>] showed that ChatGPT-3.5 produced accurate answers to 91% (43/47) of patient questions, though some were incomplete. Riddell et al [<xref ref-type="bibr" rid="ref33">33</xref>] reported that ChatGPT-4 responses to HF questions were written at a college reading level in 71% (50/70; median FRE [Flesch Reading Ease] 40.2, grade 16, IQR 48.3-34.6) of cases, while Krittanawong et al [<xref ref-type="bibr" rid="ref34">34</xref>] reported that only 40% (8/20) of ChatGPT-3.5 responses were reliable without physician oversight.</p><p>Hypertension was addressed by Yano et al [<xref ref-type="bibr" rid="ref21">21</xref>], who demonstrated that ChatGPT-4 produced largely appropriate answers in 85% (17/20) of hypertension-related inquiries in English and Japanese, with English responses consistently superior. Kusunose et al [<xref ref-type="bibr" rid="ref46">46</xref>] evaluated ChatGPT-3.5 against 31 guideline-based questions derived from the Japanese Society of Hypertension (JSH) 2019 guidelines. The chatbot achieved an overall accuracy of 64.5% (20/31). Performance was significantly higher for clinical questions than for limited evidence-based questions (80% [16/20] vs 36% [4/11]; <italic>P</italic>=.005). A nonsignificant trend was observed for recommendation level versus evidence level questions (62% vs 38%; denominators not reported; <italic>P</italic>=.07). No difference was found between questions originally written in Japanese and translated questions (65% vs 58%; denominators not reported; <italic>P</italic>=.60). In a retrospective analysis using real-world data from a rural clinic, Al Tibi et al [<xref ref-type="bibr" rid="ref45">45</xref>] compared antihypertensive medication recommendations generated by ChatGPT-4 with those made by a cardiologist during laboratory review visit. Among 40 patients with hypertension, overall recommendations differed in 95% (38/40) of cases. At the level of individual medications, agreement was low, with only 10.2% of recommendations matching between ChatGPT-4 and the cardiologist (denominators not reported). The Cohen &#x03BA; coefficient was &#x2212;0.0127, indicating no agreement on whether to implement medication changes for a given patient.</p><p>Valvular disease was the focus of Rouhi et al [<xref ref-type="bibr" rid="ref35">35</xref>], who showed ChatGPT-3.5 and Bard improved readability of aortic stenosis education materials, with ChatGPT-3.5 achieving the target 6th-7th grade level while Bard remained above it. Additionally, Kassab et al [<xref ref-type="bibr" rid="ref25">25</xref>] evaluated 30 valvular disease queries, reporting ChatGPT-4 provided 100% (15/15) accurate responses to patient-centered questions and 73% (11/15) accurate and 27% (4/15) partly accurate responses to complex clinical scenarios, outperforming Google Bard (40% [6/15] accurate) and being 2.5-fold more likely to provide accurate answers (<italic>P</italic>&#x003C;.001).</p><p>Atrial fibrillation and cardiac implantable device information were evaluated by Hillmann et al [<xref ref-type="bibr" rid="ref36">36</xref>]; ChatGPT-4 produced appropriate responses in 84% (21/25) of atrial fibrillation and 88% (22/25) of cardiac implantable electronic device queries, with comprehensibility scores of 92% (23/25) and 100% (25/25), respectively. ChatGPT-4 outperformed Bing (60% [15/22], 72% [18/25] appropriate) and Bard (52% [13/25], 16% [4/25] appropriate) and showed fewer omissions and minimal confabulation.</p><p>Other chronic conditions were also explored. Han et al [<xref ref-type="bibr" rid="ref24">24</xref>] showed ChatGPT-4 achieved 10-year cardiovascular disease risk predictions with performance similar to American College of Cardiology/American Heart Association (AHA). Li et al [<xref ref-type="bibr" rid="ref22">22</xref>] found ChatGPT-4 outperformed other LLMs in cardio-oncology, though it was less reliable for treatment recommendations. Van Bulck and Moons [<xref ref-type="bibr" rid="ref37">37</xref>] reported that 40% (8/20) of experts found ChatGPT&#x2019;s information more valuable than Google, 45% (9/20) equally valuable, and 15% (3/20) less valuable. Experts appreciated the sophistication and nuance of ChatGPT&#x2019;s responses but noted they were sometimes incomplete or potentially misleading.</p><p>Cardiovascular mortality was evaluated by Ali et al [<xref ref-type="bibr" rid="ref23">23</xref>], who demonstrated the use of ChatGPT-4 to generate and execute regression models predicting mortality rates for the US county level. Across 3118 counties, the model explains 34% (<italic>R</italic>&#x00B2;=0.34) of variability in age-adjusted cardiovascular mortality with higher social vulnerability increasing (<italic>&#x03B2;</italic>=49.01) and higher digital literacy reducing (&#x03B2; =&#x2212;4.51) mortality.</p></sec><sec id="s3-3"><title>Acute Cardiac Events</title><p>Four studies [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>] evaluated LLMs in acute cardiac contexts.</p><sec id="s3-3-1"><title>First Aid in Myocardial Infarction</title><p>Birkun and Gautam [<xref ref-type="bibr" rid="ref38">38</xref>] found that the Bing chatbot frequently omitted critical guideline concordant steps, with readability of 12th grade level for the Gambia and the United States and 10th grade for India (<italic>P</italic>&#x2264;.008). Incorrect advice appeared in 25% (5/20) of responses for the Gambia and the United States and 45% (9/20) for India.</p></sec><sec id="s3-3-2"><title>Cardiac Arrest and Cardiopulmonary Resuscitation</title><p>Scquizzato et al [<xref ref-type="bibr" rid="ref39">39</xref>] reported that ChatGPT-3.5 answers to cardiac arrest and cardiopulmonary resuscitation (CPR) questions were rated positively overall (mean 4.3/5, SD 0.7), with high scores for clarity (mean 4.4/5, SD 0.6), relevance (mean 4.3/5, SD 0.6), and accuracy (mean 4.0/5, SD 0.6). CPR-specific responses scored lower across all parameters, and professionals rated overall value (mean 4.0/5, SD 0.5 vs mean 4.6/5, SD 0.7; <italic>P</italic>=.02) and comprehensiveness (mean 3.9/5, SD 0.6 vs mean 4.5/5, SD 0.7; <italic>P</italic>=.02) lower than laypeople. Readability was difficult (FRE score 34 [IQR 26&#x2010;42]). Birkun [<xref ref-type="bibr" rid="ref43">43</xref>] assessed the New Bing chatbot&#x2019;s ability to provide telecommunicator-assisted CPR across 2 scenarios: Scenario 1, in which the victim was not breathing, and Scenario 2, in which the bystander was unsure whether the victim was breathing. In Scenario 2, the chatbot failed to ask for the emergency address in 50% (5/10) of cases and did not transition to CPR instructions after assessing the victim in 30% (3/10), with several additional Scenario 2 conversations reportedly interrupted or stuck at the breathing assessment step. The chatbot asked only whether the victim was &#x201C;breathing&#x201D; (rather than &#x201C;breathing normally&#x201D;), potentially missing agonal breathing and delaying arrest recognition, never inquired about nearby AED (Automated External Defibrillator) availability, and suggested inapplicable actions in 10% (1/10) of Scenario 1 and 30% (3/10) of Scenario 2.</p></sec><sec id="s3-3-3"><title>Chest Pain Evaluation</title><p>Safranek et al [<xref ref-type="bibr" rid="ref40">40</xref>] reported ChatGPT-4 correctly classified HEART score (History, ECG, Age, Risk factors, Troponin risk algorithm) risk groups in 100% (96.3%&#x2010;100%) of runs compared with 81.5% (71.7%&#x2010;88.4%) for ChatGPT-3.5. Iterative prompt refinement reduced nonnumerical outputs for ChatGPT-3.5 from 18.7% (95% CI 14.7&#x2010;23.5) to 6.7% (4.4&#x2010;10.1) and for ChatGPT-4 from 5.7% (3.6&#x2010;8.9) to 0.3% (0.1&#x2010;1.9).</p></sec></sec><sec id="s3-4"><title>Physician Education</title><p>Six studies [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref41">41</xref>] investigated the use of LLMs for supporting physician training and assessment in cardiology.</p><p>Exam preparation was assessed by Lee et al [<xref ref-type="bibr" rid="ref5">5</xref>], who tested LLMs on 120 MKSAP-19 (Medical Knowledge Self-Assessment Program, 19th edition) cardiology questions, found that ChatGPT-4 achieved 80% (96/120), meeting the passing threshold, while PubMedGPT lagged far behind at 27% (32/120). Skalidis et al [<xref ref-type="bibr" rid="ref16">16</xref>] reported that ChatGPT answered 58.8% (213/362) of European Exam in Core Cardiology questions correctly, close to the 60% passing threshold.</p><p>Clinical cases were addressed by Yavuz and Kahraman [<xref ref-type="bibr" rid="ref15">15</xref>], who reported that ChatGPT-4 achieved high expert agreement for differential diagnoses (median 5, IQR 1) and management plans (median 4, IQR 1), supporting its role as a supplemental study aid, but emphasized that it should not be used unsupervised.</p><p>Clinical reasoning and decision support were tested by Harskamp and De Clercq [<xref ref-type="bibr" rid="ref41">41</xref>]. ChatGPT-3.5 achieved correct responses in 85% (17/20) of AMSTELHEART-2 case vignettes, though performance was inconsistent in complex presentations. Gritti et al [<xref ref-type="bibr" rid="ref13">13</xref>] found ChatGPT-4 correctly answered 66% (58/88) of pediatric cardiology cases, compared with 38% (33/88; <italic>P</italic>&#x003C;.001) for ChatGPT-3.5, with superior accuracy across all subspecialty topics, when the passing threshold was set at 70%. Both models produced explanations containing incorrect or inconsistent reasoning, which were not formally graded.</p><p>Chest pain information was assessed by G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref19">19</xref>], who found that ChatGPT-4 produced answers with comparable scientific adequacy, ease of understanding, and physician satisfaction to hospital websites (all 5.0&#x2010;6.0/7; no significant differences), but at a much higher reading level&#x2014;11th grade versus 7th grade for hospital materials.</p></sec><sec id="s3-5"><title>Patient Education</title><p>Four studies [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>] evaluated LLMs for patient information. General consultation with ChatGPT-4 was explored by Lautrup et al [<xref ref-type="bibr" rid="ref18">18</xref>], where its responses to 123 cardiovascular prompts scored between 3 and 4 across the 4Cs (correctness 3.45/5, conciseness 3.19/5, comprehensiveness 3.52/5, and comprehensibility 3.72/5). Performance varied by topic, with myocardial infarction prompts scoring highest (correctness 3.84/5 and conciseness 3.65/5) and cardiovascular prevention lowest (correctness 3.03/5 and conciseness 2.71/5). Higher literacy prompts yielded better responses, while lower resource language prompts unexpectedly scored higher across all domains.</p><p>Emergency situations were studied by Bushuven et al [<xref ref-type="bibr" rid="ref20">20</xref>], who compared ChatGPT-3.5 and ChatGPT-4 in Basic Life Support (BLS) and Pediatric Advanced Life Support (PALS) cases. While both models correctly identified the diagnosis in 94% (124/132; <italic>P</italic>=.49) of cases, they advised calling emergency services in only 54% (12/22), provided correct first aid guidance in 45% (10/22), and gave incorrect advanced life support instructions in 14% (3/22) of cases.</p><p>Readability of patient materials was evaluated by Moons and Van Bulck [<xref ref-type="bibr" rid="ref17">17</xref>], who found that ChatGPT improved readability with minimal content loss (JAMA 533 to 525 words; Cochrane 365 to 315 words; EJCN 1013 to 563 words), whereas Google Bard achieved lower grade levels, but removed substantial content&#x2014;shortening the texts by 61% (525 to 207 words), 34% (365 to 242 words), and 80% (1013 to 204 words), often omitting important details.</p><p>Almagazzachi et al [<xref ref-type="bibr" rid="ref44">44</xref>] compiled a final set of 100 hypertension-related questions after physician review. Each question was asked to ChatGPT 3 times, and the majority response for each question was evaluated against established reference publications. Guideline-based assessment classified 93% (93/100) of the majority of responses as appropriate and 7% (7/100) as inappropriate. A separate clinical review by 1 board-certified internal medicine physician classified 92% (92/100) as appropriate and 8% (8/100) as inappropriate, yielding an overall accuracy of 92.5% (mean of the 2 assessments). For reproducibility per question, 93% (93/100) were reproducible and 7% (7/100) were irreproducible; across all 300 responses, 3.6% (7/300) were classified as irreproducible.</p></sec><sec id="s3-6"><title>Cardiac Diagnostics Tests</title><p>Six studies examined the ability of LLMs to support diagnostic testing in cardiology [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], focusing on imaging, electrocardiography, and echocardiography.</p><p>Cardiothoracic imaging was evaluated by Sarangi et al [<xref ref-type="bibr" rid="ref26">26</xref>], who compared 4 LLMs on 25 cardiac differential diagnosis items. ChatGPT and Perplexity provided more consistent differential diagnoses than Bing and Bard (67% [50/75] vs Bing 63% [47/75] and Bard 45% [34/75]), though accuracy remained moderate and dependent on case complexity.</p><p>Electrocardiography performance was assessed across several studies. Fija&#x010D;ko et al [<xref ref-type="bibr" rid="ref30">30</xref>] evaluated multimodal LLMs on ECG image interpretation, finding that ChatGPT-4 Pro correctly interpreted 63% (17/27) of ECG images, outperforming Google Bard 48% (13/27) and Bing 22.2% (6/27). Zhu et al [<xref ref-type="bibr" rid="ref42">42</xref>] subsequently assessed ChatGPT-4 on AHA BLS or ACLS (Advanced Cardiovascular Life Support) examination items. Although the model achieved 84% (21/25) accuracy on BLS items and 78.9% (30/38) on evaluable ACLS questions using multiple choice prompts, the majority of its errors originated from ECG-containing items. Accuracy improved substantially to 96% (24/25) for BLS and 92.1% (35/38) for ACLS, when incorrectly answered multiple-choice questions were rewritten as open-ended prompts. More recently, King et al [<xref ref-type="bibr" rid="ref14">14</xref>] evaluated ChatGPT-4V on the full 75-item AHA BLS or ACLS examination, achieving 96% (24/25) accuracy on BLS and 90 % (45/50) on ACLS items, with performance decreasing to 75% (9/12) on questions that contained ECG strips in the ACLS examination.</p><p>In a separate vignette-based evaluation, G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref29">29</xref>] tested ChatGPT-4 on 40 written ECG case scenarios, finding 91% (36/40) accuracy exceeding that of emergency physicians 77% (31/40; <italic>P</italic>&#x003C;.001) and comparable to cardiologists 82% (33/40; <italic>P</italic>=.001), though the model consistently struggled with wide QRS tachycardias.</p><p>Echocardiography was evaluated by Kangiszer et al [<xref ref-type="bibr" rid="ref28">28</xref>], who tested ChatGPT-4 on 150 echocardiography board-style questions, answered 141. Accuracy remained modest, with ChatGPT-4 correctly answering 47.3% (67/141) of open-ended items, 53.3% (75/141) of multiple-choice items without justification, and 55.3% (78/141) with forced justification. Overall performance was inadequate for board-level competency.</p><p><xref ref-type="fig" rid="figure3">Figure 3</xref> outlines the 3 primary aspects explored in the articles, including the reliability of LLMs, user interaction, and their specific applications in cardiology.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Illustration of 3 key focus areas regarding large language models in cardiology.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="cardio_v10i1e76734_fig03.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This systematic review included 35 studies [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref46">46</xref>] on the use of LLMs in cardiology, grouped into 5 domains: chronic and progressive cardiac conditions, acute cardiac events, physician education, patient education, and cardiac diagnostic tests. Overall, the studies showed that LLMs have potential across multiple aspects of cardiac management. In chronic conditions, models such as ChatGPT accurately answered common patient questions and improved the readability of educational materials [<xref ref-type="bibr" rid="ref27">27</xref>], supporting patient engagement in long-term care. In acute emergencies, LLMs produced advice that users found clear, relevant, and accurate [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], suggesting possible value for lay and professional support in time-critical situations. For cardiac diagnostics, multimodal models performed well in ECG interpretation, often matching or surpassing human specialists [<xref ref-type="bibr" rid="ref14">14</xref>], indicating potential to support clinical workflows and reduce routine workloads.</p><p>Despite these benefits, several issues must be addressed before LLMs can be used widely in cardiology. Accuracy and consistency varied significantly across models, with some producing unreliable or inconsistent interpretations [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], making their use in clinical settings uncertain. Readability and accessibility also remained challenging: although some LLMs improved clarity, others achieved lower reading levels only by removing essential information [<xref ref-type="bibr" rid="ref33">33</xref>], raising concerns for patient communication across varying literacy levels. The use of LLMs also raises data privacy concerns [<xref ref-type="bibr" rid="ref40">40</xref>], as their deployment requires strict protection of sensitive patient information.</p><p>The reviewed articles had several limitations. Approximately half the included studies evaluated ChatGPT-3.5, whose training data extended only to September 2021, limiting its ability to provide up-to-date information, an important consideration in cardiology. Most studies relied on expert evaluations rather than patient feedback, limiting insight into real-world usability. No included studies involved actual patients, raising questions about whether individuals can engage effectively with artificial intelligence&#x2013;generated content. Regarding diagnostic applications, earlier studies often relied on text-based representations of multimodal data, such as written ECG or echocardiography descriptions, which may not fully capture real-world diagnostic complexity. However, recent studies have begun directly evaluating image-based analysis using multimodal models, including assessments of ECG image interpretation by ChatGPT-4&#x2013;based systems, as demonstrated in a study by King et al [<xref ref-type="bibr" rid="ref14">14</xref>]. Despite these advances, the current evidence remains limited in scope, and performance across multimodal tasks is heterogeneous, underscoring the need for larger, standardized evaluations of direct image analysis in cardiology.</p><p>This review also has its own limitations. First, the search was restricted to PubMed and Scopus, potentially missing studies in databases such as Embase or IEEE Xplore. Second, the inclusion criteria limited the review to peer-reviewed publications, excluding conference papers and preprint repositories such as arXiv and medRxiv, where important AI-related findings are often shared prior to peer review. Third, most included articles were in silico evaluations rather than prospective trials, limiting the generalizability of the findings. The heterogeneity of tasks and methods prevented meta-analysis. Fourth, due to the rapid evolution of LLMs and changing model nomenclature, our search strategy did not incorporate newer terms such as &#x201C;Copilot&#x201D; or broader descriptors such as &#x201C;Generative AI,&#x201D; which may have resulted in missing recently published studies. Finally, because technological advancements occur quickly, relevant studies and newer LLM applications may have emerged after the search was completed.</p><p>Across studies, the adapted QUADAS (by Whiting et al at the University of Bristol) assessment revealed methodological limitations specific to LLM research. Patient selection frequently presented a high risk of bias, as many studies used researcher-generated prompts or unvalidated questions without clinician or patient confirmation. In physician education studies, question banks commonly excluded media-based content such as ECGs or echocardiography clips, restricting the range of assessed cardiology skills. The index test domain was also often high risk, as most studies used single, nonreplicated runs without reporting temperature settings or model versioning. Reference standards were generally low risk, whereas flow, timing, and data management were limited by missing metadata, insufficient prompt transparency, and lack of full output logs.</p><p>Future research should address these limitations by conducting prospective clinical trials evaluating LLMs in real-world workflows, developing standardized metrics for accuracy, readability, and safety, and exploring electronic health record integration. Studies should also expand multimodal applications, including direct analysis of ECGs and imaging.</p><p>In conclusion, LLMs demonstrate potential in cardiology, particularly in educational applications and routine diagnostics. However, performance remains inconsistent across clinical scenarios, especially in acute care, where precision is critical. With continued refinement and responsible integration, LLMs may ultimately become valuable partners in cardiovascular care and help redefine what is possible in modern medicine.</p></sec></body><back><ack><p>Generative artificial intelligence (ChatGPT) was used for grammar refinement and language editing during manuscript preparation. Scientific content, including study conception, data extraction, analysis, interpretation, and revisions, was generated and verified by the authors.</p></ack><notes><sec><title>Funding</title><p>This work was supported in part by the Clinical and Translational Science Awards (CTSA) grant UL1TR004419 from the National Center for Advancing Translational Sciences.</p></sec><sec><title>Data Availability</title><p>All data relevant to the study are included in the article or uploaded as supplementary information.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization - SS, EK</p><p>Supervision - SS, EK</p><p>Investigation, Data curation, Formal analysis - MG, SS</p><p>Validation - SS, EK, BSG</p><p>Visualization - MG</p><p>Writing &#x2013; original draft &#x2013; MG</p><p>Funding acquisition &#x2013; GNN</p><p>Writing &#x2013; review &#x0026; editing - MG, GNN, KS, MC-S, BSG, OE, SS, EK</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ACLS</term><def><p>Advanced Cardiovascular Life Support</p></def></def-item><def-item><term id="abb2">AED</term><def><p>Automated External Defibrillator</p></def></def-item><def-item><term id="abb3">AHA</term><def><p>American Heart Association</p></def></def-item><def-item><term id="abb4">BLS</term><def><p>Basic Life Support</p></def></def-item><def-item><term id="abb5">CPR</term><def><p>cardiopulmonary resuscitation</p></def></def-item><def-item><term id="abb6">ECG</term><def><p>electrocardiogram</p></def></def-item><def-item><term id="abb7">FRE</term><def><p>Flesch Reading Ease</p></def></def-item><def-item><term id="abb8">HEART</term><def><p>History, ECG, Age, Risk factors, Troponin risk algorithm</p></def></def-item><def-item><term id="abb9">JSH</term><def><p>Japanese Society of Hypertension</p></def></def-item><def-item><term id="abb10">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb11">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb12">MKSAP-19</term><def><p>Medical Knowledge Self-Assessment Program, 19th edition</p></def></def-item><def-item><term id="abb13">PALS</term><def><p>Pediatric Advanced Life Support</p></def></def-item><def-item><term id="abb14">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nadkarni</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Ohno-Machado</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>WW</given-names> </name></person-group><article-title>Natural language processing: an introduction</article-title><source>J Am Med Inform Assoc</source><year>2011</year><volume>18</volume><issue>5</issue><fpage>544</fpage><lpage>551</lpage><pub-id pub-id-type="doi">10.1136/amiajnl-2011-000464</pub-id><pub-id pub-id-type="medline">21846786</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mintz</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Brodie</surname><given-names>R</given-names> </name></person-group><article-title>Introduction to artificial intelligence in medicine</article-title><source>Minim Invasive Ther Allied Technol</source><year>2019</year><month>04</month><volume>28</volume><issue>2</issue><fpage>73</fpage><lpage>81</lpage><pub-id pub-id-type="doi">10.1080/13645706.2019.1575882</pub-id><pub-id pub-id-type="medline">30810430</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name></person-group><article-title>The potential for artificial intelligence in healthcare</article-title><source>J Commer Biotechnol</source><year>2022</year><volume>27</volume><issue>4</issue><fpage>217</fpage><pub-id pub-id-type="doi">10.5912/jcb1327</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarraju</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ouyang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Itchhaporia</surname><given-names>D</given-names> </name></person-group><article-title>The opportunities and challenges of large language models in cardiology</article-title><source>JACC Adv</source><year>2023</year><month>09</month><volume>2</volume><issue>7</issue><fpage>100438</fpage><pub-id pub-id-type="doi">10.1016/j.jacadv.2023.100438</pub-id><pub-id pub-id-type="medline">38939505</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Motaganahalli</surname><given-names>S</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>A</given-names> </name></person-group><article-title>Evaluating the clinical decision-making ability of large language models using MKSAP-19 cardiology questions</article-title><source>JACC Adv</source><year>2023</year><month>11</month><volume>2</volume><issue>9</issue><fpage>100658</fpage><pub-id pub-id-type="doi">10.1016/j.jacadv.2023.100658</pub-id><pub-id pub-id-type="medline">38938709</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahn</surname><given-names>C</given-names> </name></person-group><article-title>Exploring ChatGPT for information of cardiopulmonary resuscitation</article-title><source>Resuscitation</source><year>2023</year><month>04</month><volume>185</volume><fpage>109729</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.109729</pub-id><pub-id pub-id-type="medline">36773836</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boonstra</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Weissenbacher</surname><given-names>D</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Gonzalez-Hernandez</surname><given-names>G</given-names> </name><name name-style="western"><surname>Asselbergs</surname><given-names>FW</given-names> </name></person-group><article-title>Artificial intelligence: revolutionizing cardiology with large language models</article-title><source>Eur Heart J</source><year>2024</year><month>02</month><day>1</day><volume>45</volume><issue>5</issue><fpage>332</fpage><lpage>345</lpage><pub-id pub-id-type="doi">10.1093/eurheartj/ehad838</pub-id><pub-id pub-id-type="medline">38170821</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gala</surname><given-names>D</given-names> </name><name name-style="western"><surname>Makaryus</surname><given-names>AN</given-names> </name></person-group><article-title>The utility of language models in cardiology: a narrative review of the benefits and concerns of ChatGPT-4</article-title><source>Int J Environ Res Public Health</source><year>2023</year><month>07</month><day>25</day><volume>20</volume><issue>15</issue><fpage>6438</fpage><pub-id pub-id-type="doi">10.3390/ijerph20156438</pub-id><pub-id pub-id-type="medline">37568980</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Medapalli</surname><given-names>T</given-names> </name><name name-style="western"><surname>Alexandrou</surname><given-names>M</given-names> </name><name name-style="western"><surname>Brilakis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Prasad</surname><given-names>A</given-names> </name></person-group><article-title>Exploring the role of ChatGPT in cardiology: a systematic review of the current literature</article-title><source>Cureus</source><year>2024</year><volume>16</volume><issue>4</issue><pub-id pub-id-type="doi">10.7759/cureus.58936</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moher</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shamseer</surname><given-names>L</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Preferred Reporting Items for Systematic Review and Meta-Analysis Protocols (PRISMA-P) 2015 statement</article-title><source>Syst Rev</source><year>2015</year><month>01</month><day>1</day><volume>4</volume><issue>1</issue><fpage>1</fpage><pub-id pub-id-type="doi">10.1186/2046-4053-4-1</pub-id><pub-id pub-id-type="medline">25554246</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schiavo</surname><given-names>JH</given-names> </name></person-group><article-title>PROSPERO: an international register of systematic review protocols</article-title><source>Med Ref Serv Q</source><year>2019</year><volume>38</volume><issue>2</issue><fpage>171</fpage><lpage>180</lpage><pub-id pub-id-type="doi">10.1080/02763869.2019.1588072</pub-id><pub-id pub-id-type="medline">31173570</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Whiting</surname><given-names>PF</given-names> </name><name name-style="western"><surname>Rutjes</surname><given-names>AWS</given-names> </name><name name-style="western"><surname>Westwood</surname><given-names>ME</given-names> </name><etal/></person-group><article-title>QUADAS-2: a revised tool for the quality assessment of diagnostic accuracy studies</article-title><source>Ann Intern Med</source><year>2011</year><month>10</month><day>18</day><volume>155</volume><issue>8</issue><fpage>529</fpage><lpage>536</lpage><pub-id pub-id-type="doi">10.7326/0003-4819-155-8-201110180-00009</pub-id><pub-id pub-id-type="medline">22007046</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gritti</surname><given-names>MN</given-names> </name><name name-style="western"><surname>AlTurki</surname><given-names>H</given-names> </name><name name-style="western"><surname>Farid</surname><given-names>P</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>CT</given-names> </name></person-group><article-title>Progression of an artificial intelligence chatbot (ChatGPT) for pediatric cardiology educational knowledge assessment</article-title><source>Pediatr Cardiol</source><year>2024</year><month>02</month><volume>45</volume><issue>2</issue><fpage>309</fpage><lpage>313</lpage><pub-id pub-id-type="doi">10.1007/s00246-023-03385-6</pub-id><pub-id pub-id-type="medline">38170274</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>King</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Bharani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name></person-group><article-title>GPT-4V passes the BLS and ACLS examinations: an analysis of GPT-4V&#x2019;s image recognition capabilities</article-title><source>Resuscitation</source><year>2024</year><month>02</month><volume>195</volume><fpage>110106</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.110106</pub-id><pub-id pub-id-type="medline">38160904</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yavuz</surname><given-names>YE</given-names> </name><name name-style="western"><surname>Kahraman</surname><given-names>F</given-names> </name></person-group><article-title>Evaluation of the prediagnosis and management of ChatGPT-4.0 in clinical cases in cardiology</article-title><source>Future Cardiol</source><year>2024</year><month>03</month><day>11</day><volume>20</volume><issue>4</issue><fpage>197</fpage><lpage>207</lpage><pub-id pub-id-type="doi">10.1080/14796678.2024.2348898</pub-id><pub-id pub-id-type="medline">39049771</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Skalidis</surname><given-names>I</given-names> </name><name name-style="western"><surname>Cagnina</surname><given-names>A</given-names> </name><name name-style="western"><surname>Luangphiphat</surname><given-names>W</given-names> </name><etal/></person-group><article-title>ChatGPT takes on the European Exam in Core Cardiology: an artificial intelligence success story?</article-title><source>Eur Heart J Digit Health</source><year>2023</year><month>05</month><volume>4</volume><issue>3</issue><fpage>279</fpage><lpage>281</lpage><pub-id pub-id-type="doi">10.1093/ehjdh/ztad029</pub-id><pub-id pub-id-type="medline">37265864</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>P</given-names> </name><name name-style="western"><surname>Van Bulck</surname><given-names>L</given-names> </name></person-group><article-title>Using ChatGPT and Google Bard to improve the readability of written patient information: a proof of concept</article-title><source>Eur J Cardiovasc Nurs</source><year>2024</year><month>03</month><day>12</day><volume>23</volume><issue>2</issue><fpage>122</fpage><lpage>126</lpage><pub-id pub-id-type="doi">10.1093/eurjcn/zvad087</pub-id><pub-id pub-id-type="medline">37603843</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lautrup</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Hyrup</surname><given-names>T</given-names> </name><name name-style="western"><surname>Schneider-Kamp</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dahl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lindholt</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Schneider-Kamp</surname><given-names>P</given-names> </name></person-group><article-title>Heart-to-heart with ChatGPT: the impact of patients consulting AI for cardiovascular health advice</article-title><source>Open Heart</source><year>2023</year><month>11</month><volume>10</volume><issue>2</issue><fpage>e002455</fpage><pub-id pub-id-type="doi">10.1136/openhrt-2023-002455</pub-id><pub-id pub-id-type="medline">37945282</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00FC;nay</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yi&#x011F;it</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Halhalli</surname><given-names>HC</given-names> </name><name name-style="western"><surname>Tulgar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Alkahlout</surname><given-names>BH</given-names> </name><name name-style="western"><surname>Azad</surname><given-names>AM</given-names> </name></person-group><article-title>AI in patient education: assessing the impact of ChatGPT-4 on conveying comprehensive information about chest pain</article-title><source>Am J Emerg Med</source><year>2024</year><month>03</month><volume>77</volume><fpage>220</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1016/j.ajem.2023.12.047</pub-id><pub-id pub-id-type="medline">38242775</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bushuven</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bentele</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bentele</surname><given-names>S</given-names> </name><etal/></person-group><article-title>&#x201C;ChatGPT, Can You Help Me Save My Child&#x2019;s Life?&#x201D; - diagnostic accuracy and supportive capabilities to lay rescuers by ChatGPT in prehospital basic life support and paediatric advanced life support cases - an in-silico analysis</article-title><source>J Med Syst</source><year>2023</year><month>11</month><day>21</day><volume>47</volume><issue>1</issue><fpage>123</fpage><pub-id pub-id-type="doi">10.1007/s10916-023-02019-x</pub-id><pub-id pub-id-type="medline">37987870</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yano</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Nishiyama</surname><given-names>A</given-names> </name><name name-style="western"><surname>Suzuki</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Relevance of ChatGPT&#x2019;s responses to common hypertension-related patient inquiries</article-title><source>Hypertension</source><year>2024</year><month>01</month><volume>81</volume><issue>1</issue><fpage>e1</fpage><lpage>e4</lpage><pub-id pub-id-type="doi">10.1161/HYPERTENSIONAHA.123.22084</pub-id><pub-id pub-id-type="medline">37916418</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Potential multidisciplinary use of large language models for addressing queries in cardio-oncology</article-title><source>J Am Heart Assoc</source><year>2024</year><month>03</month><day>19</day><volume>13</volume><issue>6</issue><fpage>e033584</fpage><pub-id pub-id-type="doi">10.1161/JAHA.123.033584</pub-id><pub-id pub-id-type="medline">38497458</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ali</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Gandhi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sulaiman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jafri</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>AS</given-names> </name></person-group><article-title>Mapping the heartbeat of America with ChatGPT-4: unpacking the interplay of social vulnerability, digital literacy, and cardiovascular mortality in county residency choices</article-title><source>J Pers Med</source><year>2023</year><month>11</month><day>21</day><volume>13</volume><issue>12</issue><fpage>1625</fpage><pub-id pub-id-type="doi">10.3390/jpm13121625</pub-id><pub-id pub-id-type="medline">38138852</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Evaluation of GPT-4 for 10-year cardiovascular risk prediction: insights from the UK Biobank and KoGES data</article-title><source>iScience</source><year>2024</year><month>02</month><day>16</day><volume>27</volume><issue>2</issue><fpage>109022</fpage><pub-id pub-id-type="doi">10.1016/j.isci.2024.109022</pub-id><pub-id pub-id-type="medline">38357664</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kassab</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kapadia</surname><given-names>V</given-names> </name><name name-style="western"><surname>Massad</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Comparative analysis of chat-based artificial intelligence models in addressing common and challenging valvular heart disease clinical scenarios</article-title><source>J Am Heart Assoc</source><year>2023</year><month>11</month><day>21</day><volume>12</volume><issue>22</issue><fpage>e031787</fpage><pub-id pub-id-type="doi">10.1161/JAHA.123.031787</pub-id><pub-id pub-id-type="medline">37982246</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sarangi</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Irodi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Panda</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nayak</surname><given-names>DSK</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>H</given-names> </name></person-group><article-title>Radiological differential diagnoses based on cardiovascular and thoracic imaging patterns: perspectives of four large language models</article-title><source>Indian J Radiol Imaging</source><year>2024</year><month>04</month><volume>34</volume><issue>2</issue><fpage>269</fpage><lpage>275</lpage><pub-id pub-id-type="doi">10.1055/s-0043-1777289</pub-id><pub-id pub-id-type="medline">38549881</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dimitriadis</surname><given-names>F</given-names> </name><name name-style="western"><surname>Alkagiet</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tsigkriki</surname><given-names>L</given-names> </name><etal/></person-group><article-title>ChatGPT and patients with heart failure</article-title><source>Angiol Open Access</source><year>2025</year><month>09</month><volume>76</volume><issue>8</issue><fpage>796</fpage><lpage>801</lpage><pub-id pub-id-type="doi">10.1177/00033197241238403</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kangiszer</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mahtani</surname><given-names>AU</given-names> </name><name name-style="western"><surname>Pintea</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Low performance of ChatGPT on echocardiography board review questions</article-title><source>JACC Cardiovasc Imaging</source><year>2024</year><month>03</month><volume>17</volume><issue>3</issue><fpage>330</fpage><lpage>332</lpage><pub-id pub-id-type="doi">10.1016/j.jcmg.2023.09.004</pub-id><pub-id pub-id-type="medline">37943230</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00FC;nay</surname><given-names>S</given-names> </name><name name-style="western"><surname>&#x00D6;zt&#x00FC;rk</surname><given-names>A</given-names> </name><name name-style="western"><surname>&#x00D6;zerol</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yi&#x011F;it</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Erenler</surname><given-names>AK</given-names> </name></person-group><article-title>Comparison of emergency medicine specialist, cardiologist, and chat-GPT in electrocardiography assessment</article-title><source>Am J Emerg Med</source><year>2024</year><month>06</month><volume>80</volume><fpage>51</fpage><lpage>60</lpage><pub-id pub-id-type="doi">10.1016/j.ajem.2024.03.017</pub-id><pub-id pub-id-type="medline">38507847</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fija&#x010D;ko</surname><given-names>N</given-names> </name><name name-style="western"><surname>Prosen</surname><given-names>G</given-names> </name><name name-style="western"><surname>Abella</surname><given-names>BS</given-names> </name><name name-style="western"><surname>Metli&#x010D;ar</surname><given-names>&#x0160;</given-names> </name><name name-style="western"><surname>&#x0160;tiglic</surname><given-names>G</given-names> </name></person-group><article-title>Can novel multimodal chatbots such as Bing Chat Enterprise, ChatGPT-4 Pro, and Google Bard correctly interpret electrocardiogram images?</article-title><source>Resuscitation</source><year>2023</year><month>12</month><volume>193</volume><fpage>110009</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.110009</pub-id><pub-id pub-id-type="medline">37884222</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mou</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>P</given-names> </name></person-group><article-title>Can DALL-E 3 reliably generate 12-lead ECGs and teaching illustrations?</article-title><source>Cureus</source><year>2024</year><month>01</month><volume>16</volume><issue>1</issue><fpage>e52748</fpage><pub-id pub-id-type="doi">10.7759/cureus.52748</pub-id><pub-id pub-id-type="medline">38384621</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mehta</surname><given-names>S</given-names> </name></person-group><article-title>Exploring angina: a fascinating chat with ChatGPT</article-title><source>Curr Probl Cardiol</source><year>2024</year><month>03</month><volume>49</volume><issue>3</issue><fpage>102393</fpage><pub-id pub-id-type="doi">10.1016/j.cpcardiol.2024.102393</pub-id><pub-id pub-id-type="medline">38232926</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riddell</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>C</given-names> </name><name name-style="western"><surname>McGrinder</surname><given-names>H</given-names> </name><name name-style="western"><surname>Earle</surname><given-names>NJ</given-names> </name><name name-style="western"><surname>Poppe</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Doughty</surname><given-names>RN</given-names> </name></person-group><article-title>College-level reading is required to understand ChatGPT&#x2019;s answers to lay questions relating to heart failure</article-title><source>Eur J Heart Fail</source><year>2023</year><month>12</month><volume>25</volume><issue>12</issue><fpage>2336</fpage><lpage>2337</lpage><pub-id pub-id-type="doi">10.1002/ejhf.3083</pub-id><pub-id pub-id-type="medline">37964183</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krittanawong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Rodriguez</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kaplin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>WHW</given-names> </name></person-group><article-title>Assessing the potential of ChatGPT for patient education in the cardiology clinic</article-title><source>Prog Cardiovasc Dis</source><year>2023</year><volume>81</volume><fpage>109</fpage><lpage>110</lpage><pub-id pub-id-type="doi">10.1016/j.pcad.2023.10.002</pub-id><pub-id pub-id-type="medline">37832625</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rouhi</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Ghanem</surname><given-names>YK</given-names> </name><name name-style="western"><surname>Yolchieva</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Can artificial intelligence improve the readability of patient education materials on aortic stenosis? A pilot study</article-title><source>Cardiol Ther</source><year>2024</year><month>03</month><volume>13</volume><issue>1</issue><fpage>137</fpage><lpage>147</lpage><pub-id pub-id-type="doi">10.1007/s40119-023-00347-0</pub-id><pub-id pub-id-type="medline">38194058</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hillmann</surname><given-names>HAK</given-names> </name><name name-style="western"><surname>Angelini</surname><given-names>E</given-names> </name><name name-style="western"><surname>Karfoul</surname><given-names>N</given-names> </name><name name-style="western"><surname>Feickert</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mueller-Leisse</surname><given-names>J</given-names> </name><name name-style="western"><surname>Duncker</surname><given-names>D</given-names> </name></person-group><article-title>Accuracy and comprehensibility of chat-based artificial intelligence for patient information on atrial fibrillation and cardiac implantable electronic devices</article-title><source>Europace</source><year>2023</year><month>12</month><day>28</day><volume>26</volume><issue>1</issue><fpage>euad369</fpage><pub-id pub-id-type="doi">10.1093/europace/euad369</pub-id><pub-id pub-id-type="medline">38127304</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Bulck</surname><given-names>L</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>P</given-names> </name></person-group><article-title>What if your patient switches from Dr. Google to Dr. ChatGPT? A vignette-based survey of the trustworthiness, value, and danger of ChatGPT-generated responses to health questions</article-title><source>Eur J Cardiovasc Nurs</source><year>2024</year><month>01</month><day>12</day><volume>23</volume><issue>1</issue><fpage>95</fpage><lpage>98</lpage><pub-id pub-id-type="doi">10.1093/eurjcn/zvad038</pub-id><pub-id pub-id-type="medline">37094282</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birkun</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Gautam</surname><given-names>A</given-names> </name></person-group><article-title>Large language model-based chatbot as a source of advice on first aid in heart attack</article-title><source>Curr Probl Cardiol</source><year>2024</year><month>01</month><volume>49</volume><issue>1 Pt A</issue><fpage>102048</fpage><pub-id pub-id-type="doi">10.1016/j.cpcardiol.2023.102048</pub-id><pub-id pub-id-type="medline">37640177</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scquizzato</surname><given-names>T</given-names> </name><name name-style="western"><surname>Semeraro</surname><given-names>F</given-names> </name><name name-style="western"><surname>Swindell</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Testing ChatGPT ability to answer laypeople questions about cardiac arrest and cardiopulmonary resuscitation</article-title><source>Resuscitation</source><year>2024</year><month>01</month><volume>194</volume><fpage>110077</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.110077</pub-id><pub-id pub-id-type="medline">38081504</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Wright</surname><given-names>DS</given-names> </name><etal/></person-group><article-title>Automated HEART score determination via ChatGPT: honing a framework for iterative prompt development</article-title><source>J Am Coll Emerg Physicians Open</source><year>2024</year><month>04</month><volume>5</volume><issue>2</issue><fpage>e13133</fpage><pub-id pub-id-type="doi">10.1002/emp2.13133</pub-id><pub-id pub-id-type="medline">38481520</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harskamp</surname><given-names>RE</given-names> </name><name name-style="western"><surname>De Clercq</surname><given-names>L</given-names> </name></person-group><article-title>Performance of ChatGPT as an AI-assisted decision support tool in medicine: a proof-of-concept study for interpreting symptoms and management of common cardiac conditions (AMSTELHEART-2)</article-title><source>Acta Cardiol</source><year>2024</year><month>05</month><volume>79</volume><issue>3</issue><fpage>358</fpage><lpage>366</lpage><pub-id pub-id-type="doi">10.1080/00015385.2024.2303528</pub-id><pub-id pub-id-type="medline">38348835</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mou</surname><given-names>W</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>R</given-names> </name></person-group><article-title>ChatGPT can pass the AHA exams: open-ended questions outperform multiple-choice format</article-title><source>Resuscitation</source><year>2023</year><month>07</month><volume>188</volume><fpage>109783</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.109783</pub-id><pub-id pub-id-type="medline">37349064</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birkun</surname><given-names>A</given-names> </name></person-group><article-title>Performance of an artificial intelligence-based chatbot when acting as EMS dispatcher in a cardiac arrest scenario</article-title><source>Intern Emerg Med</source><year>2023</year><month>11</month><volume>18</volume><issue>8</issue><fpage>2449</fpage><lpage>2452</lpage><pub-id pub-id-type="doi">10.1007/s11739-023-03399-1</pub-id><pub-id pub-id-type="medline">37603142</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Almagazzachi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mustafa</surname><given-names>A</given-names> </name><name name-style="western"><surname>Eighaei Sedeh</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Generative artificial intelligence in patient education: ChatGPT takes on hypertension questions</article-title><source>Cureus</source><year>2024</year><month>02</month><volume>16</volume><issue>2</issue><fpage>e53441</fpage><pub-id pub-id-type="doi">10.7759/cureus.53441</pub-id><pub-id pub-id-type="medline">38435177</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al Tibi</surname><given-names>G</given-names> </name><name name-style="western"><surname>Alexander</surname><given-names>M</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chronos</surname><given-names>N</given-names> </name></person-group><article-title>A retrospective comparison of medication recommendations between a cardiologist and ChatGPT-4 for hypertension patients in a rural clinic</article-title><source>Cureus</source><year>2024</year><month>03</month><volume>16</volume><issue>3</issue><fpage>e55789</fpage><pub-id pub-id-type="doi">10.7759/cureus.55789</pub-id><pub-id pub-id-type="medline">38586651</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kusunose</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kashima</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sata</surname><given-names>M</given-names> </name></person-group><article-title>Evaluation of the accuracy of ChatGPT in answering clinical questions on the Japanese society of hypertension guidelines</article-title><source>Circ J</source><year>2023</year><month>06</month><day>23</day><volume>87</volume><issue>7</issue><fpage>1030</fpage><lpage>1033</lpage><pub-id pub-id-type="doi">10.1253/circj.CJ-23-0308</pub-id><pub-id pub-id-type="medline">37286486</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Study characteristics outcomes of large language model applications, literature search strategy, and QUADAS-2 risk of bias assessment.</p><media xlink:href="cardio_v10i1e76734_app1.docx" xlink:title="DOCX File, 282 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>PRISMA 2020 checklist.</p><media xlink:href="cardio_v10i1e76734_app2.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material></app-group></back></article>