@Article{info:doi/10.2196/54242, author="Achtari, Margaux and Salihu, Adil and Muller, Olivier and Abb{\'e}, Emmanuel and Clair, Carole and Schwarz, Jo{\"e}lle and Fournier, Stephane", title="Gender Bias in AI's Perception of Cardiovascular Risk", journal="J Med Internet Res", year="2024", month="Oct", day="22", volume="26", pages="e54242", keywords="artificial intelligence", keywords="gender equity", keywords="coronary artery disease", keywords="AI", keywords="cardiovascular", keywords="risk", keywords="CAD", keywords="artery", keywords="coronary", keywords="chatbot: health care", keywords="men: women", keywords="gender bias", keywords="gender", doi="10.2196/54242", url="https://www.jmir.org/2024/1/e54242" } @Article{info:doi/10.2196/51916, author="Leitner, Jared and Chiang, Po-Han and Agnihotri, Parag and Dey, Sujit", title="The Effect of an AI-Based, Autonomous, Digital Health Intervention Using Precise Lifestyle Guidance on Blood Pressure in Adults With Hypertension: Single-Arm Nonrandomized Trial", journal="JMIR Cardio", year="2024", month="May", day="28", volume="8", pages="e51916", keywords="blood pressure", keywords="hypertension", keywords="digital health", keywords="lifestyle change", keywords="lifestyle medicine", keywords="wearables", keywords="remote patient monitoring", keywords="artificial intelligence", keywords="AI", keywords="mobile phone", abstract="Background: Home blood pressure (BP) monitoring with lifestyle coaching is effective in managing hypertension and reducing cardiovascular risk. However, traditional manual lifestyle coaching models significantly limit availability due to high operating costs and personnel requirements. Furthermore, the lack of patient lifestyle monitoring and clinician time constraints can prevent personalized coaching on lifestyle modifications. Objective: This study assesses the effectiveness of a fully digital, autonomous, and artificial intelligence (AI)--based lifestyle coaching program on achieving BP control among adults with hypertension. Methods: Participants were enrolled in a single-arm nonrandomized trial in which they received a BP monitor and wearable activity tracker. Data were collected from these devices and a questionnaire mobile app, which were used to train personalized machine learning models that enabled precision lifestyle coaching delivered to participants via SMS text messaging and a mobile app. The primary outcomes included (1) the changes in systolic and diastolic BP from baseline to 12 and 24 weeks and (2) the percentage change of participants in the controlled, stage-1, and stage-2 hypertension categories from baseline to 12 and 24 weeks. Secondary outcomes included (1) the participant engagement rate as measured by data collection consistency and (2) the number of manual clinician outreaches. Results: In total, 141 participants were monitored over 24 weeks. At 12 weeks, systolic and diastolic BP decreased by 5.6 mm Hg (95\% CI ?7.1 to ?4.2; P<.001) and 3.8 mm Hg (95\% CI ?4.7 to ?2.8; P<.001), respectively. Particularly, for participants starting with stage-2 hypertension, systolic and diastolic BP decreased by 9.6 mm Hg (95\% CI ?12.2 to ?6.9; P<.001) and 5.7 mm Hg (95\% CI ?7.6 to ?3.9; P<.001), respectively. At 24 weeks, systolic and diastolic BP decreased by 8.1 mm Hg (95\% CI ?10.1 to ?6.1; P<.001) and 5.1 mm Hg (95\% CI ?6.2 to ?3.9; P<.001), respectively. For participants starting with stage-2 hypertension, systolic and diastolic BP decreased by 14.2 mm Hg (95\% CI ?17.7 to ?10.7; P<.001) and 8.1 mm Hg (95\% CI ?10.4 to ?5.7; P<.001), respectively, at 24 weeks. The percentage of participants with controlled BP increased by 17.2\% (22/128; P<.001) and 26.5\% (27/102; P<.001) from baseline to 12 and 24 weeks, respectively. The percentage of participants with stage-2 hypertension decreased by 25\% (32/128; P<.001) and 26.5\% (27/102; P<.001) from baseline to 12 and 24 weeks, respectively. The average weekly participant engagement rate was 92\% (SD 3.9\%), and only 5.9\% (6/102) of the participants required manual outreach over 24 weeks. Conclusions: The study demonstrates the potential of fully digital, autonomous, and AI-based lifestyle coaching to achieve meaningful BP improvements and high engagement for patients with hypertension while substantially reducing clinician workloads. Trial Registration: ClinicalTrials.gov NCT06337734; https://clinicaltrials.gov/study/NCT06337734 ", doi="10.2196/51916", url="https://cardio.jmir.org/2024/1/e51916", url="http://www.ncbi.nlm.nih.gov/pubmed/38805253" } @Article{info:doi/10.2196/53091, author="Shara, Nawar and Mirabal-Beltran, Roxanne and Talmadge, Bethany and Falah, Noor and Ahmad, Maryam and Dempers, Ramon and Crovatt, Samantha and Eisenberg, Steven and Anderson, Kelley", title="Use of Machine Learning for Early Detection of Maternal Cardiovascular Conditions: Retrospective Study Using Electronic Health Record Data", journal="JMIR Cardio", year="2024", month="Apr", day="22", volume="8", pages="e53091", keywords="machine learning", keywords="preeclampsia", keywords="cardiovascular", keywords="maternal", keywords="obstetrics", keywords="health disparities", keywords="woman", keywords="women", keywords="pregnancy", keywords="pregnant", keywords="cardiovascular condition", keywords="retrospective study", keywords="electronic health record", keywords="EHR", keywords="technology", keywords="decision-making", keywords="health disparity", keywords="virtual server", keywords="thromboembolism", keywords="kidney failure", keywords="HOPE-CAT", abstract="Background: Cardiovascular conditions (eg, cardiac and coronary conditions, hypertensive disorders of pregnancy, and cardiomyopathies) were the leading cause of maternal mortality between 2017 and 2019. The United States has the highest maternal mortality rate of any high-income nation, disproportionately impacting those who identify as non-Hispanic Black or Hispanic. Novel clinical approaches to the detection and diagnosis of cardiovascular conditions are therefore imperative. Emerging research is demonstrating that machine learning (ML) is a promising tool for detecting patients at increased risk for hypertensive disorders during pregnancy. However, additional studies are required to determine how integrating ML and big data, such as electronic health records (EHRs), can improve the identification of obstetric patients at higher risk of cardiovascular conditions. Objective: This study aimed to evaluate the capability and timing of a proprietary ML algorithm, Healthy Outcomes for all Pregnancy Experiences-Cardiovascular-Risk Assessment Technology (HOPE-CAT), to detect maternal-related cardiovascular conditions and outcomes. Methods: Retrospective data from the EHRs of a large health care system were investigated by HOPE-CAT in a virtual server environment. Deidentification of EHR data and standardization enabled HOPE-CAT to analyze data without pre-existing biases. The ML algorithm assessed risk factors selected by clinical experts in cardio-obstetrics, and the algorithm was iteratively trained using relevant literature and current standards of risk identification. After refinement of the algorithm's learned risk factors, risk profiles were generated for every patient including a designation of standard versus high risk. The profiles were individually paired with clinical outcomes pertaining to cardiovascular pregnancy conditions and complications, wherein a delta was calculated between the date of the risk profile and the actual diagnosis or intervention in the EHR. Results: In total, 604 pregnancies resulting in birth had records or diagnoses that could be compared against the risk profile; the majority of patients identified as Black (n=482, 79.8\%) and aged between 21 and 34 years (n=509, 84.4\%). Preeclampsia (n=547, 90.6\%) was the most common condition, followed by thromboembolism (n=16, 2.7\%) and acute kidney disease or failure (n=13, 2.2\%). The average delta was 56.8 (SD 69.7) days between the identification of risk factors by HOPE-CAT and the first date of diagnosis or intervention of a related condition reported in the EHR. HOPE-CAT showed the strongest performance in early risk detection of myocardial infarction at a delta of 65.7 (SD 81.4) days. Conclusions: This study provides additional evidence to support ML in obstetrical patients to enhance the early detection of cardiovascular conditions during pregnancy. ML can synthesize multiday patient presentations to enhance provider decision-making and potentially reduce maternal health disparities. ", doi="10.2196/53091", url="https://cardio.jmir.org/2024/1/e53091", url="http://www.ncbi.nlm.nih.gov/pubmed/38648629" } @Article{info:doi/10.2196/53421, author="King, C. Ryan and Samaan, S. Jamil and Yeo, Hui Yee and Peng, Yuxin and Kunkel, C. David and Habib, A. Ali and Ghashghaei, Roxana", title="A Multidisciplinary Assessment of ChatGPT's Knowledge of Amyloidosis: Observational Study", journal="JMIR Cardio", year="2024", month="Apr", day="19", volume="8", pages="e53421", keywords="amyloidosis", keywords="ChatGPT", keywords="large language models", keywords="cardiology", keywords="gastroenterology", keywords="neurology", keywords="artificial intelligence", keywords="multidisciplinary care", keywords="assessment", keywords="patient education", keywords="large language model", keywords="accuracy", keywords="reliability", keywords="accessibility", keywords="educational resources", keywords="dissemination", keywords="gastroenterologist", keywords="cardiologist", keywords="medical society", keywords="institution", keywords="institutions", keywords="Facebook", keywords="neurologist", keywords="reproducibility", keywords="amyloidosis-related", abstract="Background: Amyloidosis, a rare multisystem condition, often requires complex, multidisciplinary care. Its low prevalence underscores the importance of efforts to ensure the availability of high-quality patient education materials for better outcomes. ChatGPT (OpenAI) is a large language model powered by artificial intelligence that offers a potential avenue for disseminating accurate, reliable, and accessible educational resources for both patients and providers. Its user-friendly interface, engaging conversational responses, and the capability for users to ask follow-up questions make it a promising future tool in delivering accurate and tailored information to patients. Objective: We performed a multidisciplinary assessment of the accuracy, reproducibility, and readability of ChatGPT in answering questions related to amyloidosis. Methods: In total, 98 amyloidosis questions related to cardiology, gastroenterology, and neurology were curated from medical societies, institutions, and amyloidosis Facebook support groups and inputted into ChatGPT-3.5 and ChatGPT-4. Cardiology- and gastroenterology-related responses were independently graded by a board-certified cardiologist and gastroenterologist, respectively, who specialize in amyloidosis. These 2 reviewers (RG and DCK) also graded general questions for which disagreements were resolved with discussion. Neurology-related responses were graded by a board-certified neurologist (AAH) who specializes in amyloidosis. Reviewers used the following grading scale: (1) comprehensive, (2) correct but inadequate, (3) some correct and some incorrect, and (4) completely incorrect. Questions were stratified by categories for further analysis. Reproducibility was assessed by inputting each question twice into each model. The readability of ChatGPT-4 responses was also evaluated using the Textstat library in Python (Python Software Foundation) and the Textstat readability package in R software (R Foundation for Statistical Computing). Results: ChatGPT-4 (n=98) provided 93 (95\%) responses with accurate information, and 82 (84\%) were comprehensive. ChatGPT-3.5 (n=83) provided 74 (89\%) responses with accurate information, and 66 (79\%) were comprehensive. When examined by question category, ChatGTP-4 and ChatGPT-3.5 provided 53 (95\%) and 48 (86\%) comprehensive responses, respectively, to ``general questions'' (n=56). When examined by subject, ChatGPT-4 and ChatGPT-3.5 performed best in response to cardiology questions (n=12) with both models producing 10 (83\%) comprehensive responses. For gastroenterology (n=15), ChatGPT-4 received comprehensive grades for 9 (60\%) responses, and ChatGPT-3.5 provided 8 (53\%) responses. Overall, 96 of 98 (98\%) responses for ChatGPT-4 and 73 of 83 (88\%) for ChatGPT-3.5 were reproducible. The readability of ChatGPT-4's responses ranged from 10th to beyond graduate US grade levels with an average of 15.5 (SD 1.9). Conclusions: Large language models are a promising tool for accurate and reliable health information for patients living with amyloidosis. However, ChatGPT's responses exceeded the American Medical Association's recommended fifth- to sixth-grade reading level. Future studies focusing on improving response accuracy and readability are warranted. Prior to widespread implementation, the technology's limitations and ethical implications must be further explored to ensure patient safety and equitable implementation. ", doi="10.2196/53421", url="https://cardio.jmir.org/2024/1/e53421", url="http://www.ncbi.nlm.nih.gov/pubmed/38640472" } @Article{info:doi/10.2196/44732, author="Ho, Vy and Brown Johnson, Cati and Ghanzouri, Ilies and Amal, Saeed and Asch, Steven and Ross, Elsie", title="Physician- and Patient-Elicited Barriers and Facilitators to Implementation of a Machine Learning--Based Screening Tool for Peripheral Arterial Disease: Preimplementation Study With Physician and Patient Stakeholders", journal="JMIR Cardio", year="2023", month="Nov", day="6", volume="7", pages="e44732", keywords="artificial intelligence", keywords="cardiovascular disease", keywords="machine learning", keywords="peripheral arterial disease", keywords="preimplementation study", abstract="Background: Peripheral arterial disease (PAD) is underdiagnosed, partially due to a high prevalence of atypical symptoms and a lack of physician and patient awareness. Implementing clinical decision support tools powered by machine learning algorithms may help physicians identify high-risk patients for diagnostic workup. Objective: This study aims to evaluate barriers and facilitators to the implementation of a novel machine learning--based screening tool for PAD among physician and patient stakeholders using the Consolidated Framework for Implementation Research (CFIR). Methods: We performed semistructured interviews with physicians and patients from the Stanford University Department of Primary Care and Population Health, Division of Cardiology, and Division of Vascular Medicine. Participants answered questions regarding their perceptions toward machine learning and clinical decision support for PAD detection. Rapid thematic analysis was performed using templates incorporating codes from CFIR constructs. Results: A total of 12 physicians (6 primary care physicians and 6 cardiovascular specialists) and 14 patients were interviewed. Barriers to implementation arose from 6 CFIR constructs: complexity, evidence strength and quality, relative priority, external policies and incentives, knowledge and beliefs about intervention, and individual identification with the organization. Facilitators arose from 5 CFIR constructs: intervention source, relative advantage, learning climate, patient needs and resources, and knowledge and beliefs about intervention. Physicians felt that a machine learning--powered diagnostic tool for PAD would improve patient care but cited limited time and authority in asking patients to undergo additional screening procedures. Patients were interested in having their physicians use this tool but raised concerns about such technologies replacing human decision-making. Conclusions: Patient- and physician-reported barriers toward the implementation of a machine learning--powered PAD diagnostic tool followed four interdependent themes: (1) low familiarity or urgency in detecting PAD; (2) concerns regarding the reliability of machine learning; (3) differential perceptions of responsibility for PAD care among primary care versus specialty physicians; and (4) patient preference for physicians to remain primary interpreters of health care data. Facilitators followed two interdependent themes: (1) enthusiasm for clinical use of the predictive model and (2) willingness to incorporate machine learning into clinical care. Implementation of machine learning--powered diagnostic tools for PAD should leverage provider support while simultaneously educating stakeholders on the importance of early PAD diagnosis. High predictive validity is necessary for machine learning models but not sufficient for implementation. ", doi="10.2196/44732", url="https://cardio.jmir.org/2023/1/e44732", url="http://www.ncbi.nlm.nih.gov/pubmed/37930755" } @Article{info:doi/10.2196/51375, author="de Koning, Enrico and van der Haas, Yvette and Saguna, Saguna and Stoop, Esmee and Bosch, Jan and Beeres, Saskia and Schalij, Martin and Boogers, Mark", title="AI Algorithm to Predict Acute Coronary Syndrome in Prehospital Cardiac Care: Retrospective Cohort Study", journal="JMIR Cardio", year="2023", month="Oct", day="31", volume="7", pages="e51375", keywords="cardiology", keywords="acute coronary syndrome", keywords="Hollands Midden Acute Regional Triage--cardiology", keywords="prehospital", keywords="triage", keywords="artificial intelligence", keywords="natural language processing", keywords="angina", keywords="algorithm", keywords="overcrowding", keywords="emergency department", keywords="clinical decision-making", keywords="emergency medical service", keywords="paramedics", abstract="Background: Overcrowding of hospitals and emergency departments (EDs) is a growing problem. However, not all ED consultations are necessary. For example, 80\% of patients in the ED with chest pain do not have an acute coronary syndrome (ACS). Artificial intelligence (AI) is useful in analyzing (medical) data, and might aid health care workers in prehospital clinical decision-making before patients are presented to the hospital. Objective: The aim of this study was to develop an AI model which would be able to predict ACS before patients visit the ED. The model retrospectively analyzed prehospital data acquired by emergency medical services' nurse paramedics. Methods: Patients presenting to the emergency medical services with symptoms suggestive of ACS between September 2018 and September 2020 were included. An AI model using a supervised text classification algorithm was developed to analyze data. Data were analyzed for all 7458 patients (mean 68, SD 15 years, 54\% men). Specificity, sensitivity, positive predictive value (PPV), and negative predictive value (NPV) were calculated for control and intervention groups. At first, a machine learning (ML) algorithm (or model) was chosen; afterward, the features needed were selected and then the model was tested and improved using iterative evaluation and in a further step through hyperparameter tuning. Finally, a method was selected to explain the final AI model. Results: The AI model had a specificity of 11\% and a sensitivity of 99.5\% whereas usual care had a specificity of 1\% and a sensitivity of 99.5\%. The PPV of the AI model was 15\% and the NPV was 99\%. The PPV of usual care was 13\% and the NPV was 94\%. Conclusions: The AI model was able to predict ACS based on retrospective data from the prehospital setting. It led to an increase in specificity (from 1\% to 11\%) and NPV (from 94\% to 99\%) when compared to usual care, with a similar sensitivity. Due to the retrospective nature of this study and the singular focus on ACS it should be seen as a proof-of-concept. Other (possibly life-threatening) diagnoses were not analyzed. Future prospective validation is necessary before implementation. ", doi="10.2196/51375", url="https://cardio.jmir.org/2023/1/e51375", url="http://www.ncbi.nlm.nih.gov/pubmed/37906226" }