{"641326":{"#nid":"641326","#data":{"type":"event","title":"Ph.D. Thesis Proposal - A Unified Framework for Finite-Sample Analysis of Reinforcement Learning Algorithms","body":[{"value":"\u003Cp\u003E\u003Cstrong\u003EStudent Name:\u003C\/strong\u003E\u0026nbsp;Zaiwei Chen\u003C\/p\u003E\r\n\r\n\u003Cp\u003EMachine Learning Ph.D. Student\u003C\/p\u003E\r\n\r\n\u003Cp\u003E\u003Cstrong\u003EHome School:\u0026nbsp;\u003C\/strong\u003EAerospace Engineering\u003C\/p\u003E\r\n\r\n\u003Cp\u003EGeorgia Institute of Technology\u003C\/p\u003E\r\n\r\n\u003Ch5\u003E\u003Cstrong\u003ECommittee\u003C\/strong\u003E\u003C\/h5\u003E\r\n\r\n\u003Cp\u003E1 Dr. John-Paul Clarke (Advisor, School of Industrial and Systems Engineering, School of Aerospace Engineering, Georgia Institute of Technology)\u003C\/p\u003E\r\n\r\n\u003Cp\u003E2 Dr. Siva Theja Maguluri (Co-advisor, School of Industrial and Systems Engineering, Georgia Institute of Technology)\u003C\/p\u003E\r\n\r\n\u003Cp\u003E3 Dr. Justin Romberg (School of Electrical and Computer Engineering, Georgia Institute of Technology)\u003C\/p\u003E\r\n\r\n\u003Cp\u003E4 Dr. Benjamin Van Roy, Department of Electrical Engineering, Department of Management Science \u0026amp; Engineering, Stanford University) (external)\u003C\/p\u003E\r\n\r\n\u003Ch5\u003E\u003Cstrong\u003EAbstract\u003C\/strong\u003E\u003C\/h5\u003E\r\n\r\n\u003Cp\u003EReinforcement Learning (RL) captures an important facet of machine learning going beyond prediction and regression: sequential decision making, and has had a great impact on various problems of practical interest. The goal of this proposed thesis is to provide theoretical performance guarantees of RL algorithms. Specifically, we develop a universal approach for establishing finite-sample convergence bounds of RL algorithms when using tabular representation and when using function approximation. To achieve that, we consider general stochastic approximation algorithms and study their convergence bounds using a novel Lyapunov approach. The results enable us to gain insight into the behavior of RL algorithms.\u003C\/p\u003E\r\n","summary":null,"format":"limited_html"}],"field_subtitle":"","field_summary":"","field_summary_sentence":[{"value":"ML Ph.D. student Zaiwei Chen presents his thesis proposal."}],"uid":"34773","created_gmt":"2020-11-16 14:49:37","changed_gmt":"2020-11-16 14:49:37","author":"ablinder6","boilerplate_text":"","field_publication":"","field_article_url":"","field_event_time":{"event_time_start":"2020-11-16T16:30:00-05:00","event_time_end":"2020-11-16T18:00:00-05:00","event_time_end_last":"2020-11-16T18:00:00-05:00","gmt_time_start":"2020-11-16 21:30:00","gmt_time_end":"2020-11-16 23:00:00","gmt_time_end_last":"2020-11-16 23:00:00","rrule":null,"timezone":"America\/New_York"},"extras":[],"groups":[{"id":"1299","name":"GVU Center"},{"id":"576481","name":"ML@GT"}],"categories":[],"keywords":[],"core_research_areas":[],"news_room_topics":[],"event_categories":[],"invited_audience":[{"id":"78761","name":"Faculty\/Staff"},{"id":"177814","name":"Postdoc"},{"id":"78771","name":"Public"},{"id":"174045","name":"Graduate students"},{"id":"78751","name":"Undergraduate students"}],"affiliations":[],"classification":[],"areas_of_expertise":[],"news_and_recent_appearances":[],"phone":[],"contact":[],"email":[],"slides":[],"orientation":[],"userdata":""}}}