[{"data":1,"prerenderedAt":11252},["ShallowReactive",2],{"room-adam-optimizer-km":3,"navigation":10410,"search":10460},{"id":4,"title":5,"author":6,"avatar":7,"body":8,"date":10400,"description":10401,"extension":10402,"meta":10403,"navigation":7523,"ogImage":10404,"path":10405,"seo":10406,"stem":10407,"updateSummary":10408,"updatedAt":10400,"__hash__":10409},"content\u002Fkm\u002Frooms\u002Fadam-optimizer.md","ស្វែងយល់ពី Adam Optimizer: GPS នៃការបង្កើត AI Model","Chau Dara - ស្ថាបនិក TFDevs","\u002Fassets\u002Fimg\u002Favatar.jpg",{"type":9,"value":10,"toc":10378},"minimark",[11,33,55,63,90,93,98,103,110,113,165,427,463,467,472,504,508,539,543,550,660,681,683,687,694,701,902,913,915,919,923,929,935,1320,1399,1408,1412,1417,1422,1814,2028,2032,2225,2327,2945,2949,3473,3578,3587,3909,3911,3915,3918,4083,4511,4697,4699,4894,5079,5263,5621,5971,6481,6483,6675,6859,7077,7173,7175,7240,7242,7246,7464,7467,7477,7479,7483,7487,7490,8063,8068,8098,8102,8251,9218,9222,9333,9423,9425,9429,9432,9458,9487,9489,9492,10234,10302,10305,10307,10310,10374],[12,13,24,25],"div",{"className":14,"style":23},[15,16,17,18,19,20,21,22],"w-full","max-w-2xl","mx-auto","bg-white","rounded-lg","shadow-md","overflow-hidden","mb-2","height: 300px;","\n  ",[26,27],"img",{"src":28,"alt":29,"className":30,"style":32},"\u002Fassets\u002Fimg\u002Fadam2.gif","Gradient Descent Illustration",[15,31],"h-full","object-fit: contain;",[34,35,42,43],"p",{"className":36},[37,38,39,40,41],"text-center","text-sm","text-text-secondary","mb-6","italic","\n  រូបភាពយកមកពី: ",[44,45,54],"a",{"href":46,"target":47,"rel":48,"className":51},"https:\u002F\u002Ftowardsdatascience.com\u002Fdl-notes-advanced-gradient-descent-4407d84c2515\u002F","_blank",[49,50],"noopener","noreferrer",[52,53],"text-primary","hover:underline","DL Notes: Advanced Gradient Descent",[34,56,57,58,62],{},"ការប្រើ Gradient descent គឺពិតជាមានប្រសិទ្ធភាព ប៉ុន្តែវាមានបញ្ហាមួយ៖ រាល់គ្រប់ Parameter (ប៉ារ៉ាម៉ែត្រ) ទាំងអស់នៅក្នុងម៉ូឌែលរបស់អ្នក ប្រើប្រាស់ ",[59,60,61],"strong",{},"Learning Rate (ទំហំជំហាន) តែមួយដូចគ្នា","។ ហើយការកំណត់លេខនោះឱ្យបានត្រឹមត្រូវ? វាដូចជាការទស្សន៍ទាយច្រើនជាងវិទ្យាសាស្ត្រ។",[34,64,65,66,69,70,76,77,81,82,85,86,89],{},"Adam (មកពីពាក្យថា ",[59,67,68],{},"Adaptive Moment Estimation",") ត្រូវបានណែនាំដោយលោក Diederik Kingma និង Jimmy Ba ក្នុងឆ្នាំ ២០១៥ ",[44,71,75],{"href":72,"className":73},"#ref-1",[52,53,74],"font-semibold","[1]"," ហើយវាបានក្លាយជា \"Optimizer\" ដ៏ពេញនិយមបំផុតក្នុងវិស័យ Deep Learning។ អត្ថបទនេះនឹងពន្យល់ថា ",[78,79,80],"em",{},"ហេតុអ្វី"," បានជាការប្រើ Learning Rate ថេរតែមួយមិនសូវល្អ, ",[78,83,84],{},"តើ"," Adam ធ្វើអ្វីខ្លះខុសពីគេ, និង ",[78,87,88],{},"របៀប"," ដែលវាដំណើរការ — ចាប់ពីទ្រឹស្តីរហូតដល់កូដជាក់ស្តែង។",[91,92],"hr",{},[94,95,97],"h2",{"id":96},"បញ្ហានៃការប្រើ-learning-rate-ថេរ-fixed-learning-rate","បញ្ហានៃការប្រើ Learning Rate ថេរ (Fixed Learning Rate)",[99,100,102],"h3",{"id":101},"វិបត្តិ-ទំហំជំហានមួយ-ប្រើគ្រប់កន្លែង","វិបត្តិ \"ទំហំជំហានមួយ ប្រើគ្រប់កន្លែង\"",[34,104,105,106,109],{},"សាកស្រមៃថាអ្នកកំពុងដើរភ្នំដោយមានច្បាប់ដ៏តឹងរឹងមួយ៖ ",[59,107,108],{},"រាល់ជំហានដែលអ្នកបោះ ត្រូវតែមានប្រវែងស្មើៗគ្នាជានិច្ច"," — មិនឱ្យលើស មិនឱ្យខ្វះ។",[34,111,112],{},"នៅពេលអ្នកនៅលើច្រាំងថ្មចោត ការបោះជំហានវែងពេកអាចឱ្យអ្នកធ្លាក់ជ្រោះ។ ប៉ុន្តែនៅពេលអ្នកនៅលើវាលទំនាបដែលមានជម្រាលតិចតួច ការបោះជំហានដដែលនោះមានអារម្មណ៍ថាយឺតខ្លាំងណាស់ — វាអាចនឹងចំណាយពេលរាប់ឆ្នាំទើបទៅដល់បាតភ្នំ។",[34,114,115,116,164],{},"នេះគឺជាបញ្ហាពិតប្រាកដនៃ Learning Rate ថេរ (",[117,118,121,144],"span",{"className":119},[120],"katex",[117,122,125],{"className":123},[124],"katex-mathml",[126,127,129],"math",{"xmlns":128},"http:\u002F\u002Fwww.w3.org\u002F1998\u002FMath\u002FMathML",[130,131,132,139],"semantics",{},[133,134,135],"mrow",{},[136,137,138],"mi",{},"α",[140,141,143],"annotation",{"encoding":142},"application\u002Fx-tex","\\alpha",[117,145,149],{"className":146,"ariaHidden":148},[147],"katex-html","true",[117,150,153,158],{"className":151},[152],"base",[117,154],{"className":155,"style":157},[156],"strut","height:0.4306em;",[117,159,138],{"className":160,"style":163},[161,162],"mord","mathnormal","margin-right:0.0037em;",") នៅក្នុង Gradient Descent៖",[117,166,169],{"className":167},[168],"katex-display",[117,170,172,242],{"className":171},[120],[117,173,175],{"className":174},[124],[126,176,178],{"xmlns":128,"display":177},"block",[130,179,180,239],{},[133,181,182,199,203,218,221,223,227,230,234,236],{},[183,184,185,188],"msub",{},[136,186,187],{},"θ",[133,189,190,193,196],{},[136,191,192],{},"n",[136,194,195],{},"e",[136,197,198],{},"w",[200,201,202],"mo",{},"=",[183,204,205,207],{},[136,206,187],{},[133,208,209,212,215],{},[136,210,211],{},"o",[136,213,214],{},"l",[136,216,217],{},"d",[200,219,220],{},"−",[136,222,138],{},[136,224,226],{"mathvariant":225},"normal","∇",[136,228,229],{},"J",[200,231,233],{"stretchy":232},"false","(",[136,235,187],{},[200,237,238],{"stretchy":232},")",[140,240,241],{"encoding":142},"\\theta_{new} = \\theta_{old} - \\alpha \\nabla J(\\theta)",[117,243,245,331,399],{"className":244,"ariaHidden":148},[147],[117,246,248,252,319,324,328],{"className":247},[152],[117,249],{"className":250,"style":251},[156],"height:0.8444em;vertical-align:-0.15em;",[117,253,255,259],{"className":254},[161],[117,256,187],{"className":257,"style":258},[161,162],"margin-right:0.02778em;",[117,260,263],{"className":261},[262],"msupsub",[117,264,268,310],{"className":265},[266,267],"vlist-t","vlist-t2",[117,269,272,305],{"className":270},[271],"vlist-r",[117,273,277],{"className":274,"style":276},[275],"vlist","height:0.1514em;",[117,278,280,285],{"style":279},"top:-2.55em;margin-left:-0.0278em;margin-right:0.05em;",[117,281],{"className":282,"style":284},[283],"pstrut","height:2.7em;",[117,286,292],{"className":287},[288,289,290,291],"sizing","reset-size6","size3","mtight",[117,293,295,298,301],{"className":294},[161,291],[117,296,192],{"className":297},[161,162,291],[117,299,195],{"className":300},[161,162,291],[117,302,198],{"className":303,"style":304},[161,162,291],"margin-right:0.02691em;",[117,306,309],{"className":307},[308],"vlist-s","​",[117,311,313],{"className":312},[271],[117,314,317],{"className":315,"style":316},[275],"height:0.15em;",[117,318],{},[117,320],{"className":321,"style":323},[322],"mspace","margin-right:0.2778em;",[117,325,202],{"className":326},[327],"mrel",[117,329],{"className":330,"style":323},[322],[117,332,334,337,388,392,396],{"className":333},[152],[117,335],{"className":336,"style":251},[156],[117,338,340,343],{"className":339},[161],[117,341,187],{"className":342,"style":258},[161,162],[117,344,346],{"className":345},[262],[117,347,349,380],{"className":348},[266,267],[117,350,352,377],{"className":351},[271],[117,353,356],{"className":354,"style":355},[275],"height:0.3361em;",[117,357,358,361],{"style":279},[117,359],{"className":360,"style":284},[283],[117,362,364],{"className":363},[288,289,290,291],[117,365,367,370,374],{"className":366},[161,291],[117,368,211],{"className":369},[161,162,291],[117,371,214],{"className":372,"style":373},[161,162,291],"margin-right:0.01968em;",[117,375,217],{"className":376},[161,162,291],[117,378,309],{"className":379},[308],[117,381,383],{"className":382},[271],[117,384,386],{"className":385,"style":316},[275],[117,387],{},[117,389],{"className":390,"style":391},[322],"margin-right:0.2222em;",[117,393,220],{"className":394},[395],"mbin",[117,397],{"className":398,"style":391},[322],[117,400,402,406,409,412,416,420,423],{"className":401},[152],[117,403],{"className":404,"style":405},[156],"height:1em;vertical-align:-0.25em;",[117,407,138],{"className":408,"style":163},[161,162],[117,410,226],{"className":411},[161],[117,413,229],{"className":414,"style":415},[161,162],"margin-right:0.09618em;",[117,417,233],{"className":418},[419],"mopen",[117,421,187],{"className":422,"style":258},[161,162],[117,424,238],{"className":425},[426],"mclose",[34,428,429,430,458,459,462],{},"តម្លៃ ",[117,431,433,446],{"className":432},[120],[117,434,436],{"className":435},[124],[126,437,438],{"xmlns":128},[130,439,440,444],{},[133,441,442],{},[136,443,138],{},[140,445,143],{"encoding":142},[117,447,449],{"className":448,"ariaHidden":148},[147],[117,450,452,455],{"className":451},[152],[117,453],{"className":454,"style":157},[156],[117,456,138],{"className":457,"style":163},[161,162]," តែមួយនេះ គ្រប់គ្រងទំហំជំហានសម្រាប់ ",[59,460,461],{},"គ្រប់"," Parameter ទាំងអស់ — ទោះបីជា Parameter ខ្លះត្រូវការបោះជំហានធំ ឬខ្លះត្រូវការបោះជំហានតូចក៏ដោយ។",[99,464,466],{"id":465},"ផលវិបាក-៣-យ៉ាងនៃ-learning-rate-ថេរ","ផលវិបាក ៣ យ៉ាងនៃ Learning Rate ថេរ",[468,469,471],"h4",{"id":470},"១-ធំពេក-រំលងគោលដៅ-overshooting","១. ធំពេក — រំលងគោលដៅ (Overshooting)",[34,473,474,475,503],{},"នៅពេល ",[117,476,478,491],{"className":477},[120],[117,479,481],{"className":480},[124],[126,482,483],{"xmlns":128},[130,484,485,489],{},[133,486,487],{},[136,488,138],{},[140,490,143],{"encoding":142},[117,492,494],{"className":493,"ariaHidden":148},[147],[117,495,497,500],{"className":496},[152],[117,498],{"className":499,"style":157},[156],[117,501,138],{"className":502,"style":163},[161,162]," ធំពេក វានឹងធ្វើឱ្យយើងបោះជំហានរំលងចំណុចទាបបំផុត (Minimum) ហើយលោតទៅលោតមក៖\nការបាត់បង់ (Loss) មិនដែលថយចុះឡើយ — វានឹងលោតចុះឡើងជុំវិញគោលដៅរហូត។",[468,505,507],{"id":506},"២-តូចពេក-យឺតដូចអណ្តើក-crawling","២. តូចពេក — យឺតដូចអណ្តើក (Crawling)",[34,509,474,510,538],{},[117,511,513,526],{"className":512},[120],[117,514,516],{"className":515},[124],[126,517,518],{"xmlns":128},[130,519,520,524],{},[133,521,522],{},[136,523,138],{},[140,525,143],{"encoding":142},[117,527,529],{"className":528,"ariaHidden":148},[147],[117,530,532,535],{"className":531},[152],[117,533],{"className":534,"style":157},[156],[117,536,138],{"className":537,"style":163},[161,162]," តូចពេក ការរៀនដំណើរការទៅមុខមែន ប៉ុន្តែវាយឺតខ្លាំងណាស់។ ក្នុងម៉ូឌែលដែលមាន Parameter រាប់លាន នេះគឺជាមហន្តរាយខាងពេលវេលា និងកម្លាំងម៉ាស៊ីន។",[468,540,542],{"id":541},"៣-បញ្ហា-ផ្លូវតូចចង្អៀត-ravine-problem","៣. បញ្ហា \"ផ្លូវតូចចង្អៀត\" (Ravine Problem)",[34,544,545,546,549],{},"នៅក្នុងលំហវិមាត្រខ្ពស់ ក្រាហ្វជម្រាលជារឿយៗមើលទៅដូចជា ",[59,547,548],{},"ជ្រលងភ្នំដ៏តូចចង្អៀត"," — ចោតខ្លាំងក្នុងទិសដៅម្ខាង និងរាបស្មើក្នុងទិសដៅម្ខាងទៀត។",[551,552,553,590],"ul",{},[554,555,556,557,585,586,589],"li",{},"ទិសដៅដែលចោត ត្រូវការ ",[117,558,560,573],{"className":559},[120],[117,561,563],{"className":562},[124],[126,564,565],{"xmlns":128},[130,566,567,571],{},[133,568,569],{},[136,570,138],{},[140,572,143],{"encoding":142},[117,574,576],{"className":575,"ariaHidden":148},[147],[117,577,579,582],{"className":578},[152],[117,580],{"className":581,"style":157},[156],[117,583,138],{"className":584,"style":163},[161,162]," ",[59,587,588],{},"តូច"," ដើម្បីកុំឱ្យបោះជំហានបុកជញ្ជាំងជ្រលងភ្នំ។",[554,591,592,593,585,621,624,625,653,654,659],{},"ទិសដៅដែលរាបស្មើ ត្រូវការ ",[117,594,596,609],{"className":595},[120],[117,597,599],{"className":598},[124],[126,600,601],{"xmlns":128},[130,602,603,607],{},[133,604,605],{},[136,606,138],{},[140,608,143],{"encoding":142},[117,610,612],{"className":611,"ariaHidden":148},[147],[117,613,615,618],{"className":614},[152],[117,616],{"className":617,"style":157},[156],[117,619,138],{"className":620,"style":163},[161,162],[59,622,623],{},"ធំ"," ដើម្បីដើរឱ្យទៅមុខឆាប់ដល់។\nមិនមាន ",[117,626,628,641],{"className":627},[120],[117,629,631],{"className":630},[124],[126,632,633],{"xmlns":128},[130,634,635,639],{},[133,636,637],{},[136,638,138],{},[140,640,143],{"encoding":142},[117,642,644],{"className":643,"ariaHidden":148},[147],[117,645,647,650],{"className":646},[152],[117,648],{"className":649,"style":157},[156],[117,651,138],{"className":652,"style":163},[161,162]," ថេរណាមួយ អាចបំពេញចិត្តទិសដៅទាំងពីរក្នុងពេលតែមួយបានទេ។ LeCun et al. ",[44,655,658],{"href":656,"className":657},"#ref-2",[52,53,74],"[2]"," បានធ្វើការវិភាគលម្អិតពីបទប្បញ្ញត្តិ Loss Landscape ទាំងនេះ និងផលប៉ះពាល់របស់ពួកវាទៅលើការ Convergence។",[12,661,24,669,24,675],{"className":662},[663,664,665,666,667,668,40],"bg-yellow-50","dark:bg-yellow-900\u002F20","border-l-4","border-yellow-400","p-4","rounded-r-lg",[34,670,674],{"className":671},[74,672,673],"text-yellow-800","dark:text-yellow-200","ចំណុចខ្សោយសំខាន់",[34,676,680],{"className":677},[678,679],"text-yellow-700","dark:text-yellow-300","Parameter ផ្សេងគ្នា ត្រូវការទំហំជំហានផ្សេងគ្នា។ Learning rate ថេរចាត់ទុកពួកវាដូចគ្នាទាំងអស់ — ហើយនេះគឺជាបញ្ហាកកស្ទះ។",[91,682],{},[94,684,686],{"id":685},"ស្គាល់-adam-ប្រព័ន្ធ-gps-នៃ-optimizer","ស្គាល់ Adam: ប្រព័ន្ធ GPS នៃ Optimizer",[34,688,689,690,693],{},"បើ Gradient Descent ធម្មតាគឺជាការដើរភ្នំដោយបោះជំហានថេរ Adam គឺជាការប្រើ ",[59,691,692],{},"GPS ដែលមានការណែនាំផ្លូវដោយវៃឆ្លាត","៖ វាបង្កើនល្បឿននៅលើផ្លូវហាយវេ បន្ថយល្បឿននៅផ្លូវបត់ចង្អៀត និងចងចាំផ្លូវដែលធ្លាប់បានដើរកន្លងមក។",[34,695,696,697,700],{},"អាថ៌កំបាំងរបស់ Adam គឺការតាមដាន ",[59,698,699],{},"រឿងពីរយ៉ាង"," សម្រាប់រាល់ Parameter នីមួយៗ៖",[702,703,704,720],"table",{},[705,706,707],"thead",{},[708,709,710,714,717],"tr",{},[711,712,713],"th",{},"បរិមាណ",[711,715,716],{},"និមិត្តសញ្ញា",[711,718,719],{},"អត្ថន័យងាយៗ",[721,722,723,812],"tbody",{},[708,724,725,732,809],{},[726,727,728,731],"td",{},[59,729,730],{},"1st Moment"," (Momentum)",[726,733,734],{},[117,735,737,757],{"className":736},[120],[117,738,740],{"className":739},[124],[126,741,742],{"xmlns":128},[130,743,744,754],{},[133,745,746],{},[183,747,748,751],{},[136,749,750],{},"m",[136,752,753],{},"t",[140,755,756],{"encoding":142},"m_t",[117,758,760],{"className":759,"ariaHidden":148},[147],[117,761,763,767],{"className":762},[152],[117,764],{"className":765,"style":766},[156],"height:0.5806em;vertical-align:-0.15em;",[117,768,770,773],{"className":769},[161],[117,771,750],{"className":772},[161,162],[117,774,776],{"className":775},[262],[117,777,779,801],{"className":778},[266,267],[117,780,782,798],{"className":781},[271],[117,783,786],{"className":784,"style":785},[275],"height:0.2806em;",[117,787,789,792],{"style":788},"top:-2.55em;margin-left:0em;margin-right:0.05em;",[117,790],{"className":791,"style":284},[283],[117,793,795],{"className":794},[288,289,290,291],[117,796,753],{"className":797},[161,162,291],[117,799,309],{"className":800},[308],[117,802,804],{"className":803},[271],[117,805,807],{"className":806,"style":316},[275],[117,808],{},[726,810,811],{},"តើទិសដៅណាខ្លះដែលជម្រាល (Gradients) ធ្លាប់ចង្អុលទៅនាពេលថ្មីៗនេះ?",[708,813,814,820,895],{},[726,815,816,819],{},[59,817,818],{},"2nd Moment"," (Adaptive Scale)",[726,821,822],{},[117,823,825,844],{"className":824},[120],[117,826,828],{"className":827},[124],[126,829,830],{"xmlns":128},[130,831,832,841],{},[133,833,834],{},[183,835,836,839],{},[136,837,838],{},"v",[136,840,753],{},[140,842,843],{"encoding":142},"v_t",[117,845,847],{"className":846,"ariaHidden":148},[147],[117,848,850,853],{"className":849},[152],[117,851],{"className":852,"style":766},[156],[117,854,856,860],{"className":855},[161],[117,857,838],{"className":858,"style":859},[161,162],"margin-right:0.03588em;",[117,861,863],{"className":862},[262],[117,864,866,887],{"className":865},[266,267],[117,867,869,884],{"className":868},[271],[117,870,872],{"className":871,"style":785},[275],[117,873,875,878],{"style":874},"top:-2.55em;margin-left:-0.0359em;margin-right:0.05em;",[117,876],{"className":877,"style":284},[283],[117,879,881],{"className":880},[288,289,290,291],[117,882,753],{"className":883},[161,162,291],[117,885,309],{"className":886},[308],[117,888,890],{"className":889},[271],[117,891,893],{"className":892,"style":316},[275],[117,894],{},[726,896,897,898,901],{},"តើជម្រាល (Gradients) មានទំហំ ",[78,899,900],{},"ធំប៉ុនណា"," នាពេលថ្មីៗនេះ?",[34,903,904,905,908,909,912],{},"តាមរយៈការចែកទំហំជំហាននឹងឫសការ៉េនៃ 2nd moment, Adam នឹង",[59,906,907],{},"បន្ថយ","ទំហំជំហានដោយស្វ័យប្រវត្តិសម្រាប់ Parameter ណាដែលមានជម្រាលធំៗខ្លាំងពេក និង",[59,910,911],{},"បង្កើន","ទំហំជំហានសម្រាប់ Parameter ណាដែលមានជម្រាលតូចៗ។",[91,914],{},[94,916,918],{"id":917},"របៀបបង្កើត-adam-ជំហានម្តងៗ","របៀបបង្កើត Adam (ជំហានម្តងៗ)",[99,920,922],{"id":921},"ជំហានទី-១-momentum-រក្សាល្បឿន-និងទិសដៅ","ជំហានទី ១ — Momentum: រក្សាល្បឿន និងទិសដៅ",[34,924,925,928],{},[59,926,927],{},"បញ្ហាដែលវាដោះស្រាយ:"," ជម្រាល (Gradients) ជារឿយៗមានភាពរំខាន (Noisy)។ ការរត់តាមជម្រាលដែលរំខានទាំងនោះ ធ្វើឱ្យផ្លូវដើរមិនរលូន។",[34,930,931,934],{},[59,932,933],{},"គំនិត:"," រក្សាមធ្យមភាគនៃជម្រាលពីមុនៗ ដូចជាការរមៀលបាល់ចុះពីលើភ្នំ — វានឹងបង្កើនល្បឿនក្នុងទិសដៅដែលស្របគ្នា និងមិនងាយងាករេដោយសារដុំថ្មតូចៗតាមផ្លូវ។",[117,936,938],{"className":937},[168],[117,939,941,1011],{"className":940},[120],[117,942,944],{"className":943},[124],[126,945,946],{"xmlns":128,"display":177},[130,947,948,1008],{},[133,949,950,956,958,967,970,982,985,987,989,991,997,999,1001],{},[183,951,952,954],{},[136,953,750],{},[136,955,753],{},[200,957,202],{},[183,959,960,963],{},[136,961,962],{},"β",[964,965,966],"mn",{},"1",[200,968,969],{},"⋅",[183,971,972,974],{},[136,973,750],{},[133,975,976,978,980],{},[136,977,753],{},[200,979,220],{},[964,981,966],{},[200,983,984],{},"+",[200,986,233],{"stretchy":232},[964,988,966],{},[200,990,220],{},[183,992,993,995],{},[136,994,962],{},[964,996,966],{},[200,998,238],{"stretchy":232},[200,1000,969],{},[183,1002,1003,1006],{},[136,1004,1005],{},"g",[136,1007,753],{},[140,1009,1010],{"encoding":142},"m_t = \\beta_1 \\cdot m_{t-1} + (1 - \\beta_1) \\cdot g_t",[117,1012,1014,1069,1128,1194,1215,1273],{"className":1013,"ariaHidden":148},[147],[117,1015,1017,1020,1060,1063,1066],{"className":1016},[152],[117,1018],{"className":1019,"style":766},[156],[117,1021,1023,1026],{"className":1022},[161],[117,1024,750],{"className":1025},[161,162],[117,1027,1029],{"className":1028},[262],[117,1030,1032,1052],{"className":1031},[266,267],[117,1033,1035,1049],{"className":1034},[271],[117,1036,1038],{"className":1037,"style":785},[275],[117,1039,1040,1043],{"style":788},[117,1041],{"className":1042,"style":284},[283],[117,1044,1046],{"className":1045},[288,289,290,291],[117,1047,753],{"className":1048},[161,162,291],[117,1050,309],{"className":1051},[308],[117,1053,1055],{"className":1054},[271],[117,1056,1058],{"className":1057,"style":316},[275],[117,1059],{},[117,1061],{"className":1062,"style":323},[322],[117,1064,202],{"className":1065},[327],[117,1067],{"className":1068,"style":323},[322],[117,1070,1072,1076,1119,1122,1125],{"className":1071},[152],[117,1073],{"className":1074,"style":1075},[156],"height:0.8889em;vertical-align:-0.1944em;",[117,1077,1079,1083],{"className":1078},[161],[117,1080,962],{"className":1081,"style":1082},[161,162],"margin-right:0.05278em;",[117,1084,1086],{"className":1085},[262],[117,1087,1089,1111],{"className":1088},[266,267],[117,1090,1092,1108],{"className":1091},[271],[117,1093,1096],{"className":1094,"style":1095},[275],"height:0.3011em;",[117,1097,1099,1102],{"style":1098},"top:-2.55em;margin-left:-0.0528em;margin-right:0.05em;",[117,1100],{"className":1101,"style":284},[283],[117,1103,1105],{"className":1104},[288,289,290,291],[117,1106,966],{"className":1107},[161,291],[117,1109,309],{"className":1110},[308],[117,1112,1114],{"className":1113},[271],[117,1115,1117],{"className":1116,"style":316},[275],[117,1118],{},[117,1120],{"className":1121,"style":391},[322],[117,1123,969],{"className":1124},[395],[117,1126],{"className":1127,"style":391},[322],[117,1129,1131,1135,1185,1188,1191],{"className":1130},[152],[117,1132],{"className":1133,"style":1134},[156],"height:0.7917em;vertical-align:-0.2083em;",[117,1136,1138,1141],{"className":1137},[161],[117,1139,750],{"className":1140},[161,162],[117,1142,1144],{"className":1143},[262],[117,1145,1147,1176],{"className":1146},[266,267],[117,1148,1150,1173],{"className":1149},[271],[117,1151,1153],{"className":1152,"style":1095},[275],[117,1154,1155,1158],{"style":788},[117,1156],{"className":1157,"style":284},[283],[117,1159,1161],{"className":1160},[288,289,290,291],[117,1162,1164,1167,1170],{"className":1163},[161,291],[117,1165,753],{"className":1166},[161,162,291],[117,1168,220],{"className":1169},[395,291],[117,1171,966],{"className":1172},[161,291],[117,1174,309],{"className":1175},[308],[117,1177,1179],{"className":1178},[271],[117,1180,1183],{"className":1181,"style":1182},[275],"height:0.2083em;",[117,1184],{},[117,1186],{"className":1187,"style":391},[322],[117,1189,984],{"className":1190},[395],[117,1192],{"className":1193,"style":391},[322],[117,1195,1197,1200,1203,1206,1209,1212],{"className":1196},[152],[117,1198],{"className":1199,"style":405},[156],[117,1201,233],{"className":1202},[419],[117,1204,966],{"className":1205},[161],[117,1207],{"className":1208,"style":391},[322],[117,1210,220],{"className":1211},[395],[117,1213],{"className":1214,"style":391},[322],[117,1216,1218,1221,1261,1264,1267,1270],{"className":1217},[152],[117,1219],{"className":1220,"style":405},[156],[117,1222,1224,1227],{"className":1223},[161],[117,1225,962],{"className":1226,"style":1082},[161,162],[117,1228,1230],{"className":1229},[262],[117,1231,1233,1253],{"className":1232},[266,267],[117,1234,1236,1250],{"className":1235},[271],[117,1237,1239],{"className":1238,"style":1095},[275],[117,1240,1241,1244],{"style":1098},[117,1242],{"className":1243,"style":284},[283],[117,1245,1247],{"className":1246},[288,289,290,291],[117,1248,966],{"className":1249},[161,291],[117,1251,309],{"className":1252},[308],[117,1254,1256],{"className":1255},[271],[117,1257,1259],{"className":1258,"style":316},[275],[117,1260],{},[117,1262,238],{"className":1263},[426],[117,1265],{"className":1266,"style":391},[322],[117,1268,969],{"className":1269},[395],[117,1271],{"className":1272,"style":391},[322],[117,1274,1276,1280],{"className":1275},[152],[117,1277],{"className":1278,"style":1279},[156],"height:0.625em;vertical-align:-0.1944em;",[117,1281,1283,1286],{"className":1282},[161],[117,1284,1005],{"className":1285,"style":859},[161,162],[117,1287,1289],{"className":1288},[262],[117,1290,1292,1312],{"className":1291},[266,267],[117,1293,1295,1309],{"className":1294},[271],[117,1296,1298],{"className":1297,"style":785},[275],[117,1299,1300,1303],{"style":874},[117,1301],{"className":1302,"style":284},[283],[117,1304,1306],{"className":1305},[288,289,290,291],[117,1307,753],{"className":1308},[161,162,291],[117,1310,309],{"className":1311},[308],[117,1313,1315],{"className":1314},[271],[117,1316,1318],{"className":1317,"style":316},[275],[117,1319],{},[551,1321,1322],{},[554,1323,1324,1394,1395,1398],{},[117,1325,1327,1345],{"className":1326},[120],[117,1328,1330],{"className":1329},[124],[126,1331,1332],{"xmlns":128},[130,1333,1334,1342],{},[133,1335,1336],{},[183,1337,1338,1340],{},[136,1339,962],{},[964,1341,966],{},[140,1343,1344],{"encoding":142},"\\beta_1",[117,1346,1348],{"className":1347,"ariaHidden":148},[147],[117,1349,1351,1354],{"className":1350},[152],[117,1352],{"className":1353,"style":1075},[156],[117,1355,1357,1360],{"className":1356},[161],[117,1358,962],{"className":1359,"style":1082},[161,162],[117,1361,1363],{"className":1362},[262],[117,1364,1366,1386],{"className":1365},[266,267],[117,1367,1369,1383],{"className":1368},[271],[117,1370,1372],{"className":1371,"style":1095},[275],[117,1373,1374,1377],{"style":1098},[117,1375],{"className":1376,"style":284},[283],[117,1378,1380],{"className":1379},[288,289,290,291],[117,1381,966],{"className":1382},[161,291],[117,1384,309],{"className":1385},[308],[117,1387,1389],{"className":1388},[271],[117,1390,1392],{"className":1391,"style":316},[275],[117,1393],{}," ជាមេគុណ (ជាទូទៅគឺ ",[59,1396,1397],{},"0.9",")៖ មានន័យថាឱ្យតម្លៃ ៩០% លើអតីតកាល និង ១០% លើជម្រាលថ្មី។",[34,1400,1401,1402,1407],{},"Sutskever et al. ",[44,1403,1406],{"href":1404,"className":1405},"#ref-3",[52,53,74],"[3]"," បានបង្ហាញថា Momentum term នេះ មានសារៈសំខាន់ខ្លាំងណាស់ក្នុងការ Converge យ៉ាងលឿន និងស្ថិតស្ថេរ នៅក្នុង Deep Networks។",[99,1409,1411],{"id":1410},"ជំហានទី-២-adaptive-scale-ការបត់បែនតាមប្រវត្តិ","ជំហានទី ២ — Adaptive Scale: ការបត់បែនតាមប្រវត្តិ",[34,1413,1414,1416],{},[59,1415,927],{}," Parameter ខ្លះមានជម្រាលធំ ខ្លះមានជម្រាលតូច។ យើងចង់ឱ្យអាធំដើរតិចៗ និងអាតូចដើរឱ្យបានច្រើន។",[34,1418,1419,1421],{},[59,1420,933],{}," តាមដានមធ្យមភាគនៃ \"ការ៉េ\" នៃជម្រាល៖",[117,1423,1425],{"className":1424},[168],[117,1426,1428,1496],{"className":1427},[120],[117,1429,1431],{"className":1430},[124],[126,1432,1433],{"xmlns":128,"display":177},[130,1434,1435,1493],{},[133,1436,1437,1443,1445,1452,1454,1466,1468,1470,1472,1474,1480,1482,1484],{},[183,1438,1439,1441],{},[136,1440,838],{},[136,1442,753],{},[200,1444,202],{},[183,1446,1447,1449],{},[136,1448,962],{},[964,1450,1451],{},"2",[200,1453,969],{},[183,1455,1456,1458],{},[136,1457,838],{},[133,1459,1460,1462,1464],{},[136,1461,753],{},[200,1463,220],{},[964,1465,966],{},[200,1467,984],{},[200,1469,233],{"stretchy":232},[964,1471,966],{},[200,1473,220],{},[183,1475,1476,1478],{},[136,1477,962],{},[964,1479,1451],{},[200,1481,238],{"stretchy":232},[200,1483,969],{},[1485,1486,1487,1489,1491],"msubsup",{},[136,1488,1005],{},[136,1490,753],{},[964,1492,1451],{},[140,1494,1495],{"encoding":142},"v_t = \\beta_2 \\cdot v_{t-1} + (1 - \\beta_2) \\cdot g_t^2",[117,1497,1499,1554,1609,1673,1694,1752],{"className":1498,"ariaHidden":148},[147],[117,1500,1502,1505,1545,1548,1551],{"className":1501},[152],[117,1503],{"className":1504,"style":766},[156],[117,1506,1508,1511],{"className":1507},[161],[117,1509,838],{"className":1510,"style":859},[161,162],[117,1512,1514],{"className":1513},[262],[117,1515,1517,1537],{"className":1516},[266,267],[117,1518,1520,1534],{"className":1519},[271],[117,1521,1523],{"className":1522,"style":785},[275],[117,1524,1525,1528],{"style":874},[117,1526],{"className":1527,"style":284},[283],[117,1529,1531],{"className":1530},[288,289,290,291],[117,1532,753],{"className":1533},[161,162,291],[117,1535,309],{"className":1536},[308],[117,1538,1540],{"className":1539},[271],[117,1541,1543],{"className":1542,"style":316},[275],[117,1544],{},[117,1546],{"className":1547,"style":323},[322],[117,1549,202],{"className":1550},[327],[117,1552],{"className":1553,"style":323},[322],[117,1555,1557,1560,1600,1603,1606],{"className":1556},[152],[117,1558],{"className":1559,"style":1075},[156],[117,1561,1563,1566],{"className":1562},[161],[117,1564,962],{"className":1565,"style":1082},[161,162],[117,1567,1569],{"className":1568},[262],[117,1570,1572,1592],{"className":1571},[266,267],[117,1573,1575,1589],{"className":1574},[271],[117,1576,1578],{"className":1577,"style":1095},[275],[117,1579,1580,1583],{"style":1098},[117,1581],{"className":1582,"style":284},[283],[117,1584,1586],{"className":1585},[288,289,290,291],[117,1587,1451],{"className":1588},[161,291],[117,1590,309],{"className":1591},[308],[117,1593,1595],{"className":1594},[271],[117,1596,1598],{"className":1597,"style":316},[275],[117,1599],{},[117,1601],{"className":1602,"style":391},[322],[117,1604,969],{"className":1605},[395],[117,1607],{"className":1608,"style":391},[322],[117,1610,1612,1615,1664,1667,1670],{"className":1611},[152],[117,1613],{"className":1614,"style":1134},[156],[117,1616,1618,1621],{"className":1617},[161],[117,1619,838],{"className":1620,"style":859},[161,162],[117,1622,1624],{"className":1623},[262],[117,1625,1627,1656],{"className":1626},[266,267],[117,1628,1630,1653],{"className":1629},[271],[117,1631,1633],{"className":1632,"style":1095},[275],[117,1634,1635,1638],{"style":874},[117,1636],{"className":1637,"style":284},[283],[117,1639,1641],{"className":1640},[288,289,290,291],[117,1642,1644,1647,1650],{"className":1643},[161,291],[117,1645,753],{"className":1646},[161,162,291],[117,1648,220],{"className":1649},[395,291],[117,1651,966],{"className":1652},[161,291],[117,1654,309],{"className":1655},[308],[117,1657,1659],{"className":1658},[271],[117,1660,1662],{"className":1661,"style":1182},[275],[117,1663],{},[117,1665],{"className":1666,"style":391},[322],[117,1668,984],{"className":1669},[395],[117,1671],{"className":1672,"style":391},[322],[117,1674,1676,1679,1682,1685,1688,1691],{"className":1675},[152],[117,1677],{"className":1678,"style":405},[156],[117,1680,233],{"className":1681},[419],[117,1683,966],{"className":1684},[161],[117,1686],{"className":1687,"style":391},[322],[117,1689,220],{"className":1690},[395],[117,1692],{"className":1693,"style":391},[322],[117,1695,1697,1700,1740,1743,1746,1749],{"className":1696},[152],[117,1698],{"className":1699,"style":405},[156],[117,1701,1703,1706],{"className":1702},[161],[117,1704,962],{"className":1705,"style":1082},[161,162],[117,1707,1709],{"className":1708},[262],[117,1710,1712,1732],{"className":1711},[266,267],[117,1713,1715,1729],{"className":1714},[271],[117,1716,1718],{"className":1717,"style":1095},[275],[117,1719,1720,1723],{"style":1098},[117,1721],{"className":1722,"style":284},[283],[117,1724,1726],{"className":1725},[288,289,290,291],[117,1727,1451],{"className":1728},[161,291],[117,1730,309],{"className":1731},[308],[117,1733,1735],{"className":1734},[271],[117,1736,1738],{"className":1737,"style":316},[275],[117,1739],{},[117,1741,238],{"className":1742},[426],[117,1744],{"className":1745,"style":391},[322],[117,1747,969],{"className":1748},[395],[117,1750],{"className":1751,"style":391},[322],[117,1753,1755,1759],{"className":1754},[152],[117,1756],{"className":1757,"style":1758},[156],"height:1.1111em;vertical-align:-0.247em;",[117,1760,1762,1765],{"className":1761},[161],[117,1763,1005],{"className":1764,"style":859},[161,162],[117,1766,1768],{"className":1767},[262],[117,1769,1771,1805],{"className":1770},[266,267],[117,1772,1774,1802],{"className":1773},[271],[117,1775,1778,1790],{"className":1776,"style":1777},[275],"height:0.8641em;",[117,1779,1781,1784],{"style":1780},"top:-2.453em;margin-left:-0.0359em;margin-right:0.05em;",[117,1782],{"className":1783,"style":284},[283],[117,1785,1787],{"className":1786},[288,289,290,291],[117,1788,753],{"className":1789},[161,162,291],[117,1791,1793,1796],{"style":1792},"top:-3.113em;margin-right:0.05em;",[117,1794],{"className":1795,"style":284},[283],[117,1797,1799],{"className":1798},[288,289,290,291],[117,1800,1451],{"className":1801},[161,291],[117,1803,309],{"className":1804},[308],[117,1806,1808],{"className":1807},[271],[117,1809,1812],{"className":1810,"style":1811},[275],"height:0.247em;",[117,1813],{},[34,1815,1816,1817,1886,1887,2023,2024,2027],{},"Parameter ណាដែលទទួលបានជម្រាលធំៗជាបន្តបន្ទាប់ នឹងមានតម្លៃ ",[117,1818,1820,1837],{"className":1819},[120],[117,1821,1823],{"className":1822},[124],[126,1824,1825],{"xmlns":128},[130,1826,1827,1835],{},[133,1828,1829],{},[183,1830,1831,1833],{},[136,1832,838],{},[136,1834,753],{},[140,1836,843],{"encoding":142},[117,1838,1840],{"className":1839,"ariaHidden":148},[147],[117,1841,1843,1846],{"className":1842},[152],[117,1844],{"className":1845,"style":766},[156],[117,1847,1849,1852],{"className":1848},[161],[117,1850,838],{"className":1851,"style":859},[161,162],[117,1853,1855],{"className":1854},[262],[117,1856,1858,1878],{"className":1857},[266,267],[117,1859,1861,1875],{"className":1860},[271],[117,1862,1864],{"className":1863,"style":785},[275],[117,1865,1866,1869],{"style":874},[117,1867],{"className":1868,"style":284},[283],[117,1870,1872],{"className":1871},[288,289,290,291],[117,1873,753],{"className":1874},[161,162,291],[117,1876,309],{"className":1877},[308],[117,1879,1881],{"className":1880},[271],[117,1882,1884],{"className":1883,"style":316},[275],[117,1885],{}," ធំ។ នៅពេលយើងយកជំហានទៅចែកនឹង ",[117,1888,1890,1911],{"className":1889},[120],[117,1891,1893],{"className":1892},[124],[126,1894,1895],{"xmlns":128},[130,1896,1897,1908],{},[133,1898,1899],{},[1900,1901,1902],"msqrt",{},[183,1903,1904,1906],{},[136,1905,838],{},[136,1907,753],{},[140,1909,1910],{"encoding":142},"\\sqrt{v_t}",[117,1912,1914],{"className":1913,"ariaHidden":148},[147],[117,1915,1917,1921],{"className":1916},[152],[117,1918],{"className":1919,"style":1920},[156],"height:1.04em;vertical-align:-0.3147em;",[117,1922,1925],{"className":1923},[161,1924],"sqrt",[117,1926,1928,2014],{"className":1927},[266,267],[117,1929,1931,2011],{"className":1930},[271],[117,1932,1935,1988],{"className":1933,"style":1934},[275],"height:0.7253em;",[117,1936,1940,1944],{"className":1937,"style":1939},[1938],"svg-align","top:-3em;",[117,1941],{"className":1942,"style":1943},[283],"height:3em;",[117,1945,1948],{"className":1946,"style":1947},[161],"padding-left:0.833em;",[117,1949,1951,1954],{"className":1950},[161],[117,1952,838],{"className":1953,"style":859},[161,162],[117,1955,1957],{"className":1956},[262],[117,1958,1960,1980],{"className":1959},[266,267],[117,1961,1963,1977],{"className":1962},[271],[117,1964,1966],{"className":1965,"style":785},[275],[117,1967,1968,1971],{"style":874},[117,1969],{"className":1970,"style":284},[283],[117,1972,1974],{"className":1973},[288,289,290,291],[117,1975,753],{"className":1976},[161,162,291],[117,1978,309],{"className":1979},[308],[117,1981,1983],{"className":1982},[271],[117,1984,1986],{"className":1985,"style":316},[275],[117,1987],{},[117,1989,1991,1994],{"style":1990},"top:-2.6853em;",[117,1992],{"className":1993,"style":1943},[283],[117,1995,1999],{"className":1996,"style":1998},[1997],"hide-tail","min-width:0.853em;height:1.08em;",[2000,2001,2007],"svg",{"xmlns":2002,"width":2003,"height":2004,"viewBox":2005,"preserveAspectRatio":2006},"http:\u002F\u002Fwww.w3.org\u002F2000\u002Fsvg","400em","1.08em","0 0 400000 1080","xMinYMin slice",[2008,2009],"path",{"d":2010},"M95,702\nc-2.7,0,-7.17,-2.7,-13.5,-8c-5.8,-5.3,-9.5,-10,-9.5,-14\nc0,-2,0.3,-3.3,1,-4c1.3,-2.7,23.83,-20.7,67.5,-54\nc44.2,-33.3,65.8,-50.3,66.5,-51c1.3,-1.3,3,-2,5,-2c4.7,0,8.7,3.3,12,10\ns173,378,173,378c0.7,0,35.3,-71,104,-213c68.7,-142,137.5,-285,206.5,-429\nc69,-144,104.5,-217.7,106.5,-221\nl0 -0\nc5.3,-9.3,12,-14,20,-14\nH400000v40H845.2724\ns-225.272,467,-225.272,467s-235,486,-235,486c-2.7,4.7,-9,7,-19,7\nc-6,0,-10,-1,-12,-3s-194,-422,-194,-422s-65,47,-65,47z\nM834 80h400000v40h-400000z",[117,2012,309],{"className":2013},[308],[117,2015,2017],{"className":2016},[271],[117,2018,2021],{"className":2019,"style":2020},[275],"height:0.3147em;",[117,2022],{}," វានឹងធ្វើឱ្យការ Update ថយចុះមកតូចវិញ។ នេះហើយជា ",[59,2025,2026],{},"Learning Rate ផ្ទាល់ខ្លួន"," សម្រាប់ Parameter នីមួយៗ។",[99,2029,2031],{"id":2030},"ជំហានទី-៣-bias-correction-ការកែតម្រូវពេលចាប់ផ្តើម","ជំហានទី ៣ — Bias Correction: ការកែតម្រូវពេលចាប់ផ្តើម",[34,2033,2034,2036,2037,2131,2132,2224],{},[59,2035,927],{}," ដោយសារនៅពេលចាប់ផ្តើម ",[117,2038,2040,2063],{"className":2039},[120],[117,2041,2043],{"className":2042},[124],[126,2044,2045],{"xmlns":128},[130,2046,2047,2060],{},[133,2048,2049,2056,2058],{},[183,2050,2051,2053],{},[136,2052,750],{},[964,2054,2055],{},"0",[200,2057,202],{},[964,2059,2055],{},[140,2061,2062],{"encoding":142},"m_0 = 0",[117,2064,2066,2121],{"className":2065,"ariaHidden":148},[147],[117,2067,2069,2072,2112,2115,2118],{"className":2068},[152],[117,2070],{"className":2071,"style":766},[156],[117,2073,2075,2078],{"className":2074},[161],[117,2076,750],{"className":2077},[161,162],[117,2079,2081],{"className":2080},[262],[117,2082,2084,2104],{"className":2083},[266,267],[117,2085,2087,2101],{"className":2086},[271],[117,2088,2090],{"className":2089,"style":1095},[275],[117,2091,2092,2095],{"style":788},[117,2093],{"className":2094,"style":284},[283],[117,2096,2098],{"className":2097},[288,289,290,291],[117,2099,2055],{"className":2100},[161,291],[117,2102,309],{"className":2103},[308],[117,2105,2107],{"className":2106},[271],[117,2108,2110],{"className":2109,"style":316},[275],[117,2111],{},[117,2113],{"className":2114,"style":323},[322],[117,2116,202],{"className":2117},[327],[117,2119],{"className":2120,"style":323},[322],[117,2122,2124,2128],{"className":2123},[152],[117,2125],{"className":2126,"style":2127},[156],"height:0.6444em;",[117,2129,2055],{"className":2130},[161]," និង ",[117,2133,2135,2157],{"className":2134},[120],[117,2136,2138],{"className":2137},[124],[126,2139,2140],{"xmlns":128},[130,2141,2142,2154],{},[133,2143,2144,2150,2152],{},[183,2145,2146,2148],{},[136,2147,838],{},[964,2149,2055],{},[200,2151,202],{},[964,2153,2055],{},[140,2155,2156],{"encoding":142},"v_0 = 0",[117,2158,2160,2215],{"className":2159,"ariaHidden":148},[147],[117,2161,2163,2166,2206,2209,2212],{"className":2162},[152],[117,2164],{"className":2165,"style":766},[156],[117,2167,2169,2172],{"className":2168},[161],[117,2170,838],{"className":2171,"style":859},[161,162],[117,2173,2175],{"className":2174},[262],[117,2176,2178,2198],{"className":2177},[266,267],[117,2179,2181,2195],{"className":2180},[271],[117,2182,2184],{"className":2183,"style":1095},[275],[117,2185,2186,2189],{"style":874},[117,2187],{"className":2188,"style":284},[283],[117,2190,2192],{"className":2191},[288,289,290,291],[117,2193,2055],{"className":2194},[161,291],[117,2196,309],{"className":2197},[308],[117,2199,2201],{"className":2200},[271],[117,2202,2204],{"className":2203,"style":316},[275],[117,2205],{},[117,2207],{"className":2208,"style":323},[322],[117,2210,202],{"className":2211},[327],[117,2213],{"className":2214,"style":323},[322],[117,2216,2218,2221],{"className":2217},[152],[117,2219],{"className":2220,"style":2127},[156],[117,2222,2055],{"className":2223},[161]," នោះការប៉ាន់ស្មានដំបូងៗនឹងខិតទៅជិតសូន្យខ្លាំងពេក (វាមិនទាន់មានប្រវត្តិគ្រប់គ្រាន់)។",[34,2226,2227,2230,2231,2326],{},[59,2228,2229],{},"ដំណោះស្រាយ:"," ចែកវាជាមួយ ",[117,2232,2234,2261],{"className":2233},[120],[117,2235,2237],{"className":2236},[124],[126,2238,2239],{"xmlns":128},[130,2240,2241,2258],{},[133,2242,2243,2245,2247,2249,2256],{},[200,2244,233],{"stretchy":232},[964,2246,966],{},[200,2248,220],{},[2250,2251,2252,2254],"msup",{},[136,2253,962],{},[136,2255,753],{},[200,2257,238],{"stretchy":232},[140,2259,2260],{"encoding":142},"(1 - \\beta^t)",[117,2262,2264,2285],{"className":2263,"ariaHidden":148},[147],[117,2265,2267,2270,2273,2276,2279,2282],{"className":2266},[152],[117,2268],{"className":2269,"style":405},[156],[117,2271,233],{"className":2272},[419],[117,2274,966],{"className":2275},[161],[117,2277],{"className":2278,"style":391},[322],[117,2280,220],{"className":2281},[395],[117,2283],{"className":2284,"style":391},[322],[117,2286,2288,2292,2323],{"className":2287},[152],[117,2289],{"className":2290,"style":2291},[156],"height:1.0436em;vertical-align:-0.25em;",[117,2293,2295,2298],{"className":2294},[161],[117,2296,962],{"className":2297,"style":1082},[161,162],[117,2299,2301],{"className":2300},[262],[117,2302,2304],{"className":2303},[266],[117,2305,2307],{"className":2306},[271],[117,2308,2311],{"className":2309,"style":2310},[275],"height:0.7936em;",[117,2312,2314,2317],{"style":2313},"top:-3.063em;margin-right:0.05em;",[117,2315],{"className":2316,"style":284},[283],[117,2318,2320],{"className":2319},[288,289,290,291],[117,2321,753],{"className":2322},[161,162,291],[117,2324,238],{"className":2325},[426]," ដើម្បីកែតម្រូវឱ្យមានតុល្យភាពវិញនៅជំហានដំបូងៗ៖",[117,2328,2330],{"className":2329},[168],[117,2331,2333,2422],{"className":2332},[120],[117,2334,2336],{"className":2335},[124],[126,2337,2338],{"xmlns":128,"display":177},[130,2339,2340,2419],{},[133,2341,2342,2354,2356,2379,2382,2385,2395,2397],{},[183,2343,2344,2352],{},[2345,2346,2347,2349],"mover",{"accent":148},[136,2348,750],{},[200,2350,2351],{},"^",[136,2353,753],{},[200,2355,202],{},[2357,2358,2359,2365],"mfrac",{},[183,2360,2361,2363],{},[136,2362,750],{},[136,2364,753],{},[133,2366,2367,2369,2371],{},[964,2368,966],{},[200,2370,220],{},[1485,2372,2373,2375,2377],{},[136,2374,962],{},[964,2376,966],{},[136,2378,753],{},[200,2380,2381],{"separator":148},",",[322,2383],{"width":2384},"2em",[183,2386,2387,2393],{},[2345,2388,2389,2391],{"accent":148},[136,2390,838],{},[200,2392,2351],{},[136,2394,753],{},[200,2396,202],{},[2357,2398,2399,2405],{},[183,2400,2401,2403],{},[136,2402,838],{},[136,2404,753],{},[133,2406,2407,2409,2411],{},[964,2408,966],{},[200,2410,220],{},[1485,2412,2413,2415,2417],{},[136,2414,962],{},[964,2416,1451],{},[136,2418,753],{},[140,2420,2421],{"encoding":142},"\\hat{m}_t = \\frac{m_t}{1 - \\beta_1^t}, \\qquad \\hat{v}_t = \\frac{v_t}{1 - \\beta_2^t}",[117,2423,2425,2512,2780],{"className":2424,"ariaHidden":148},[147],[117,2426,2428,2431,2503,2506,2509],{"className":2427},[152],[117,2429],{"className":2430,"style":251},[156],[117,2432,2434,2469],{"className":2433},[161],[117,2435,2438],{"className":2436},[161,2437],"accent",[117,2439,2441],{"className":2440},[266],[117,2442,2444],{"className":2443},[271],[117,2445,2448,2456],{"className":2446,"style":2447},[275],"height:0.6944em;",[117,2449,2450,2453],{"style":1939},[117,2451],{"className":2452,"style":1943},[283],[117,2454,750],{"className":2455},[161,162],[117,2457,2458,2461],{"style":1939},[117,2459],{"className":2460,"style":1943},[283],[117,2462,2466],{"className":2463,"style":2465},[2464],"accent-body","left:-0.25em;",[117,2467,2351],{"className":2468},[161],[117,2470,2472],{"className":2471},[262],[117,2473,2475,2495],{"className":2474},[266,267],[117,2476,2478,2492],{"className":2477},[271],[117,2479,2481],{"className":2480,"style":785},[275],[117,2482,2483,2486],{"style":788},[117,2484],{"className":2485,"style":284},[283],[117,2487,2489],{"className":2488},[288,289,290,291],[117,2490,753],{"className":2491},[161,162,291],[117,2493,309],{"className":2494},[308],[117,2496,2498],{"className":2497},[271],[117,2499,2501],{"className":2500,"style":316},[275],[117,2502],{},[117,2504],{"className":2505,"style":323},[322],[117,2507,202],{"className":2508},[327],[117,2510],{"className":2511,"style":323},[322],[117,2513,2515,2519,2690,2694,2698,2702,2771,2774,2777],{"className":2514},[152],[117,2516],{"className":2517,"style":2518},[156],"height:2.0599em;vertical-align:-0.9523em;",[117,2520,2522,2526,2687],{"className":2521},[161],[117,2523],{"className":2524},[419,2525],"nulldelimiter",[117,2527,2529],{"className":2528},[2357],[117,2530,2532,2678],{"className":2531},[266,267],[117,2533,2535,2675],{"className":2534},[271],[117,2536,2539,2615,2626],{"className":2537,"style":2538},[275],"height:1.1076em;",[117,2540,2542,2545],{"style":2541},"top:-2.314em;",[117,2543],{"className":2544,"style":1943},[283],[117,2546,2548,2551,2554,2557,2560],{"className":2547},[161],[117,2549,966],{"className":2550},[161],[117,2552],{"className":2553,"style":391},[322],[117,2555,220],{"className":2556},[395],[117,2558],{"className":2559,"style":391},[322],[117,2561,2563,2566],{"className":2562},[161],[117,2564,962],{"className":2565,"style":1082},[161,162],[117,2567,2569],{"className":2568},[262],[117,2570,2572,2606],{"className":2571},[266,267],[117,2573,2575,2603],{"className":2574},[271],[117,2576,2579,2591],{"className":2577,"style":2578},[275],"height:0.7754em;",[117,2580,2582,2585],{"style":2581},"top:-2.4337em;margin-left:-0.0528em;margin-right:0.05em;",[117,2583],{"className":2584,"style":284},[283],[117,2586,2588],{"className":2587},[288,289,290,291],[117,2589,966],{"className":2590},[161,291],[117,2592,2594,2597],{"style":2593},"top:-3.0448em;margin-right:0.05em;",[117,2595],{"className":2596,"style":284},[283],[117,2598,2600],{"className":2599},[288,289,290,291],[117,2601,753],{"className":2602},[161,162,291],[117,2604,309],{"className":2605},[308],[117,2607,2609],{"className":2608},[271],[117,2610,2613],{"className":2611,"style":2612},[275],"height:0.2663em;",[117,2614],{},[117,2616,2618,2621],{"style":2617},"top:-3.23em;",[117,2619],{"className":2620,"style":1943},[283],[117,2622],{"className":2623,"style":2625},[2624],"frac-line","border-bottom-width:0.04em;",[117,2627,2629,2632],{"style":2628},"top:-3.677em;",[117,2630],{"className":2631,"style":1943},[283],[117,2633,2635],{"className":2634},[161],[117,2636,2638,2641],{"className":2637},[161],[117,2639,750],{"className":2640},[161,162],[117,2642,2644],{"className":2643},[262],[117,2645,2647,2667],{"className":2646},[266,267],[117,2648,2650,2664],{"className":2649},[271],[117,2651,2653],{"className":2652,"style":785},[275],[117,2654,2655,2658],{"style":788},[117,2656],{"className":2657,"style":284},[283],[117,2659,2661],{"className":2660},[288,289,290,291],[117,2662,753],{"className":2663},[161,162,291],[117,2665,309],{"className":2666},[308],[117,2668,2670],{"className":2669},[271],[117,2671,2673],{"className":2672,"style":316},[275],[117,2674],{},[117,2676,309],{"className":2677},[308],[117,2679,2681],{"className":2680},[271],[117,2682,2685],{"className":2683,"style":2684},[275],"height:0.9523em;",[117,2686],{},[117,2688],{"className":2689},[426,2525],[117,2691,2381],{"className":2692},[2693],"mpunct",[117,2695],{"className":2696,"style":2697},[322],"margin-right:2em;",[117,2699],{"className":2700,"style":2701},[322],"margin-right:0.1667em;",[117,2703,2705,2737],{"className":2704},[161],[117,2706,2708],{"className":2707},[161,2437],[117,2709,2711],{"className":2710},[266],[117,2712,2714],{"className":2713},[271],[117,2715,2717,2725],{"className":2716,"style":2447},[275],[117,2718,2719,2722],{"style":1939},[117,2720],{"className":2721,"style":1943},[283],[117,2723,838],{"className":2724,"style":859},[161,162],[117,2726,2727,2730],{"style":1939},[117,2728],{"className":2729,"style":1943},[283],[117,2731,2734],{"className":2732,"style":2733},[2464],"left:-0.2222em;",[117,2735,2351],{"className":2736},[161],[117,2738,2740],{"className":2739},[262],[117,2741,2743,2763],{"className":2742},[266,267],[117,2744,2746,2760],{"className":2745},[271],[117,2747,2749],{"className":2748,"style":785},[275],[117,2750,2751,2754],{"style":874},[117,2752],{"className":2753,"style":284},[283],[117,2755,2757],{"className":2756},[288,289,290,291],[117,2758,753],{"className":2759},[161,162,291],[117,2761,309],{"className":2762},[308],[117,2764,2766],{"className":2765},[271],[117,2767,2769],{"className":2768,"style":316},[275],[117,2770],{},[117,2772],{"className":2773,"style":323},[322],[117,2775,202],{"className":2776},[327],[117,2778],{"className":2779,"style":323},[322],[117,2781,2783,2786],{"className":2782},[152],[117,2784],{"className":2785,"style":2518},[156],[117,2787,2789,2792,2942],{"className":2788},[161],[117,2790],{"className":2791},[419,2525],[117,2793,2795],{"className":2794},[2357],[117,2796,2798,2934],{"className":2797},[266,267],[117,2799,2801,2931],{"className":2800},[271],[117,2802,2804,2875,2883],{"className":2803,"style":2538},[275],[117,2805,2806,2809],{"style":2541},[117,2807],{"className":2808,"style":1943},[283],[117,2810,2812,2815,2818,2821,2824],{"className":2811},[161],[117,2813,966],{"className":2814},[161],[117,2816],{"className":2817,"style":391},[322],[117,2819,220],{"className":2820},[395],[117,2822],{"className":2823,"style":391},[322],[117,2825,2827,2830],{"className":2826},[161],[117,2828,962],{"className":2829,"style":1082},[161,162],[117,2831,2833],{"className":2832},[262],[117,2834,2836,2867],{"className":2835},[266,267],[117,2837,2839,2864],{"className":2838},[271],[117,2840,2842,2853],{"className":2841,"style":2578},[275],[117,2843,2844,2847],{"style":2581},[117,2845],{"className":2846,"style":284},[283],[117,2848,2850],{"className":2849},[288,289,290,291],[117,2851,1451],{"className":2852},[161,291],[117,2854,2855,2858],{"style":2593},[117,2856],{"className":2857,"style":284},[283],[117,2859,2861],{"className":2860},[288,289,290,291],[117,2862,753],{"className":2863},[161,162,291],[117,2865,309],{"className":2866},[308],[117,2868,2870],{"className":2869},[271],[117,2871,2873],{"className":2872,"style":2612},[275],[117,2874],{},[117,2876,2877,2880],{"style":2617},[117,2878],{"className":2879,"style":1943},[283],[117,2881],{"className":2882,"style":2625},[2624],[117,2884,2885,2888],{"style":2628},[117,2886],{"className":2887,"style":1943},[283],[117,2889,2891],{"className":2890},[161],[117,2892,2894,2897],{"className":2893},[161],[117,2895,838],{"className":2896,"style":859},[161,162],[117,2898,2900],{"className":2899},[262],[117,2901,2903,2923],{"className":2902},[266,267],[117,2904,2906,2920],{"className":2905},[271],[117,2907,2909],{"className":2908,"style":785},[275],[117,2910,2911,2914],{"style":874},[117,2912],{"className":2913,"style":284},[283],[117,2915,2917],{"className":2916},[288,289,290,291],[117,2918,753],{"className":2919},[161,162,291],[117,2921,309],{"className":2922},[308],[117,2924,2926],{"className":2925},[271],[117,2927,2929],{"className":2928,"style":316},[275],[117,2930],{},[117,2932,309],{"className":2933},[308],[117,2935,2937],{"className":2936},[271],[117,2938,2940],{"className":2939,"style":2684},[275],[117,2941],{},[117,2943],{"className":2944},[426,2525],[99,2946,2948],{"id":2947},"ជំហានទី-៤-រូបមន្តចុងក្រោយនៃការ-update","ជំហានទី ៤ — រូបមន្តចុងក្រោយនៃការ Update",[117,2950,2952],{"className":2951},[168],[117,2953,2955,3037],{"className":2954},[120],[117,2956,2958],{"className":2957},[124],[126,2959,2960],{"xmlns":128,"display":177},[130,2961,2962,3034],{},[133,2963,2964],{},[2965,2966,2968],"menclose",{"notation":2967},"box",[2969,2970,2971],"mstyle",{"scriptlevel":2055,"displaystyle":232},[2969,2972,2973],{"scriptlevel":2055,"displaystyle":232},[2969,2974,2975],{"scriptlevel":2055,"displaystyle":148},[133,2976,2977,2989,2991,2997,2999,3022,3024],{},[183,2978,2979,2981],{},[136,2980,187],{},[133,2982,2983,2985,2987],{},[136,2984,753],{},[200,2986,984],{},[964,2988,966],{},[200,2990,202],{},[183,2992,2993,2995],{},[136,2994,187],{},[136,2996,753],{},[200,2998,220],{},[2357,3000,3001,3003],{},[136,3002,138],{},[133,3004,3005,3017,3019],{},[1900,3006,3007],{},[183,3008,3009,3015],{},[2345,3010,3011,3013],{"accent":148},[136,3012,838],{},[200,3014,2351],{},[136,3016,753],{},[200,3018,984],{},[136,3020,3021],{},"ϵ",[200,3023,969],{},[183,3025,3026,3032],{},[2345,3027,3028,3030],{"accent":148},[136,3029,750],{},[200,3031,2351],{},[136,3033,753],{},[140,3035,3036],{"encoding":142},"\\boxed{\\theta_{t+1} = \\theta_t - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\cdot \\hat{m}_t}",[117,3038,3040],{"className":3039,"ariaHidden":148},[147],[117,3041,3043,3047],{"className":3042},[152],[117,3044],{"className":3045,"style":3046},[156],"height:2.7176em;vertical-align:-1.27em;",[117,3048,3050],{"className":3049},[161],[117,3051,3053,3464],{"className":3052},[266,267],[117,3054,3056,3461],{"className":3055},[271],[117,3057,3060,3449],{"className":3058,"style":3059},[275],"height:1.4476em;",[117,3061,3063,3067],{"style":3062},"top:-4.7176em;",[117,3064],{"className":3065,"style":3066},[283],"height:4.7176em;",[117,3068,3071],{"className":3069},[3070],"boxpad",[117,3072,3074],{"className":3073},[161],[117,3075,3077,3126,3129,3132,3135,3175,3178,3181,3184,3372,3375,3378,3381],{"className":3076},[161],[117,3078,3080,3083],{"className":3079},[161],[117,3081,187],{"className":3082,"style":258},[161,162],[117,3084,3086],{"className":3085},[262],[117,3087,3089,3118],{"className":3088},[266,267],[117,3090,3092,3115],{"className":3091},[271],[117,3093,3095],{"className":3094,"style":1095},[275],[117,3096,3097,3100],{"style":279},[117,3098],{"className":3099,"style":284},[283],[117,3101,3103],{"className":3102},[288,289,290,291],[117,3104,3106,3109,3112],{"className":3105},[161,291],[117,3107,753],{"className":3108},[161,162,291],[117,3110,984],{"className":3111},[395,291],[117,3113,966],{"className":3114},[161,291],[117,3116,309],{"className":3117},[308],[117,3119,3121],{"className":3120},[271],[117,3122,3124],{"className":3123,"style":1182},[275],[117,3125],{},[117,3127],{"className":3128,"style":323},[322],[117,3130,202],{"className":3131},[327],[117,3133],{"className":3134,"style":323},[322],[117,3136,3138,3141],{"className":3137},[161],[117,3139,187],{"className":3140,"style":258},[161,162],[117,3142,3144],{"className":3143},[262],[117,3145,3147,3167],{"className":3146},[266,267],[117,3148,3150,3164],{"className":3149},[271],[117,3151,3153],{"className":3152,"style":785},[275],[117,3154,3155,3158],{"style":279},[117,3156],{"className":3157,"style":284},[283],[117,3159,3161],{"className":3160},[288,289,290,291],[117,3162,753],{"className":3163},[161,162,291],[117,3165,309],{"className":3166},[308],[117,3168,3170],{"className":3169},[271],[117,3171,3173],{"className":3172,"style":316},[275],[117,3174],{},[117,3176],{"className":3177,"style":391},[322],[117,3179,220],{"className":3180},[395],[117,3182],{"className":3183,"style":391},[322],[117,3185,3187,3190,3369],{"className":3186},[161],[117,3188],{"className":3189},[419,2525],[117,3191,3193],{"className":3192},[2357],[117,3194,3196,3360],{"className":3195},[266,267],[117,3197,3199,3357],{"className":3198},[271],[117,3200,3202,3338,3346],{"className":3201,"style":2538},[275],[117,3203,3205,3208],{"style":3204},"top:-2.2528em;",[117,3206],{"className":3207,"style":1943},[283],[117,3209,3211,3326,3329,3332,3335],{"className":3210},[161],[117,3212,3214],{"className":3213},[161,1924],[117,3215,3217,3317],{"className":3216},[266,267],[117,3218,3220,3314],{"className":3219},[271],[117,3221,3224,3301],{"className":3222,"style":3223},[275],"height:0.8572em;",[117,3225,3227,3230],{"className":3226,"style":1939},[1938],[117,3228],{"className":3229,"style":1943},[283],[117,3231,3233],{"className":3232,"style":1947},[161],[117,3234,3236,3267],{"className":3235},[161],[117,3237,3239],{"className":3238},[161,2437],[117,3240,3242],{"className":3241},[266],[117,3243,3245],{"className":3244},[271],[117,3246,3248,3256],{"className":3247,"style":2447},[275],[117,3249,3250,3253],{"style":1939},[117,3251],{"className":3252,"style":1943},[283],[117,3254,838],{"className":3255,"style":859},[161,162],[117,3257,3258,3261],{"style":1939},[117,3259],{"className":3260,"style":1943},[283],[117,3262,3264],{"className":3263,"style":2733},[2464],[117,3265,2351],{"className":3266},[161],[117,3268,3270],{"className":3269},[262],[117,3271,3273,3293],{"className":3272},[266,267],[117,3274,3276,3290],{"className":3275},[271],[117,3277,3279],{"className":3278,"style":785},[275],[117,3280,3281,3284],{"style":874},[117,3282],{"className":3283,"style":284},[283],[117,3285,3287],{"className":3286},[288,289,290,291],[117,3288,753],{"className":3289},[161,162,291],[117,3291,309],{"className":3292},[308],[117,3294,3296],{"className":3295},[271],[117,3297,3299],{"className":3298,"style":316},[275],[117,3300],{},[117,3302,3304,3307],{"style":3303},"top:-2.8172em;",[117,3305],{"className":3306,"style":1943},[283],[117,3308,3310],{"className":3309,"style":1998},[1997],[2000,3311,3312],{"xmlns":2002,"width":2003,"height":2004,"viewBox":2005,"preserveAspectRatio":2006},[2008,3313],{"d":2010},[117,3315,309],{"className":3316},[308],[117,3318,3320],{"className":3319},[271],[117,3321,3324],{"className":3322,"style":3323},[275],"height:0.1828em;",[117,3325],{},[117,3327],{"className":3328,"style":391},[322],[117,3330,984],{"className":3331},[395],[117,3333],{"className":3334,"style":391},[322],[117,3336,3021],{"className":3337},[161,162],[117,3339,3340,3343],{"style":2617},[117,3341],{"className":3342,"style":1943},[283],[117,3344],{"className":3345,"style":2625},[2624],[117,3347,3348,3351],{"style":2628},[117,3349],{"className":3350,"style":1943},[283],[117,3352,3354],{"className":3353},[161],[117,3355,138],{"className":3356,"style":163},[161,162],[117,3358,309],{"className":3359},[308],[117,3361,3363],{"className":3362},[271],[117,3364,3367],{"className":3365,"style":3366},[275],"height:0.93em;",[117,3368],{},[117,3370],{"className":3371},[426,2525],[117,3373],{"className":3374,"style":391},[322],[117,3376,969],{"className":3377},[395],[117,3379],{"className":3380,"style":391},[322],[117,3382,3384,3415],{"className":3383},[161],[117,3385,3387],{"className":3386},[161,2437],[117,3388,3390],{"className":3389},[266],[117,3391,3393],{"className":3392},[271],[117,3394,3396,3404],{"className":3395,"style":2447},[275],[117,3397,3398,3401],{"style":1939},[117,3399],{"className":3400,"style":1943},[283],[117,3402,750],{"className":3403},[161,162],[117,3405,3406,3409],{"style":1939},[117,3407],{"className":3408,"style":1943},[283],[117,3410,3412],{"className":3411,"style":2465},[2464],[117,3413,2351],{"className":3414},[161],[117,3416,3418],{"className":3417},[262],[117,3419,3421,3441],{"className":3420},[266,267],[117,3422,3424,3438],{"className":3423},[271],[117,3425,3427],{"className":3426,"style":785},[275],[117,3428,3429,3432],{"style":788},[117,3430],{"className":3431,"style":284},[283],[117,3433,3435],{"className":3434},[288,289,290,291],[117,3436,753],{"className":3437},[161,162,291],[117,3439,309],{"className":3440},[308],[117,3442,3444],{"className":3443},[271],[117,3445,3447],{"className":3446,"style":316},[275],[117,3448],{},[117,3450,3452,3455],{"style":3451},"top:-3.4476em;",[117,3453],{"className":3454,"style":3066},[283],[117,3456],{"className":3457,"style":3460},[3458,3459],"stretchy","fbox","height:2.7176em;border-style:solid;border-width:0.04em;",[117,3462,309],{"className":3463},[308],[117,3465,3467],{"className":3466},[271],[117,3468,3471],{"className":3469,"style":3470},[275],"height:1.27em;",[117,3472],{},[34,3474,3475],{},[78,3476,3477,3478,3577],{},"(ចំណាំ៖ ",[117,3479,3481,3510],{"className":3480},[120],[117,3482,3484],{"className":3483},[124],[126,3485,3486],{"xmlns":128},[130,3487,3488,3507],{},[133,3489,3490,3492,3495],{},[136,3491,3021],{},[200,3493,3494],{},"≈",[2250,3496,3497,3500],{},[964,3498,3499],{},"10",[133,3501,3502,3504],{},[200,3503,220],{},[964,3505,3506],{},"8",[140,3508,3509],{"encoding":142},"\\epsilon \\approx 10^{-8}",[117,3511,3513,3532],{"className":3512,"ariaHidden":148},[147],[117,3514,3516,3520,3523,3526,3529],{"className":3515},[152],[117,3517],{"className":3518,"style":3519},[156],"height:0.4831em;",[117,3521,3021],{"className":3522},[161,162],[117,3524],{"className":3525,"style":323},[322],[117,3527,3494],{"className":3528},[327],[117,3530],{"className":3531,"style":323},[322],[117,3533,3535,3539,3542],{"className":3534},[152],[117,3536],{"className":3537,"style":3538},[156],"height:0.8141em;",[117,3540,966],{"className":3541},[161],[117,3543,3545,3548],{"className":3544},[161],[117,3546,2055],{"className":3547},[161],[117,3549,3551],{"className":3550},[262],[117,3552,3554],{"className":3553},[266],[117,3555,3557],{"className":3556},[271],[117,3558,3560],{"className":3559,"style":3538},[275],[117,3561,3562,3565],{"style":2313},[117,3563],{"className":3564,"style":284},[283],[117,3566,3568],{"className":3567},[288,289,290,291],[117,3569,3571,3574],{"className":3570},[161,291],[117,3572,220],{"className":3573},[161,291],[117,3575,3506],{"className":3576},[161,291]," ដើម្បីការពារកុំឱ្យមានការចែកនឹងសូន្យ)",[34,3579,3580],{},[59,3581,3582,3583,3586],{},"Hyperparameters លំនាំដើមតាមក្រដាសសំណើដើម ",[44,3584,75],{"href":72,"className":3585},[52,53,74],":",[702,3588,3589,3601],{},[705,3590,3591],{},[708,3592,3593,3596,3598],{},[711,3594,3595],{},"Hyperparameter",[711,3597,716],{},[711,3599,3600],{},"Default",[721,3602,3603,3641,3719,3799],{},[708,3604,3605,3608,3638],{},[726,3606,3607],{},"Learning rate",[726,3609,3610],{},[117,3611,3613,3626],{"className":3612},[120],[117,3614,3616],{"className":3615},[124],[126,3617,3618],{"xmlns":128},[130,3619,3620,3624],{},[133,3621,3622],{},[136,3623,138],{},[140,3625,143],{"encoding":142},[117,3627,3629],{"className":3628,"ariaHidden":148},[147],[117,3630,3632,3635],{"className":3631},[152],[117,3633],{"className":3634,"style":157},[156],[117,3636,138],{"className":3637,"style":163},[161,162],[726,3639,3640],{},"0.001",[708,3642,3643,3646,3717],{},[726,3644,3645],{},"1st moment decay",[726,3647,3648],{},[117,3649,3651,3668],{"className":3650},[120],[117,3652,3654],{"className":3653},[124],[126,3655,3656],{"xmlns":128},[130,3657,3658,3666],{},[133,3659,3660],{},[183,3661,3662,3664],{},[136,3663,962],{},[964,3665,966],{},[140,3667,1344],{"encoding":142},[117,3669,3671],{"className":3670,"ariaHidden":148},[147],[117,3672,3674,3677],{"className":3673},[152],[117,3675],{"className":3676,"style":1075},[156],[117,3678,3680,3683],{"className":3679},[161],[117,3681,962],{"className":3682,"style":1082},[161,162],[117,3684,3686],{"className":3685},[262],[117,3687,3689,3709],{"className":3688},[266,267],[117,3690,3692,3706],{"className":3691},[271],[117,3693,3695],{"className":3694,"style":1095},[275],[117,3696,3697,3700],{"style":1098},[117,3698],{"className":3699,"style":284},[283],[117,3701,3703],{"className":3702},[288,289,290,291],[117,3704,966],{"className":3705},[161,291],[117,3707,309],{"className":3708},[308],[117,3710,3712],{"className":3711},[271],[117,3713,3715],{"className":3714,"style":316},[275],[117,3716],{},[726,3718,1397],{},[708,3720,3721,3724,3796],{},[726,3722,3723],{},"2nd moment decay",[726,3725,3726],{},[117,3727,3729,3747],{"className":3728},[120],[117,3730,3732],{"className":3731},[124],[126,3733,3734],{"xmlns":128},[130,3735,3736,3744],{},[133,3737,3738],{},[183,3739,3740,3742],{},[136,3741,962],{},[964,3743,1451],{},[140,3745,3746],{"encoding":142},"\\beta_2",[117,3748,3750],{"className":3749,"ariaHidden":148},[147],[117,3751,3753,3756],{"className":3752},[152],[117,3754],{"className":3755,"style":1075},[156],[117,3757,3759,3762],{"className":3758},[161],[117,3760,962],{"className":3761,"style":1082},[161,162],[117,3763,3765],{"className":3764},[262],[117,3766,3768,3788],{"className":3767},[266,267],[117,3769,3771,3785],{"className":3770},[271],[117,3772,3774],{"className":3773,"style":1095},[275],[117,3775,3776,3779],{"style":1098},[117,3777],{"className":3778,"style":284},[283],[117,3780,3782],{"className":3781},[288,289,290,291],[117,3783,1451],{"className":3784},[161,291],[117,3786,309],{"className":3787},[308],[117,3789,3791],{"className":3790},[271],[117,3792,3794],{"className":3793,"style":316},[275],[117,3795],{},[726,3797,3798],{},"0.999",[708,3800,3801,3804,3835],{},[726,3802,3803],{},"Numerical stability",[726,3805,3806],{},[117,3807,3809,3823],{"className":3808},[120],[117,3810,3812],{"className":3811},[124],[126,3813,3814],{"xmlns":128},[130,3815,3816,3820],{},[133,3817,3818],{},[136,3819,3021],{},[140,3821,3822],{"encoding":142},"\\epsilon",[117,3824,3826],{"className":3825,"ariaHidden":148},[147],[117,3827,3829,3832],{"className":3828},[152],[117,3830],{"className":3831,"style":157},[156],[117,3833,3021],{"className":3834},[161,162],[726,3836,3837],{},[117,3838,3840,3862],{"className":3839},[120],[117,3841,3843],{"className":3842},[124],[126,3844,3845],{"xmlns":128},[130,3846,3847,3859],{},[133,3848,3849],{},[2250,3850,3851,3853],{},[964,3852,3499],{},[133,3854,3855,3857],{},[200,3856,220],{},[964,3858,3506],{},[140,3860,3861],{"encoding":142},"10^{-8}",[117,3863,3865],{"className":3864,"ariaHidden":148},[147],[117,3866,3868,3871,3874],{"className":3867},[152],[117,3869],{"className":3870,"style":3538},[156],[117,3872,966],{"className":3873},[161],[117,3875,3877,3880],{"className":3876},[161],[117,3878,2055],{"className":3879},[161],[117,3881,3883],{"className":3882},[262],[117,3884,3886],{"className":3885},[266],[117,3887,3889],{"className":3888},[271],[117,3890,3892],{"className":3891,"style":3538},[275],[117,3893,3894,3897],{"style":2313},[117,3895],{"className":3896,"style":284},[283],[117,3898,3900],{"className":3899},[288,289,290,291],[117,3901,3903,3906],{"className":3902},[161,291],[117,3904,220],{"className":3905},[161,291],[117,3907,3506],{"className":3908},[161,291],[91,3910],{},[94,3912,3914],{"id":3913},"ឧទាហរណ៍ជាក់ស្តែង-adam-ដំណើរការ","ឧទាហរណ៍ជាក់ស្តែង: Adam ដំណើរការ",[34,3916,3917],{},"សូមតាមដាន Adam ដោយដៃ លើអនុគមន៍សាមញ្ញដូចដែលយើងប្រើក្នុង Gradient Descent:",[117,3919,3921],{"className":3920},[168],[117,3922,3924,3972],{"className":3923},[120],[117,3925,3927],{"className":3926},[124],[126,3928,3929],{"xmlns":128,"display":177},[130,3930,3931,3969],{},[133,3932,3933,3935,3937,3939,3941,3943,3949,3951,3953,3955,3957,3959,3961,3963,3965,3967],{},[136,3934,229],{},[200,3936,233],{"stretchy":232},[136,3938,187],{},[200,3940,238],{"stretchy":232},[200,3942,202],{},[2250,3944,3945,3947],{},[136,3946,187],{},[964,3948,1451],{},[200,3950,2381],{"separator":148},[322,3952],{"width":2384},[136,3954,226],{"mathvariant":225},[136,3956,229],{},[200,3958,233],{"stretchy":232},[136,3960,187],{},[200,3962,238],{"stretchy":232},[200,3964,202],{},[964,3966,1451],{},[136,3968,187],{},[140,3970,3971],{"encoding":142},"J(\\theta) = \\theta^2, \\qquad \\nabla J(\\theta) = 2\\theta",[117,3973,3975,4002,4071],{"className":3974,"ariaHidden":148},[147],[117,3976,3978,3981,3984,3987,3990,3993,3996,3999],{"className":3977},[152],[117,3979],{"className":3980,"style":405},[156],[117,3982,229],{"className":3983,"style":415},[161,162],[117,3985,233],{"className":3986},[419],[117,3988,187],{"className":3989,"style":258},[161,162],[117,3991,238],{"className":3992},[426],[117,3994],{"className":3995,"style":323},[322],[117,3997,202],{"className":3998},[327],[117,4000],{"className":4001,"style":323},[322],[117,4003,4005,4009,4038,4041,4044,4047,4050,4053,4056,4059,4062,4065,4068],{"className":4004},[152],[117,4006],{"className":4007,"style":4008},[156],"height:1.1141em;vertical-align:-0.25em;",[117,4010,4012,4015],{"className":4011},[161],[117,4013,187],{"className":4014,"style":258},[161,162],[117,4016,4018],{"className":4017},[262],[117,4019,4021],{"className":4020},[266],[117,4022,4024],{"className":4023},[271],[117,4025,4027],{"className":4026,"style":1777},[275],[117,4028,4029,4032],{"style":1792},[117,4030],{"className":4031,"style":284},[283],[117,4033,4035],{"className":4034},[288,289,290,291],[117,4036,1451],{"className":4037},[161,291],[117,4039,2381],{"className":4040},[2693],[117,4042],{"className":4043,"style":2697},[322],[117,4045],{"className":4046,"style":2701},[322],[117,4048,226],{"className":4049},[161],[117,4051,229],{"className":4052,"style":415},[161,162],[117,4054,233],{"className":4055},[419],[117,4057,187],{"className":4058,"style":258},[161,162],[117,4060,238],{"className":4061},[426],[117,4063],{"className":4064,"style":323},[322],[117,4066,202],{"className":4067},[327],[117,4069],{"className":4070,"style":323},[322],[117,4072,4074,4077,4080],{"className":4073},[152],[117,4075],{"className":4076,"style":2447},[156],[117,4078,1451],{"className":4079},[161],[117,4081,187],{"className":4082,"style":258},[161,162],[34,4084,4085,4086,4179,4180,4231,4232,4231,4324,4231,4416,4510],{},"ចាប់ផ្តើមនៅ ",[117,4087,4089,4112],{"className":4088},[120],[117,4090,4092],{"className":4091},[124],[126,4093,4094],{"xmlns":128},[130,4095,4096,4109],{},[133,4097,4098,4104,4106],{},[183,4099,4100,4102],{},[136,4101,187],{},[964,4103,2055],{},[200,4105,202],{},[964,4107,4108],{},"5",[140,4110,4111],{"encoding":142},"\\theta_0 = 5",[117,4113,4115,4170],{"className":4114,"ariaHidden":148},[147],[117,4116,4118,4121,4161,4164,4167],{"className":4117},[152],[117,4119],{"className":4120,"style":251},[156],[117,4122,4124,4127],{"className":4123},[161],[117,4125,187],{"className":4126,"style":258},[161,162],[117,4128,4130],{"className":4129},[262],[117,4131,4133,4153],{"className":4132},[266,267],[117,4134,4136,4150],{"className":4135},[271],[117,4137,4139],{"className":4138,"style":1095},[275],[117,4140,4141,4144],{"style":279},[117,4142],{"className":4143,"style":284},[283],[117,4145,4147],{"className":4146},[288,289,290,291],[117,4148,2055],{"className":4149},[161,291],[117,4151,309],{"className":4152},[308],[117,4154,4156],{"className":4155},[271],[117,4157,4159],{"className":4158,"style":316},[275],[117,4160],{},[117,4162],{"className":4163,"style":323},[322],[117,4165,202],{"className":4166},[327],[117,4168],{"className":4169,"style":323},[322],[117,4171,4173,4176],{"className":4172},[152],[117,4174],{"className":4175,"style":2127},[156],[117,4177,4108],{"className":4178},[161]," ជាមួយ hyperparameters លំនាំដើម (",[117,4181,4183,4201],{"className":4182},[120],[117,4184,4186],{"className":4185},[124],[126,4187,4188],{"xmlns":128},[130,4189,4190,4198],{},[133,4191,4192,4194,4196],{},[136,4193,138],{},[200,4195,202],{},[964,4197,3640],{},[140,4199,4200],{"encoding":142},"\\alpha = 0.001",[117,4202,4204,4222],{"className":4203,"ariaHidden":148},[147],[117,4205,4207,4210,4213,4216,4219],{"className":4206},[152],[117,4208],{"className":4209,"style":157},[156],[117,4211,138],{"className":4212,"style":163},[161,162],[117,4214],{"className":4215,"style":323},[322],[117,4217,202],{"className":4218},[327],[117,4220],{"className":4221,"style":323},[322],[117,4223,4225,4228],{"className":4224},[152],[117,4226],{"className":4227,"style":2127},[156],[117,4229,3640],{"className":4230},[161],", ",[117,4233,4235,4257],{"className":4234},[120],[117,4236,4238],{"className":4237},[124],[126,4239,4240],{"xmlns":128},[130,4241,4242,4254],{},[133,4243,4244,4250,4252],{},[183,4245,4246,4248],{},[136,4247,962],{},[964,4249,966],{},[200,4251,202],{},[964,4253,1397],{},[140,4255,4256],{"encoding":142},"\\beta_1 = 0.9",[117,4258,4260,4315],{"className":4259,"ariaHidden":148},[147],[117,4261,4263,4266,4306,4309,4312],{"className":4262},[152],[117,4264],{"className":4265,"style":1075},[156],[117,4267,4269,4272],{"className":4268},[161],[117,4270,962],{"className":4271,"style":1082},[161,162],[117,4273,4275],{"className":4274},[262],[117,4276,4278,4298],{"className":4277},[266,267],[117,4279,4281,4295],{"className":4280},[271],[117,4282,4284],{"className":4283,"style":1095},[275],[117,4285,4286,4289],{"style":1098},[117,4287],{"className":4288,"style":284},[283],[117,4290,4292],{"className":4291},[288,289,290,291],[117,4293,966],{"className":4294},[161,291],[117,4296,309],{"className":4297},[308],[117,4299,4301],{"className":4300},[271],[117,4302,4304],{"className":4303,"style":316},[275],[117,4305],{},[117,4307],{"className":4308,"style":323},[322],[117,4310,202],{"className":4311},[327],[117,4313],{"className":4314,"style":323},[322],[117,4316,4318,4321],{"className":4317},[152],[117,4319],{"className":4320,"style":2127},[156],[117,4322,1397],{"className":4323},[161],[117,4325,4327,4349],{"className":4326},[120],[117,4328,4330],{"className":4329},[124],[126,4331,4332],{"xmlns":128},[130,4333,4334,4346],{},[133,4335,4336,4342,4344],{},[183,4337,4338,4340],{},[136,4339,962],{},[964,4341,1451],{},[200,4343,202],{},[964,4345,3798],{},[140,4347,4348],{"encoding":142},"\\beta_2 = 0.999",[117,4350,4352,4407],{"className":4351,"ariaHidden":148},[147],[117,4353,4355,4358,4398,4401,4404],{"className":4354},[152],[117,4356],{"className":4357,"style":1075},[156],[117,4359,4361,4364],{"className":4360},[161],[117,4362,962],{"className":4363,"style":1082},[161,162],[117,4365,4367],{"className":4366},[262],[117,4368,4370,4390],{"className":4369},[266,267],[117,4371,4373,4387],{"className":4372},[271],[117,4374,4376],{"className":4375,"style":1095},[275],[117,4377,4378,4381],{"style":1098},[117,4379],{"className":4380,"style":284},[283],[117,4382,4384],{"className":4383},[288,289,290,291],[117,4385,1451],{"className":4386},[161,291],[117,4388,309],{"className":4389},[308],[117,4391,4393],{"className":4392},[271],[117,4394,4396],{"className":4395,"style":316},[275],[117,4397],{},[117,4399],{"className":4400,"style":323},[322],[117,4402,202],{"className":4403},[327],[117,4405],{"className":4406,"style":323},[322],[117,4408,4410,4413],{"className":4409},[152],[117,4411],{"className":4412,"style":2127},[156],[117,4414,3798],{"className":4415},[161],[117,4417,4419,4445],{"className":4418},[120],[117,4420,4422],{"className":4421},[124],[126,4423,4424],{"xmlns":128},[130,4425,4426,4442],{},[133,4427,4428,4430,4432],{},[136,4429,3021],{},[200,4431,202],{},[2250,4433,4434,4436],{},[964,4435,3499],{},[133,4437,4438,4440],{},[200,4439,220],{},[964,4441,3506],{},[140,4443,4444],{"encoding":142},"\\epsilon = 10^{-8}",[117,4446,4448,4466],{"className":4447,"ariaHidden":148},[147],[117,4449,4451,4454,4457,4460,4463],{"className":4450},[152],[117,4452],{"className":4453,"style":157},[156],[117,4455,3021],{"className":4456},[161,162],[117,4458],{"className":4459,"style":323},[322],[117,4461,202],{"className":4462},[327],[117,4464],{"className":4465,"style":323},[322],[117,4467,4469,4472,4475],{"className":4468},[152],[117,4470],{"className":4471,"style":3538},[156],[117,4473,966],{"className":4474},[161],[117,4476,4478,4481],{"className":4477},[161],[117,4479,2055],{"className":4480},[161],[117,4482,4484],{"className":4483},[262],[117,4485,4487],{"className":4486},[266],[117,4488,4490],{"className":4489},[271],[117,4491,4493],{"className":4492,"style":3538},[275],[117,4494,4495,4498],{"style":2313},[117,4496],{"className":4497,"style":284},[283],[117,4499,4501],{"className":4500},[288,289,290,291],[117,4502,4504,4507],{"className":4503},[161,291],[117,4505,220],{"className":4506},[161,291],[117,4508,3506],{"className":4509},[161,291],")។",[34,4512,4513,4514,4231,4605,4696],{},"ចាប់ផ្តើម: ",[117,4515,4517,4538],{"className":4516},[120],[117,4518,4520],{"className":4519},[124],[126,4521,4522],{"xmlns":128},[130,4523,4524,4536],{},[133,4525,4526,4532,4534],{},[183,4527,4528,4530],{},[136,4529,750],{},[964,4531,2055],{},[200,4533,202],{},[964,4535,2055],{},[140,4537,2062],{"encoding":142},[117,4539,4541,4596],{"className":4540,"ariaHidden":148},[147],[117,4542,4544,4547,4587,4590,4593],{"className":4543},[152],[117,4545],{"className":4546,"style":766},[156],[117,4548,4550,4553],{"className":4549},[161],[117,4551,750],{"className":4552},[161,162],[117,4554,4556],{"className":4555},[262],[117,4557,4559,4579],{"className":4558},[266,267],[117,4560,4562,4576],{"className":4561},[271],[117,4563,4565],{"className":4564,"style":1095},[275],[117,4566,4567,4570],{"style":788},[117,4568],{"className":4569,"style":284},[283],[117,4571,4573],{"className":4572},[288,289,290,291],[117,4574,2055],{"className":4575},[161,291],[117,4577,309],{"className":4578},[308],[117,4580,4582],{"className":4581},[271],[117,4583,4585],{"className":4584,"style":316},[275],[117,4586],{},[117,4588],{"className":4589,"style":323},[322],[117,4591,202],{"className":4592},[327],[117,4594],{"className":4595,"style":323},[322],[117,4597,4599,4602],{"className":4598},[152],[117,4600],{"className":4601,"style":2127},[156],[117,4603,2055],{"className":4604},[161],[117,4606,4608,4629],{"className":4607},[120],[117,4609,4611],{"className":4610},[124],[126,4612,4613],{"xmlns":128},[130,4614,4615,4627],{},[133,4616,4617,4623,4625],{},[183,4618,4619,4621],{},[136,4620,838],{},[964,4622,2055],{},[200,4624,202],{},[964,4626,2055],{},[140,4628,2156],{"encoding":142},[117,4630,4632,4687],{"className":4631,"ariaHidden":148},[147],[117,4633,4635,4638,4678,4681,4684],{"className":4634},[152],[117,4636],{"className":4637,"style":766},[156],[117,4639,4641,4644],{"className":4640},[161],[117,4642,838],{"className":4643,"style":859},[161,162],[117,4645,4647],{"className":4646},[262],[117,4648,4650,4670],{"className":4649},[266,267],[117,4651,4653,4667],{"className":4652},[271],[117,4654,4656],{"className":4655,"style":1095},[275],[117,4657,4658,4661],{"style":874},[117,4659],{"className":4660,"style":284},[283],[117,4662,4664],{"className":4663},[288,289,290,291],[117,4665,2055],{"className":4666},[161,291],[117,4668,309],{"className":4669},[308],[117,4671,4673],{"className":4672},[271],[117,4674,4676],{"className":4675,"style":316},[275],[117,4677],{},[117,4679],{"className":4680,"style":323},[322],[117,4682,202],{"className":4683},[327],[117,4685],{"className":4686,"style":323},[322],[117,4688,4690,4693],{"className":4689},[152],[117,4691],{"className":4692,"style":2127},[156],[117,4694,2055],{"className":4695},[161],"។",[91,4698],{},[34,4700,4701,585,4756],{},[59,4702,4703,4704,3586],{},"ជំហាន ",[117,4705,4707,4725],{"className":4706},[120],[117,4708,4710],{"className":4709},[124],[126,4711,4712],{"xmlns":128},[130,4713,4714,4722],{},[133,4715,4716,4718,4720],{},[136,4717,753],{},[200,4719,202],{},[964,4721,966],{},[140,4723,4724],{"encoding":142},"t=1",[117,4726,4728,4747],{"className":4727,"ariaHidden":148},[147],[117,4729,4731,4735,4738,4741,4744],{"className":4730},[152],[117,4732],{"className":4733,"style":4734},[156],"height:0.6151em;",[117,4736,753],{"className":4737},[161,162],[117,4739],{"className":4740,"style":323},[322],[117,4742,202],{"className":4743},[327],[117,4745],{"className":4746,"style":323},[322],[117,4748,4750,4753],{"className":4749},[152],[117,4751],{"className":4752,"style":2127},[156],[117,4754,966],{"className":4755},[161],[117,4757,4759,4790],{"className":4758},[120],[117,4760,4762],{"className":4761},[124],[126,4763,4764],{"xmlns":128},[130,4765,4766,4787],{},[133,4767,4768,4774,4776,4778,4781,4783,4785],{},[183,4769,4770,4772],{},[136,4771,1005],{},[964,4773,966],{},[200,4775,202],{},[964,4777,1451],{},[200,4779,4780],{},"×",[964,4782,4108],{},[200,4784,202],{},[964,4786,3499],{},[140,4788,4789],{"encoding":142},"g_1 = 2 \\times 5 = 10",[117,4791,4793,4848,4867,4885],{"className":4792,"ariaHidden":148},[147],[117,4794,4796,4799,4839,4842,4845],{"className":4795},[152],[117,4797],{"className":4798,"style":1279},[156],[117,4800,4802,4805],{"className":4801},[161],[117,4803,1005],{"className":4804,"style":859},[161,162],[117,4806,4808],{"className":4807},[262],[117,4809,4811,4831],{"className":4810},[266,267],[117,4812,4814,4828],{"className":4813},[271],[117,4815,4817],{"className":4816,"style":1095},[275],[117,4818,4819,4822],{"style":874},[117,4820],{"className":4821,"style":284},[283],[117,4823,4825],{"className":4824},[288,289,290,291],[117,4826,966],{"className":4827},[161,291],[117,4829,309],{"className":4830},[308],[117,4832,4834],{"className":4833},[271],[117,4835,4837],{"className":4836,"style":316},[275],[117,4838],{},[117,4840],{"className":4841,"style":323},[322],[117,4843,202],{"className":4844},[327],[117,4846],{"className":4847,"style":323},[322],[117,4849,4851,4855,4858,4861,4864],{"className":4850},[152],[117,4852],{"className":4853,"style":4854},[156],"height:0.7278em;vertical-align:-0.0833em;",[117,4856,1451],{"className":4857},[161],[117,4859],{"className":4860,"style":391},[322],[117,4862,4780],{"className":4863},[395],[117,4865],{"className":4866,"style":391},[322],[117,4868,4870,4873,4876,4879,4882],{"className":4869},[152],[117,4871],{"className":4872,"style":2127},[156],[117,4874,4108],{"className":4875},[161],[117,4877],{"className":4878,"style":323},[322],[117,4880,202],{"className":4881},[327],[117,4883],{"className":4884,"style":323},[322],[117,4886,4888,4891],{"className":4887},[152],[117,4889],{"className":4890,"style":2127},[156],[117,4892,3499],{"className":4893},[161],[117,4895,4897],{"className":4896},[168],[117,4898,4900,4940],{"className":4899},[120],[117,4901,4903],{"className":4902},[124],[126,4904,4905],{"xmlns":128,"display":177},[130,4906,4907,4937],{},[133,4908,4909,4915,4917,4919,4921,4923,4925,4928,4930,4932,4934],{},[183,4910,4911,4913],{},[136,4912,750],{},[964,4914,966],{},[200,4916,202],{},[964,4918,1397],{},[200,4920,4780],{},[964,4922,2055],{},[200,4924,984],{},[964,4926,4927],{},"0.1",[200,4929,4780],{},[964,4931,3499],{},[200,4933,202],{},[964,4935,4936],{},"1.0",[140,4938,4939],{"encoding":142},"m_1 = 0.9 \\times 0 + 0.1 \\times 10 = 1.0",[117,4941,4943,4998,5016,5034,5052,5070],{"className":4942,"ariaHidden":148},[147],[117,4944,4946,4949,4989,4992,4995],{"className":4945},[152],[117,4947],{"className":4948,"style":766},[156],[117,4950,4952,4955],{"className":4951},[161],[117,4953,750],{"className":4954},[161,162],[117,4956,4958],{"className":4957},[262],[117,4959,4961,4981],{"className":4960},[266,267],[117,4962,4964,4978],{"className":4963},[271],[117,4965,4967],{"className":4966,"style":1095},[275],[117,4968,4969,4972],{"style":788},[117,4970],{"className":4971,"style":284},[283],[117,4973,4975],{"className":4974},[288,289,290,291],[117,4976,966],{"className":4977},[161,291],[117,4979,309],{"className":4980},[308],[117,4982,4984],{"className":4983},[271],[117,4985,4987],{"className":4986,"style":316},[275],[117,4988],{},[117,4990],{"className":4991,"style":323},[322],[117,4993,202],{"className":4994},[327],[117,4996],{"className":4997,"style":323},[322],[117,4999,5001,5004,5007,5010,5013],{"className":5000},[152],[117,5002],{"className":5003,"style":4854},[156],[117,5005,1397],{"className":5006},[161],[117,5008],{"className":5009,"style":391},[322],[117,5011,4780],{"className":5012},[395],[117,5014],{"className":5015,"style":391},[322],[117,5017,5019,5022,5025,5028,5031],{"className":5018},[152],[117,5020],{"className":5021,"style":4854},[156],[117,5023,2055],{"className":5024},[161],[117,5026],{"className":5027,"style":391},[322],[117,5029,984],{"className":5030},[395],[117,5032],{"className":5033,"style":391},[322],[117,5035,5037,5040,5043,5046,5049],{"className":5036},[152],[117,5038],{"className":5039,"style":4854},[156],[117,5041,4927],{"className":5042},[161],[117,5044],{"className":5045,"style":391},[322],[117,5047,4780],{"className":5048},[395],[117,5050],{"className":5051,"style":391},[322],[117,5053,5055,5058,5061,5064,5067],{"className":5054},[152],[117,5056],{"className":5057,"style":2127},[156],[117,5059,3499],{"className":5060},[161],[117,5062],{"className":5063,"style":323},[322],[117,5065,202],{"className":5066},[327],[117,5068],{"className":5069,"style":323},[322],[117,5071,5073,5076],{"className":5072},[152],[117,5074],{"className":5075,"style":2127},[156],[117,5077,4936],{"className":5078},[161],[117,5080,5082],{"className":5081},[168],[117,5083,5085,5124],{"className":5084},[120],[117,5086,5088],{"className":5087},[124],[126,5089,5090],{"xmlns":128,"display":177},[130,5091,5092,5121],{},[133,5093,5094,5100,5102,5104,5106,5108,5110,5112,5114,5117,5119],{},[183,5095,5096,5098],{},[136,5097,838],{},[964,5099,966],{},[200,5101,202],{},[964,5103,3798],{},[200,5105,4780],{},[964,5107,2055],{},[200,5109,984],{},[964,5111,3640],{},[200,5113,4780],{},[964,5115,5116],{},"100",[200,5118,202],{},[964,5120,4927],{},[140,5122,5123],{"encoding":142},"v_1 = 0.999 \\times 0 + 0.001 \\times 100 = 0.1",[117,5125,5127,5182,5200,5218,5236,5254],{"className":5126,"ariaHidden":148},[147],[117,5128,5130,5133,5173,5176,5179],{"className":5129},[152],[117,5131],{"className":5132,"style":766},[156],[117,5134,5136,5139],{"className":5135},[161],[117,5137,838],{"className":5138,"style":859},[161,162],[117,5140,5142],{"className":5141},[262],[117,5143,5145,5165],{"className":5144},[266,267],[117,5146,5148,5162],{"className":5147},[271],[117,5149,5151],{"className":5150,"style":1095},[275],[117,5152,5153,5156],{"style":874},[117,5154],{"className":5155,"style":284},[283],[117,5157,5159],{"className":5158},[288,289,290,291],[117,5160,966],{"className":5161},[161,291],[117,5163,309],{"className":5164},[308],[117,5166,5168],{"className":5167},[271],[117,5169,5171],{"className":5170,"style":316},[275],[117,5172],{},[117,5174],{"className":5175,"style":323},[322],[117,5177,202],{"className":5178},[327],[117,5180],{"className":5181,"style":323},[322],[117,5183,5185,5188,5191,5194,5197],{"className":5184},[152],[117,5186],{"className":5187,"style":4854},[156],[117,5189,3798],{"className":5190},[161],[117,5192],{"className":5193,"style":391},[322],[117,5195,4780],{"className":5196},[395],[117,5198],{"className":5199,"style":391},[322],[117,5201,5203,5206,5209,5212,5215],{"className":5202},[152],[117,5204],{"className":5205,"style":4854},[156],[117,5207,2055],{"className":5208},[161],[117,5210],{"className":5211,"style":391},[322],[117,5213,984],{"className":5214},[395],[117,5216],{"className":5217,"style":391},[322],[117,5219,5221,5224,5227,5230,5233],{"className":5220},[152],[117,5222],{"className":5223,"style":4854},[156],[117,5225,3640],{"className":5226},[161],[117,5228],{"className":5229,"style":391},[322],[117,5231,4780],{"className":5232},[395],[117,5234],{"className":5235,"style":391},[322],[117,5237,5239,5242,5245,5248,5251],{"className":5238},[152],[117,5240],{"className":5241,"style":2127},[156],[117,5243,5116],{"className":5244},[161],[117,5246],{"className":5247,"style":323},[322],[117,5249,202],{"className":5250},[327],[117,5252],{"className":5253,"style":323},[322],[117,5255,5257,5260],{"className":5256},[152],[117,5258],{"className":5259,"style":2127},[156],[117,5261,4927],{"className":5262},[161],[117,5264,5266],{"className":5265},[168],[117,5267,5269,5322],{"className":5268},[120],[117,5270,5272],{"className":5271},[124],[126,5273,5274],{"xmlns":128,"display":177},[130,5275,5276,5319],{},[133,5277,5278,5288,5290,5306,5308,5314,5316],{},[183,5279,5280,5286],{},[2345,5281,5282,5284],{"accent":148},[136,5283,750],{},[200,5285,2351],{},[964,5287,966],{},[200,5289,202],{},[2357,5291,5292,5294],{},[964,5293,4936],{},[133,5295,5296,5298,5300],{},[964,5297,966],{},[200,5299,220],{},[2250,5301,5302,5304],{},[964,5303,1397],{},[964,5305,966],{},[200,5307,202],{},[2357,5309,5310,5312],{},[964,5311,4936],{},[964,5313,4927],{},[200,5315,202],{},[964,5317,5318],{},"10.0",[140,5320,5321],{"encoding":142},"\\hat{m}_1 = \\frac{1.0}{1 - 0.9^1} = \\frac{1.0}{0.1} = 10.0",[117,5323,5325,5408,5533,5612],{"className":5324,"ariaHidden":148},[147],[117,5326,5328,5331,5399,5402,5405],{"className":5327},[152],[117,5329],{"className":5330,"style":251},[156],[117,5332,5334,5365],{"className":5333},[161],[117,5335,5337],{"className":5336},[161,2437],[117,5338,5340],{"className":5339},[266],[117,5341,5343],{"className":5342},[271],[117,5344,5346,5354],{"className":5345,"style":2447},[275],[117,5347,5348,5351],{"style":1939},[117,5349],{"className":5350,"style":1943},[283],[117,5352,750],{"className":5353},[161,162],[117,5355,5356,5359],{"style":1939},[117,5357],{"className":5358,"style":1943},[283],[117,5360,5362],{"className":5361,"style":2465},[2464],[117,5363,2351],{"className":5364},[161],[117,5366,5368],{"className":5367},[262],[117,5369,5371,5391],{"className":5370},[266,267],[117,5372,5374,5388],{"className":5373},[271],[117,5375,5377],{"className":5376,"style":1095},[275],[117,5378,5379,5382],{"style":788},[117,5380],{"className":5381,"style":284},[283],[117,5383,5385],{"className":5384},[288,289,290,291],[117,5386,966],{"className":5387},[161,291],[117,5389,309],{"className":5390},[308],[117,5392,5394],{"className":5393},[271],[117,5395,5397],{"className":5396,"style":316},[275],[117,5398],{},[117,5400],{"className":5401,"style":323},[322],[117,5403,202],{"className":5404},[327],[117,5406],{"className":5407,"style":323},[322],[117,5409,5411,5415,5524,5527,5530],{"className":5410},[152],[117,5412],{"className":5413,"style":5414},[156],"height:2.0908em;vertical-align:-0.7693em;",[117,5416,5418,5421,5521],{"className":5417},[161],[117,5419],{"className":5420},[419,2525],[117,5422,5424],{"className":5423},[2357],[117,5425,5427,5512],{"className":5426},[266,267],[117,5428,5430,5509],{"className":5429},[271],[117,5431,5434,5490,5498],{"className":5432,"style":5433},[275],"height:1.3214em;",[117,5435,5436,5439],{"style":2541},[117,5437],{"className":5438,"style":1943},[283],[117,5440,5442,5445,5448,5451,5454,5458],{"className":5441},[161],[117,5443,966],{"className":5444},[161],[117,5446],{"className":5447,"style":391},[322],[117,5449,220],{"className":5450},[395],[117,5452],{"className":5453,"style":391},[322],[117,5455,5457],{"className":5456},[161],"0.",[117,5459,5461,5465],{"className":5460},[161],[117,5462,5464],{"className":5463},[161],"9",[117,5466,5468],{"className":5467},[262],[117,5469,5471],{"className":5470},[266],[117,5472,5474],{"className":5473},[271],[117,5475,5478],{"className":5476,"style":5477},[275],"height:0.7401em;",[117,5479,5481,5484],{"style":5480},"top:-2.989em;margin-right:0.05em;",[117,5482],{"className":5483,"style":284},[283],[117,5485,5487],{"className":5486},[288,289,290,291],[117,5488,966],{"className":5489},[161,291],[117,5491,5492,5495],{"style":2617},[117,5493],{"className":5494,"style":1943},[283],[117,5496],{"className":5497,"style":2625},[2624],[117,5499,5500,5503],{"style":2628},[117,5501],{"className":5502,"style":1943},[283],[117,5504,5506],{"className":5505},[161],[117,5507,4936],{"className":5508},[161],[117,5510,309],{"className":5511},[308],[117,5513,5515],{"className":5514},[271],[117,5516,5519],{"className":5517,"style":5518},[275],"height:0.7693em;",[117,5520],{},[117,5522],{"className":5523},[426,2525],[117,5525],{"className":5526,"style":323},[322],[117,5528,202],{"className":5529},[327],[117,5531],{"className":5532,"style":323},[322],[117,5534,5536,5540,5603,5606,5609],{"className":5535},[152],[117,5537],{"className":5538,"style":5539},[156],"height:2.0074em;vertical-align:-0.686em;",[117,5541,5543,5546,5600],{"className":5542},[161],[117,5544],{"className":5545},[419,2525],[117,5547,5549],{"className":5548},[2357],[117,5550,5552,5591],{"className":5551},[266,267],[117,5553,5555,5588],{"className":5554},[271],[117,5556,5558,5569,5577],{"className":5557,"style":5433},[275],[117,5559,5560,5563],{"style":2541},[117,5561],{"className":5562,"style":1943},[283],[117,5564,5566],{"className":5565},[161],[117,5567,4927],{"className":5568},[161],[117,5570,5571,5574],{"style":2617},[117,5572],{"className":5573,"style":1943},[283],[117,5575],{"className":5576,"style":2625},[2624],[117,5578,5579,5582],{"style":2628},[117,5580],{"className":5581,"style":1943},[283],[117,5583,5585],{"className":5584},[161],[117,5586,4936],{"className":5587},[161],[117,5589,309],{"className":5590},[308],[117,5592,5594],{"className":5593},[271],[117,5595,5598],{"className":5596,"style":5597},[275],"height:0.686em;",[117,5599],{},[117,5601],{"className":5602},[426,2525],[117,5604],{"className":5605,"style":323},[322],[117,5607,202],{"className":5608},[327],[117,5610],{"className":5611,"style":323},[322],[117,5613,5615,5618],{"className":5614},[152],[117,5616],{"className":5617,"style":2127},[156],[117,5619,5318],{"className":5620},[161],[117,5622,5624],{"className":5623},[168],[117,5625,5627,5680],{"className":5626},[120],[117,5628,5630],{"className":5629},[124],[126,5631,5632],{"xmlns":128,"display":177},[130,5633,5634,5677],{},[133,5635,5636,5646,5648,5664,5666,5672,5674],{},[183,5637,5638,5644],{},[2345,5639,5640,5642],{"accent":148},[136,5641,838],{},[200,5643,2351],{},[964,5645,966],{},[200,5647,202],{},[2357,5649,5650,5652],{},[964,5651,4927],{},[133,5653,5654,5656,5658],{},[964,5655,966],{},[200,5657,220],{},[2250,5659,5660,5662],{},[964,5661,3798],{},[964,5663,966],{},[200,5665,202],{},[2357,5667,5668,5670],{},[964,5669,4927],{},[964,5671,3640],{},[200,5673,202],{},[964,5675,5676],{},"100.0",[140,5678,5679],{"encoding":142},"\\hat{v}_1 = \\frac{0.1}{1 - 0.999^1} = \\frac{0.1}{0.001} = 100.0",[117,5681,5683,5766,5885,5962],{"className":5682,"ariaHidden":148},[147],[117,5684,5686,5689,5757,5760,5763],{"className":5685},[152],[117,5687],{"className":5688,"style":251},[156],[117,5690,5692,5723],{"className":5691},[161],[117,5693,5695],{"className":5694},[161,2437],[117,5696,5698],{"className":5697},[266],[117,5699,5701],{"className":5700},[271],[117,5702,5704,5712],{"className":5703,"style":2447},[275],[117,5705,5706,5709],{"style":1939},[117,5707],{"className":5708,"style":1943},[283],[117,5710,838],{"className":5711,"style":859},[161,162],[117,5713,5714,5717],{"style":1939},[117,5715],{"className":5716,"style":1943},[283],[117,5718,5720],{"className":5719,"style":2733},[2464],[117,5721,2351],{"className":5722},[161],[117,5724,5726],{"className":5725},[262],[117,5727,5729,5749],{"className":5728},[266,267],[117,5730,5732,5746],{"className":5731},[271],[117,5733,5735],{"className":5734,"style":1095},[275],[117,5736,5737,5740],{"style":874},[117,5738],{"className":5739,"style":284},[283],[117,5741,5743],{"className":5742},[288,289,290,291],[117,5744,966],{"className":5745},[161,291],[117,5747,309],{"className":5748},[308],[117,5750,5752],{"className":5751},[271],[117,5753,5755],{"className":5754,"style":316},[275],[117,5756],{},[117,5758],{"className":5759,"style":323},[322],[117,5761,202],{"className":5762},[327],[117,5764],{"className":5765,"style":323},[322],[117,5767,5769,5772,5876,5879,5882],{"className":5768},[152],[117,5770],{"className":5771,"style":5414},[156],[117,5773,5775,5778,5873],{"className":5774},[161],[117,5776],{"className":5777},[419,2525],[117,5779,5781],{"className":5780},[2357],[117,5782,5784,5865],{"className":5783},[266,267],[117,5785,5787,5862],{"className":5786},[271],[117,5788,5790,5843,5851],{"className":5789,"style":5433},[275],[117,5791,5792,5795],{"style":2541},[117,5793],{"className":5794,"style":1943},[283],[117,5796,5798,5801,5804,5807,5810,5814],{"className":5797},[161],[117,5799,966],{"className":5800},[161],[117,5802],{"className":5803,"style":391},[322],[117,5805,220],{"className":5806},[395],[117,5808],{"className":5809,"style":391},[322],[117,5811,5813],{"className":5812},[161],"0.99",[117,5815,5817,5820],{"className":5816},[161],[117,5818,5464],{"className":5819},[161],[117,5821,5823],{"className":5822},[262],[117,5824,5826],{"className":5825},[266],[117,5827,5829],{"className":5828},[271],[117,5830,5832],{"className":5831,"style":5477},[275],[117,5833,5834,5837],{"style":5480},[117,5835],{"className":5836,"style":284},[283],[117,5838,5840],{"className":5839},[288,289,290,291],[117,5841,966],{"className":5842},[161,291],[117,5844,5845,5848],{"style":2617},[117,5846],{"className":5847,"style":1943},[283],[117,5849],{"className":5850,"style":2625},[2624],[117,5852,5853,5856],{"style":2628},[117,5854],{"className":5855,"style":1943},[283],[117,5857,5859],{"className":5858},[161],[117,5860,4927],{"className":5861},[161],[117,5863,309],{"className":5864},[308],[117,5866,5868],{"className":5867},[271],[117,5869,5871],{"className":5870,"style":5518},[275],[117,5872],{},[117,5874],{"className":5875},[426,2525],[117,5877],{"className":5878,"style":323},[322],[117,5880,202],{"className":5881},[327],[117,5883],{"className":5884,"style":323},[322],[117,5886,5888,5891,5953,5956,5959],{"className":5887},[152],[117,5889],{"className":5890,"style":5539},[156],[117,5892,5894,5897,5950],{"className":5893},[161],[117,5895],{"className":5896},[419,2525],[117,5898,5900],{"className":5899},[2357],[117,5901,5903,5942],{"className":5902},[266,267],[117,5904,5906,5939],{"className":5905},[271],[117,5907,5909,5920,5928],{"className":5908,"style":5433},[275],[117,5910,5911,5914],{"style":2541},[117,5912],{"className":5913,"style":1943},[283],[117,5915,5917],{"className":5916},[161],[117,5918,3640],{"className":5919},[161],[117,5921,5922,5925],{"style":2617},[117,5923],{"className":5924,"style":1943},[283],[117,5926],{"className":5927,"style":2625},[2624],[117,5929,5930,5933],{"style":2628},[117,5931],{"className":5932,"style":1943},[283],[117,5934,5936],{"className":5935},[161],[117,5937,4927],{"className":5938},[161],[117,5940,309],{"className":5941},[308],[117,5943,5945],{"className":5944},[271],[117,5946,5948],{"className":5947,"style":5597},[275],[117,5949],{},[117,5951],{"className":5952},[426,2525],[117,5954],{"className":5955,"style":323},[322],[117,5957,202],{"className":5958},[327],[117,5960],{"className":5961,"style":323},[322],[117,5963,5965,5968],{"className":5964},[152],[117,5966],{"className":5967,"style":2127},[156],[117,5969,5676],{"className":5970},[161],[117,5972,5974],{"className":5973},[168],[117,5975,5977,6056],{"className":5976},[120],[117,5978,5980],{"className":5979},[124],[126,5981,5982],{"xmlns":128,"display":177},[130,5983,5984,6053],{},[133,5985,5986,5992,5994,5996,5998,6020,6022,6024,6026,6028,6030,6036,6038,6040,6042,6044,6046,6048,6050],{},[183,5987,5988,5990],{},[136,5989,187],{},[964,5991,966],{},[200,5993,202],{},[964,5995,4108],{},[200,5997,220],{},[2357,5999,6000,6002],{},[964,6001,3640],{},[133,6003,6004,6008,6010],{},[1900,6005,6006],{},[964,6007,5116],{},[200,6009,984],{},[2250,6011,6012,6014],{},[964,6013,3499],{},[133,6015,6016,6018],{},[200,6017,220],{},[964,6019,3506],{},[200,6021,4780],{},[964,6023,5318],{},[200,6025,202],{},[964,6027,4108],{},[200,6029,220],{},[2357,6031,6032,6034],{},[964,6033,3640],{},[964,6035,3499],{},[200,6037,4780],{},[964,6039,5318],{},[200,6041,202],{},[964,6043,4108],{},[200,6045,220],{},[964,6047,3640],{},[200,6049,202],{},[964,6051,6052],{},"4.999",[140,6054,6055],{"encoding":142},"\\theta_1 = 5 - \\frac{0.001}{\\sqrt{100} + 10^{-8}} \\times 10.0 = 5 - \\frac{0.001}{10} \\times 10.0 = 5 - 0.001 = 4.999",[117,6057,6059,6114,6132,6305,6323,6341,6418,6436,6454,6472],{"className":6058,"ariaHidden":148},[147],[117,6060,6062,6065,6105,6108,6111],{"className":6061},[152],[117,6063],{"className":6064,"style":251},[156],[117,6066,6068,6071],{"className":6067},[161],[117,6069,187],{"className":6070,"style":258},[161,162],[117,6072,6074],{"className":6073},[262],[117,6075,6077,6097],{"className":6076},[266,267],[117,6078,6080,6094],{"className":6079},[271],[117,6081,6083],{"className":6082,"style":1095},[275],[117,6084,6085,6088],{"style":279},[117,6086],{"className":6087,"style":284},[283],[117,6089,6091],{"className":6090},[288,289,290,291],[117,6092,966],{"className":6093},[161,291],[117,6095,309],{"className":6096},[308],[117,6098,6100],{"className":6099},[271],[117,6101,6103],{"className":6102,"style":316},[275],[117,6104],{},[117,6106],{"className":6107,"style":323},[322],[117,6109,202],{"className":6110},[327],[117,6112],{"className":6113,"style":323},[322],[117,6115,6117,6120,6123,6126,6129],{"className":6116},[152],[117,6118],{"className":6119,"style":4854},[156],[117,6121,4108],{"className":6122},[161],[117,6124],{"className":6125,"style":391},[322],[117,6127,220],{"className":6128},[395],[117,6130],{"className":6131,"style":391},[322],[117,6133,6135,6139,6296,6299,6302],{"className":6134},[152],[117,6136],{"className":6137,"style":6138},[156],"height:2.2514em;vertical-align:-0.93em;",[117,6140,6142,6145,6293],{"className":6141},[161],[117,6143],{"className":6144},[419,2525],[117,6146,6148],{"className":6147},[2357],[117,6149,6151,6285],{"className":6150},[266,267],[117,6152,6154,6282],{"className":6153},[271],[117,6155,6157,6263,6271],{"className":6156,"style":5433},[275],[117,6158,6160,6163],{"style":6159},"top:-2.2028em;",[117,6161],{"className":6162,"style":1943},[283],[117,6164,6166,6216,6219,6222,6225,6228],{"className":6165},[161],[117,6167,6169],{"className":6168},[161,1924],[117,6170,6172,6207],{"className":6171},[266,267],[117,6173,6175,6204],{"className":6174},[271],[117,6176,6179,6191],{"className":6177,"style":6178},[275],"height:0.9072em;",[117,6180,6182,6185],{"className":6181,"style":1939},[1938],[117,6183],{"className":6184,"style":1943},[283],[117,6186,6188],{"className":6187,"style":1947},[161],[117,6189,5116],{"className":6190},[161],[117,6192,6194,6197],{"style":6193},"top:-2.8672em;",[117,6195],{"className":6196,"style":1943},[283],[117,6198,6200],{"className":6199,"style":1998},[1997],[2000,6201,6202],{"xmlns":2002,"width":2003,"height":2004,"viewBox":2005,"preserveAspectRatio":2006},[2008,6203],{"d":2010},[117,6205,309],{"className":6206},[308],[117,6208,6210],{"className":6209},[271],[117,6211,6214],{"className":6212,"style":6213},[275],"height:0.1328em;",[117,6215],{},[117,6217],{"className":6218,"style":391},[322],[117,6220,984],{"className":6221},[395],[117,6223],{"className":6224,"style":391},[322],[117,6226,966],{"className":6227},[161],[117,6229,6231,6234],{"className":6230},[161],[117,6232,2055],{"className":6233},[161],[117,6235,6237],{"className":6236},[262],[117,6238,6240],{"className":6239},[266],[117,6241,6243],{"className":6242},[271],[117,6244,6246],{"className":6245,"style":5477},[275],[117,6247,6248,6251],{"style":5480},[117,6249],{"className":6250,"style":284},[283],[117,6252,6254],{"className":6253},[288,289,290,291],[117,6255,6257,6260],{"className":6256},[161,291],[117,6258,220],{"className":6259},[161,291],[117,6261,3506],{"className":6262},[161,291],[117,6264,6265,6268],{"style":2617},[117,6266],{"className":6267,"style":1943},[283],[117,6269],{"className":6270,"style":2625},[2624],[117,6272,6273,6276],{"style":2628},[117,6274],{"className":6275,"style":1943},[283],[117,6277,6279],{"className":6278},[161],[117,6280,3640],{"className":6281},[161],[117,6283,309],{"className":6284},[308],[117,6286,6288],{"className":6287},[271],[117,6289,6291],{"className":6290,"style":3366},[275],[117,6292],{},[117,6294],{"className":6295},[426,2525],[117,6297],{"className":6298,"style":391},[322],[117,6300,4780],{"className":6301},[395],[117,6303],{"className":6304,"style":391},[322],[117,6306,6308,6311,6314,6317,6320],{"className":6307},[152],[117,6309],{"className":6310,"style":2127},[156],[117,6312,5318],{"className":6313},[161],[117,6315],{"className":6316,"style":323},[322],[117,6318,202],{"className":6319},[327],[117,6321],{"className":6322,"style":323},[322],[117,6324,6326,6329,6332,6335,6338],{"className":6325},[152],[117,6327],{"className":6328,"style":4854},[156],[117,6330,4108],{"className":6331},[161],[117,6333],{"className":6334,"style":391},[322],[117,6336,220],{"className":6337},[395],[117,6339],{"className":6340,"style":391},[322],[117,6342,6344,6347,6409,6412,6415],{"className":6343},[152],[117,6345],{"className":6346,"style":5539},[156],[117,6348,6350,6353,6406],{"className":6349},[161],[117,6351],{"className":6352},[419,2525],[117,6354,6356],{"className":6355},[2357],[117,6357,6359,6398],{"className":6358},[266,267],[117,6360,6362,6395],{"className":6361},[271],[117,6363,6365,6376,6384],{"className":6364,"style":5433},[275],[117,6366,6367,6370],{"style":2541},[117,6368],{"className":6369,"style":1943},[283],[117,6371,6373],{"className":6372},[161],[117,6374,3499],{"className":6375},[161],[117,6377,6378,6381],{"style":2617},[117,6379],{"className":6380,"style":1943},[283],[117,6382],{"className":6383,"style":2625},[2624],[117,6385,6386,6389],{"style":2628},[117,6387],{"className":6388,"style":1943},[283],[117,6390,6392],{"className":6391},[161],[117,6393,3640],{"className":6394},[161],[117,6396,309],{"className":6397},[308],[117,6399,6401],{"className":6400},[271],[117,6402,6404],{"className":6403,"style":5597},[275],[117,6405],{},[117,6407],{"className":6408},[426,2525],[117,6410],{"className":6411,"style":391},[322],[117,6413,4780],{"className":6414},[395],[117,6416],{"className":6417,"style":391},[322],[117,6419,6421,6424,6427,6430,6433],{"className":6420},[152],[117,6422],{"className":6423,"style":2127},[156],[117,6425,5318],{"className":6426},[161],[117,6428],{"className":6429,"style":323},[322],[117,6431,202],{"className":6432},[327],[117,6434],{"className":6435,"style":323},[322],[117,6437,6439,6442,6445,6448,6451],{"className":6438},[152],[117,6440],{"className":6441,"style":4854},[156],[117,6443,4108],{"className":6444},[161],[117,6446],{"className":6447,"style":391},[322],[117,6449,220],{"className":6450},[395],[117,6452],{"className":6453,"style":391},[322],[117,6455,6457,6460,6463,6466,6469],{"className":6456},[152],[117,6458],{"className":6459,"style":2127},[156],[117,6461,3640],{"className":6462},[161],[117,6464],{"className":6465,"style":323},[322],[117,6467,202],{"className":6468},[327],[117,6470],{"className":6471,"style":323},[322],[117,6473,6475,6478],{"className":6474},[152],[117,6476],{"className":6477,"style":2127},[156],[117,6479,6052],{"className":6480},[161],[91,6482],{},[34,6484,6485,585,6538],{},[59,6486,4703,6487,3586],{},[117,6488,6490,6508],{"className":6489},[120],[117,6491,6493],{"className":6492},[124],[126,6494,6495],{"xmlns":128},[130,6496,6497,6505],{},[133,6498,6499,6501,6503],{},[136,6500,753],{},[200,6502,202],{},[964,6504,1451],{},[140,6506,6507],{"encoding":142},"t=2",[117,6509,6511,6529],{"className":6510,"ariaHidden":148},[147],[117,6512,6514,6517,6520,6523,6526],{"className":6513},[152],[117,6515],{"className":6516,"style":4734},[156],[117,6518,753],{"className":6519},[161,162],[117,6521],{"className":6522,"style":323},[322],[117,6524,202],{"className":6525},[327],[117,6527],{"className":6528,"style":323},[322],[117,6530,6532,6535],{"className":6531},[152],[117,6533],{"className":6534,"style":2127},[156],[117,6536,1451],{"className":6537},[161],[117,6539,6541,6572],{"className":6540},[120],[117,6542,6544],{"className":6543},[124],[126,6545,6546],{"xmlns":128},[130,6547,6548,6569],{},[133,6549,6550,6556,6558,6560,6562,6564,6566],{},[183,6551,6552,6554],{},[136,6553,1005],{},[964,6555,1451],{},[200,6557,202],{},[964,6559,1451],{},[200,6561,4780],{},[964,6563,6052],{},[200,6565,202],{},[964,6567,6568],{},"9.998",[140,6570,6571],{"encoding":142},"g_2 = 2 \\times 4.999 = 9.998",[117,6573,6575,6630,6648,6666],{"className":6574,"ariaHidden":148},[147],[117,6576,6578,6581,6621,6624,6627],{"className":6577},[152],[117,6579],{"className":6580,"style":1279},[156],[117,6582,6584,6587],{"className":6583},[161],[117,6585,1005],{"className":6586,"style":859},[161,162],[117,6588,6590],{"className":6589},[262],[117,6591,6593,6613],{"className":6592},[266,267],[117,6594,6596,6610],{"className":6595},[271],[117,6597,6599],{"className":6598,"style":1095},[275],[117,6600,6601,6604],{"style":874},[117,6602],{"className":6603,"style":284},[283],[117,6605,6607],{"className":6606},[288,289,290,291],[117,6608,1451],{"className":6609},[161,291],[117,6611,309],{"className":6612},[308],[117,6614,6616],{"className":6615},[271],[117,6617,6619],{"className":6618,"style":316},[275],[117,6620],{},[117,6622],{"className":6623,"style":323},[322],[117,6625,202],{"className":6626},[327],[117,6628],{"className":6629,"style":323},[322],[117,6631,6633,6636,6639,6642,6645],{"className":6632},[152],[117,6634],{"className":6635,"style":4854},[156],[117,6637,1451],{"className":6638},[161],[117,6640],{"className":6641,"style":391},[322],[117,6643,4780],{"className":6644},[395],[117,6646],{"className":6647,"style":391},[322],[117,6649,6651,6654,6657,6660,6663],{"className":6650},[152],[117,6652],{"className":6653,"style":2127},[156],[117,6655,6052],{"className":6656},[161],[117,6658],{"className":6659,"style":323},[322],[117,6661,202],{"className":6662},[327],[117,6664],{"className":6665,"style":323},[322],[117,6667,6669,6672],{"className":6668},[152],[117,6670],{"className":6671,"style":2127},[156],[117,6673,6568],{"className":6674},[161],[117,6676,6678],{"className":6677},[168],[117,6679,6681,6720],{"className":6680},[120],[117,6682,6684],{"className":6683},[124],[126,6685,6686],{"xmlns":128,"display":177},[130,6687,6688,6717],{},[133,6689,6690,6696,6698,6700,6702,6704,6706,6708,6710,6712,6714],{},[183,6691,6692,6694],{},[136,6693,750],{},[964,6695,1451],{},[200,6697,202],{},[964,6699,1397],{},[200,6701,4780],{},[964,6703,4936],{},[200,6705,984],{},[964,6707,4927],{},[200,6709,4780],{},[964,6711,6568],{},[200,6713,202],{},[964,6715,6716],{},"1.8998",[140,6718,6719],{"encoding":142},"m_2 = 0.9 \\times 1.0 + 0.1 \\times 9.998 = 1.8998",[117,6721,6723,6778,6796,6814,6832,6850],{"className":6722,"ariaHidden":148},[147],[117,6724,6726,6729,6769,6772,6775],{"className":6725},[152],[117,6727],{"className":6728,"style":766},[156],[117,6730,6732,6735],{"className":6731},[161],[117,6733,750],{"className":6734},[161,162],[117,6736,6738],{"className":6737},[262],[117,6739,6741,6761],{"className":6740},[266,267],[117,6742,6744,6758],{"className":6743},[271],[117,6745,6747],{"className":6746,"style":1095},[275],[117,6748,6749,6752],{"style":788},[117,6750],{"className":6751,"style":284},[283],[117,6753,6755],{"className":6754},[288,289,290,291],[117,6756,1451],{"className":6757},[161,291],[117,6759,309],{"className":6760},[308],[117,6762,6764],{"className":6763},[271],[117,6765,6767],{"className":6766,"style":316},[275],[117,6768],{},[117,6770],{"className":6771,"style":323},[322],[117,6773,202],{"className":6774},[327],[117,6776],{"className":6777,"style":323},[322],[117,6779,6781,6784,6787,6790,6793],{"className":6780},[152],[117,6782],{"className":6783,"style":4854},[156],[117,6785,1397],{"className":6786},[161],[117,6788],{"className":6789,"style":391},[322],[117,6791,4780],{"className":6792},[395],[117,6794],{"className":6795,"style":391},[322],[117,6797,6799,6802,6805,6808,6811],{"className":6798},[152],[117,6800],{"className":6801,"style":4854},[156],[117,6803,4936],{"className":6804},[161],[117,6806],{"className":6807,"style":391},[322],[117,6809,984],{"className":6810},[395],[117,6812],{"className":6813,"style":391},[322],[117,6815,6817,6820,6823,6826,6829],{"className":6816},[152],[117,6818],{"className":6819,"style":4854},[156],[117,6821,4927],{"className":6822},[161],[117,6824],{"className":6825,"style":391},[322],[117,6827,4780],{"className":6828},[395],[117,6830],{"className":6831,"style":391},[322],[117,6833,6835,6838,6841,6844,6847],{"className":6834},[152],[117,6836],{"className":6837,"style":2127},[156],[117,6839,6568],{"className":6840},[161],[117,6842],{"className":6843,"style":323},[322],[117,6845,202],{"className":6846},[327],[117,6848],{"className":6849,"style":323},[322],[117,6851,6853,6856],{"className":6852},[152],[117,6854],{"className":6855,"style":2127},[156],[117,6857,6716],{"className":6858},[161],[117,6860,6862],{"className":6861},[168],[117,6863,6865,6908],{"className":6864},[120],[117,6866,6868],{"className":6867},[124],[126,6869,6870],{"xmlns":128,"display":177},[130,6871,6872,6905],{},[133,6873,6874,6880,6882,6884,6886,6888,6890,6892,6894,6900,6902],{},[183,6875,6876,6878],{},[136,6877,838],{},[964,6879,1451],{},[200,6881,202],{},[964,6883,3798],{},[200,6885,4780],{},[964,6887,4927],{},[200,6889,984],{},[964,6891,3640],{},[200,6893,4780],{},[2250,6895,6896,6898],{},[964,6897,6568],{},[964,6899,1451],{},[200,6901,202],{},[964,6903,6904],{},"0.1999",[140,6906,6907],{"encoding":142},"v_2 = 0.999 \\times 0.1 + 0.001 \\times 9.998^2 = 0.1999",[117,6909,6911,6966,6984,7002,7020,7068],{"className":6910,"ariaHidden":148},[147],[117,6912,6914,6917,6957,6960,6963],{"className":6913},[152],[117,6915],{"className":6916,"style":766},[156],[117,6918,6920,6923],{"className":6919},[161],[117,6921,838],{"className":6922,"style":859},[161,162],[117,6924,6926],{"className":6925},[262],[117,6927,6929,6949],{"className":6928},[266,267],[117,6930,6932,6946],{"className":6931},[271],[117,6933,6935],{"className":6934,"style":1095},[275],[117,6936,6937,6940],{"style":874},[117,6938],{"className":6939,"style":284},[283],[117,6941,6943],{"className":6942},[288,289,290,291],[117,6944,1451],{"className":6945},[161,291],[117,6947,309],{"className":6948},[308],[117,6950,6952],{"className":6951},[271],[117,6953,6955],{"className":6954,"style":316},[275],[117,6956],{},[117,6958],{"className":6959,"style":323},[322],[117,6961,202],{"className":6962},[327],[117,6964],{"className":6965,"style":323},[322],[117,6967,6969,6972,6975,6978,6981],{"className":6968},[152],[117,6970],{"className":6971,"style":4854},[156],[117,6973,3798],{"className":6974},[161],[117,6976],{"className":6977,"style":391},[322],[117,6979,4780],{"className":6980},[395],[117,6982],{"className":6983,"style":391},[322],[117,6985,6987,6990,6993,6996,6999],{"className":6986},[152],[117,6988],{"className":6989,"style":4854},[156],[117,6991,4927],{"className":6992},[161],[117,6994],{"className":6995,"style":391},[322],[117,6997,984],{"className":6998},[395],[117,7000],{"className":7001,"style":391},[322],[117,7003,7005,7008,7011,7014,7017],{"className":7004},[152],[117,7006],{"className":7007,"style":4854},[156],[117,7009,3640],{"className":7010},[161],[117,7012],{"className":7013,"style":391},[322],[117,7015,4780],{"className":7016},[395],[117,7018],{"className":7019,"style":391},[322],[117,7021,7023,7026,7030,7059,7062,7065],{"className":7022},[152],[117,7024],{"className":7025,"style":1777},[156],[117,7027,7029],{"className":7028},[161],"9.99",[117,7031,7033,7036],{"className":7032},[161],[117,7034,3506],{"className":7035},[161],[117,7037,7039],{"className":7038},[262],[117,7040,7042],{"className":7041},[266],[117,7043,7045],{"className":7044},[271],[117,7046,7048],{"className":7047,"style":1777},[275],[117,7049,7050,7053],{"style":1792},[117,7051],{"className":7052,"style":284},[283],[117,7054,7056],{"className":7055},[288,289,290,291],[117,7057,1451],{"className":7058},[161,291],[117,7060],{"className":7061,"style":323},[322],[117,7063,202],{"className":7064},[327],[117,7066],{"className":7067,"style":323},[322],[117,7069,7071,7074],{"className":7070},[152],[117,7072],{"className":7073,"style":2127},[156],[117,7075,6904],{"className":7076},[161],[34,7078,7079,7080,4696],{},"បន្ទាប់ពី Bias Correction និង Update, ",[117,7081,7083,7106],{"className":7082},[120],[117,7084,7086],{"className":7085},[124],[126,7087,7088],{"xmlns":128},[130,7089,7090,7103],{},[133,7091,7092,7098,7100],{},[183,7093,7094,7096],{},[136,7095,187],{},[964,7097,1451],{},[200,7099,3494],{},[964,7101,7102],{},"4.998",[140,7104,7105],{"encoding":142},"\\theta_2 \\approx 4.998",[117,7107,7109,7164],{"className":7108,"ariaHidden":148},[147],[117,7110,7112,7115,7155,7158,7161],{"className":7111},[152],[117,7113],{"className":7114,"style":251},[156],[117,7116,7118,7121],{"className":7117},[161],[117,7119,187],{"className":7120,"style":258},[161,162],[117,7122,7124],{"className":7123},[262],[117,7125,7127,7147],{"className":7126},[266,267],[117,7128,7130,7144],{"className":7129},[271],[117,7131,7133],{"className":7132,"style":1095},[275],[117,7134,7135,7138],{"style":279},[117,7136],{"className":7137,"style":284},[283],[117,7139,7141],{"className":7140},[288,289,290,291],[117,7142,1451],{"className":7143},[161,291],[117,7145,309],{"className":7146},[308],[117,7148,7150],{"className":7149},[271],[117,7151,7153],{"className":7152,"style":316},[275],[117,7154],{},[117,7156],{"className":7157,"style":323},[322],[117,7159,3494],{"className":7160},[327],[117,7162],{"className":7163,"style":323},[322],[117,7165,7167,7170],{"className":7166},[152],[117,7168],{"className":7169,"style":2127},[156],[117,7171,7102],{"className":7172},[161],[91,7174],{},[34,7176,7177,7178,7181,7182,7210,7211,7239],{},"Adam ធ្វើ ",[59,7179,7180],{},"ជំហានស្ថិតស្ថេរ និងគ្រប់គ្រងបាន"," — មិនរហ័សហ្លើតដូច SGD ដែល ",[117,7183,7185,7198],{"className":7184},[120],[117,7186,7188],{"className":7187},[124],[126,7189,7190],{"xmlns":128},[130,7191,7192,7196],{},[133,7193,7194],{},[136,7195,138],{},[140,7197,143],{"encoding":142},[117,7199,7201],{"className":7200,"ariaHidden":148},[147],[117,7202,7204,7207],{"className":7203},[152],[117,7205],{"className":7206,"style":157},[156],[117,7208,138],{"className":7209,"style":163},[161,162]," ធំ (ដែលនឹងបោះជំហានរំលងចំណុចទាប) ប៉ុន្តែលឿនជាងច្រើនពី SGD ដែល ",[117,7212,7214,7227],{"className":7213},[120],[117,7215,7217],{"className":7216},[124],[126,7218,7219],{"xmlns":128},[130,7220,7221,7225],{},[133,7222,7223],{},[136,7224,138],{},[140,7226,143],{"encoding":142},[117,7228,7230],{"className":7229,"ariaHidden":148},[147],[117,7231,7233,7236],{"className":7232},[152],[117,7234],{"className":7235,"style":157},[156],[117,7237,138],{"className":7238,"style":163},[161,162]," តូចខ្លាំង (ដែលនឹងដើរយឺតបន្តិចម្ដងៗ)។ ការកែ Bias Correction ធ្វើឱ្យជំហានដំបូងៗនៅតែមានន័យ ទោះបីចាប់ផ្តើមពី Cold Start ក៏ដោយ។",[91,7241],{},[94,7243,7245],{"id":7244},"ការប្រៀបធៀប-optimizer","ការប្រៀបធៀប Optimizer",[702,7247,7248,7267],{},[705,7249,7250],{},[708,7251,7252,7255,7258,7261,7264],{},[711,7253,7254],{},"Optimizer",[711,7256,7257],{},"ទំហំជំហាន",[711,7259,7260],{},"ការចងចាំ",[711,7262,7263],{},"ចំណុចខ្លាំង",[711,7265,7266],{},"ចំណុចខ្សោយ",[721,7268,7269,7343,7416,7438],{},[708,7270,7271,7274,7305,7308,7311],{},[726,7272,7273],{},"SGD",[726,7275,7276,7277,238],{},"ថេរ (",[117,7278,7280,7293],{"className":7279},[120],[117,7281,7283],{"className":7282},[124],[126,7284,7285],{"xmlns":128},[130,7286,7287,7291],{},[133,7288,7289],{},[136,7290,138],{},[140,7292,143],{"encoding":142},[117,7294,7296],{"className":7295,"ariaHidden":148},[147],[117,7297,7299,7302],{"className":7298},[152],[117,7300],{"className":7301,"style":157},[156],[117,7303,138],{"className":7304,"style":163},[161,162],[726,7306,7307],{},"គ្មាន",[726,7309,7310],{},"សាមញ្ញ ងាយយល់",[726,7312,7313,7314,7342],{},"ពិបាកកំណត់ ",[117,7315,7317,7330],{"className":7316},[120],[117,7318,7320],{"className":7319},[124],[126,7321,7322],{"xmlns":128},[130,7323,7324,7328],{},[133,7325,7326],{},[136,7327,138],{},[140,7329,143],{"encoding":142},[117,7331,7333],{"className":7332,"ariaHidden":148},[147],[117,7334,7336,7339],{"className":7335},[152],[117,7337],{"className":7338,"style":157},[156],[117,7340,138],{"className":7341,"style":163},[161,162],", យឺត",[708,7344,7345,7348,7378,7381,7384],{},[726,7346,7347],{},"SGD + Momentum",[726,7349,7276,7350,238],{},[117,7351,7353,7366],{"className":7352},[120],[117,7354,7356],{"className":7355},[124],[126,7357,7358],{"xmlns":128},[130,7359,7360,7364],{},[133,7361,7362],{},[136,7363,138],{},[140,7365,143],{"encoding":142},[117,7367,7369],{"className":7368,"ariaHidden":148},[147],[117,7370,7372,7375],{"className":7371},[152],[117,7373],{"className":7374,"style":157},[156],[117,7376,138],{"className":7377,"style":163},[161,162],[726,7379,7380],{},"ទិសដៅជម្រាល",[726,7382,7383],{},"ដើរលឿន និងរលូនជាង",[726,7385,7386,7387,7415],{},"នៅតែត្រូវការ ",[117,7388,7390,7403],{"className":7389},[120],[117,7391,7393],{"className":7392},[124],[126,7394,7395],{"xmlns":128},[130,7396,7397,7401],{},[133,7398,7399],{},[136,7400,138],{},[140,7402,143],{"encoding":142},[117,7404,7406],{"className":7405,"ariaHidden":148},[147],[117,7407,7409,7412],{"className":7408},[152],[117,7410],{"className":7411,"style":157},[156],[117,7413,138],{"className":7414,"style":163},[161,162]," ល្អ",[708,7417,7418,7426,7429,7432,7435],{},[726,7419,7420,7421],{},"RMSProp ",[44,7422,7425],{"href":7423,"className":7424},"#ref-4",[52,53,74],"[4]",[726,7427,7428],{},"បត់បែន",[726,7430,7431],{},"ទំហំជម្រាល",[726,7433,7434],{},"ល្អក្នុងករណីទិន្នន័យផ្លាស់ប្ដូរ",[726,7436,7437],{},"គ្មាន Momentum",[708,7439,7440,7445,7449,7454,7459],{},[726,7441,7442],{},[59,7443,7444],{},"Adam",[726,7446,7447],{},[59,7448,7428],{},[726,7450,7451],{},[59,7452,7453],{},"ទិសដៅ + ទំហំ",[726,7455,7456],{},[59,7457,7458],{},"ល្អបំផុតសឹងគ្រប់ការងារ",[726,7460,7461],{},[59,7462,7463],{},"ពេលខ្លះ Generalize បានមិនល្អប៉ុណ្ណឹង",[7465,7466],"interactive-adam",{},[34,7468,7469,7470,7472,7473,7476],{},"Adam បញ្ចូលរួម ",[59,7471,7347],{}," (1st moment) និង ",[59,7474,7475],{},"RMSProp"," (2nd moment) ក្នុងក្របខណ្ឌតែមួយ ជាមួយ Bias Correction ជាការបន្ថែម។",[91,7478],{},[94,7480,7482],{"id":7481},"ការអនុវត្តជាមួយ-python-កូដគំរូ","ការអនុវត្តជាមួយ Python (កូដគំរូ)",[99,7484,7486],{"id":7485},"adam-យ៉ាងសាមញ្ញពីបាតដៃទទេ","Adam យ៉ាងសាមញ្ញពីបាតដៃទទេ",[34,7488,7489],{},"ខាងក្រោមនេះគឺជាការសរសេរ Adam Optimizer ដោយខ្លួនឯង (ពីបាតដៃទទេ)៖",[7491,7492,7497],"pre",{"className":7493,"code":7494,"language":7495,"meta":7496,"style":7496},"language-python shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","import numpy as np\n\ndef adam(grad_fn, theta_init, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8, max_iters=1000):\n    theta = theta_init\n    m = 0.0   # 1st moment (momentum)\n    v = 0.0   # 2nd moment (adaptive scale)\n\n    for t in range(1, max_iters + 1):\n        g = grad_fn(theta)            # ① គណនា gradient\n\n        m = beta1 * m + (1 - beta1) * g       # ② update 1st moment\n        v = beta2 * v + (1 - beta2) * g ** 2  # ③ update 2nd moment\n\n        m_hat = m \u002F (1 - beta1 ** t)          # ④ កែតម្រូវ bias សម្រាប់ m\n        v_hat = v \u002F (1 - beta2 ** t)          # ⑤ កែតម្រូវ bias សម្រាប់ v\n\n        # ⑥ ធ្វើការ Update parameter\n        theta = theta - alpha \u002F (np.sqrt(v_hat) + eps) * m_hat  \n\n        if abs(g) \u003C 1e-7:\n            print(f\"ជោគជ័យនៅជំហានទី {t}\")\n            break\n\n    return theta\n\n# សាកល្បងកាត់បន្ថយ J(θ) = θ²,  ∇J(θ) = 2θ\ntheta_min = adam(grad_fn=lambda th: 2 * th, theta_init=5.0)\nprint(f\"ចំណុចទាបបំផុតគឺ θ = {theta_min:.8f}\")\n","python","",[7498,7499,7500,7518,7525,7599,7610,7625,7638,7643,7674,7695,7700,7740,7782,7787,7818,7847,7852,7858,7906,7911,7935,7964,7970,7975,7984,7989,7995,8036],"code",{"__ignoreMap":7496},[117,7501,7504,7508,7512,7515],{"class":7502,"line":7503},"line",1,[117,7505,7507],{"class":7506},"s7zQu","import",[117,7509,7511],{"class":7510},"sTEyZ"," numpy ",[117,7513,7514],{"class":7506},"as",[117,7516,7517],{"class":7510}," np\n",[117,7519,7521],{"class":7502,"line":7520},2,[117,7522,7524],{"emptyLinePlaceholder":7523},true,"\n",[117,7526,7528,7532,7536,7539,7543,7545,7548,7550,7553,7555,7558,7560,7563,7565,7567,7569,7572,7574,7576,7578,7581,7583,7586,7588,7591,7593,7596],{"class":7502,"line":7527},3,[117,7529,7531],{"class":7530},"spNyl","def",[117,7533,7535],{"class":7534},"s2Zo4"," adam",[117,7537,233],{"class":7538},"sMK4o",[117,7540,7542],{"class":7541},"sHdIc","grad_fn",[117,7544,2381],{"class":7538},[117,7546,7547],{"class":7541}," theta_init",[117,7549,2381],{"class":7538},[117,7551,7552],{"class":7541}," alpha",[117,7554,202],{"class":7538},[117,7556,3640],{"class":7557},"sbssI",[117,7559,2381],{"class":7538},[117,7561,7562],{"class":7541}," beta1",[117,7564,202],{"class":7538},[117,7566,1397],{"class":7557},[117,7568,2381],{"class":7538},[117,7570,7571],{"class":7541}," beta2",[117,7573,202],{"class":7538},[117,7575,3798],{"class":7557},[117,7577,2381],{"class":7538},[117,7579,7580],{"class":7541}," eps",[117,7582,202],{"class":7538},[117,7584,7585],{"class":7557},"1e-8",[117,7587,2381],{"class":7538},[117,7589,7590],{"class":7541}," max_iters",[117,7592,202],{"class":7538},[117,7594,7595],{"class":7557},"1000",[117,7597,7598],{"class":7538},"):\n",[117,7600,7602,7605,7607],{"class":7502,"line":7601},4,[117,7603,7604],{"class":7510},"    theta ",[117,7606,202],{"class":7538},[117,7608,7609],{"class":7510}," theta_init\n",[117,7611,7613,7616,7618,7621],{"class":7502,"line":7612},5,[117,7614,7615],{"class":7510},"    m ",[117,7617,202],{"class":7538},[117,7619,7620],{"class":7557}," 0.0",[117,7622,7624],{"class":7623},"sHwdD","   # 1st moment (momentum)\n",[117,7626,7628,7631,7633,7635],{"class":7502,"line":7627},6,[117,7629,7630],{"class":7510},"    v ",[117,7632,202],{"class":7538},[117,7634,7620],{"class":7557},[117,7636,7637],{"class":7623},"   # 2nd moment (adaptive scale)\n",[117,7639,7641],{"class":7502,"line":7640},7,[117,7642,7524],{"emptyLinePlaceholder":7523},[117,7644,7646,7649,7652,7655,7658,7660,7662,7664,7667,7669,7672],{"class":7502,"line":7645},8,[117,7647,7648],{"class":7506},"    for",[117,7650,7651],{"class":7510}," t ",[117,7653,7654],{"class":7506},"in",[117,7656,7657],{"class":7534}," range",[117,7659,233],{"class":7538},[117,7661,966],{"class":7557},[117,7663,2381],{"class":7538},[117,7665,7666],{"class":7534}," max_iters ",[117,7668,984],{"class":7538},[117,7670,7671],{"class":7557}," 1",[117,7673,7598],{"class":7538},[117,7675,7677,7680,7682,7685,7687,7690,7692],{"class":7502,"line":7676},9,[117,7678,7679],{"class":7510},"        g ",[117,7681,202],{"class":7538},[117,7683,7684],{"class":7534}," grad_fn",[117,7686,233],{"class":7538},[117,7688,7689],{"class":7534},"theta",[117,7691,238],{"class":7538},[117,7693,7694],{"class":7623},"            # ① គណនា gradient\n",[117,7696,7698],{"class":7502,"line":7697},10,[117,7699,7524],{"emptyLinePlaceholder":7523},[117,7701,7703,7706,7708,7711,7714,7717,7719,7722,7724,7727,7729,7731,7734,7737],{"class":7502,"line":7702},11,[117,7704,7705],{"class":7510},"        m ",[117,7707,202],{"class":7538},[117,7709,7710],{"class":7510}," beta1 ",[117,7712,7713],{"class":7538},"*",[117,7715,7716],{"class":7510}," m ",[117,7718,984],{"class":7538},[117,7720,7721],{"class":7538}," (",[117,7723,966],{"class":7557},[117,7725,7726],{"class":7538}," -",[117,7728,7562],{"class":7510},[117,7730,238],{"class":7538},[117,7732,7733],{"class":7538}," *",[117,7735,7736],{"class":7510}," g       ",[117,7738,7739],{"class":7623},"# ② update 1st moment\n",[117,7741,7743,7746,7748,7751,7753,7756,7758,7760,7762,7764,7766,7768,7770,7773,7776,7779],{"class":7502,"line":7742},12,[117,7744,7745],{"class":7510},"        v ",[117,7747,202],{"class":7538},[117,7749,7750],{"class":7510}," beta2 ",[117,7752,7713],{"class":7538},[117,7754,7755],{"class":7510}," v ",[117,7757,984],{"class":7538},[117,7759,7721],{"class":7538},[117,7761,966],{"class":7557},[117,7763,7726],{"class":7538},[117,7765,7571],{"class":7510},[117,7767,238],{"class":7538},[117,7769,7733],{"class":7538},[117,7771,7772],{"class":7510}," g ",[117,7774,7775],{"class":7538},"**",[117,7777,7778],{"class":7557}," 2",[117,7780,7781],{"class":7623},"  # ③ update 2nd moment\n",[117,7783,7785],{"class":7502,"line":7784},13,[117,7786,7524],{"emptyLinePlaceholder":7523},[117,7788,7790,7793,7795,7797,7800,7802,7804,7806,7808,7810,7813,7815],{"class":7502,"line":7789},14,[117,7791,7792],{"class":7510},"        m_hat ",[117,7794,202],{"class":7538},[117,7796,7716],{"class":7510},[117,7798,7799],{"class":7538},"\u002F",[117,7801,7721],{"class":7538},[117,7803,966],{"class":7557},[117,7805,7726],{"class":7538},[117,7807,7710],{"class":7510},[117,7809,7775],{"class":7538},[117,7811,7812],{"class":7510}," t",[117,7814,238],{"class":7538},[117,7816,7817],{"class":7623},"          # ④ កែតម្រូវ bias សម្រាប់ m\n",[117,7819,7821,7824,7826,7828,7830,7832,7834,7836,7838,7840,7842,7844],{"class":7502,"line":7820},15,[117,7822,7823],{"class":7510},"        v_hat ",[117,7825,202],{"class":7538},[117,7827,7755],{"class":7510},[117,7829,7799],{"class":7538},[117,7831,7721],{"class":7538},[117,7833,966],{"class":7557},[117,7835,7726],{"class":7538},[117,7837,7750],{"class":7510},[117,7839,7775],{"class":7538},[117,7841,7812],{"class":7510},[117,7843,238],{"class":7538},[117,7845,7846],{"class":7623},"          # ⑤ កែតម្រូវ bias សម្រាប់ v\n",[117,7848,7850],{"class":7502,"line":7849},16,[117,7851,7524],{"emptyLinePlaceholder":7523},[117,7853,7855],{"class":7502,"line":7854},17,[117,7856,7857],{"class":7623},"        # ⑥ ធ្វើការ Update parameter\n",[117,7859,7861,7864,7866,7869,7872,7875,7877,7879,7882,7885,7887,7889,7892,7894,7897,7899,7901,7903],{"class":7502,"line":7860},18,[117,7862,7863],{"class":7510},"        theta ",[117,7865,202],{"class":7538},[117,7867,7868],{"class":7510}," theta ",[117,7870,7871],{"class":7538},"-",[117,7873,7874],{"class":7510}," alpha ",[117,7876,7799],{"class":7538},[117,7878,7721],{"class":7538},[117,7880,7881],{"class":7510},"np",[117,7883,7884],{"class":7538},".",[117,7886,1924],{"class":7534},[117,7888,233],{"class":7538},[117,7890,7891],{"class":7534},"v_hat",[117,7893,238],{"class":7538},[117,7895,7896],{"class":7538}," +",[117,7898,7580],{"class":7510},[117,7900,238],{"class":7538},[117,7902,7733],{"class":7538},[117,7904,7905],{"class":7510}," m_hat  \n",[117,7907,7909],{"class":7502,"line":7908},19,[117,7910,7524],{"emptyLinePlaceholder":7523},[117,7912,7914,7917,7920,7922,7924,7926,7929,7932],{"class":7502,"line":7913},20,[117,7915,7916],{"class":7506},"        if",[117,7918,7919],{"class":7534}," abs",[117,7921,233],{"class":7538},[117,7923,1005],{"class":7534},[117,7925,238],{"class":7538},[117,7927,7928],{"class":7538}," \u003C",[117,7930,7931],{"class":7557}," 1e-7",[117,7933,7934],{"class":7538},":\n",[117,7936,7938,7941,7943,7946,7950,7953,7955,7958,7961],{"class":7502,"line":7937},21,[117,7939,7940],{"class":7534},"            print",[117,7942,233],{"class":7538},[117,7944,7945],{"class":7530},"f",[117,7947,7949],{"class":7948},"sfazB","\"ជោគជ័យនៅជំហានទី ",[117,7951,7952],{"class":7557},"{",[117,7954,753],{"class":7534},[117,7956,7957],{"class":7557},"}",[117,7959,7960],{"class":7948},"\"",[117,7962,7963],{"class":7538},")\n",[117,7965,7967],{"class":7502,"line":7966},22,[117,7968,7969],{"class":7506},"            break\n",[117,7971,7973],{"class":7502,"line":7972},23,[117,7974,7524],{"emptyLinePlaceholder":7523},[117,7976,7978,7981],{"class":7502,"line":7977},24,[117,7979,7980],{"class":7506},"    return",[117,7982,7983],{"class":7510}," theta\n",[117,7985,7987],{"class":7502,"line":7986},25,[117,7988,7524],{"emptyLinePlaceholder":7523},[117,7990,7992],{"class":7502,"line":7991},26,[117,7993,7994],{"class":7623},"# សាកល្បងកាត់បន្ថយ J(θ) = θ²,  ∇J(θ) = 2θ\n",[117,7996,7998,8001,8003,8005,8007,8009,8011,8014,8017,8019,8021,8023,8025,8027,8029,8031,8034],{"class":7502,"line":7997},27,[117,7999,8000],{"class":7510},"theta_min ",[117,8002,202],{"class":7538},[117,8004,7535],{"class":7534},[117,8006,233],{"class":7538},[117,8008,7542],{"class":7541},[117,8010,202],{"class":7538},[117,8012,8013],{"class":7530},"lambda",[117,8015,8016],{"class":7541}," th",[117,8018,3586],{"class":7538},[117,8020,7778],{"class":7557},[117,8022,7733],{"class":7538},[117,8024,8016],{"class":7534},[117,8026,2381],{"class":7538},[117,8028,7547],{"class":7541},[117,8030,202],{"class":7538},[117,8032,8033],{"class":7557},"5.0",[117,8035,7963],{"class":7538},[117,8037,8039,8042,8044,8046,8049,8051,8054,8057,8059,8061],{"class":7502,"line":8038},28,[117,8040,8041],{"class":7534},"print",[117,8043,233],{"class":7538},[117,8045,7945],{"class":7530},[117,8047,8048],{"class":7948},"\"ចំណុចទាបបំផុតគឺ θ = ",[117,8050,7952],{"class":7557},[117,8052,8053],{"class":7534},"theta_min",[117,8055,8056],{"class":7530},":.8f",[117,8058,7957],{"class":7557},[117,8060,7960],{"class":7948},[117,8062,7963],{"class":7538},[34,8064,8065],{},[59,8066,8067],{},"លទ្ធផល:",[7491,8069,8073],{"className":8070,"code":8071,"language":8072,"meta":7496,"style":7496},"language-sh shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","ជោគជ័យនៅជំហានទី 817\nចំណុចទាបបំផុតគឺ θ = 0.00000001\n","sh",[7498,8074,8075,8084],{"__ignoreMap":7496},[117,8076,8077,8081],{"class":7502,"line":7503},[117,8078,8080],{"class":8079},"sBMFI","ជោគជ័យនៅជំហានទី",[117,8082,8083],{"class":7557}," 817\n",[117,8085,8086,8089,8092,8095],{"class":7502,"line":7520},[117,8087,8088],{"class":8079},"ចំណុចទាបបំផុតគឺ",[117,8090,8091],{"class":7948}," θ",[117,8093,8094],{"class":7948}," =",[117,8096,8097],{"class":7557}," 0.00000001\n",[99,8099,8101],{"id":8100},"adam-លើ-linear-regression","Adam លើ Linear Regression",[34,8103,8104,8105,8250],{},"សូមសាកអនុវត្ត Adam លើករណីប្រើប្រាស់ជាក់ស្តែង — ការ Fit ខ្សែត្រង់ ",[117,8106,8108,8141],{"className":8107},[120],[117,8109,8111],{"className":8110},[124],[126,8112,8113],{"xmlns":128},[130,8114,8115,8138],{},[133,8116,8117,8124,8126,8128,8130,8133,8135],{},[2345,8118,8119,8122],{"accent":148},[136,8120,8121],{},"y",[200,8123,2351],{},[200,8125,202],{},[136,8127,198],{},[200,8129,969],{},[136,8131,8132],{},"x",[200,8134,984],{},[136,8136,8137],{},"b",[140,8139,8140],{"encoding":142},"\\hat{y} = w \\cdot x + b",[117,8142,8144,8203,8222,8241],{"className":8143,"ariaHidden":148},[147],[117,8145,8147,8150,8194,8197,8200],{"className":8146},[152],[117,8148],{"className":8149,"style":1075},[156],[117,8151,8153],{"className":8152},[161,2437],[117,8154,8156,8185],{"className":8155},[266,267],[117,8157,8159,8182],{"className":8158},[271],[117,8160,8162,8170],{"className":8161,"style":2447},[275],[117,8163,8164,8167],{"style":1939},[117,8165],{"className":8166,"style":1943},[283],[117,8168,8121],{"className":8169,"style":859},[161,162],[117,8171,8172,8175],{"style":1939},[117,8173],{"className":8174,"style":1943},[283],[117,8176,8179],{"className":8177,"style":8178},[2464],"left:-0.1944em;",[117,8180,2351],{"className":8181},[161],[117,8183,309],{"className":8184},[308],[117,8186,8188],{"className":8187},[271],[117,8189,8192],{"className":8190,"style":8191},[275],"height:0.1944em;",[117,8193],{},[117,8195],{"className":8196,"style":323},[322],[117,8198,202],{"className":8199},[327],[117,8201],{"className":8202,"style":323},[322],[117,8204,8206,8210,8213,8216,8219],{"className":8205},[152],[117,8207],{"className":8208,"style":8209},[156],"height:0.4445em;",[117,8211,198],{"className":8212,"style":304},[161,162],[117,8214],{"className":8215,"style":391},[322],[117,8217,969],{"className":8218},[395],[117,8220],{"className":8221,"style":391},[322],[117,8223,8225,8229,8232,8235,8238],{"className":8224},[152],[117,8226],{"className":8227,"style":8228},[156],"height:0.6667em;vertical-align:-0.0833em;",[117,8230,8132],{"className":8231},[161,162],[117,8233],{"className":8234,"style":391},[322],[117,8236,984],{"className":8237},[395],[117,8239],{"className":8240,"style":391},[322],[117,8242,8244,8247],{"className":8243},[152],[117,8245],{"className":8246,"style":2447},[156],[117,8248,8137],{"className":8249},[161,162]," ទៅទិន្នន័យ។",[7491,8252,8256],{"className":7493,"code":8253,"filename":8254,"highlights":8255,"language":7495,"meta":7496,"style":7496},"import numpy as np\n\ndef adam_linear_regression(X, y, alpha=0.01, beta1=0.9, beta2=0.999,\n                            eps=1e-8, epochs=200):\n    m = len(y)\n    w, b = 0.0, 0.0\n\n    # Adam state ដាច់ដោយឡែកសម្រាប់ parameter នីមួយៗ\n    mw, vw = 0.0, 0.0   # moments for w\n    mb, vb = 0.0, 0.0   # moments for b\n\n    for t in range(1, epochs + 1):\n        y_pred = w * X + b\n        error  = y_pred - y\n\n        # Gradients (រូបមន្តដូចគ្នានឹង Gradient Descent)\n        gw = (2 \u002F m) * np.dot(error, X)\n        gb = (2 \u002F m) * np.sum(error)\n\n        # 1st និង 2nd moment updates សម្រាប់ w\n        mw = beta1 * mw + (1 - beta1) * gw\n        vw = beta2 * vw + (1 - beta2) * gw ** 2\n        mw_hat = mw \u002F (1 - beta1 ** t)\n        vw_hat = vw \u002F (1 - beta2 ** t)\n\n        # 1st និង 2nd moment updates សម្រាប់ b\n        mb = beta1 * mb + (1 - beta1) * gb\n        vb = beta2 * vb + (1 - beta2) * gb ** 2\n        mb_hat = mb \u002F (1 - beta1 ** t)\n        vb_hat = vb \u002F (1 - beta2 ** t)\n\n        # Parameter updates\n        w = w - alpha \u002F (np.sqrt(vw_hat) + eps) * mw_hat\n        b = b - alpha \u002F (np.sqrt(vb_hat) + eps) * mb_hat\n\n        if t % 50 == 0:\n            loss = np.mean(error ** 2)\n            print(f\"Epoch {t:4d}: loss={loss:.6f}  w={w:.4f}  b={b:.4f}\")\n\n    return w, b\n\n# ទំនាក់ទំនងពិត: y = 2x + 1\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([3.0, 5.0, 7.0, 9.0, 11.0])\n\nw, b = adam_linear_regression(X, y)\nprint(f\"\\nFitted: ŷ = {w:.4f}·x + {b:.4f}\")\n","adam_linear_regression.py",[7860,7908,7913,7937,7966,7972,7977,7986,7991,7997],[7498,8257,8258,8268,8272,8317,8338,8353,8372,8376,8381,8402,8423,8427,8452,8472,8487,8491,8496,8537,8571,8576,8582,8614,8650,8676,8702,8707,8713,8745,8779,8805,8831,8836,8842,8884,8926,8931,8952,8978,9039,9044,9056,9061,9067,9110,9150,9155,9178],{"__ignoreMap":7496},[117,8259,8260,8262,8264,8266],{"class":7502,"line":7503},[117,8261,7507],{"class":7506},[117,8263,7511],{"class":7510},[117,8265,7514],{"class":7506},[117,8267,7517],{"class":7510},[117,8269,8270],{"class":7502,"line":7520},[117,8271,7524],{"emptyLinePlaceholder":7523},[117,8273,8274,8276,8279,8281,8284,8286,8289,8291,8293,8295,8298,8300,8302,8304,8306,8308,8310,8312,8314],{"class":7502,"line":7527},[117,8275,7531],{"class":7530},[117,8277,8278],{"class":7534}," adam_linear_regression",[117,8280,233],{"class":7538},[117,8282,8283],{"class":7541},"X",[117,8285,2381],{"class":7538},[117,8287,8288],{"class":7541}," y",[117,8290,2381],{"class":7538},[117,8292,7552],{"class":7541},[117,8294,202],{"class":7538},[117,8296,8297],{"class":7557},"0.01",[117,8299,2381],{"class":7538},[117,8301,7562],{"class":7541},[117,8303,202],{"class":7538},[117,8305,1397],{"class":7557},[117,8307,2381],{"class":7538},[117,8309,7571],{"class":7541},[117,8311,202],{"class":7538},[117,8313,3798],{"class":7557},[117,8315,8316],{"class":7538},",\n",[117,8318,8319,8322,8324,8326,8328,8331,8333,8336],{"class":7502,"line":7601},[117,8320,8321],{"class":7541},"                            eps",[117,8323,202],{"class":7538},[117,8325,7585],{"class":7557},[117,8327,2381],{"class":7538},[117,8329,8330],{"class":7541}," epochs",[117,8332,202],{"class":7538},[117,8334,8335],{"class":7557},"200",[117,8337,7598],{"class":7538},[117,8339,8340,8342,8344,8347,8349,8351],{"class":7502,"line":7612},[117,8341,7615],{"class":7510},[117,8343,202],{"class":7538},[117,8345,8346],{"class":7534}," len",[117,8348,233],{"class":7538},[117,8350,8121],{"class":7534},[117,8352,7963],{"class":7538},[117,8354,8355,8358,8360,8363,8365,8367,8369],{"class":7502,"line":7627},[117,8356,8357],{"class":7510},"    w",[117,8359,2381],{"class":7538},[117,8361,8362],{"class":7510}," b ",[117,8364,202],{"class":7538},[117,8366,7620],{"class":7557},[117,8368,2381],{"class":7538},[117,8370,8371],{"class":7557}," 0.0\n",[117,8373,8374],{"class":7502,"line":7640},[117,8375,7524],{"emptyLinePlaceholder":7523},[117,8377,8378],{"class":7502,"line":7645},[117,8379,8380],{"class":7623},"    # Adam state ដាច់ដោយឡែកសម្រាប់ parameter នីមួយៗ\n",[117,8382,8383,8386,8388,8391,8393,8395,8397,8399],{"class":7502,"line":7676},[117,8384,8385],{"class":7510},"    mw",[117,8387,2381],{"class":7538},[117,8389,8390],{"class":7510}," vw ",[117,8392,202],{"class":7538},[117,8394,7620],{"class":7557},[117,8396,2381],{"class":7538},[117,8398,7620],{"class":7557},[117,8400,8401],{"class":7623},"   # moments for w\n",[117,8403,8404,8407,8409,8412,8414,8416,8418,8420],{"class":7502,"line":7697},[117,8405,8406],{"class":7510},"    mb",[117,8408,2381],{"class":7538},[117,8410,8411],{"class":7510}," vb ",[117,8413,202],{"class":7538},[117,8415,7620],{"class":7557},[117,8417,2381],{"class":7538},[117,8419,7620],{"class":7557},[117,8421,8422],{"class":7623},"   # moments for b\n",[117,8424,8425],{"class":7502,"line":7702},[117,8426,7524],{"emptyLinePlaceholder":7523},[117,8428,8429,8431,8433,8435,8437,8439,8441,8443,8446,8448,8450],{"class":7502,"line":7742},[117,8430,7648],{"class":7506},[117,8432,7651],{"class":7510},[117,8434,7654],{"class":7506},[117,8436,7657],{"class":7534},[117,8438,233],{"class":7538},[117,8440,966],{"class":7557},[117,8442,2381],{"class":7538},[117,8444,8445],{"class":7534}," epochs ",[117,8447,984],{"class":7538},[117,8449,7671],{"class":7557},[117,8451,7598],{"class":7538},[117,8453,8454,8457,8459,8462,8464,8467,8469],{"class":7502,"line":7784},[117,8455,8456],{"class":7510},"        y_pred ",[117,8458,202],{"class":7538},[117,8460,8461],{"class":7510}," w ",[117,8463,7713],{"class":7538},[117,8465,8466],{"class":7510}," X ",[117,8468,984],{"class":7538},[117,8470,8471],{"class":7510}," b\n",[117,8473,8474,8477,8479,8482,8484],{"class":7502,"line":7789},[117,8475,8476],{"class":7510},"        error  ",[117,8478,202],{"class":7538},[117,8480,8481],{"class":7510}," y_pred ",[117,8483,7871],{"class":7538},[117,8485,8486],{"class":7510}," y\n",[117,8488,8489],{"class":7502,"line":7820},[117,8490,7524],{"emptyLinePlaceholder":7523},[117,8492,8493],{"class":7502,"line":7849},[117,8494,8495],{"class":7623},"        # Gradients (រូបមន្តដូចគ្នានឹង Gradient Descent)\n",[117,8497,8498,8501,8503,8505,8507,8510,8513,8515,8517,8520,8522,8525,8527,8530,8532,8535],{"class":7502,"line":7854},[117,8499,8500],{"class":7510},"        gw ",[117,8502,202],{"class":7538},[117,8504,7721],{"class":7538},[117,8506,1451],{"class":7557},[117,8508,8509],{"class":7538}," \u002F",[117,8511,8512],{"class":7510}," m",[117,8514,238],{"class":7538},[117,8516,7733],{"class":7538},[117,8518,8519],{"class":7510}," np",[117,8521,7884],{"class":7538},[117,8523,8524],{"class":7534},"dot",[117,8526,233],{"class":7538},[117,8528,8529],{"class":7534},"error",[117,8531,2381],{"class":7538},[117,8533,8534],{"class":7534}," X",[117,8536,7963],{"class":7538},[117,8538,8541,8544,8546,8548,8550,8552,8554,8556,8558,8560,8562,8565,8567,8569],{"class":8539,"line":7860},[7502,8540],"highlight",[117,8542,8543],{"class":7510},"        gb ",[117,8545,202],{"class":7538},[117,8547,7721],{"class":7538},[117,8549,1451],{"class":7557},[117,8551,8509],{"class":7538},[117,8553,8512],{"class":7510},[117,8555,238],{"class":7538},[117,8557,7733],{"class":7538},[117,8559,8519],{"class":7510},[117,8561,7884],{"class":7538},[117,8563,8564],{"class":7534},"sum",[117,8566,233],{"class":7538},[117,8568,8529],{"class":7534},[117,8570,7963],{"class":7538},[117,8572,8574],{"class":8573,"line":7908},[7502,8540],[117,8575,7524],{"emptyLinePlaceholder":7523},[117,8577,8579],{"class":8578,"line":7913},[7502,8540],[117,8580,8581],{"class":7623},"        # 1st និង 2nd moment updates សម្រាប់ w\n",[117,8583,8585,8588,8590,8592,8594,8597,8599,8601,8603,8605,8607,8609,8611],{"class":8584,"line":7937},[7502,8540],[117,8586,8587],{"class":7510},"        mw ",[117,8589,202],{"class":7538},[117,8591,7710],{"class":7510},[117,8593,7713],{"class":7538},[117,8595,8596],{"class":7510}," mw ",[117,8598,984],{"class":7538},[117,8600,7721],{"class":7538},[117,8602,966],{"class":7557},[117,8604,7726],{"class":7538},[117,8606,7562],{"class":7510},[117,8608,238],{"class":7538},[117,8610,7733],{"class":7538},[117,8612,8613],{"class":7510}," gw\n",[117,8615,8617,8620,8622,8624,8626,8628,8630,8632,8634,8636,8638,8640,8642,8645,8647],{"class":8616,"line":7966},[7502,8540],[117,8618,8619],{"class":7510},"        vw ",[117,8621,202],{"class":7538},[117,8623,7750],{"class":7510},[117,8625,7713],{"class":7538},[117,8627,8390],{"class":7510},[117,8629,984],{"class":7538},[117,8631,7721],{"class":7538},[117,8633,966],{"class":7557},[117,8635,7726],{"class":7538},[117,8637,7571],{"class":7510},[117,8639,238],{"class":7538},[117,8641,7733],{"class":7538},[117,8643,8644],{"class":7510}," gw ",[117,8646,7775],{"class":7538},[117,8648,8649],{"class":7557}," 2\n",[117,8651,8653,8656,8658,8660,8662,8664,8666,8668,8670,8672,8674],{"class":8652,"line":7972},[7502,8540],[117,8654,8655],{"class":7510},"        mw_hat ",[117,8657,202],{"class":7538},[117,8659,8596],{"class":7510},[117,8661,7799],{"class":7538},[117,8663,7721],{"class":7538},[117,8665,966],{"class":7557},[117,8667,7726],{"class":7538},[117,8669,7710],{"class":7510},[117,8671,7775],{"class":7538},[117,8673,7812],{"class":7510},[117,8675,7963],{"class":7538},[117,8677,8679,8682,8684,8686,8688,8690,8692,8694,8696,8698,8700],{"class":8678,"line":7977},[7502,8540],[117,8680,8681],{"class":7510},"        vw_hat ",[117,8683,202],{"class":7538},[117,8685,8390],{"class":7510},[117,8687,7799],{"class":7538},[117,8689,7721],{"class":7538},[117,8691,966],{"class":7557},[117,8693,7726],{"class":7538},[117,8695,7750],{"class":7510},[117,8697,7775],{"class":7538},[117,8699,7812],{"class":7510},[117,8701,7963],{"class":7538},[117,8703,8705],{"class":8704,"line":7986},[7502,8540],[117,8706,7524],{"emptyLinePlaceholder":7523},[117,8708,8710],{"class":8709,"line":7991},[7502,8540],[117,8711,8712],{"class":7623},"        # 1st និង 2nd moment updates សម្រាប់ b\n",[117,8714,8716,8719,8721,8723,8725,8728,8730,8732,8734,8736,8738,8740,8742],{"class":8715,"line":7997},[7502,8540],[117,8717,8718],{"class":7510},"        mb ",[117,8720,202],{"class":7538},[117,8722,7710],{"class":7510},[117,8724,7713],{"class":7538},[117,8726,8727],{"class":7510}," mb ",[117,8729,984],{"class":7538},[117,8731,7721],{"class":7538},[117,8733,966],{"class":7557},[117,8735,7726],{"class":7538},[117,8737,7562],{"class":7510},[117,8739,238],{"class":7538},[117,8741,7733],{"class":7538},[117,8743,8744],{"class":7510}," gb\n",[117,8746,8747,8750,8752,8754,8756,8758,8760,8762,8764,8766,8768,8770,8772,8775,8777],{"class":7502,"line":8038},[117,8748,8749],{"class":7510},"        vb ",[117,8751,202],{"class":7538},[117,8753,7750],{"class":7510},[117,8755,7713],{"class":7538},[117,8757,8411],{"class":7510},[117,8759,984],{"class":7538},[117,8761,7721],{"class":7538},[117,8763,966],{"class":7557},[117,8765,7726],{"class":7538},[117,8767,7571],{"class":7510},[117,8769,238],{"class":7538},[117,8771,7733],{"class":7538},[117,8773,8774],{"class":7510}," gb ",[117,8776,7775],{"class":7538},[117,8778,8649],{"class":7557},[117,8780,8782,8785,8787,8789,8791,8793,8795,8797,8799,8801,8803],{"class":7502,"line":8781},29,[117,8783,8784],{"class":7510},"        mb_hat ",[117,8786,202],{"class":7538},[117,8788,8727],{"class":7510},[117,8790,7799],{"class":7538},[117,8792,7721],{"class":7538},[117,8794,966],{"class":7557},[117,8796,7726],{"class":7538},[117,8798,7710],{"class":7510},[117,8800,7775],{"class":7538},[117,8802,7812],{"class":7510},[117,8804,7963],{"class":7538},[117,8806,8808,8811,8813,8815,8817,8819,8821,8823,8825,8827,8829],{"class":7502,"line":8807},30,[117,8809,8810],{"class":7510},"        vb_hat ",[117,8812,202],{"class":7538},[117,8814,8411],{"class":7510},[117,8816,7799],{"class":7538},[117,8818,7721],{"class":7538},[117,8820,966],{"class":7557},[117,8822,7726],{"class":7538},[117,8824,7750],{"class":7510},[117,8826,7775],{"class":7538},[117,8828,7812],{"class":7510},[117,8830,7963],{"class":7538},[117,8832,8834],{"class":7502,"line":8833},31,[117,8835,7524],{"emptyLinePlaceholder":7523},[117,8837,8839],{"class":7502,"line":8838},32,[117,8840,8841],{"class":7623},"        # Parameter updates\n",[117,8843,8845,8848,8850,8852,8854,8856,8858,8860,8862,8864,8866,8868,8871,8873,8875,8877,8879,8881],{"class":7502,"line":8844},33,[117,8846,8847],{"class":7510},"        w ",[117,8849,202],{"class":7538},[117,8851,8461],{"class":7510},[117,8853,7871],{"class":7538},[117,8855,7874],{"class":7510},[117,8857,7799],{"class":7538},[117,8859,7721],{"class":7538},[117,8861,7881],{"class":7510},[117,8863,7884],{"class":7538},[117,8865,1924],{"class":7534},[117,8867,233],{"class":7538},[117,8869,8870],{"class":7534},"vw_hat",[117,8872,238],{"class":7538},[117,8874,7896],{"class":7538},[117,8876,7580],{"class":7510},[117,8878,238],{"class":7538},[117,8880,7733],{"class":7538},[117,8882,8883],{"class":7510}," mw_hat\n",[117,8885,8887,8890,8892,8894,8896,8898,8900,8902,8904,8906,8908,8910,8913,8915,8917,8919,8921,8923],{"class":7502,"line":8886},34,[117,8888,8889],{"class":7510},"        b ",[117,8891,202],{"class":7538},[117,8893,8362],{"class":7510},[117,8895,7871],{"class":7538},[117,8897,7874],{"class":7510},[117,8899,7799],{"class":7538},[117,8901,7721],{"class":7538},[117,8903,7881],{"class":7510},[117,8905,7884],{"class":7538},[117,8907,1924],{"class":7534},[117,8909,233],{"class":7538},[117,8911,8912],{"class":7534},"vb_hat",[117,8914,238],{"class":7538},[117,8916,7896],{"class":7538},[117,8918,7580],{"class":7510},[117,8920,238],{"class":7538},[117,8922,7733],{"class":7538},[117,8924,8925],{"class":7510}," mb_hat\n",[117,8927,8929],{"class":7502,"line":8928},35,[117,8930,7524],{"emptyLinePlaceholder":7523},[117,8932,8934,8936,8938,8941,8944,8947,8950],{"class":7502,"line":8933},36,[117,8935,7916],{"class":7506},[117,8937,7651],{"class":7510},[117,8939,8940],{"class":7538},"%",[117,8942,8943],{"class":7557}," 50",[117,8945,8946],{"class":7538}," ==",[117,8948,8949],{"class":7557}," 0",[117,8951,7934],{"class":7538},[117,8953,8955,8958,8960,8962,8964,8967,8969,8972,8974,8976],{"class":7502,"line":8954},37,[117,8956,8957],{"class":7510},"            loss ",[117,8959,202],{"class":7538},[117,8961,8519],{"class":7510},[117,8963,7884],{"class":7538},[117,8965,8966],{"class":7534},"mean",[117,8968,233],{"class":7538},[117,8970,8971],{"class":7534},"error ",[117,8973,7775],{"class":7538},[117,8975,7778],{"class":7557},[117,8977,7963],{"class":7538},[117,8979,8981,8983,8985,8987,8990,8992,8994,8997,8999,9002,9004,9007,9010,9012,9015,9017,9019,9022,9024,9027,9029,9031,9033,9035,9037],{"class":7502,"line":8980},38,[117,8982,7940],{"class":7534},[117,8984,233],{"class":7538},[117,8986,7945],{"class":7530},[117,8988,8989],{"class":7948},"\"Epoch ",[117,8991,7952],{"class":7557},[117,8993,753],{"class":7534},[117,8995,8996],{"class":7530},":4d",[117,8998,7957],{"class":7557},[117,9000,9001],{"class":7948},": loss=",[117,9003,7952],{"class":7557},[117,9005,9006],{"class":7534},"loss",[117,9008,9009],{"class":7530},":.6f",[117,9011,7957],{"class":7557},[117,9013,9014],{"class":7948},"  w=",[117,9016,7952],{"class":7557},[117,9018,198],{"class":7534},[117,9020,9021],{"class":7530},":.4f",[117,9023,7957],{"class":7557},[117,9025,9026],{"class":7948},"  b=",[117,9028,7952],{"class":7557},[117,9030,8137],{"class":7534},[117,9032,9021],{"class":7530},[117,9034,7957],{"class":7557},[117,9036,7960],{"class":7948},[117,9038,7963],{"class":7538},[117,9040,9042],{"class":7502,"line":9041},39,[117,9043,7524],{"emptyLinePlaceholder":7523},[117,9045,9047,9049,9052,9054],{"class":7502,"line":9046},40,[117,9048,7980],{"class":7506},[117,9050,9051],{"class":7510}," w",[117,9053,2381],{"class":7538},[117,9055,8471],{"class":7510},[117,9057,9059],{"class":7502,"line":9058},41,[117,9060,7524],{"emptyLinePlaceholder":7523},[117,9062,9064],{"class":7502,"line":9063},42,[117,9065,9066],{"class":7623},"# ទំនាក់ទំនងពិត: y = 2x + 1\n",[117,9068,9070,9073,9075,9077,9079,9082,9085,9087,9089,9092,9094,9097,9099,9102,9104,9107],{"class":7502,"line":9069},43,[117,9071,9072],{"class":7510},"X ",[117,9074,202],{"class":7538},[117,9076,8519],{"class":7510},[117,9078,7884],{"class":7538},[117,9080,9081],{"class":7534},"array",[117,9083,9084],{"class":7538},"([",[117,9086,4936],{"class":7557},[117,9088,2381],{"class":7538},[117,9090,9091],{"class":7557}," 2.0",[117,9093,2381],{"class":7538},[117,9095,9096],{"class":7557}," 3.0",[117,9098,2381],{"class":7538},[117,9100,9101],{"class":7557}," 4.0",[117,9103,2381],{"class":7538},[117,9105,9106],{"class":7557}," 5.0",[117,9108,9109],{"class":7538},"])\n",[117,9111,9113,9116,9118,9120,9122,9124,9126,9129,9131,9133,9135,9138,9140,9143,9145,9148],{"class":7502,"line":9112},44,[117,9114,9115],{"class":7510},"y ",[117,9117,202],{"class":7538},[117,9119,8519],{"class":7510},[117,9121,7884],{"class":7538},[117,9123,9081],{"class":7534},[117,9125,9084],{"class":7538},[117,9127,9128],{"class":7557},"3.0",[117,9130,2381],{"class":7538},[117,9132,9106],{"class":7557},[117,9134,2381],{"class":7538},[117,9136,9137],{"class":7557}," 7.0",[117,9139,2381],{"class":7538},[117,9141,9142],{"class":7557}," 9.0",[117,9144,2381],{"class":7538},[117,9146,9147],{"class":7557}," 11.0",[117,9149,9109],{"class":7538},[117,9151,9153],{"class":7502,"line":9152},45,[117,9154,7524],{"emptyLinePlaceholder":7523},[117,9156,9158,9160,9162,9164,9166,9168,9170,9172,9174,9176],{"class":7502,"line":9157},46,[117,9159,198],{"class":7510},[117,9161,2381],{"class":7538},[117,9163,8362],{"class":7510},[117,9165,202],{"class":7538},[117,9167,8278],{"class":7534},[117,9169,233],{"class":7538},[117,9171,8283],{"class":7534},[117,9173,2381],{"class":7538},[117,9175,8288],{"class":7534},[117,9177,7963],{"class":7538},[117,9179,9181,9183,9185,9187,9189,9192,9195,9197,9199,9201,9203,9206,9208,9210,9212,9214,9216],{"class":7502,"line":9180},47,[117,9182,8041],{"class":7534},[117,9184,233],{"class":7538},[117,9186,7945],{"class":7530},[117,9188,7960],{"class":7948},[117,9190,9191],{"class":7510},"\\n",[117,9193,9194],{"class":7948},"Fitted: ŷ = ",[117,9196,7952],{"class":7557},[117,9198,198],{"class":7534},[117,9200,9021],{"class":7530},[117,9202,7957],{"class":7557},[117,9204,9205],{"class":7948},"·x + ",[117,9207,7952],{"class":7557},[117,9209,8137],{"class":7534},[117,9211,9021],{"class":7530},[117,9213,7957],{"class":7557},[117,9215,7960],{"class":7948},[117,9217,7963],{"class":7538},[34,9219,9220],{},[59,9221,8067],{},[7491,9223,9225],{"className":8070,"code":9224,"language":8072,"meta":7496,"style":7496},"Epoch   50: loss=0.000042  w=1.9953  b=1.0044\nEpoch  100: loss=0.000000  w=2.0000  b=1.0000\nEpoch  150: loss=0.000000  w=2.0000  b=1.0000\nEpoch  200: loss=0.000000  w=2.0000  b=1.0000\n\nFitted: ŷ = 2.0000·x + 1.0000\n",[7498,9226,9227,9251,9273,9292,9311,9315],{"__ignoreMap":7496},[117,9228,9229,9232,9235,9238,9241,9243,9246,9248],{"class":7502,"line":7503},[117,9230,9231],{"class":8079},"Epoch",[117,9233,9234],{"class":7948},"   50:",[117,9236,9237],{"class":7948}," loss=",[117,9239,9240],{"class":7557},"0.000042",[117,9242,9014],{"class":7948},[117,9244,9245],{"class":7557},"1.9953",[117,9247,9026],{"class":7948},[117,9249,9250],{"class":7557},"1.0044\n",[117,9252,9253,9255,9258,9260,9263,9265,9268,9270],{"class":7502,"line":7520},[117,9254,9231],{"class":8079},[117,9256,9257],{"class":7948},"  100:",[117,9259,9237],{"class":7948},[117,9261,9262],{"class":7557},"0.000000",[117,9264,9014],{"class":7948},[117,9266,9267],{"class":7557},"2.0000",[117,9269,9026],{"class":7948},[117,9271,9272],{"class":7557},"1.0000\n",[117,9274,9275,9277,9280,9282,9284,9286,9288,9290],{"class":7502,"line":7527},[117,9276,9231],{"class":8079},[117,9278,9279],{"class":7948},"  150:",[117,9281,9237],{"class":7948},[117,9283,9262],{"class":7557},[117,9285,9014],{"class":7948},[117,9287,9267],{"class":7557},[117,9289,9026],{"class":7948},[117,9291,9272],{"class":7557},[117,9293,9294,9296,9299,9301,9303,9305,9307,9309],{"class":7502,"line":7601},[117,9295,9231],{"class":8079},[117,9297,9298],{"class":7948},"  200:",[117,9300,9237],{"class":7948},[117,9302,9262],{"class":7557},[117,9304,9014],{"class":7948},[117,9306,9267],{"class":7557},[117,9308,9026],{"class":7948},[117,9310,9272],{"class":7557},[117,9312,9313],{"class":7502,"line":7612},[117,9314,7524],{"emptyLinePlaceholder":7523},[117,9316,9317,9320,9323,9325,9328,9330],{"class":7502,"line":7627},[117,9318,9319],{"class":8079},"Fitted:",[117,9321,9322],{"class":7948}," ŷ",[117,9324,8094],{"class":7948},[117,9326,9327],{"class":7948}," 2.0000·x",[117,9329,7896],{"class":7948},[117,9331,9332],{"class":7557}," 1.0000\n",[34,9334,9335,9336,9422],{},"Adam ស្ដារ ",[117,9337,9339,9365],{"className":9338},[120],[117,9340,9342],{"className":9341},[124],[126,9343,9344],{"xmlns":128},[130,9345,9346,9362],{},[133,9347,9348,9350,9352,9354,9356,9358,9360],{},[136,9349,198],{},[200,9351,202],{},[964,9353,1451],{},[200,9355,2381],{"separator":148},[136,9357,8137],{},[200,9359,202],{},[964,9361,966],{},[140,9363,9364],{"encoding":142},"w=2, b=1",[117,9366,9368,9386,9413],{"className":9367,"ariaHidden":148},[147],[117,9369,9371,9374,9377,9380,9383],{"className":9370},[152],[117,9372],{"className":9373,"style":157},[156],[117,9375,198],{"className":9376,"style":304},[161,162],[117,9378],{"className":9379,"style":323},[322],[117,9381,202],{"className":9382},[327],[117,9384],{"className":9385,"style":323},[322],[117,9387,9389,9392,9395,9398,9401,9404,9407,9410],{"className":9388},[152],[117,9390],{"className":9391,"style":1075},[156],[117,9393,1451],{"className":9394},[161],[117,9396,2381],{"className":9397},[2693],[117,9399],{"className":9400,"style":2701},[322],[117,9402,8137],{"className":9403},[161,162],[117,9405],{"className":9406,"style":323},[322],[117,9408,202],{"className":9409},[327],[117,9411],{"className":9412,"style":323},[322],[117,9414,9416,9419],{"className":9415},[152],[117,9417],{"className":9418,"style":2127},[156],[117,9420,966],{"className":9421},[161]," បានច្បាស់លាស់ និងលឿន — ជាពិសេសបើប្រៀបនឹង Gradient Descent ធម្មតា ដែលត្រូវបន្ដ Tune Learning Rate ដោយប្រុងប្រយ័ត្ន។",[91,9424],{},[94,9426,9428],{"id":9427},"ពេលណាគួរប្រើ-adam","ពេលណាគួរប្រើ Adam?",[34,9430,9431],{},"Adam គឺជាជម្រើសដ៏សុវត្ថិភាពបំផុតសម្រាប់កិច្ចការ Deep Learning ស្ទើរតែទាំងអស់៖",[551,9433,9434,9440,9446,9452],{},[554,9435,9436,9439],{},[59,9437,9438],{},"Neural networks",": Training MLPs, CNNs, Transformers, RNNs",[554,9441,9442,9445],{},[59,9443,9444],{},"ទិន្នន័យដែលមានការរំខាន (Noisy gradients)",": ល្អសម្រាប់ Mini-batch training ដែលប្រើ Batch size តូចៗ។",[554,9447,9448,9451],{},[59,9449,9450],{},"ទិន្នន័យរំដោចខ្ចាត (Sparse features)",": ល្អសម្រាប់ NLP ដែលពាក្យខ្លះបង្ហាញកម្រ (ជម្រាលធំ ប៉ុន្តែមិនសូវញឹកញាប់)។",[554,9453,9454,9457],{},[59,9455,9456],{},"អ្នកទើបចាប់ផ្តើម",": នៅពេលអ្នកមិនចង់ចំណាយពេលច្រើនក្នុងការ Tune Learning Rate។",[12,9459,24,9465,24,9471],{"className":9460},[9461,9462,665,9463,667,668,9464],"bg-blue-50","dark:bg-blue-900\u002F20","border-blue-400","my-6",[34,9466,9470],{"className":9467},[74,9468,9469],"text-blue-800","dark:text-blue-200","ចំណាំមួយ",[34,9472,9476,9477,9482,9483,9486],{"className":9473},[9474,9475],"text-blue-700","dark:text-blue-300","Wilson et al. ",[44,9478,9481],{"href":9479,"className":9480},"#ref-5",[52,53,74],"[5]"," បង្ហាញថា Adaptive optimizer ដូចជា Adam អាចនឹង Generalize បានន ចុះបន្តិចបើប្រៀបនឹង SGD + Momentum ដែល Tune ល្អ សម្រាប់ Image Classification។ ក្នុងករណីនោះ ",[59,9484,9485],{},"SGD + Momentum ជាមួយ Learning Rate Scheduling"," អាចប្រសើរជាង Adam។ ប៉ុន្តែសម្រាប់កិច្ចការភាគច្រើន ភាពរឹងមាំ (Robustness) របស់ Adam នៅតែឈ្នះ។",[91,9488],{},[94,9490,9491],{"id":9491},"សេចក្តីសង្ខេប",[702,9493,9494,9504],{},[705,9495,9496],{},[708,9497,9498,9501],{},[711,9499,9500],{},"គំនិត",[711,9502,9503],{},"ចំណុចសំខាន់",[721,9505,9506,9542,9619,9696,9859],{},[708,9507,9508,9511],{},[726,9509,9510],{},"ចំណុចខ្សោយ Fixed LR",[726,9512,9513,9541],{},[117,9514,9516,9529],{"className":9515},[120],[117,9517,9519],{"className":9518},[124],[126,9520,9521],{"xmlns":128},[130,9522,9523,9527],{},[133,9524,9525],{},[136,9526,138],{},[140,9528,143],{"encoding":142},[117,9530,9532],{"className":9531,"ariaHidden":148},[147],[117,9533,9535,9538],{"className":9534},[152],[117,9536],{"className":9537,"style":157},[156],[117,9539,138],{"className":9540,"style":163},[161,162]," តែមួយសម្រាប់ Parameters ទាំងអស់ — លម្អិតពេក",[708,9543,9544,9616],{},[726,9545,9546,9547,238],{},"Momentum (",[117,9548,9550,9567],{"className":9549},[120],[117,9551,9553],{"className":9552},[124],[126,9554,9555],{"xmlns":128},[130,9556,9557,9565],{},[133,9558,9559],{},[183,9560,9561,9563],{},[136,9562,750],{},[136,9564,753],{},[140,9566,756],{"encoding":142},[117,9568,9570],{"className":9569,"ariaHidden":148},[147],[117,9571,9573,9576],{"className":9572},[152],[117,9574],{"className":9575,"style":766},[156],[117,9577,9579,9582],{"className":9578},[161],[117,9580,750],{"className":9581},[161,162],[117,9583,9585],{"className":9584},[262],[117,9586,9588,9608],{"className":9587},[266,267],[117,9589,9591,9605],{"className":9590},[271],[117,9592,9594],{"className":9593,"style":785},[275],[117,9595,9596,9599],{"style":788},[117,9597],{"className":9598,"style":284},[283],[117,9600,9602],{"className":9601},[288,289,290,291],[117,9603,753],{"className":9604},[161,162,291],[117,9606,309],{"className":9607},[308],[117,9609,9611],{"className":9610},[271],[117,9612,9614],{"className":9613,"style":316},[275],[117,9615],{},[726,9617,9618],{},"ធ្វើឱ្យទិសជម្រាលរលូន និងស្ថិតស្ថេរតាមពេល",[708,9620,9621,9693],{},[726,9622,9623,9624,238],{},"Adaptive scale (",[117,9625,9627,9644],{"className":9626},[120],[117,9628,9630],{"className":9629},[124],[126,9631,9632],{"xmlns":128},[130,9633,9634,9642],{},[133,9635,9636],{},[183,9637,9638,9640],{},[136,9639,838],{},[136,9641,753],{},[140,9643,843],{"encoding":142},[117,9645,9647],{"className":9646,"ariaHidden":148},[147],[117,9648,9650,9653],{"className":9649},[152],[117,9651],{"className":9652,"style":766},[156],[117,9654,9656,9659],{"className":9655},[161],[117,9657,838],{"className":9658,"style":859},[161,162],[117,9660,9662],{"className":9661},[262],[117,9663,9665,9685],{"className":9664},[266,267],[117,9666,9668,9682],{"className":9667},[271],[117,9669,9671],{"className":9670,"style":785},[275],[117,9672,9673,9676],{"style":874},[117,9674],{"className":9675,"style":284},[283],[117,9677,9679],{"className":9678},[288,289,290,291],[117,9680,753],{"className":9681},[161,162,291],[117,9683,309],{"className":9684},[308],[117,9686,9688],{"className":9687},[271],[117,9689,9691],{"className":9690,"style":316},[275],[117,9692],{},[726,9694,9695],{},"Scale ជំហានតាមប្រវត្តិទំហំជម្រាល",[708,9697,9698,9701],{},[726,9699,9700],{},"Bias correction",[726,9702,9703,9704],{},"កែ Cold-start bias នៅពេល ",[117,9705,9707,9737],{"className":9706},[120],[117,9708,9710],{"className":9709},[124],[126,9711,9712],{"xmlns":128},[130,9713,9714,9734],{},[133,9715,9716,9722,9724,9730,9732],{},[183,9717,9718,9720],{},[136,9719,750],{},[964,9721,2055],{},[200,9723,202],{},[183,9725,9726,9728],{},[136,9727,838],{},[964,9729,2055],{},[200,9731,202],{},[964,9733,2055],{},[140,9735,9736],{"encoding":142},"m_0 = v_0 = 0",[117,9738,9740,9795,9850],{"className":9739,"ariaHidden":148},[147],[117,9741,9743,9746,9786,9789,9792],{"className":9742},[152],[117,9744],{"className":9745,"style":766},[156],[117,9747,9749,9752],{"className":9748},[161],[117,9750,750],{"className":9751},[161,162],[117,9753,9755],{"className":9754},[262],[117,9756,9758,9778],{"className":9757},[266,267],[117,9759,9761,9775],{"className":9760},[271],[117,9762,9764],{"className":9763,"style":1095},[275],[117,9765,9766,9769],{"style":788},[117,9767],{"className":9768,"style":284},[283],[117,9770,9772],{"className":9771},[288,289,290,291],[117,9773,2055],{"className":9774},[161,291],[117,9776,309],{"className":9777},[308],[117,9779,9781],{"className":9780},[271],[117,9782,9784],{"className":9783,"style":316},[275],[117,9785],{},[117,9787],{"className":9788,"style":323},[322],[117,9790,202],{"className":9791},[327],[117,9793],{"className":9794,"style":323},[322],[117,9796,9798,9801,9841,9844,9847],{"className":9797},[152],[117,9799],{"className":9800,"style":766},[156],[117,9802,9804,9807],{"className":9803},[161],[117,9805,838],{"className":9806,"style":859},[161,162],[117,9808,9810],{"className":9809},[262],[117,9811,9813,9833],{"className":9812},[266,267],[117,9814,9816,9830],{"className":9815},[271],[117,9817,9819],{"className":9818,"style":1095},[275],[117,9820,9821,9824],{"style":874},[117,9822],{"className":9823,"style":284},[283],[117,9825,9827],{"className":9826},[288,289,290,291],[117,9828,2055],{"className":9829},[161,291],[117,9831,309],{"className":9832},[308],[117,9834,9836],{"className":9835},[271],[117,9837,9839],{"className":9838,"style":316},[275],[117,9840],{},[117,9842],{"className":9843,"style":323},[322],[117,9845,202],{"className":9846},[327],[117,9848],{"className":9849,"style":323},[322],[117,9851,9853,9856],{"className":9852},[152],[117,9854],{"className":9855,"style":2127},[156],[117,9857,2055],{"className":9858},[161],[708,9860,9861,9864],{},[726,9862,9863],{},"Adam update",[726,9865,9866],{},[117,9867,9869,9922],{"className":9868},[120],[117,9870,9872],{"className":9871},[124],[126,9873,9874],{"xmlns":128},[130,9875,9876,9919],{},[133,9877,9878,9880,9883,9885,9887,9909],{},[136,9879,187],{},[200,9881,9882],{},"←",[136,9884,187],{},[200,9886,220],{},[2357,9888,9889,9891],{},[136,9890,138],{},[133,9892,9893,9905,9907],{},[1900,9894,9895],{},[183,9896,9897,9903],{},[2345,9898,9899,9901],{"accent":148},[136,9900,838],{},[200,9902,2351],{},[136,9904,753],{},[200,9906,984],{},[136,9908,3021],{},[183,9910,9911,9917],{},[2345,9912,9913,9915],{"accent":148},[136,9914,750],{},[200,9916,2351],{},[136,9918,753],{},[140,9920,9921],{"encoding":142},"\\theta \\leftarrow \\theta - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\hat{m}_t",[117,9923,9925,9943,9962],{"className":9924,"ariaHidden":148},[147],[117,9926,9928,9931,9934,9937,9940],{"className":9927},[152],[117,9929],{"className":9930,"style":2447},[156],[117,9932,187],{"className":9933,"style":258},[161,162],[117,9935],{"className":9936,"style":323},[322],[117,9938,9882],{"className":9939},[327],[117,9941],{"className":9942,"style":323},[322],[117,9944,9946,9950,9953,9956,9959],{"className":9945},[152],[117,9947],{"className":9948,"style":9949},[156],"height:0.7778em;vertical-align:-0.0833em;",[117,9951,187],{"className":9952,"style":258},[161,162],[117,9954],{"className":9955,"style":391},[322],[117,9957,220],{"className":9958},[395],[117,9960],{"className":9961,"style":391},[322],[117,9963,9965,9969,10166],{"className":9964},[152],[117,9966],{"className":9967,"style":9968},[156],"height:1.2334em;vertical-align:-0.538em;",[117,9970,9972,9975,10163],{"className":9971},[161],[117,9973],{"className":9974},[419,2525],[117,9976,9978],{"className":9977},[2357],[117,9979,9981,10154],{"className":9980},[266,267],[117,9982,9984,10151],{"className":9983},[271],[117,9985,9988,10128,10136],{"className":9986,"style":9987},[275],"height:0.6954em;",[117,9989,9991,9994],{"style":9990},"top:-2.5836em;",[117,9992],{"className":9993,"style":1943},[283],[117,9995,9997],{"className":9996},[288,289,290,291],[117,9998,10000,10122,10125],{"className":9999},[161,291],[117,10001,10003],{"className":10002},[161,1924,291],[117,10004,10006,10113],{"className":10005},[266,267],[117,10007,10009,10110],{"className":10008},[271],[117,10010,10013,10097],{"className":10011,"style":10012},[275],"height:0.8663em;",[117,10014,10016,10019],{"className":10015,"style":1939},[1938],[117,10017],{"className":10018,"style":1943},[283],[117,10020,10022],{"className":10021,"style":1947},[161,291],[117,10023,10025,10057],{"className":10024},[161,291],[117,10026,10028],{"className":10027},[161,2437,291],[117,10029,10031],{"className":10030},[266],[117,10032,10034],{"className":10033},[271],[117,10035,10037,10046],{"className":10036,"style":2447},[275],[117,10038,10040,10043],{"style":10039},"top:-2.7em;",[117,10041],{"className":10042,"style":284},[283],[117,10044,838],{"className":10045,"style":859},[161,162,291],[117,10047,10048,10051],{"style":10039},[117,10049],{"className":10050,"style":284},[283],[117,10052,10054],{"className":10053,"style":2733},[2464],[117,10055,2351],{"className":10056},[161,291],[117,10058,10060],{"className":10059},[262],[117,10061,10063,10088],{"className":10062},[266,267],[117,10064,10066,10085],{"className":10065},[271],[117,10067,10070],{"className":10068,"style":10069},[275],"height:0.2963em;",[117,10071,10073,10077],{"style":10072},"top:-2.357em;margin-left:-0.0359em;margin-right:0.0714em;",[117,10074],{"className":10075,"style":10076},[283],"height:2.5em;",[117,10078,10082],{"className":10079},[288,10080,10081,291],"reset-size3","size1",[117,10083,753],{"className":10084},[161,162,291],[117,10086,309],{"className":10087},[308],[117,10089,10091],{"className":10090},[271],[117,10092,10095],{"className":10093,"style":10094},[275],"height:0.143em;",[117,10096],{},[117,10098,10100,10103],{"style":10099},"top:-2.8263em;",[117,10101],{"className":10102,"style":1943},[283],[117,10104,10106],{"className":10105,"style":1998},[1997,291],[2000,10107,10108],{"xmlns":2002,"width":2003,"height":2004,"viewBox":2005,"preserveAspectRatio":2006},[2008,10109],{"d":2010},[117,10111,309],{"className":10112},[308],[117,10114,10116],{"className":10115},[271],[117,10117,10120],{"className":10118,"style":10119},[275],"height:0.1737em;",[117,10121],{},[117,10123,984],{"className":10124},[395,291],[117,10126,3021],{"className":10127},[161,162,291],[117,10129,10130,10133],{"style":2617},[117,10131],{"className":10132,"style":1943},[283],[117,10134],{"className":10135,"style":2625},[2624],[117,10137,10139,10142],{"style":10138},"top:-3.394em;",[117,10140],{"className":10141,"style":1943},[283],[117,10143,10145],{"className":10144},[288,289,290,291],[117,10146,10148],{"className":10147},[161,291],[117,10149,138],{"className":10150,"style":163},[161,162,291],[117,10152,309],{"className":10153},[308],[117,10155,10157],{"className":10156},[271],[117,10158,10161],{"className":10159,"style":10160},[275],"height:0.538em;",[117,10162],{},[117,10164],{"className":10165},[426,2525],[117,10167,10169,10200],{"className":10168},[161],[117,10170,10172],{"className":10171},[161,2437],[117,10173,10175],{"className":10174},[266],[117,10176,10178],{"className":10177},[271],[117,10179,10181,10189],{"className":10180,"style":2447},[275],[117,10182,10183,10186],{"style":1939},[117,10184],{"className":10185,"style":1943},[283],[117,10187,750],{"className":10188},[161,162],[117,10190,10191,10194],{"style":1939},[117,10192],{"className":10193,"style":1943},[283],[117,10195,10197],{"className":10196,"style":2465},[2464],[117,10198,2351],{"className":10199},[161],[117,10201,10203],{"className":10202},[262],[117,10204,10206,10226],{"className":10205},[266,267],[117,10207,10209,10223],{"className":10208},[271],[117,10210,10212],{"className":10211,"style":785},[275],[117,10213,10214,10217],{"style":788},[117,10215],{"className":10216,"style":284},[283],[117,10218,10220],{"className":10219},[288,289,290,291],[117,10221,753],{"className":10222},[161,162,291],[117,10224,309],{"className":10225},[308],[117,10227,10229],{"className":10228},[271],[117,10230,10232],{"className":10231,"style":316},[275],[117,10233],{},[34,10235,10236,10237,10265,10266,10269,10270,10298,10299,10301],{},"Adam មិនមែនមកលុបបំបាត់ Learning Rate (",[117,10238,10240,10253],{"className":10239},[120],[117,10241,10243],{"className":10242},[124],[126,10244,10245],{"xmlns":128},[130,10246,10247,10251],{},[133,10248,10249],{},[136,10250,138],{},[140,10252,143],{"encoding":142},[117,10254,10256],{"className":10255,"ariaHidden":148},[147],[117,10257,10259,10262],{"className":10258},[152],[117,10260],{"className":10261,"style":157},[156],[117,10263,138],{"className":10264,"style":163},[161,162],") នោះទេ — វានៅតែសំខាន់។ ប៉ុន្តែ Adam ធ្វើឱ្យការហ្វឹកហាត់ម៉ូឌែល ",[59,10267,10268],{},"មិនសូវរងឥទ្ធិពលខ្លាំង"," ពីការកំណត់លេខ ",[117,10271,10273,10286],{"className":10272},[120],[117,10274,10276],{"className":10275},[124],[126,10277,10278],{"xmlns":128},[130,10279,10280,10284],{},[133,10281,10282],{},[136,10283,138],{},[140,10285,143],{"encoding":142},[117,10287,10289],{"className":10288,"ariaHidden":148},[147],[117,10290,10292,10295],{"className":10291},[152],[117,10293],{"className":10294,"style":157},[156],[117,10296,138],{"className":10297,"style":163},[161,162]," ខុស។ នេះជាមូលហេតុដែលតម្លៃ Default ",[7498,10300,3640],{}," របស់វា ដំណើរការបានយ៉ាងល្អលើម៉ូឌែលរាប់ពាន់ខុសៗគ្នា។",[34,10303,10304],{},"បើ Gradient Descent គឺជាការដើរភ្នំដោយបោះជំហានស្មើៗគ្នា Adam គឺជាការជួលអ្នកនាំផ្លូវដែលមាន GPS ជាប់ខ្លួន ដែលចេះកែសម្រួលល្បឿនតាមស្ថានភាពផ្លូវ និងធានាថាអ្នកនឹងមិនដើរវង្វេង ឬចំណាយពេលឥតប្រយោជន៍លើផ្លូវដែលធ្លាប់ដើររួចនោះទេ។",[91,10306],{},[94,10308,10309],{"id":10309},"ឯកសារយោង",[10311,10312,24,10318,24,10331,24,10344,24,10357,24,10361],"ol",{"className":10313},[10314,10315,10316,10317,38,39],"list-decimal","list-outside","pl-6","space-y-3",[554,10319,10321,10322,10325,10326],{"id":10320},"ref-1","D. P. Kingma and J. Ba, \"Adam: A method for stochastic optimization,\" in ",[78,10323,10324],{},"Proc. 3rd Int. Conf. Learn. Representations (ICLR)",", San Diego, CA, USA, May 2015. [Online]. Available: ",[44,10327,10328],{"href":10328,"target":47,"rel":10329,"className":10330},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1412.6980",[49,50],[52,53],[554,10332,10334,10335,10338,10339],{"id":10333},"ref-2","Y. LeCun, L. Bottou, G. B. Orr, and K.-R. Müller, \"Efficient backprop,\" in ",[78,10336,10337],{},"Neural Networks: Tricks of the Trade",", G. B. Orr and K.-R. Müller, Eds. Berlin, Germany: Springer, 1998, pp. 9–50. [Online]. Available: ",[44,10340,10341],{"href":10341,"target":47,"rel":10342,"className":10343},"https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-642-35289-8_5",[49,50],[52,53],[554,10345,10347,10348,10351,10352],{"id":10346},"ref-3","I. Sutskever, J. Martens, G. Dahl, and G. Hinton, \"On the importance of initialization and momentum in deep learning,\" in ",[78,10349,10350],{},"Proc. 30th Int. Conf. Mach. Learn. (ICML)",", Atlanta, GA, USA, Jun. 2013, pp. 1139–1147. [Online]. Available: ",[44,10353,10354],{"href":10354,"target":47,"rel":10355,"className":10356},"https:\u002F\u002Fproceedings.mlr.press\u002Fv28\u002Fsutskever13.html",[49,50],[52,53],[554,10358,10360],{"id":10359},"ref-4","T. Tieleman and G. Hinton, \"Lecture 6.5 — RMSProp: Divide the gradient by a running average of its recent magnitude,\" COURSERA: Neural Networks for Machine Learning, Tech. Rep., 2012.",[554,10362,10364,10365,10368,10369],{"id":10363},"ref-5","A. C. Wilson, R. Roelofs, M. Stern, N. Srebro, and B. Recht, \"The marginal value of momentum for small learning rate SGD,\" in ",[78,10366,10367],{},"Proc. 31st Conf. Neural Inf. Process. Syst. (NeurIPS)",", Long Beach, CA, USA, Dec. 2017. [Online]. Available: ",[44,10370,10371],{"href":10371,"target":47,"rel":10372,"className":10373},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1705.08292",[49,50],[52,53],[10375,10376,10377],"style",{},"html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}",{"title":7496,"searchDepth":7520,"depth":7520,"links":10379},[10380,10384,10385,10391,10392,10393,10397,10398,10399],{"id":96,"depth":7520,"text":97,"children":10381},[10382,10383],{"id":101,"depth":7527,"text":102},{"id":465,"depth":7527,"text":466},{"id":685,"depth":7520,"text":686},{"id":917,"depth":7520,"text":918,"children":10386},[10387,10388,10389,10390],{"id":921,"depth":7527,"text":922},{"id":1410,"depth":7527,"text":1411},{"id":2030,"depth":7527,"text":2031},{"id":2947,"depth":7527,"text":2948},{"id":3913,"depth":7520,"text":3914},{"id":7244,"depth":7520,"text":7245},{"id":7481,"depth":7520,"text":7482,"children":10394},[10395,10396],{"id":7485,"depth":7527,"text":7486},{"id":8100,"depth":7527,"text":8101},{"id":9427,"depth":7520,"text":9428},{"id":9491,"depth":7520,"text":9491},{"id":10309,"depth":7520,"text":10309},"2026-04-04","ពីការកំណត់ល្បឿនថេរ ទៅកាន់ការផ្លាស់ប្តូរតាមស្ថានភាព — ស្វែងយល់ពីបច្ចេកទេសនៅពីក្រោយ AI សម័យថ្មី","md",{},null,"\u002Fkm\u002Frooms\u002Fadam-optimizer",{"title":5,"description":10401},"km\u002Frooms\u002Fadam-optimizer","N\u002FA","KIBmskn_7aCLCVC4-h0mi99OHYjrMSo8SSzsUHuGeME",[10411,10438],{"title":10412,"path":10413,"stem":10414,"children":10415,"page":10437},"En","\u002Fen","en",[10416,10420],{"title":10417,"path":10418,"stem":10419},"AI & ML Engineering Bootcamp — Batch 3","\u002Fen\u002Fbootcamp","en\u002Fbootcamp",{"title":10421,"path":10422,"stem":10423,"children":10424,"page":10437},"Rooms","\u002Fen\u002Frooms","en\u002Frooms",[10425,10429,10433],{"title":10426,"path":10427,"stem":10428},"Adam Optimizer","\u002Fen\u002Frooms\u002Fadam-optimizer","en\u002Frooms\u002Fadam-optimizer",{"title":10430,"path":10431,"stem":10432},"Derivatives — The Language of Change","\u002Fen\u002Frooms\u002Fderivatives","en\u002Frooms\u002Fderivatives",{"title":10434,"path":10435,"stem":10436},"Gradient Descent","\u002Fen\u002Frooms\u002Fgradient-descent","en\u002Frooms\u002Fgradient-descent",false,{"title":10439,"path":10440,"stem":10441,"children":10442,"page":10437},"Km","\u002Fkm","km",[10443,10447],{"title":10444,"path":10445,"stem":10446},"វគ្គបណ្តុះបណ្តាល AI & ML Engineering — ជំនាន់ទី 3","\u002Fkm\u002Fbootcamp","km\u002Fbootcamp",{"title":10421,"path":10448,"stem":10449,"children":10450,"page":10437},"\u002Fkm\u002Frooms","km\u002Frooms",[10451,10452,10456],{"title":5,"path":10405,"stem":10407},{"title":10453,"path":10454,"stem":10455},"ដេរីវេ — ភាសានៃការផ្លាស់ប្ដូរ","\u002Fkm\u002Frooms\u002Fderivatives","km\u002Frooms\u002Fderivatives",{"title":10457,"path":10458,"stem":10459},"ស្វែងយល់ពី Gradient Descent Algorithm","\u002Fkm\u002Frooms\u002Fgradient-descent","km\u002Frooms\u002Fgradient-descent",[10461,10464,10469,10474,10478,10482,10487,10492,10497,10502,10507,10512,10517,10522,10525,10529,10534,10538,10543,10548,10553,10558,10562,10567,10572,10577,10582,10587,10592,10596,10601,10606,10611,10616,10621,10624,10628,10633,10638,10643,10648,10653,10658,10662,10667,10672,10677,10682,10687,10692,10697,10702,10706,10711,10716,10721,10726,10731,10736,10740,10745,10748,10753,10757,10762,10767,10772,10777,10782,10787,10791,10796,10800,10804,10809,10814,10819,10824,10829,10834,10839,10843,10848,10853,10858,10863,10868,10871,10876,10881,10885,10889,10894,10899,10904,10909,10914,10919,10924,10929,10932,10935,10939,10942,10946,10950,10954,10958,10961,10965,10969,10973,10977,10981,10985,10988,10992,10996,11000,11004,11007,11010,11014,11019,11024,11029,11034,11039,11044,11048,11053,11058,11063,11068,11073,11078,11083,11088,11092,11097,11102,11107,11112,11117,11122,11126,11131,11134,11139,11143,11148,11153,11158,11163,11168,11173,11177,11182,11186,11190,11194,11198,11202,11207,11212,11217,11222,11227,11232,11237,11242,11247],{"id":10418,"title":10417,"titles":10462,"content":10463,"level":7503},[],"A 22-week hands-on program taking you from mathematical foundations to deploying production ML systems. Build real models, ship real code.",{"id":10465,"title":10466,"titles":10467,"content":10468,"level":7520},"\u002Fen\u002Fbootcamp#welcome-future-ml-engineers","Welcome, Future ML Engineers!",[10417],"Over 22 weeks you will grow from someone who uses ML models into someone who builds, trains, and deploys them in production. The curriculum blends mathematical foundations, classical machine learning, deep learning, Transformers, and MLOps into a single coherent journey.",{"id":10470,"title":10471,"titles":10472,"content":10473,"level":7520},"\u002Fen\u002Fbootcamp#program-at-a-glance","Program at a Glance",[10417],"Duration22 weeks (66 hours of instruction)ScheduleThursday & Friday · 1.5 hours \u002F sessionWeekly Commitment3 hrs in-class + 4–6 hrs self-studyStart DateMarch 26, 2026 (Still Accepting Applications)ApproachMath → Classical ML → Deep Learning → Transformers → MLOps",{"id":10475,"title":10476,"titles":10477,"content":7496,"level":7520},"\u002Fen\u002Fbootcamp#tech-stack","Tech Stack",[10417],{"id":10479,"title":10480,"titles":10481,"content":7496,"level":7520},"\u002Fen\u002Fbootcamp#curriculum-modules","Curriculum Modules",[10417],{"id":10483,"title":10484,"titles":10485,"content":10486,"level":7527},"\u002Fen\u002Fbootcamp#module-1-foundations-4-weeks","Module 1 · Foundations (4 weeks)",[10417,10480],"Mathematical and conceptual building blocks for ML. Build the intuition behind how machines learn before writing a single fit() call. Week 1 — AI\u002FML\u002FDeep Learning landscape · supervised vs unsupervised learning · types of ML problemsWeek 2 — Vectors & matrices · dot products & matrix multiplication · gradients · gradient descent from scratchWeek 3 — Probability distributions · Bayes' theorem · MSE & cross-entropy loss · bias-variance tradeoffWeek 4 — Exploratory data analysis · handling missing data · feature scaling · train your first model 🎯 End-of-Module Project: Implement gradient descent and linear regression from scratch using only NumPy.",{"id":10488,"title":10489,"titles":10490,"content":10491,"level":7527},"\u002Fen\u002Fbootcamp#module-2-classical-machine-learning-5-weeks","Module 2 · Classical Machine Learning (5 weeks)",[10417,10480],"The Scikit-learn ecosystem and tabular data mastery. Build, evaluate, and tune real-world classifiers and regressors. Week 5 — Linear regression (OLS) · logistic regression · sigmoid function · decision boundariesWeek 6 — Decision trees (Gini\u002Fentropy splitting) · random forests · bagging · feature importanceWeek 7 — XGBoost & LightGBM gradient boosting · metrics (precision, recall, F1, AUC-ROC) · confusion matricesWeek 8 — K-fold cross-validation · grid & Bayesian hyperparameter search · feature engineering · preventing data leakageWeek 9 — Kaggle competition workflow · end-to-end sklearn Pipeline · model serialization 🎯 End-of-Module Project: Compete in a Kaggle tabular-data challenge and ship a complete sklearn pipeline.",{"id":10493,"title":10494,"titles":10495,"content":10496,"level":7527},"\u002Fen\u002Fbootcamp#module-3-deep-learning-with-pytorch-4-weeks","Module 3 · Deep Learning with PyTorch (4 weeks)",[10417,10480],"Neural networks from first principles to GPU-accelerated CNNs. Understand every layer, gradient update, and training trick. Week 10 — Perceptrons · multi-layer networks · forward propagation · backpropagation & chain ruleWeek 11 — Activation functions (ReLU, Softmax) · PyTorch tensors · custom Dataset & DataLoader · data augmentationWeek 12 — Training loops · Adam\u002FSGD optimizers · early stopping · model checkpointing with torch.saveWeek 13 — Convolutional layers & pooling · ResNet\u002FVGG architectures · transfer learning · fine-tuning strategies 🎯 End-of-Module Project: Build an image classifier using transfer learning with a pretrained CNN.",{"id":10498,"title":10499,"titles":10500,"content":10501,"level":7527},"\u002Fen\u002Fbootcamp#module-4-transformers-hugging-face-3-weeks","Module 4 · Transformers & Hugging Face (3 weeks)",[10417,10480],"The attention mechanism that powers modern AI. Fine-tune BERT and GPT-class models for real NLP tasks. Week 14 — Self-attention · multi-head attention · transformer architecture · tokenization (BPE\u002FWordPiece) · positional encodingWeek 15 — Hugging Face Hub & Pipeline API · fine-tuning with Trainer API · BERT for text classification & NERWeek 16 — NLP competition strategy · pushing models to Hugging Face Hub · building a live text classification service 🎯 End-of-Module Project: Fine-tune a transformer on a Kaggle NLP challenge and publish it to Hugging Face Hub.",{"id":10503,"title":10504,"titles":10505,"content":10506,"level":7527},"\u002Fen\u002Fbootcamp#module-5-mlops-deployment-3-weeks","Module 5 · MLOps & Deployment (3 weeks)",[10417,10480],"From Jupyter notebook to a production-grade API. Learn the tools and practices every ML engineer needs in industry. Week 17 — Model serialization (pickle \u002F joblib \u002F ONNX) · DVC versioning · REST prediction APIs with FastAPIWeek 18 — Docker images & containers · Dockerfile best practices · MLflow experiment tracking & model registryWeek 19 — GitHub Actions CI\u002FCD · automated testing · data drift detection · model monitoring & alerting 🎯 End-of-Module Project: Deploy an ML model end-to-end with FastAPI, Docker, and a CI\u002FCD pipeline.",{"id":10508,"title":10509,"titles":10510,"content":10511,"level":7527},"\u002Fen\u002Fbootcamp#module-6-capstone-1-week","Module 6 · Capstone (1 week)",[10417,10480],"Build and ship a full production ML system — from raw data to a live API. Source a real dataset (Kaggle or real-world problem)Full EDA, preprocessing, and feature engineeringTrain and compare multiple models with documented hyperparameter tuningDeploy a REST API (FastAPI) inside a Docker containerSet up a CI\u002FCD pipeline with GitHub Actions10–15 minute live demo presentation Example projects: sentiment analysis · medical image classification · real-estate price prediction · fake news detection · customer churn · text summarization API.",{"id":10513,"title":10514,"titles":10515,"content":10516,"level":7520},"\u002Fen\u002Fbootcamp#who-is-this-for","Who Is This For?",[10417],"Developers who know Python and want to break into ML engineeringUniversity students wanting practical, resume-worthy ML projectsAnyone who has taken online courses but wants structured, project-based depth",{"id":10518,"title":10519,"titles":10520,"content":10521,"level":7520},"\u002Fen\u002Fbootcamp#what-you-will-build","What You Will Build",[10417],"By the end of the bootcamp you will have trained and deployed real models, competed in Kaggle challenges, and delivered a capstone project that demonstrates full-stack ML skills.",{"id":10427,"title":10426,"titles":10523,"content":10524,"level":7503},[],"From fixed learning rates to adaptive moments — understanding the optimizer behind modern deep learning Image source: DL Notes: Advanced Gradient Descent Adam (short for Adaptive Moment Estimation) was introduced by Diederik Kingma and Jimmy Ba in 2015 [1], and quickly became the go-to optimizer in deep learning. This article explains why a fixed learning rate fails, what Adam does differently, and how it works — from first principles to code.",{"id":10526,"title":10527,"titles":10528,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#the-problem-with-a-fixed-learning-rate","The Problem with a Fixed Learning Rate",[10426],{"id":10530,"title":10531,"titles":10532,"content":10533,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#the-one-speed-fits-all-dilemma","The \"One Speed Fits All\" Dilemma",[10426,10527],"Imagine you're hiking through a mountain range with one strict rule: every step you take must be exactly the same length — no more, no less. On a steep cliff face, that fixed step length is terrifying — one step too large and you tumble. On a long, gentle slope to the valley, that same step feels absurdly tiny — it would take forever to reach the bottom. This is exactly the problem with a fixed learning rate α\\alphaα in gradient descent: θnew=θold−α∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\nabla J(\\theta)θnew​=θold​−α∇J(θ) The single scalar α\\alphaα controls the step size for every parameter — whether that parameter has large gradients or tiny ones, whether it's converging well or oscillating wildly.",{"id":10535,"title":10536,"titles":10537,"content":7496,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#three-ways-a-fixed-learning-rate-fails","Three Ways a Fixed Learning Rate Fails",[10426,10527],{"id":10539,"title":10540,"titles":10541,"content":10542,"level":7601},"\u002Fen\u002Frooms\u002Fadam-optimizer#_1-too-large-overshooting","1. Too Large — Overshooting",[10426,10527,10536],"When α\\alphaα is too large, gradient descent overshoots the minimum and bounces back and forth: J(θ)=θ2,α=1.0J(\\theta) = \\theta^2, \\quad \\alpha = 1.0J(θ)=θ2,α=1.0 θ0=5→−1.0×10θ1=−5→−1.0×(−10)θ2=5→⋯\\theta_0 = 5 \\xrightarrow{-1.0 \\times 10} \\theta_1 = -5 \\xrightarrow{-1.0 \\times (-10)} \\theta_2 = 5 \\xrightarrow{\\cdots}θ0​=5−1.0×10​θ1​=−5−1.0×(−10)​θ2​=5⋯​ The loss never decreases — it oscillates forever around the minimum.",{"id":10544,"title":10545,"titles":10546,"content":10547,"level":7601},"\u002Fen\u002Frooms\u002Fadam-optimizer#_2-too-small-crawling-forever","2. Too Small — Crawling Forever",[10426,10527,10536],"When α\\alphaα is too small, learning works but is painfully slow: θ0=5,α=0.001\\theta_0 = 5, \\quad \\alpha = 0.001θ0​=5,α=0.001 θ1=5−0.001×10=4.99,θ2=4.98,…\\theta_1 = 5 - 0.001 \\times 10 = 4.99, \\quad \\theta_2 = 4.98, \\quad \\ldotsθ1​=5−0.001×10=4.99,θ2​=4.98,… Thousands of iterations just to move a little. In practice with millions of parameters, this is computationally catastrophic.",{"id":10549,"title":10550,"titles":10551,"content":10552,"level":7601},"\u002Fen\u002Frooms\u002Fadam-optimizer#_3-the-ravine-problem-oscillation","3. The \"Ravine\" Problem — Oscillation",[10426,10527,10536],"In higher dimensions, loss landscapes often look like narrow ravines — very steep in one direction, nearly flat in another. With a fixed learning rate: The steep direction demands a small α\\alphaα to avoid oscillating across the ravine walls.The flat direction needs a large α\\alphaα to make any progress along the ravine floor. No single fixed α\\alphaα can satisfy both at the same time. LeCun et al. [2] provide an early and thorough analysis of these pathological loss-landscape behaviours and their impact on convergence. The Core Pain\n  Different parameters need different step sizes. A fixed learning rate treats all of them the same — and that's the bottleneck.",{"id":10554,"title":10555,"titles":10556,"content":10557,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#enter-adam-the-gps-of-optimizers","Enter Adam: The GPS of Optimizers",[10426],"If vanilla gradient descent is hiking with a fixed stride, Adam is using a GPS with adaptive routing: it speeds up on highways, slows down in tight corners, and remembers which paths were already explored. Adam's secret is tracking two things per parameter at every step: QuantitySymbolIntuition1st Moment (momentum)mtm_tmt​Which direction have gradients been pointing recently?2nd Moment (adaptive scale)vtv_tvt​How large have the gradients been recently? By dividing by the square root of the 2nd moment, Adam automatically shrinks the step size for parameters with consistently large gradients and enlarges it for parameters with small gradients.",{"id":10559,"title":10560,"titles":10561,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#building-adam-from-scratch","Building Adam from Scratch",[10426],{"id":10563,"title":10564,"titles":10565,"content":10566,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#step-1-momentum-smoothing-the-direction","Step 1 — Momentum: Smoothing the Direction",[10426,10560],"The problem it solves: Gradients are noisy. Every mini-batch gives a slightly different gradient. Chasing each individual noisy gradient makes the path jagged. The idea: Keep a running average of past gradients, like a ball rolling downhill — it builds speed in a consistent direction and isn't thrown off by small bumps. mt=β1⋅mt−1+(1−β1)⋅gtm_t = \\beta_1 \\cdot m_{t-1} + (1 - \\beta_1) \\cdot g_tmt​=β1​⋅mt−1​+(1−β1​)⋅gt​ Where: gtg_tgt​ = current gradient ∇J(θt)\\nabla J(\\theta_t)∇J(θt​)β1\\beta_1β1​ = decay rate, typically 0.9 (90% weight on the past, 10% on the new gradient)m0=0m_0 = 0m0​=0 Analogy: It's like computing a weighted average of recent directions. Gradient yesterday counts more than gradient from 10 steps ago. Sutskever et al. [3] demonstrated that this momentum term is critical for fast, stable convergence in deep networks.",{"id":10568,"title":10569,"titles":10570,"content":10571,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#step-2-adaptive-scale-normalizing-by-history","Step 2 — Adaptive Scale: Normalizing by History",[10426,10560],"The problem it solves: Some parameters have consistently large gradients; others have tiny ones. We want large-gradient parameters to take smaller steps, and small-gradient parameters to take larger steps. The idea: Track the running average of squared gradients: vt=β2⋅vt−1+(1−β2)⋅gt2v_t = \\beta_2 \\cdot v_{t-1} + (1 - \\beta_2) \\cdot g_t^2vt​=β2​⋅vt−1​+(1−β2​)⋅gt2​ Where: β2\\beta_2β2​ = decay rate, typically 0.999v0=0v_0 = 0v0​=0 A parameter that always receives large gradients will accumulate a large vtv_tvt​. Dividing the step size by vt\\sqrt{v_t}vt​​ keeps its updates proportionally small. This is Adam's per-parameter learning rate.",{"id":10573,"title":10574,"titles":10575,"content":10576,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#step-3-bias-correction-fixing-cold-start-errors","Step 3 — Bias Correction: Fixing Cold-Start Errors",[10426,10560],"The problem it solves: Since m0=0m_0 = 0m0​=0 and v0=0v_0 = 0v0​=0, the first few estimates of mtm_tmt​ and vtv_tvt​ are heavily biased toward zero (we haven't accumulated enough history yet). The fix: Divide by (1−βt)(1 - \\beta^t)(1−βt) to correct for the initial bias: m^t=mt1−β1t,v^t=vt1−β2t\\hat{m}_t = \\frac{m_t}{1 - \\beta_1^t}, \\qquad \\hat{v}_t = \\frac{v_t}{1 - \\beta_2^t}m^t​=1−β1t​mt​​,v^t​=1−β2t​vt​​ As ttt grows, βt→0\\beta^t \\to 0βt→0, so the correction factor 11−βt→1\\frac{1}{1-\\beta^t} \\to 11−βt1​→1 and has no effect — it only matters in the early steps.",{"id":10578,"title":10579,"titles":10580,"content":10581,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#step-4-the-final-update-rule","Step 4 — The Final Update Rule",[10426,10560],"θt+1=θt−αv^t+ϵ⋅m^t\\boxed{\\theta_{t+1} = \\theta_t - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\cdot \\hat{m}_t}θt+1​=θt​−v^t​​+ϵα​⋅m^t​​ Where ϵ≈10−8\\epsilon \\approx 10^{-8}ϵ≈10−8 prevents division by zero. Default hyperparameters from the original paper [1]: HyperparameterSymbolDefaultLearning rateα\\alphaα0.0011st moment decayβ1\\beta_1β1​0.92nd moment decayβ2\\beta_2β2​0.999Numerical stabilityϵ\\epsilonϵ10−810^{-8}10−8",{"id":10583,"title":10584,"titles":10585,"content":10586,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#worked-example-adam-in-action","Worked Example: Adam in Action",[10426],"Let's trace Adam manually on the same simple function we used for gradient descent: J(θ)=θ2,∇J(θ)=2θJ(\\theta) = \\theta^2, \\qquad \\nabla J(\\theta) = 2\\thetaJ(θ)=θ2,∇J(θ)=2θ Starting at θ0=5\\theta_0 = 5θ0​=5, with default hyperparameters (α=0.001\\alpha = 0.001α=0.001, β1=0.9\\beta_1 = 0.9β1​=0.9, β2=0.999\\beta_2 = 0.999β2​=0.999, ϵ=10−8\\epsilon = 10^{-8}ϵ=10−8). Initialize: m0=0m_0 = 0m0​=0, v0=0v_0 = 0v0​=0. Step t=1t=1t=1: g1=2×5=10g_1 = 2 \\times 5 = 10g1​=2×5=10 m1=0.9×0+0.1×10=1.0m_1 = 0.9 \\times 0 + 0.1 \\times 10 = 1.0m1​=0.9×0+0.1×10=1.0 v1=0.999×0+0.001×100=0.1v_1 = 0.999 \\times 0 + 0.001 \\times 100 = 0.1v1​=0.999×0+0.001×100=0.1 m^1=1.01−0.91=1.00.1=10.0\\hat{m}_1 = \\frac{1.0}{1 - 0.9^1} = \\frac{1.0}{0.1} = 10.0m^1​=1−0.911.0​=0.11.0​=10.0 v^1=0.11−0.9991=0.10.001=100.0\\hat{v}_1 = \\frac{0.1}{1 - 0.999^1} = \\frac{0.1}{0.001} = 100.0v^1​=1−0.99910.1​=0.0010.1​=100.0 θ1=5−0.001100+10−8×10.0=5−0.00110×10.0=5−0.001=4.999\\theta_1 = 5 - \\frac{0.001}{\\sqrt{100} + 10^{-8}} \\times 10.0 = 5 - \\frac{0.001}{10} \\times 10.0 = 5 - 0.001 = 4.999θ1​=5−100​+10−80.001​×10.0=5−100.001​×10.0=5−0.001=4.999 Step t=2t=2t=2: g2=2×4.999=9.998g_2 = 2 \\times 4.999 = 9.998g2​=2×4.999=9.998 m2=0.9×1.0+0.1×9.998=1.8998m_2 = 0.9 \\times 1.0 + 0.1 \\times 9.998 = 1.8998m2​=0.9×1.0+0.1×9.998=1.8998 v2=0.999×0.1+0.001×9.9982=0.1999v_2 = 0.999 \\times 0.1 + 0.001 \\times 9.998^2 = 0.1999v2​=0.999×0.1+0.001×9.9982=0.1999 With bias correction and update, θ2≈4.998\\theta_2 \\approx 4.998θ2​≈4.998. Notice: Adam makes consistent, controlled steps — not as aggressive as large-α\\alphaα SGD (which would have overshot), yet much faster than tiny-α\\alphaα SGD (which would crawl). The bias-corrected estimates keep early steps meaningful despite the cold start.",{"id":10588,"title":10589,"titles":10590,"content":10591,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#comparing-optimizers-side-by-side","Comparing Optimizers Side by Side",[10426],"Let's bring it all together with intuition: OptimizerStep sizeMemoryStrengthsWeaknessesSGDFixed α\\alphaαNoneSimple, well-understoodSensitive to α\\alphaα, slow on ravinesSGD + MomentumFixed α\\alphaαGradient directionFaster, smoother pathStill needs good α\\alphaαRMSProp [4]AdaptiveGradient magnitudeGood for non-stationaryNo momentumAdamAdaptiveDirection + magnitudeBest of both worldsCan generalize slightly worse Adam essentially combines SGD with momentum (1st moment) and RMSProp (2nd moment) under one roof, with bias correction on top.",{"id":10593,"title":10594,"titles":10595,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#python-implementation","Python Implementation",[10426],{"id":10597,"title":10598,"titles":10599,"content":10600,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#minimal-adam-from-scratch","Minimal Adam from Scratch",[10426,10594],"import numpy as np\n\ndef adam(grad_fn, theta_init, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8, max_iters=1000):\n    theta = theta_init\n    m = 0.0   # first moment (momentum)\n    v = 0.0   # second moment (adaptive scale)\n\n    for t in range(1, max_iters + 1):\n        g = grad_fn(theta)            # ① compute gradient\n\n        m = beta1 * m + (1 - beta1) * g       # ② update 1st moment\n        v = beta2 * v + (1 - beta2) * g ** 2  # ③ update 2nd moment\n\n        m_hat = m \u002F (1 - beta1 ** t)          # ④ bias-correct 1st moment\n        v_hat = v \u002F (1 - beta2 ** t)          # ⑤ bias-correct 2nd moment\n\n        theta = theta - alpha \u002F (np.sqrt(v_hat) + eps) * m_hat  # ⑥ update\n\n        if abs(g) \u003C 1e-7:\n            print(f\"Converged at step {t}\")\n            break\n\n    return theta\n\n# Minimize J(θ) = θ²,  ∇J(θ) = 2θ\ntheta_min = adam(grad_fn=lambda th: 2 * th, theta_init=5.0)\nprint(f\"Minimum at θ = {theta_min:.8f}\") Output: Converged at step 817\nMinimum at θ = 0.00000001",{"id":10602,"title":10603,"titles":10604,"content":10605,"level":7527},"\u002Fen\u002Frooms\u002Fadam-optimizer#adam-on-linear-regression","Adam on Linear Regression",[10426,10594],"Now let's apply Adam to a real use case — fitting a line y^=w⋅x+b\\hat{y} = w \\cdot x + by^​=w⋅x+b to data. import numpy as np\n\ndef adam_linear_regression(X, y, alpha=0.01, beta1=0.9, beta2=0.999,\n                            eps=1e-8, epochs=200):\n    m = len(y)\n    w, b = 0.0, 0.0\n\n    # Separate Adam state for each parameter\n    mw, vw = 0.0, 0.0   # moments for w\n    mb, vb = 0.0, 0.0   # moments for b\n\n    for t in range(1, epochs + 1):\n        y_pred = w * X + b\n        error  = y_pred - y\n\n        # Gradients (same formula as gradient descent)\n        gw = (2 \u002F m) * np.dot(error, X)\n        gb = (2 \u002F m) * np.sum(error)\n\n        # 1st and 2nd moment updates for w\n        mw = beta1 * mw + (1 - beta1) * gw\n        vw = beta2 * vw + (1 - beta2) * gw ** 2\n        mw_hat = mw \u002F (1 - beta1 ** t)\n        vw_hat = vw \u002F (1 - beta2 ** t)\n\n        # 1st and 2nd moment updates for b\n        mb = beta1 * mb + (1 - beta1) * gb\n        vb = beta2 * vb + (1 - beta2) * gb ** 2\n        mb_hat = mb \u002F (1 - beta1 ** t)\n        vb_hat = vb \u002F (1 - beta2 ** t)\n\n        # Parameter updates\n        w = w - alpha \u002F (np.sqrt(vw_hat) + eps) * mw_hat\n        b = b - alpha \u002F (np.sqrt(vb_hat) + eps) * mb_hat\n\n        if t % 50 == 0:\n            loss = np.mean(error ** 2)\n            print(f\"Epoch {t:4d}: loss={loss:.6f}  w={w:.4f}  b={b:.4f}\")\n\n    return w, b\n\n# True relationship: y = 2x + 1\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([3.0, 5.0, 7.0, 9.0, 11.0])\n\nw, b = adam_linear_regression(X, y)\nprint(f\"\\nFitted: ŷ = {w:.4f}·x + {b:.4f}\") Output: Epoch   50: loss=0.000042  w=1.9953  b=1.0044\nEpoch  100: loss=0.000000  w=2.0000  b=1.0000\nEpoch  150: loss=0.000000  w=2.0000  b=1.0000\nEpoch  200: loss=0.000000  w=2.0000  b=1.0000\n\nFitted: ŷ = 2.0000·x + 1.0000 Adam recovers the true w=2,b=1w=2, b=1w=2,b=1 cleanly and fast — especially compared to vanilla gradient descent, which required careful learning rate tuning.",{"id":10607,"title":10608,"titles":10609,"content":10610,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#when-to-use-adam","When to Use Adam",[10426],"Adam is a safe default for most deep learning tasks: Neural networks: Training MLPs, CNNs, Transformers, RNNsNoisy gradients: Mini-batch training with small batch sizesSparse features: NLP tasks where some words appear rarely (large, infrequent gradients)Getting started: When you don't want to spend time tuning the learning rate One Known Limitation\n  Wilson et al. [5] show that adaptive optimizers like Adam can converge to slightly worse generalization than well-tuned SGD with momentum for image classification. In that setting, SGD + momentum with learning rate scheduling can outperform Adam. But for most tasks, Adam's robustness wins.",{"id":10612,"title":10613,"titles":10614,"content":10615,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#summary","Summary",[10426],"ConceptKey IdeaFixed learning rate flawOne α\\alphaα for all parameters — too rigidMomentum (mtm_tmt​)Smooth gradient direction over timeAdaptive scale (vtv_tvt​)Scale steps by gradient magnitude historyBias correctionFix cold-start bias when m0=v0=0m_0 = v_0 = 0m0​=v0​=0Adam updateθ←θ−αv^t+ϵm^t\\theta \\leftarrow \\theta - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\hat{m}_tθ←θ−v^t​​+ϵα​m^t​ Adam doesn't remove the learning rate α\\alphaα — it still matters. But it makes training dramatically less sensitive to your choice of α\\alphaα. That's why the same default of 0.0010.0010.001 works well across an enormous variety of tasks. If gradient descent is hiking with a fixed stride, Adam is hiring a GPS-equipped guide who adjusts your pace, smooths your path, and makes sure you don't waste time on terrain you've already explored.",{"id":10617,"title":10618,"titles":10619,"content":10620,"level":7520},"\u002Fen\u002Frooms\u002Fadam-optimizer#references","References",[10426],"D. P. Kingma and J. Ba, \"Adam: A method for stochastic optimization,\" in Proc. 3rd Int. Conf. Learn. Representations (ICLR), San Diego, CA, USA, May 2015. [Online]. Available: https:\u002F\u002Farxiv.org\u002Fabs\u002F1412.6980\n  Y. LeCun, L. Bottou, G. B. Orr, and K.-R. Müller, \"Efficient backprop,\" in Neural Networks: Tricks of the Trade, G. B. Orr and K.-R. Müller, Eds. Berlin, Germany: Springer, 1998, pp. 9–50. [Online]. Available: https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-642-35289-8_5\n  I. Sutskever, J. Martens, G. Dahl, and G. Hinton, \"On the importance of initialization and momentum in deep learning,\" in Proc. 30th Int. Conf. Mach. Learn. (ICML), Atlanta, GA, USA, Jun. 2013, pp. 1139–1147. [Online]. Available: https:\u002F\u002Fproceedings.mlr.press\u002Fv28\u002Fsutskever13.html\n  T. Tieleman and G. Hinton, \"Lecture 6.5 — RMSProp: Divide the gradient by a running average of its recent magnitude,\" COURSERA: Neural Networks for Machine Learning, Tech. Rep., 2012.\n  A. C. Wilson, R. Roelofs, M. Stern, N. Srebro, and B. Recht, \"The marginal value of momentum for small learning rate SGD,\" in Proc. 31st Conf. Neural Inf. Process. Syst. (NeurIPS), Long Beach, CA, USA, Dec. 2017. [Online]. Available: https:\u002F\u002Farxiv.org\u002Fabs\u002F1705.08292 html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}",{"id":10431,"title":10430,"titles":10622,"content":10623,"level":7503},[],"From slopes of lines to the calculus engine behind machine learning Every time a neural network learns, it asks one question over and over: \"If I nudge this parameter slightly, does the error go up or down — and by how much?\" That question is answered by the derivative. Before we talk about gradients or optimizers, we need to understand derivatives from scratch.",{"id":10625,"title":10626,"titles":10627,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-1-lines-and-slopes","Part 1 — Lines and Slopes",[10430],{"id":10629,"title":10630,"titles":10631,"content":10632,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#the-equation-of-a-line","The Equation of a Line",[10430,10626],"The simplest relationship between two quantities is a straight line: y=mx+by = mx + by=mx+b Where: xxx is the inputyyy is the outputmmm is the slope — how steeply the line rises or fallsbbb is the y-intercept — where the line crosses the vertical axis Example: y=2x+1y = 2x + 1y=2x+1 xxxy=2x+1y = 2x + 1y=2x+101132537 Every time xxx increases by 1, yyy increases by exactly 2. The slope m=2m = 2m=2 captures this constant rate.",{"id":10634,"title":10635,"titles":10636,"content":10637,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#computing-the-slope-between-two-points","Computing the Slope Between Two Points",[10430,10626],"Given any two points (x1,y1)(x_1, y_1)(x1​,y1​) and (x2,y2)(x_2, y_2)(x2​,y2​) on a line, the slope is: m=ΔyΔx=y2−y1x2−x1m = \\frac{\\Delta y}{\\Delta x} = \\frac{y_2 - y_1}{x_2 - x_1}m=ΔxΔy​=x2​−x1​y2​−y1​​ This is the rise over run formula — how much yyy changes (rise) per unit change in xxx (run). Why Does Slope Matter?\n  Slope tells you the rate of change. A slope of 2 means \"for every 1 unit step in x, y changes by 2.\" A slope of −3 means y decreases by 3 for every step forward. A slope of 0 means y doesn't change at all — it's flat.",{"id":10639,"title":10640,"titles":10641,"content":10642,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-2-when-lines-become-curves","Part 2 — When Lines Become Curves",[10430],"A line has a constant slope — it's the same everywhere. But most interesting functions in mathematics (and in machine learning) are curves whose steepness changes at every point. Consider the parabola: f(x)=x2f(x) = x^2f(x)=x2 xxxf(x)=x2f(x) = x^2f(x)=x2−39−11001139 Near x=0x = 0x=0 the curve is nearly flat. Near x=3x = 3x=3 it rises steeply. The slope is different at every point — which means the single formula m=ΔyΔxm = \\frac{\\Delta y}{\\Delta x}m=ΔxΔy​ between two distant points only gives us an average.",{"id":10644,"title":10645,"titles":10646,"content":10647,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#average-rate-of-change","Average Rate of Change",[10430,10640],"For two points xxx and x+hx + hx+h on a curve fff, the average rate of change over that interval is: ΔfΔx=f(x+h)−f(x)h\\frac{\\Delta f}{\\Delta x} = \\frac{f(x + h) - f(x)}{h}ΔxΔf​=hf(x+h)−f(x)​ This is the slope of the secant line — the straight line connecting the two points on the curve. Example on f(x)=x2f(x) = x^2f(x)=x2 between x=1x = 1x=1 and x=3x = 3x=3: f(3)−f(1)3−1=9−12=4\\frac{f(3) - f(1)}{3 - 1} = \\frac{9 - 1}{2} = 43−1f(3)−f(1)​=29−1​=4 That is the average steepness between x=1x=1x=1 and x=3x=3x=3, but it doesn't tell us what the slope is at a specific point.",{"id":10649,"title":10650,"titles":10651,"content":10652,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-3-the-limit-zooming-in-to-a-single-point","Part 3 — The Limit: Zooming In to a Single Point",[10430],"To find the slope at one exact point, we shrink the interval hhh down toward zero. As hhh gets smaller and smaller, the secant line rotates until it becomes the tangent line — touching the curve at exactly one point and matching its steepness there. Formally, the instantaneous rate of change at xxx is the limit: lim⁡h→0f(x+h)−f(x)h\\lim_{h \\to 0} \\frac{f(x + h) - f(x)}{h}h→0lim​hf(x+h)−f(x)​ This is the core idea of a derivative.",{"id":10654,"title":10655,"titles":10656,"content":10657,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#limits-intuitively","Limits Intuitively",[10430,10650],"A limit asks: \"What value does an expression approach as a variable gets closer and closer to some number — even if it never arrives?\" lim⁡h→0(x+h)2−x2h\\lim_{h \\to 0} \\frac{(x+h)^2 - x^2}{h}h→0lim​h(x+h)2−x2​ Expand the numerator: =lim⁡h→0x2+2xh+h2−x2h=lim⁡h→02xh+h2h=lim⁡h→0(2x+h)= \\lim_{h \\to 0} \\frac{x^2 + 2xh + h^2 - x^2}{h} = \\lim_{h \\to 0} \\frac{2xh + h^2}{h} = \\lim_{h \\to 0} (2x + h)=h→0lim​hx2+2xh+h2−x2​=h→0lim​h2xh+h2​=h→0lim​(2x+h) As h→0h \\to 0h→0: =2x= 2x=2x The slope of f(x)=x2f(x) = x^2f(x)=x2 at any point xxx is exactly 2x2x2x.",{"id":10659,"title":10660,"titles":10661,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-4-the-derivative","Part 4 — The Derivative",[10430],{"id":10663,"title":10664,"titles":10665,"content":10666,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#definition","Definition",[10430,10660],"The derivative of a function fff at point xxx, written f′(x)f'(x)f′(x) or dfdx\\frac{df}{dx}dxdf​, is: f′(x)=lim⁡h→0f(x+h)−f(x)h\\boxed{f'(x) = \\lim_{h \\to 0} \\frac{f(x + h) - f(x)}{h}}f′(x)=h→0lim​hf(x+h)−f(x)​​ It gives the instantaneous rate of change — the slope of the tangent line at every point.",{"id":10668,"title":10669,"titles":10670,"content":10671,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#geometric-meaning","Geometric Meaning",[10430,10660],"Derivative ValueMeaningf′(x)>0f'(x) > 0f′(x)>0Function is increasing at xxxf′(x)\u003C0f'(x) \u003C 0f′(x)\u003C0Function is decreasing at xxxf′(x)=0f'(x) = 0f′(x)=0Function has a flat point (possible minimum, maximum, or saddle)Large ∥f′(x)∥\\|f'(x)\\|∥f′(x)∥Function is changing rapidlySmall ∥f′(x)∥\\|f'(x)\\|∥f′(x)∥Function is changing slowly",{"id":10673,"title":10674,"titles":10675,"content":10676,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-5-differentiation-rules","Part 5 — Differentiation Rules",[10430],"Computing limits by hand every time would be exhausting. Mathematicians have derived shortcut rules that cover almost every function you'll encounter.",{"id":10678,"title":10679,"titles":10680,"content":10681,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#power-rule","Power Rule",[10430,10674],"For f(x)=xnf(x) = x^nf(x)=xn: ddxxn=n⋅xn−1\\frac{d}{dx} x^n = n \\cdot x^{n-1}dxd​xn=n⋅xn−1 Examples: FunctionDerivativex2x^2x22x2x2xx3x^3x33x23x^23x2x10x^{10}x1010x910x^910x9xxx (i.e. x1x^1x1)111555 (constant, x0x^0x0)000",{"id":10683,"title":10684,"titles":10685,"content":10686,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#constant-multiple-rule","Constant Multiple Rule",[10430,10674],"ddx[c⋅f(x)]=c⋅f′(x)\\frac{d}{dx}[c \\cdot f(x)] = c \\cdot f'(x)dxd​[c⋅f(x)]=c⋅f′(x) If f(x)=3x2f(x) = 3x^2f(x)=3x2, then f′(x)=3⋅2x=6xf'(x) = 3 \\cdot 2x = 6xf′(x)=3⋅2x=6x.",{"id":10688,"title":10689,"titles":10690,"content":10691,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#sum-rule","Sum Rule",[10430,10674],"ddx[f(x)+g(x)]=f′(x)+g′(x)\\frac{d}{dx}[f(x) + g(x)] = f'(x) + g'(x)dxd​[f(x)+g(x)]=f′(x)+g′(x) If f(x)=x3+5x2−2x+7f(x) = x^3 + 5x^2 - 2x + 7f(x)=x3+5x2−2x+7, differentiate term by term: f′(x)=3x2+10x−2f'(x) = 3x^2 + 10x - 2f′(x)=3x2+10x−2",{"id":10693,"title":10694,"titles":10695,"content":10696,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#chain-rule","Chain Rule",[10430,10674],"For a composition of functions f(g(x))f(g(x))f(g(x)): ddxf(g(x))=f′(g(x))⋅g′(x)\\frac{d}{dx} f(g(x)) = f'(g(x)) \\cdot g'(x)dxd​f(g(x))=f′(g(x))⋅g′(x) Read as: \"derivative of outer, evaluated at inner — times derivative of inner.\" Example: h(x)=(3x+1)4h(x) = (3x + 1)^4h(x)=(3x+1)4 Let g(x)=3x+1g(x) = 3x + 1g(x)=3x+1 and f(u)=u4f(u) = u^4f(u)=u4: h′(x)=4(3x+1)3⋅3=12(3x+1)3h'(x) = 4(3x+1)^3 \\cdot 3 = 12(3x+1)^3h′(x)=4(3x+1)3⋅3=12(3x+1)3 The chain rule is everywhere in machine learning — backpropagation is essentially repeated application of it through layers of a neural network.",{"id":10698,"title":10699,"titles":10700,"content":10701,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#common-derivatives-reference","Common Derivatives Reference",[10430,10674],"FunctionDerivativeexe^xexexe^xexln⁡(x)\\ln(x)ln(x)1x\\frac{1}{x}x1​sin⁡(x)\\sin(x)sin(x)cos⁡(x)\\cos(x)cos(x)cos⁡(x)\\cos(x)cos(x)−sin⁡(x)-\\sin(x)−sin(x)σ(x)=11+e−x\\sigma(x) = \\frac{1}{1+e^{-x}}σ(x)=1+e−x1​ (sigmoid)σ(x)(1−σ(x))\\sigma(x)(1 - \\sigma(x))σ(x)(1−σ(x))",{"id":10703,"title":10704,"titles":10705,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-6-derivatives-in-practice","Part 6 — Derivatives in Practice",[10430],{"id":10707,"title":10708,"titles":10709,"content":10710,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#finding-minima-and-maxima","Finding Minima and Maxima",[10430,10704],"If f′(x)=0f'(x) = 0f′(x)=0 the function is momentarily flat — this is a critical point. There are three types: Local minimum: function dips down then rises → f′(x)f'(x)f′(x) changes from negative to positiveLocal maximum: function rises then dips → f′(x)f'(x)f′(x) changes from positive to negativeSaddle point: function is flat but continues in the same general direction Example: Find the minimum of f(x)=x2−4x+5f(x) = x^2 - 4x + 5f(x)=x2−4x+5 f′(x)=2x−4=0  ⟹  x=2f'(x) = 2x - 4 = 0 \\implies x = 2f′(x)=2x−4=0⟹x=2 At x=2x = 2x=2: f(2)=4−8+5=1f(2) = 4 - 8 + 5 = 1f(2)=4−8+5=1 — this is the minimum. def f(x):\n    return x**2 - 4*x + 5\n\ndef f_prime(x):\n    return 2*x - 4\n\n# Find where derivative = 0\n# 2x - 4 = 0  =>  x = 2\nx_min = 2\nprint(f\"Minimum at x={x_min}, f(x)={f(x_min)}\")  # x=2, f(x)=1",{"id":10712,"title":10713,"titles":10714,"content":10715,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#the-derivative-as-a-direction-signal","The Derivative as a Direction Signal",[10430,10704],"This is the key insight that bridges calculus to machine learning: If f′(x)>0f'(x) > 0f′(x)>0 at some point, moving xxx to the right increases fff. Moving xxx to the left decreases fff.If f′(x)\u003C0f'(x) \u003C 0f′(x)\u003C0, the opposite is true. To minimize fff, we should always move xxx in the direction opposite to the derivative: xnew=xold−α⋅f′(xold)x_{\\text{new}} = x_{\\text{old}} - \\alpha \\cdot f'(x_{\\text{old}})xnew​=xold​−α⋅f′(xold​) Where α\\alphaα is a small step size. Notice anything? This is exactly the gradient descent update rule.",{"id":10717,"title":10718,"titles":10719,"content":10720,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-7-from-one-variable-to-many-the-gradient","Part 7 — From One Variable to Many: The Gradient",[10430],"Machine learning models have not one parameter, but millions. A loss function JJJ might depend on weights w1,w2,…,wnw_1, w_2, \\ldots, w_nw1​,w2​,…,wn​. We need derivatives with respect to each parameter simultaneously.",{"id":10722,"title":10723,"titles":10724,"content":10725,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#partial-derivatives","Partial Derivatives",[10430,10718],"A partial derivative holds all other variables constant and differentiates with respect to one: ∂J∂wi= \"how much does J change if we nudge only wi?\"\\frac{\\partial J}{\\partial w_i} \\quad \\text{= \"how much does J change if we nudge only } w_i \\text{?\"}∂wi​∂J​= \"how much does J change if we nudge only wi​?\" Example: J(w1,w2)=w12+3w1w2+w22J(w_1, w_2) = w_1^2 + 3w_1 w_2 + w_2^2J(w1​,w2​)=w12​+3w1​w2​+w22​ ∂J∂w1=2w1+3w2∂J∂w2=3w1+2w2\\frac{\\partial J}{\\partial w_1} = 2w_1 + 3w_2 \\qquad \\frac{\\partial J}{\\partial w_2} = 3w_1 + 2w_2∂w1​∂J​=2w1​+3w2​∂w2​∂J​=3w1​+2w2​",{"id":10727,"title":10728,"titles":10729,"content":10730,"level":7527},"\u002Fen\u002Frooms\u002Fderivatives#the-gradient-vector","The Gradient Vector",[10430,10718],"Stack all partial derivatives into a single vector — this is the gradient ∇J\\nabla J∇J: ∇J(w1,w2,…,wn)=[∂J∂w1∂J∂w2⋮∂J∂wn]\\nabla J(w_1, w_2, \\ldots, w_n) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial w_1} \\\\[4pt]\n\\frac{\\partial J}{\\partial w_2} \\\\\n\\vdots \\\\[4pt]\n\\frac{\\partial J}{\\partial w_n}\n\\end{bmatrix}∇J(w1​,w2​,…,wn​)=​∂w1​∂J​∂w2​∂J​⋮∂wn​∂J​​​ The gradient is the multi-dimensional equivalent of the derivative. It points in the direction of steepest ascent in the loss landscape. To minimize the loss, we move in the opposite direction — exactly what gradient descent does. The Bridge to Machine Learning\n  \n    In ML, the loss function $J(\\theta)$ measures how wrong the model is. The gradient $\\nabla J(\\theta)$ tells us which direction in parameter space increases the error most. By stepping in the opposite direction, we reduce the error — step by step, iteration by iteration.",{"id":10732,"title":10733,"titles":10734,"content":10735,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#part-8-a-complete-example-linear-regression","Part 8 — A Complete Example: Linear Regression",[10430],"Let's see all of this in action. Setup: We have data points (x(i),y(i))(x^{(i)}, y^{(i)})(x(i),y(i)) and want to fit y^=wx+b\\hat{y} = wx + by^​=wx+b. Loss function (Mean Squared Error): J(w,b)=1m∑i=1m(y^(i)−y(i))2=1m∑i=1m(wx(i)+b−y(i))2J(w, b) = \\frac{1}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)^2 = \\frac{1}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right)^2J(w,b)=m1​i=1∑m​(y^​(i)−y(i))2=m1​i=1∑m​(wx(i)+b−y(i))2 Partial derivative w.r.t. www (using chain rule — derivative of outer squared term times derivative of inner wx+bwx+bwx+b): ∂J∂w=2m∑i=1m(wx(i)+b−y(i))⋅x(i)\\frac{\\partial J}{\\partial w} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right) \\cdot x^{(i)}∂w∂J​=m2​i=1∑m​(wx(i)+b−y(i))⋅x(i) Partial derivative w.r.t. bbb: ∂J∂b=2m∑i=1m(wx(i)+b−y(i))\\frac{\\partial J}{\\partial b} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right)∂b∂J​=m2​i=1∑m​(wx(i)+b−y(i)) Gradient descent updates — move opposite to the gradient: w←w−α⋅∂J∂w,b←b−α⋅∂J∂bw \\leftarrow w - \\alpha \\cdot \\frac{\\partial J}{\\partial w}, \\qquad b \\leftarrow b - \\alpha \\cdot \\frac{\\partial J}{\\partial b}w←w−α⋅∂w∂J​,b←b−α⋅∂b∂J​ import numpy as np\n\n# Data: true relationship y = 3x + 2\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([5.0, 8.0, 11.0, 14.0, 17.0])\n\nw, b = 0.0, 0.0   # start at zero\nalpha = 0.01\nm = len(y)\n\nfor epoch in range(500):\n    y_pred = w * X + b              # forward pass\n    error  = y_pred - y             # residuals: ŷ - y\n\n    # Partial derivatives (the gradient)\n    dw = (2 \u002F m) * np.dot(error, X) # ∂J\u002F∂w\n    db = (2 \u002F m) * np.sum(error)    # ∂J\u002F∂b\n\n    # Gradient descent step\n    w = w - alpha * dw\n    b = b - alpha * db\n\nprint(f\"Fitted: ŷ = {w:.4f}·x + {b:.4f}\")\n# Output: ŷ = 3.0000·x + 2.0000 The derivative — computed analytically with calculus, then applied iteratively — is what drives the entire learning process.",{"id":10737,"title":10613,"titles":10738,"content":10739,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#summary",[10430],"ConceptOne-Line DefinitionSlope of a linem=ΔyΔxm = \\frac{\\Delta y}{\\Delta x}m=ΔxΔy​ — constant rate of changeAverage rate of changef(x+h)−f(x)h\\frac{f(x+h)-f(x)}{h}hf(x+h)−f(x)​ — slope of secant over interval hhhLimitThe value an expression approaches as h→0h \\to 0h→0Derivativef′(x)=lim⁡h→0f(x+h)−f(x)hf'(x) = \\lim_{h\\to 0}\\frac{f(x+h)-f(x)}{h}f′(x)=limh→0​hf(x+h)−f(x)​ — instantaneous rate of changePower ruleddxxn=nxn−1\\frac{d}{dx} x^n = nx^{n-1}dxd​xn=nxn−1Chain ruleddxf(g(x))=f′(g(x))⋅g′(x)\\frac{d}{dx}f(g(x)) = f'(g(x))\\cdot g'(x)dxd​f(g(x))=f′(g(x))⋅g′(x) — essential for backpropPartial derivativeDerivative holding all other variables fixedGradientVector of all partial derivatives — points toward steepest ascent The derivative is the mathematical answer to the question \"which way is uphill?\" In machine learning we use its negative — downhill — to train every model.",{"id":10741,"title":10742,"titles":10743,"content":10744,"level":7520},"\u002Fen\u002Frooms\u002Fderivatives#whats-next","What's Next?",[10430],"You now have the calculus foundation. The gradient descent algorithm takes this one concept — move opposite to the derivative — and turns it into a complete optimization engine for machine learning. Next Room: Gradient Descent\n    See how the derivative becomes an optimization algorithm — with interactive experiments, full Python code, and a walk through every step of the math.\n    \n      Enter the Gradient Descent Room → html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"id":10435,"title":10434,"titles":10746,"content":10747,"level":7503},[],"Understanding the optimization algorithm that powers machine learning Image extracted from: Creating a Gradient Descent Animation in Python Gradient descent is one of the most fundamental optimization algorithms in machine learning. It's a method for finding the minimum of a function by iteratively moving in the direction of steepest descent.",{"id":10749,"title":10750,"titles":10751,"content":10752,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#the-intuition","The Intuition",[10434],"Imagine you're standing on a mountain in thick fog, and you want to reach the valley below. You can't see far, but you can feel the slope beneath your feet. Gradient descent works the same way: it takes small steps downhill, following the steepest path, until it reaches a minimum.",{"id":10754,"title":10755,"titles":10756,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#the-mathematics","The Mathematics",[10434],{"id":10758,"title":10759,"titles":10760,"content":10761,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#the-basic-formula","The Basic Formula",[10434,10755],"At its core, gradient descent updates parameters using this simple formula: θnew=θold−α∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\nabla J(\\theta)θnew​=θold​−α∇J(θ) Where: θ\\thetaθ represents the parameters we're optimizingα\\alphaα is the learning rate (step size)∇J(θ)\\nabla J(\\theta)∇J(θ) is the gradient of the cost function JJJ with respect to θ\\thetaθ",{"id":10763,"title":10764,"titles":10765,"content":10766,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#understanding-the-gradient-from-simple-to-complex","Understanding the Gradient: From Simple to Complex",[10434,10755],"Let's demystify the gradient symbol ∇\\nabla∇ (called \"nabla\" or \"del\") by building up from the simplest case.",{"id":10768,"title":10769,"titles":10770,"content":10771,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#case-1-single-variable-one-parameter","Case 1: Single Variable (One Parameter)",[10434,10755,10764],"When we have just one parameter, the gradient is simply the derivative: ∇J(θ)=dJdθ\\nabla J(\\theta) = \\frac{dJ}{d\\theta}∇J(θ)=dθdJ​ The derivative tells us: \"If I increase θ\\thetaθ by a tiny amount, how much does JJJ change?\" Example: For J(θ)=θ2J(\\theta) = \\theta^2J(θ)=θ2: ∇J(θ)=dJdθ=2θ\\nabla J(\\theta) = \\frac{dJ}{d\\theta} = 2\\theta∇J(θ)=dθdJ​=2θ If θ=5\\theta = 5θ=5, then ∇J(5)=10\\nabla J(5) = 10∇J(5)=10 → function is increasing, go left (decrease θ\\thetaθ)If θ=−3\\theta = -3θ=−3, then ∇J(−3)=−6\\nabla J(-3) = -6∇J(−3)=−6 → function is decreasing, go right (increase θ\\thetaθ)If θ=0\\theta = 0θ=0, then ∇J(0)=0\\nabla J(0) = 0∇J(0)=0 → we're at the minimum!",{"id":10773,"title":10774,"titles":10775,"content":10776,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#case-2-two-variables-two-parameters","Case 2: Two Variables (Two Parameters)",[10434,10755,10764],"When we have two parameters θ1\\theta_1θ1​ and θ2\\theta_2θ2​, the gradient becomes a vector with two components: ∇J(θ1,θ2)=[∂J∂θ1∂J∂θ2]\\nabla J(\\theta_1, \\theta_2) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial \\theta_1} \\\\\n\\frac{\\partial J}{\\partial \\theta_2}\n\\end{bmatrix}∇J(θ1​,θ2​)=[∂θ1​∂J​∂θ2​∂J​​] Each partial derivative ∂J∂θi\\frac{\\partial J}{\\partial \\theta_i}∂θi​∂J​ asks: \"If I change only θi\\theta_iθi​ (keeping others fixed), how much does JJJ change?\" Example: For J(θ1,θ2)=θ12+θ22J(\\theta_1, \\theta_2) = \\theta_1^2 + \\theta_2^2J(θ1​,θ2​)=θ12​+θ22​: ∇J=[2θ12θ2]\\nabla J = \\begin{bmatrix}\n2\\theta_1 \\\\\n2\\theta_2\n\\end{bmatrix}∇J=[2θ1​2θ2​​] At point (θ1=3,θ2=4)(\\theta_1=3, \\theta_2=4)(θ1​=3,θ2​=4): ∇J=[68]\\nabla J = \\begin{bmatrix}\n6 \\\\\n8\n\\end{bmatrix}∇J=[68​] This vector points in the direction of steepest ascent. We go in the opposite direction (subtract it) to descend!",{"id":10778,"title":10779,"titles":10780,"content":10781,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#case-3-many-variables-general-case","Case 3: Many Variables (General Case)",[10434,10755,10764],"For n parameters θ1,θ2,…,θn\\theta_1, \\theta_2, \\ldots, \\theta_nθ1​,θ2​,…,θn​, the gradient is an n-dimensional vector: ∇J(θ)=[∂J∂θ1∂J∂θ2⋮∂J∂θn]\\nabla J(\\theta) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial \\theta_1} \\\\\n\\frac{\\partial J}{\\partial \\theta_2} \\\\\n\\vdots \\\\\n\\frac{\\partial J}{\\partial \\theta_n}\n\\end{bmatrix}∇J(θ)=​∂θ1​∂J​∂θ2​∂J​⋮∂θn​∂J​​​ Each component tells us how sensitive JJJ is to changes in that specific parameter. This is exactly what we need to know which direction to adjust each parameter! Key Insight: Whether you have 1 parameter or 1 million parameters, the idea is the same: compute how much each parameter affects the cost, then adjust them in the opposite direction.",{"id":10783,"title":10784,"titles":10785,"content":10786,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#walking-through-a-complete-example","Walking Through a Complete Example",[10434],"Let's see gradient descent in action with the simplest case: one variable. Consider minimizing the quadratic function: J(θ)=θ2J(\\theta) = \\theta^2J(θ)=θ2 The gradient (derivative) is: ∇J(θ)=dJdθ=2θ\\nabla J(\\theta) = \\frac{dJ}{d\\theta} = 2\\theta∇J(θ)=dθdJ​=2θ The gradient descent update rule becomes: θnew=θold−α⋅2θold\\theta_{new} = \\theta_{old} - \\alpha \\cdot 2\\theta_{old}θnew​=θold​−α⋅2θold​ Starting at θ0=10\\theta_0 = 10θ0​=10 with learning rate α=0.1\\alpha = 0.1α=0.1: Iteration 1: θ1=10−0.1×(2×10)=10−2=8\\theta_1 = 10 - 0.1 \\times (2 \\times 10) = 10 - 2 = 8θ1​=10−0.1×(2×10)=10−2=8 The gradient was positive (10 slope upward), so we moved left (decreased θ\\thetaθ) Iteration 2: θ2=8−0.1×(2×8)=8−1.6=6.4\\theta_2 = 8 - 0.1 \\times (2 \\times 8) = 8 - 1.6 = 6.4θ2​=8−0.1×(2×8)=8−1.6=6.4 Still positive gradient, getting smaller, so smaller steps Iteration 3: θ3=6.4−0.1×(2×6.4)=6.4−1.28=5.12\\theta_3 = 6.4 - 0.1 \\times (2 \\times 6.4) = 6.4 - 1.28 = 5.12θ3​=6.4−0.1×(2×6.4)=6.4−1.28=5.12 Pattern continues: as we approach the minimum, the gradient shrinks, so our steps get smaller automatically! With each step, we get closer to the minimum at θ=0\\theta = 0θ=0. Notice how the steps naturally get smaller as the gradient decreases near the minimum.",{"id":10788,"title":10789,"titles":10790,"content":7496,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#key-concepts","Key Concepts",[10434],{"id":10792,"title":10793,"titles":10794,"content":10795,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#learning-rate","Learning Rate",[10434,10789],"The learning rate α\\alphaα is crucial: Too large: We might overshoot the minimum or even divergeToo small: Convergence will be very slowJust right: Efficient convergence to the minimum",{"id":10797,"title":10798,"titles":10799,"content":7496,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#lets-do-experiments-with-different-learning-rates-and-see-how-it-affects-convergence","Let's do experiments with different learning rates and see how it affects convergence!",[10434,10789],{"id":10801,"title":10802,"titles":10803,"content":7496,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#types-of-gradient-descent","Types of Gradient Descent",[10434,10789],{"id":10805,"title":10806,"titles":10807,"content":10808,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#_1-batch-gradient-descent","1. Batch Gradient Descent",[10434,10789,10802],"Uses the entire dataset to compute the gradient: θ=θ−α∇θJ(θ)\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta)θ=θ−α∇θ​J(θ) Where J(θ)J(\\theta)J(θ) is computed over all training examples.",{"id":10810,"title":10811,"titles":10812,"content":10813,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#_2-stochastic-gradient-descent-sgd","2. Stochastic Gradient Descent (SGD)",[10434,10789,10802],"Updates parameters using one training example at a time: θ=θ−α∇θJ(θ;x(i),y(i))\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta; x^{(i)}, y^{(i)})θ=θ−α∇θ​J(θ;x(i),y(i))",{"id":10815,"title":10816,"titles":10817,"content":10818,"level":7601},"\u002Fen\u002Frooms\u002Fgradient-descent#_3-mini-batch-gradient-descent","3. Mini-batch Gradient Descent",[10434,10789,10802],"A compromise: uses a small batch of examples: θ=θ−α∇θJ(θ;x(i:i+b),y(i:i+b))\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta; x^{(i:i+b)}, y^{(i:i+b)})θ=θ−α∇θ​J(θ;x(i:i+b),y(i:i+b)) Where bbb is the batch size.",{"id":10820,"title":10821,"titles":10822,"content":10823,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#convergence","Convergence",[10434],"Gradient descent converges when the gradient becomes very small: ∣∇J(θ)∣\u003Cϵ|\\nabla J(\\theta)| \u003C \\epsilon∣∇J(θ)∣\u003Cϵ Where ϵ\\epsilonϵ is a small threshold value.",{"id":10825,"title":10826,"titles":10827,"content":10828,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#challenges","Challenges",[10434],"Local Minima: The algorithm might get stuck in local minima instead of finding the global minimumSaddle Points: Points where the gradient is zero but aren't minimaPlateau Regions: Areas where the gradient is very small, slowing down learning",{"id":10830,"title":10831,"titles":10832,"content":10833,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#real-world-applications","Real-World Applications",[10434],"Gradient descent is used to train: Neural Networks: Optimizing millions of parametersLinear Regression: Finding the best-fit lineLogistic Regression: Classification problemsSupport Vector Machines: Finding optimal hyperplanes",{"id":10835,"title":10836,"titles":10837,"content":10838,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#gradient-descent-in-deep-learning","Gradient Descent in Deep Learning",[10434,10831],"A deep neural network uses gradient descent to train weights across all its layers by minimizing the cost function. The image above shows a Deep Neural Network — a powerful type of model that directly relies on gradient descent to optimize its cost function. In deep learning: Input Layer receives raw data (e.g. image pixels, words, numbers)Hidden Layers perform feature extraction — learning complex patterns from dataOutput Layer produces the final predictionWeights www in each connection are the parameters θ\\thetaθ that gradient descent optimizes During training, the process is: Forward Pass→Compute Loss J(θ)→Backpropagation→Gradient Descent Update\\begin{aligned}\n&\\text{Forward Pass} \\\\\n&\\rightarrow \\text{Compute Loss } J(\\theta) \\\\\n&\\rightarrow \\text{Backpropagation} \\\\\n&\\rightarrow \\text{Gradient Descent Update}\n\\end{aligned}​Forward Pass→Compute Loss J(θ)→Backpropagation→Gradient Descent Update​ A network may have millions of neurons → millions of weights → a gradient vector with millions of dimensions — yet gradient descent works exactly the same way as in the 1D case: move opposite to the gradient to reduce the loss!",{"id":10840,"title":10594,"titles":10841,"content":10842,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#python-implementation",[10434],"Below is a pure-Python implementation — no ML libraries. Each block maps directly to the math above. Highlighted lines are the core formulas.",{"id":10844,"title":10845,"titles":10846,"content":10847,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#step-1-cost-function-and-its-gradient","Step 1 — Cost Function and its Gradient",[10434,10594],"J(θ)=θ2,∇J(θ)=2θJ(\\theta) = \\theta^2, \\qquad \\nabla J(\\theta) = 2\\thetaJ(θ)=θ2,∇J(θ)=2θ # J(θ) = θ²  →  the function we want to minimize\ndef cost(theta):\n    return theta ** 2\n\n# ∇J(θ) = dJ\u002Fdθ = 2θ  →  its derivative (gradient)\ndef gradient(theta):\n    return 2 * theta",{"id":10849,"title":10850,"titles":10851,"content":10852,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#step-2-the-update-rule","Step 2 — The Update Rule",[10434,10594],"θnew=θold−α⋅∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\cdot \\nabla J(\\theta)θnew​=θold​−α⋅∇J(θ) def update(theta, alpha):\n    grad = gradient(theta)           # ① compute  ∇J(θ)\n    return theta - alpha * grad      # ② apply   θ_new = θ_old − α·∇J(θ) Line 3 is the update rule formula above, written directly as Python.",{"id":10854,"title":10855,"titles":10856,"content":10857,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#step-3-full-loop-until-convergence","Step 3 — Full Loop Until Convergence",[10434,10594],"Run updates until ∣∇J(θ)∣\u003Cε|\\nabla J(\\theta)| \u003C \\varepsilon∣∇J(θ)∣\u003Cε — when the gradient is essentially zero: def gradient_descent(theta_init, alpha, epsilon=1e-6, max_iters=1000):\n    theta = theta_init                           # θ₀ — starting point\n    for i in range(max_iters):\n        grad = gradient(theta)                   # ∇J(θ) = 2θ\n        if abs(grad) \u003C epsilon:                  # stop when |∇J(θ)| \u003C ε\n            print(f\"Converged at iteration {i}\")\n            break\n        theta = theta - alpha * grad             # θ_new = θ_old − α·∇J(θ)\n        if i \u003C 5:\n            print(f\"  iter {i+1:2d}: θ={theta:.5f}  J={cost(theta):.5f}  ∇J={grad:.5f}\")\n    return theta\n\n# Same starting values as the manual example above: θ₀ = 10, α = 0.1\ntheta_min = gradient_descent(theta_init=10.0, alpha=0.1)\nprint(f\"\\nMinimum at θ = {theta_min:.8f}\") Output — matches the manual iterations above: iter  1: θ= 8.00000  J=64.00000  ∇J=20.00000\n  iter  2: θ= 6.40000  J=40.96000  ∇J=16.00000\n  iter  3: θ= 5.12000  J=26.21440  ∇J=12.80000\n  iter  4: θ= 4.09600  J=16.77722  ∇J=10.24000\n  iter  5: θ= 3.27680  J=10.73742  ∇J= 8.19200\nMinimum at θ = 0.00000001",{"id":10859,"title":10860,"titles":10861,"content":10862,"level":7527},"\u002Fen\u002Frooms\u002Fgradient-descent#step-4-linear-regression-two-parameters","Step 4 — Linear Regression: Two Parameters",[10434,10594],"For a model y^=wX+b\\hat{y} = wX + by^​=wX+b, the cost is mean squared error: J(w,b)=1m∑i=1m(y^(i)−y(i))2J(w, b) = \\frac{1}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)^2J(w,b)=m1​∑i=1m​(y^​(i)−y(i))2 With partial derivatives: ∂J∂w=2m∑i=1m(y^(i)−y(i))x(i),∂J∂b=2m∑i=1m(y^(i)−y(i))\\frac{\\partial J}{\\partial w} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right) x^{(i)}, \\qquad \\frac{\\partial J}{\\partial b} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)∂w∂J​=m2​∑i=1m​(y^​(i)−y(i))x(i),∂b∂J​=m2​∑i=1m​(y^​(i)−y(i)) import numpy as np\n\ndef linear_regression_gd(X, y, alpha=0.01, epochs=500):\n    m = len(y)\n    w, b = 0.0, 0.0                        # θ = [w, b] — initialize to zero\n    for epoch in range(epochs):\n        y_pred = w * X + b                 # forward pass:  ŷ = w·X + b\n        error  = y_pred - y                # residuals:     ŷ − y\n        dw = (2 \u002F m) * np.dot(error, X)   # ∂J\u002F∂w = (2\u002Fm) Σ (ŷ−y)·x\n        db = (2 \u002F m) * np.sum(error)       # ∂J\u002F∂b = (2\u002Fm) Σ (ŷ−y)\n        w = w - alpha * dw                 # w_new = w_old − α·∂J\u002F∂w\n        b = b - alpha * db                 # b_new = b_old − α·∂J\u002F∂b\n        if epoch % 100 == 0:\n            loss = np.mean(error ** 2)     # J(w,b) = (1\u002Fm) Σ (ŷ−y)²\n            print(f\"Epoch {epoch:4d}: loss={loss:.4f}  w={w:.4f}  b={b:.4f}\")\n    return w, b\n\n# True relationship: y = 2·x  →  model should converge to w≈2, b≈0\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([2.0, 4.0, 6.0, 8.0, 10.0])\nw, b = linear_regression_gd(X, y)\nprint(f\"\\nFitted:  ŷ = {w:.4f}·x + {b:.4f}\") The highlighted lines 7–12 map directly to the formulas: Lines 7–8: forward pass y^=wX+b\\hat{y} = wX + by^​=wX+b and residualsLines 9–10: partial derivatives ∂J∂w\\frac{\\partial J}{\\partial w}∂w∂J​ and ∂J∂b\\frac{\\partial J}{\\partial b}∂b∂J​Lines 11–12: gradient descent update rule θnew=θold−α∇J\\theta_{new} = \\theta_{old} - \\alpha \\nabla Jθnew​=θold​−α∇J",{"id":10864,"title":10865,"titles":10866,"content":10867,"level":7520},"\u002Fen\u002Frooms\u002Fgradient-descent#next-steps","Next Steps",[10434],"Once you understand gradient descent, you can explore advanced variations: Momentum: Adds velocity to updatesAdam: Adaptive learning rates per parameterRMSprop: Handles sparse gradients better html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}",{"id":10445,"title":10444,"titles":10869,"content":10870,"level":7503},[],"កម្មវិធី 22 សប្តាហ៍ដែលនឹងបំប្លែងអ្នកពីអ្នកប្រើ ML Model ទៅជាអ្នកបង្កើត train និង deploy ពួកវានៅក្នុងការផលិតពិតប្រាកដ។",{"id":10872,"title":10873,"titles":10874,"content":10875,"level":7520},"\u002Fkm\u002Fbootcamp#សូមស្វាគមន៍-ml-engineers-នាពេលអនាគត","សូមស្វាគមន៍ ML Engineers នាពេលអនាគត!",[10444],"ក្នុងរយៈពេល 22 សប្តាហ៍ អ្នកនឹងវិវត្តន៍ពីអ្នកដែល ប្រើប្រាស់ ML Models ទៅជាអ្នកដែល បង្កើត train និង deploy ពួកវានៅក្នុងការផលិតពិតប្រាកដ។ កម្មវិធីសិក្សានេះ រួមបញ្ចូលមូលដ្ឋានគ្រឹះគណិតវិទ្យា Machine Learning បែបប្រពៃណី Deep Learning Transformers និង MLOps ចូលក្នុងដំណើររួមមួយ។",{"id":10877,"title":10878,"titles":10879,"content":10880,"level":7520},"\u002Fkm\u002Fbootcamp#ព័ត៌មានលម្អិតនៃកម្មវិធី","ព័ត៌មានលម្អិតនៃកម្មវិធី",[10444],"រយៈពេល22 សប្តាហ៍ (៦០ ម៉ោងបង្រៀន)កាលវិភាគព្រហស្បត្តិ៍ & សុក្រ · 1.5 ម៉ោង \u002F Sessionពេលវេលារៀន\u002Fសប្តាហ៍1.5 ម៉ោងក្នុងថ្នាក់ + 4–6 ម៉ោងសិក្សាដោយខ្លួនឯងថ្ងៃចាប់ផ្តើម26 មីនា 2026 (នៅបន្តចុះឈ្មោះ)វិធីសាស្ត្រគណិត → Classical ML → Deep Learning → Transformers → MLOps",{"id":10882,"title":10883,"titles":10884,"content":7496,"level":7520},"\u002Fkm\u002Fbootcamp#technology-stack","Technology Stack",[10444],{"id":10886,"title":10887,"titles":10888,"content":7496,"level":7520},"\u002Fkm\u002Fbootcamp#មាតិកានៃកម្មវិធី","មាតិកានៃកម្មវិធី",[10444],{"id":10890,"title":10891,"titles":10892,"content":10893,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-1-មូលដ្ឋានគ្រឹះ-៤-សប្តាហ៍","កម្មវិធី​ទី 1 · មូលដ្ឋានគ្រឹះ (៤ សប្តាហ៍)",[10444,10887],"គ្រឹះគណិតវិទ្យា និងគំនិតសម្រាប់ ML។ សិក្សាស្វែងយល់ពីរបៀបដែលម៉ាស៊ីនរៀននៅពេលមុន ការហ្វឹកហ្វឺន Model ពិតប្រាកដ។ សប្តាហ៍ 1 — AI\u002FML\u002FDeep Learning · Supervised vs Unsupervised Learning · ប្រភេទបញ្ហា MLសប្តាហ៍ 2 — Vectors & Matrices · Matrix Multiplication · Gradients · Gradient Descentសប្តាហ៍ 3 — ការចែកចាយប្រូបាប្លីស្ទេ · Bayes' Theorem · MSE & Cross-Entropy Loss · Bias-Variance Tradeoffសប្តាហ៍ 4 — Exploratory Data Analysis · ការដោះស្រាយទិន្នន័យបំផ្លាត · Feature Scaling · ហ្វឹកហ្វឺន Model ដំបូង 🎯 គម្រោងបញ្ចប់ Module: អនុវត្ត Gradient Descent និង Linear Regression ពីដំបូង ដោយប្រើ NumPy តែប៉ុណ្ណោះ។",{"id":10895,"title":10896,"titles":10897,"content":10898,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-2-machine-learning-បែបប្រពៃណី-៥-សប្តាហ៍","កម្មវិធី​ទី 2 · Machine Learning បែបប្រពៃណី (៥ សប្តាហ៍)",[10444,10887],"Scikit-learn Ecosystem និងការស្ទាត់ជំនាញទិន្នន័យ Tabular។ បង្កើត វាស់ស្ទង់ និង Tune Classifiers និង Regressors ពិតប្រាកដ។ សប្តាហ៍ 5 — Linear Regression · Logistic Regression · Sigmoid Function · Decision Boundariesសប្តាហ៍ 6 — Decision Trees (Gini\u002FEntropy) · Random Forests · Bagging · Feature Importanceសប្តាហ៍ 7 — XGBoost & LightGBM · Metrics (Precision, Recall, F1, AUC-ROC) · Confusion Matricesសប្តាហ៍ 8 — K-Fold Cross-Validation · Grid & Bayesian Hyperparameter Search · Feature Engineering · ការការពារ Data Leakageសប្តាហ៍ 9 — Kaggle Competition Strategy · sklearn Pipeline · Model Serialization 🎯 គម្រោងបញ្ចប់ Module: ចូលរួម Kaggle Tabular-Data Challenge ហើយបង្កើត sklearn Pipeline ពេញលេញ។",{"id":10900,"title":10901,"titles":10902,"content":10903,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-3-deep-learning-ជាមួយ-pytorch-៤-សប្តាហ៍","កម្មវិធី​ទី 3 · Deep Learning ជាមួយ PyTorch (៤ សប្តាហ៍)",[10444,10887],"Neural Networks ពីគ្រឹះដល់ CNNs ដំណើរការ GPU។ ស្វែងយល់ Layer ជ្រៅជ្រះ Gradient Update និងល្បិចហ្វឹកហ្វឺន។ សប្តាហ៍ 10 — Perceptrons · Multi-Layer Networks · Forward Propagation · Backpropagation & Chain Ruleសប្តាហ៍ 11 — Activation Functions (ReLU, Softmax) · PyTorch Tensors · Custom Dataset & DataLoader · Data Augmentationសប្តាហ៍ 12 — Training Loops · Adam\u002FSGD Optimizers · Early Stopping · Model Checkpointingសប្តាហ៍ 13 — Convolutional Layers & Pooling · ResNet\u002FVGG · Transfer Learning · Fine-Tuning Strategies 🎯 គម្រោងបញ្ចប់ Module: បង្កើត Image Classifier ដោយប្រើ Transfer Learning ជាមួយ CNN ដែលបានហ្វឹកហ្វឺនមុន។",{"id":10905,"title":10906,"titles":10907,"content":10908,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-4-transformers-hugging-face-៣-សប្តាហ៍","កម្មវិធី​ទី 4 · Transformers & Hugging Face (៣ សប្តាហ៍)",[10444,10887],"យន្តការ Attention ដែលជំរុញ AI សម័យទំនើប។ Fine-tune BERT និង GPT-class Models សម្រាប់ NLP Tasks ពិតប្រាកដ។ សប្តាហ៍ 14 — Self-Attention · Multi-Head Attention · Transformer Architecture · Tokenization (BPE\u002FWordPiece) · Positional Encodingសប្តាហ៍ 15 — Hugging Face Hub & Pipeline API · Fine-Tuning ជាមួយ Trainer API · BERT សម្រាប់ Text Classification & NERសប្តាហ៍ 16 — NLP Competition Strategy · ផ្ទុក Models ទៅ Hugging Face Hub · បង្កើត Text Classification Service 🎯 គម្រោងបញ្ចប់ Module: Fine-tune Transformer លើ Kaggle NLP Challenge ហើយបោះផ្សាយ Model ទៅ Hugging Face Hub។",{"id":10910,"title":10911,"titles":10912,"content":10913,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-5-mlops-deployment-៣-សប្តាហ៍","កម្មវិធី​ទី 5 · MLOps & Deployment (៣ សប្តាហ៍)",[10444,10887],"ពី Jupyter Notebook ទៅ Production API។ រៀន Tools និងការអនុវត្ត ដែល ML Engineers ទាំងអស់ត្រូវការក្នុងឧស្សាហកម្ម។ សប្តាហ៍ 17 — Model Serialization (pickle \u002F joblib \u002F ONNX) · DVC Versioning · REST Prediction APIs ជាមួយ FastAPIសប្តាហ៍ 18 — Docker Images & Containers · Dockerfile Best Practices · MLflow Experiment Tracking & Model Registryសប្តាហ៍ 19 — GitHub Actions CI\u002FCD · Automated Testing · Data Drift Detection · Model Monitoring & Alerting 🎯 គម្រោងបញ្ចប់ Module: Deploy ML Model ពេញលេញជាមួយ FastAPI, Docker, និង CI\u002FCD Pipeline។",{"id":10915,"title":10916,"titles":10917,"content":10918,"level":7527},"\u002Fkm\u002Fbootcamp#កម្មវិធីទី-6-គម្រោងបញ្ចប់-១-សប្តាហ៍","កម្មវិធី​ទី 6 · គម្រោងបញ្ចប់ (១ សប្តាហ៍)",[10444,10887],"បង្កើត និង Deploy ប្រព័ន្ធ ML Production ពេញលេញ — ពី Raw Data ទៅ Live API។ ស្វែងរក Dataset ពិតប្រាកដ (Kaggle ឬបញ្ហាពិតប្រាកដ)EDA, Preprocessing, និង Feature Engineering ពេញលេញTrain និងប្រៀបធៀប Models ច្រើន ជាមួយ Hyperparameter Tuning ដែលបានចារDeploy REST API (FastAPI) ក្នុង Docker Containerរៀបចំ CI\u002FCD Pipeline ជាមួយ GitHub ActionsDemo Presentation 10–15 នាទី គំនិតគម្រោង: Sentiment Analysis · Medical Image Classification · Real-Estate Price Prediction · Fake News Detection · Customer Churn · Text Summarization API",{"id":10920,"title":10921,"titles":10922,"content":10923,"level":7520},"\u002Fkm\u002Fbootcamp#កម្មវិធីនេះសម្រាប់អ្នកណា","កម្មវិធីនេះសម្រាប់អ្នកណា?",[10444],"Developers ដែលស្គាល់ Python ហើយចង់ចូលក្នុងវិស័យ ML Engineeringនិស្សិតដែលចង់បានការអនុវត្ត ML Projects ពិតប្រាកដអ្នកណាក៏ដោយដែលចង់ស្ទាប់ស្ទង់ ML ដោយការអនុវត្ត",{"id":10925,"title":10926,"titles":10927,"content":10928,"level":7520},"\u002Fkm\u002Fbootcamp#អ្វីដែលអ្នកនឹងបង្កើត","អ្វីដែលអ្នកនឹងបង្កើត",[10444],"នៅចុងបញ្ចប់នៃ Bootcamp អ្នកនឹងបាន Train និង Deploy ម៉ូដែលពិតប្រាកដ ចូលរួមប្រកួត Kaggle ហើយបន្ហាញ Capstone Project ដែលបង្ហាញ Full-Stack ML Skills។",{"id":10405,"title":5,"titles":10930,"content":10931,"level":7503},[],"ពីការកំណត់ល្បឿនថេរ ទៅកាន់ការផ្លាស់ប្តូរតាមស្ថានភាព — ស្វែងយល់ពីបច្ចេកទេសនៅពីក្រោយ AI សម័យថ្មី រូបភាពយកមកពី: DL Notes: Advanced Gradient Descent ការប្រើ Gradient descent គឺពិតជាមានប្រសិទ្ធភាព ប៉ុន្តែវាមានបញ្ហាមួយ៖ រាល់គ្រប់ Parameter (ប៉ារ៉ាម៉ែត្រ) ទាំងអស់នៅក្នុងម៉ូឌែលរបស់អ្នក ប្រើប្រាស់ Learning Rate (ទំហំជំហាន) តែមួយដូចគ្នា។ ហើយការកំណត់លេខនោះឱ្យបានត្រឹមត្រូវ? វាដូចជាការទស្សន៍ទាយច្រើនជាងវិទ្យាសាស្ត្រ។ Adam (មកពីពាក្យថា Adaptive Moment Estimation) ត្រូវបានណែនាំដោយលោក Diederik Kingma និង Jimmy Ba ក្នុងឆ្នាំ ២០១៥ [1] ហើយវាបានក្លាយជា \"Optimizer\" ដ៏ពេញនិយមបំផុតក្នុងវិស័យ Deep Learning។ អត្ថបទនេះនឹងពន្យល់ថា ហេតុអ្វី បានជាការប្រើ Learning Rate ថេរតែមួយមិនសូវល្អ, តើ Adam ធ្វើអ្វីខ្លះខុសពីគេ, និង របៀប ដែលវាដំណើរការ — ចាប់ពីទ្រឹស្តីរហូតដល់កូដជាក់ស្តែង។",{"id":10933,"title":97,"titles":10934,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#បញ្ហានៃការប្រើ-learning-rate-ថេរ-fixed-learning-rate",[5],{"id":10936,"title":102,"titles":10937,"content":10938,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#វិបត្តិ-ទំហំជំហានមួយ-ប្រើគ្រប់កន្លែង",[5,97],"សាកស្រមៃថាអ្នកកំពុងដើរភ្នំដោយមានច្បាប់ដ៏តឹងរឹងមួយ៖ រាល់ជំហានដែលអ្នកបោះ ត្រូវតែមានប្រវែងស្មើៗគ្នាជានិច្ច — មិនឱ្យលើស មិនឱ្យខ្វះ។ នៅពេលអ្នកនៅលើច្រាំងថ្មចោត ការបោះជំហានវែងពេកអាចឱ្យអ្នកធ្លាក់ជ្រោះ។ ប៉ុន្តែនៅពេលអ្នកនៅលើវាលទំនាបដែលមានជម្រាលតិចតួច ការបោះជំហានដដែលនោះមានអារម្មណ៍ថាយឺតខ្លាំងណាស់ — វាអាចនឹងចំណាយពេលរាប់ឆ្នាំទើបទៅដល់បាតភ្នំ។ នេះគឺជាបញ្ហាពិតប្រាកដនៃ Learning Rate ថេរ (α\\alphaα) នៅក្នុង Gradient Descent៖ θnew=θold−α∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\nabla J(\\theta)θnew​=θold​−α∇J(θ) តម្លៃ α\\alphaα តែមួយនេះ គ្រប់គ្រងទំហំជំហានសម្រាប់ គ្រប់ Parameter ទាំងអស់ — ទោះបីជា Parameter ខ្លះត្រូវការបោះជំហានធំ ឬខ្លះត្រូវការបោះជំហានតូចក៏ដោយ។",{"id":10940,"title":466,"titles":10941,"content":7496,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ផលវិបាក-៣-យ៉ាងនៃ-learning-rate-ថេរ",[5,97],{"id":10943,"title":471,"titles":10944,"content":10945,"level":7601},"\u002Fkm\u002Frooms\u002Fadam-optimizer#១-ធំពេក-រំលងគោលដៅ-overshooting",[5,97,466],"នៅពេល α\\alphaα ធំពេក វានឹងធ្វើឱ្យយើងបោះជំហានរំលងចំណុចទាបបំផុត (Minimum) ហើយលោតទៅលោតមក៖\nការបាត់បង់ (Loss) មិនដែលថយចុះឡើយ — វានឹងលោតចុះឡើងជុំវិញគោលដៅរហូត។",{"id":10947,"title":507,"titles":10948,"content":10949,"level":7601},"\u002Fkm\u002Frooms\u002Fadam-optimizer#២-តូចពេក-យឺតដូចអណ្តើក-crawling",[5,97,466],"នៅពេល α\\alphaα តូចពេក ការរៀនដំណើរការទៅមុខមែន ប៉ុន្តែវាយឺតខ្លាំងណាស់។ ក្នុងម៉ូឌែលដែលមាន Parameter រាប់លាន នេះគឺជាមហន្តរាយខាងពេលវេលា និងកម្លាំងម៉ាស៊ីន។",{"id":10951,"title":542,"titles":10952,"content":10953,"level":7601},"\u002Fkm\u002Frooms\u002Fadam-optimizer#៣-បញ្ហា-ផ្លូវតូចចង្អៀត-ravine-problem",[5,97,466],"នៅក្នុងលំហវិមាត្រខ្ពស់ ក្រាហ្វជម្រាលជារឿយៗមើលទៅដូចជា ជ្រលងភ្នំដ៏តូចចង្អៀត — ចោតខ្លាំងក្នុងទិសដៅម្ខាង និងរាបស្មើក្នុងទិសដៅម្ខាងទៀត។ ទិសដៅដែលចោត ត្រូវការ α\\alphaα តូច ដើម្បីកុំឱ្យបោះជំហានបុកជញ្ជាំងជ្រលងភ្នំ។ទិសដៅដែលរាបស្មើ ត្រូវការ α\\alphaα ធំ ដើម្បីដើរឱ្យទៅមុខឆាប់ដល់។\nមិនមាន α\\alphaα ថេរណាមួយ អាចបំពេញចិត្តទិសដៅទាំងពីរក្នុងពេលតែមួយបានទេ។ LeCun et al. [2] បានធ្វើការវិភាគលម្អិតពីបទប្បញ្ញត្តិ Loss Landscape ទាំងនេះ និងផលប៉ះពាល់របស់ពួកវាទៅលើការ Convergence។ ចំណុចខ្សោយសំខាន់\n  Parameter ផ្សេងគ្នា ត្រូវការទំហំជំហានផ្សេងគ្នា។ Learning rate ថេរចាត់ទុកពួកវាដូចគ្នាទាំងអស់ — ហើយនេះគឺជាបញ្ហាកកស្ទះ។",{"id":10955,"title":686,"titles":10956,"content":10957,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ស្គាល់-adam-ប្រព័ន្ធ-gps-នៃ-optimizer",[5],"បើ Gradient Descent ធម្មតាគឺជាការដើរភ្នំដោយបោះជំហានថេរ Adam គឺជាការប្រើ GPS ដែលមានការណែនាំផ្លូវដោយវៃឆ្លាត៖ វាបង្កើនល្បឿននៅលើផ្លូវហាយវេ បន្ថយល្បឿននៅផ្លូវបត់ចង្អៀត និងចងចាំផ្លូវដែលធ្លាប់បានដើរកន្លងមក។ អាថ៌កំបាំងរបស់ Adam គឺការតាមដាន រឿងពីរយ៉ាង សម្រាប់រាល់ Parameter នីមួយៗ៖ បរិមាណនិមិត្តសញ្ញាអត្ថន័យងាយៗ1st Moment (Momentum)mtm_tmt​តើទិសដៅណាខ្លះដែលជម្រាល (Gradients) ធ្លាប់ចង្អុលទៅនាពេលថ្មីៗនេះ?2nd Moment (Adaptive Scale)vtv_tvt​តើជម្រាល (Gradients) មានទំហំ ធំប៉ុនណា នាពេលថ្មីៗនេះ? តាមរយៈការចែកទំហំជំហាននឹងឫសការ៉េនៃ 2nd moment, Adam នឹងបន្ថយទំហំជំហានដោយស្វ័យប្រវត្តិសម្រាប់ Parameter ណាដែលមានជម្រាលធំៗខ្លាំងពេក និងបង្កើនទំហំជំហានសម្រាប់ Parameter ណាដែលមានជម្រាលតូចៗ។",{"id":10959,"title":918,"titles":10960,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#របៀបបង្កើត-adam-ជំហានម្តងៗ",[5],{"id":10962,"title":922,"titles":10963,"content":10964,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ជំហានទី-១-momentum-រក្សាល្បឿន-និងទិសដៅ",[5,918],"បញ្ហាដែលវាដោះស្រាយ: ជម្រាល (Gradients) ជារឿយៗមានភាពរំខាន (Noisy)។ ការរត់តាមជម្រាលដែលរំខានទាំងនោះ ធ្វើឱ្យផ្លូវដើរមិនរលូន។ គំនិត: រក្សាមធ្យមភាគនៃជម្រាលពីមុនៗ ដូចជាការរមៀលបាល់ចុះពីលើភ្នំ — វានឹងបង្កើនល្បឿនក្នុងទិសដៅដែលស្របគ្នា និងមិនងាយងាករេដោយសារដុំថ្មតូចៗតាមផ្លូវ។ mt=β1⋅mt−1+(1−β1)⋅gtm_t = \\beta_1 \\cdot m_{t-1} + (1 - \\beta_1) \\cdot g_tmt​=β1​⋅mt−1​+(1−β1​)⋅gt​ β1\\beta_1β1​ ជាមេគុណ (ជាទូទៅគឺ 0.9)៖ មានន័យថាឱ្យតម្លៃ ៩០% លើអតីតកាល និង ១០% លើជម្រាលថ្មី។ Sutskever et al. [3] បានបង្ហាញថា Momentum term នេះ មានសារៈសំខាន់ខ្លាំងណាស់ក្នុងការ Converge យ៉ាងលឿន និងស្ថិតស្ថេរ នៅក្នុង Deep Networks។",{"id":10966,"title":1411,"titles":10967,"content":10968,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ជំហានទី-២-adaptive-scale-ការបត់បែនតាមប្រវត្តិ",[5,918],"បញ្ហាដែលវាដោះស្រាយ: Parameter ខ្លះមានជម្រាលធំ ខ្លះមានជម្រាលតូច។ យើងចង់ឱ្យអាធំដើរតិចៗ និងអាតូចដើរឱ្យបានច្រើន។ គំនិត: តាមដានមធ្យមភាគនៃ \"ការ៉េ\" នៃជម្រាល៖ vt=β2⋅vt−1+(1−β2)⋅gt2v_t = \\beta_2 \\cdot v_{t-1} + (1 - \\beta_2) \\cdot g_t^2vt​=β2​⋅vt−1​+(1−β2​)⋅gt2​ Parameter ណាដែលទទួលបានជម្រាលធំៗជាបន្តបន្ទាប់ នឹងមានតម្លៃ vtv_tvt​ ធំ។ នៅពេលយើងយកជំហានទៅចែកនឹង vt\\sqrt{v_t}vt​​ វានឹងធ្វើឱ្យការ Update ថយចុះមកតូចវិញ។ នេះហើយជា Learning Rate ផ្ទាល់ខ្លួន សម្រាប់ Parameter នីមួយៗ។",{"id":10970,"title":2031,"titles":10971,"content":10972,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ជំហានទី-៣-bias-correction-ការកែតម្រូវពេលចាប់ផ្តើម",[5,918],"បញ្ហាដែលវាដោះស្រាយ: ដោយសារនៅពេលចាប់ផ្តើម m0=0m_0 = 0m0​=0 និង v0=0v_0 = 0v0​=0 នោះការប៉ាន់ស្មានដំបូងៗនឹងខិតទៅជិតសូន្យខ្លាំងពេក (វាមិនទាន់មានប្រវត្តិគ្រប់គ្រាន់)។ ដំណោះស្រាយ: ចែកវាជាមួយ (1−βt)(1 - \\beta^t)(1−βt) ដើម្បីកែតម្រូវឱ្យមានតុល្យភាពវិញនៅជំហានដំបូងៗ៖ m^t=mt1−β1t,v^t=vt1−β2t\\hat{m}_t = \\frac{m_t}{1 - \\beta_1^t}, \\qquad \\hat{v}_t = \\frac{v_t}{1 - \\beta_2^t}m^t​=1−β1t​mt​​,v^t​=1−β2t​vt​​",{"id":10974,"title":2948,"titles":10975,"content":10976,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ជំហានទី-៤-រូបមន្តចុងក្រោយនៃការ-update",[5,918],"θt+1=θt−αv^t+ϵ⋅m^t\\boxed{\\theta_{t+1} = \\theta_t - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\cdot \\hat{m}_t}θt+1​=θt​−v^t​​+ϵα​⋅m^t​​ (ចំណាំ៖ ϵ≈10−8\\epsilon \\approx 10^{-8}ϵ≈10−8 ដើម្បីការពារកុំឱ្យមានការចែកនឹងសូន្យ) Hyperparameters លំនាំដើមតាមក្រដាសសំណើដើម [1]: Hyperparameterនិមិត្តសញ្ញាDefaultLearning rateα\\alphaα0.0011st moment decayβ1\\beta_1β1​0.92nd moment decayβ2\\beta_2β2​0.999Numerical stabilityϵ\\epsilonϵ10−810^{-8}10−8",{"id":10978,"title":3914,"titles":10979,"content":10980,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ឧទាហរណ៍ជាក់ស្តែង-adam-ដំណើរការ",[5],"សូមតាមដាន Adam ដោយដៃ លើអនុគមន៍សាមញ្ញដូចដែលយើងប្រើក្នុង Gradient Descent: J(θ)=θ2,∇J(θ)=2θJ(\\theta) = \\theta^2, \\qquad \\nabla J(\\theta) = 2\\thetaJ(θ)=θ2,∇J(θ)=2θ ចាប់ផ្តើមនៅ θ0=5\\theta_0 = 5θ0​=5 ជាមួយ hyperparameters លំនាំដើម (α=0.001\\alpha = 0.001α=0.001, β1=0.9\\beta_1 = 0.9β1​=0.9, β2=0.999\\beta_2 = 0.999β2​=0.999, ϵ=10−8\\epsilon = 10^{-8}ϵ=10−8)។ ចាប់ផ្តើម: m0=0m_0 = 0m0​=0, v0=0v_0 = 0v0​=0។ ជំហាន t=1t=1t=1: g1=2×5=10g_1 = 2 \\times 5 = 10g1​=2×5=10 m1=0.9×0+0.1×10=1.0m_1 = 0.9 \\times 0 + 0.1 \\times 10 = 1.0m1​=0.9×0+0.1×10=1.0 v1=0.999×0+0.001×100=0.1v_1 = 0.999 \\times 0 + 0.001 \\times 100 = 0.1v1​=0.999×0+0.001×100=0.1 m^1=1.01−0.91=1.00.1=10.0\\hat{m}_1 = \\frac{1.0}{1 - 0.9^1} = \\frac{1.0}{0.1} = 10.0m^1​=1−0.911.0​=0.11.0​=10.0 v^1=0.11−0.9991=0.10.001=100.0\\hat{v}_1 = \\frac{0.1}{1 - 0.999^1} = \\frac{0.1}{0.001} = 100.0v^1​=1−0.99910.1​=0.0010.1​=100.0 θ1=5−0.001100+10−8×10.0=5−0.00110×10.0=5−0.001=4.999\\theta_1 = 5 - \\frac{0.001}{\\sqrt{100} + 10^{-8}} \\times 10.0 = 5 - \\frac{0.001}{10} \\times 10.0 = 5 - 0.001 = 4.999θ1​=5−100​+10−80.001​×10.0=5−100.001​×10.0=5−0.001=4.999 ជំហាន t=2t=2t=2: g2=2×4.999=9.998g_2 = 2 \\times 4.999 = 9.998g2​=2×4.999=9.998 m2=0.9×1.0+0.1×9.998=1.8998m_2 = 0.9 \\times 1.0 + 0.1 \\times 9.998 = 1.8998m2​=0.9×1.0+0.1×9.998=1.8998 v2=0.999×0.1+0.001×9.9982=0.1999v_2 = 0.999 \\times 0.1 + 0.001 \\times 9.998^2 = 0.1999v2​=0.999×0.1+0.001×9.9982=0.1999 បន្ទាប់ពី Bias Correction និង Update, θ2≈4.998\\theta_2 \\approx 4.998θ2​≈4.998។ Adam ធ្វើ ជំហានស្ថិតស្ថេរ និងគ្រប់គ្រងបាន — មិនរហ័សហ្លើតដូច SGD ដែល α\\alphaα ធំ (ដែលនឹងបោះជំហានរំលងចំណុចទាប) ប៉ុន្តែលឿនជាងច្រើនពី SGD ដែល α\\alphaα តូចខ្លាំង (ដែលនឹងដើរយឺតបន្តិចម្ដងៗ)។ ការកែ Bias Correction ធ្វើឱ្យជំហានដំបូងៗនៅតែមានន័យ ទោះបីចាប់ផ្តើមពី Cold Start ក៏ដោយ។",{"id":10982,"title":7245,"titles":10983,"content":10984,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ការប្រៀបធៀប-optimizer",[5],"Optimizerទំហំជំហានការចងចាំចំណុចខ្លាំងចំណុចខ្សោយSGDថេរ (α\\alphaα)គ្មានសាមញ្ញ ងាយយល់ពិបាកកំណត់ α\\alphaα, យឺតSGD + Momentumថេរ (α\\alphaα)ទិសដៅជម្រាលដើរលឿន និងរលូនជាងនៅតែត្រូវការ α\\alphaα ល្អRMSProp [4]បត់បែនទំហំជម្រាលល្អក្នុងករណីទិន្នន័យផ្លាស់ប្ដូរគ្មាន MomentumAdamបត់បែនទិសដៅ + ទំហំល្អបំផុតសឹងគ្រប់ការងារពេលខ្លះ Generalize បានមិនល្អប៉ុណ្ណឹង Adam បញ្ចូលរួម SGD + Momentum (1st moment) និង RMSProp (2nd moment) ក្នុងក្របខណ្ឌតែមួយ ជាមួយ Bias Correction ជាការបន្ថែម។",{"id":10986,"title":7482,"titles":10987,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ការអនុវត្តជាមួយ-python-កូដគំរូ",[5],{"id":10989,"title":7486,"titles":10990,"content":10991,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#adam-យ៉ាងសាមញ្ញពីបាតដៃទទេ",[5,7482],"ខាងក្រោមនេះគឺជាការសរសេរ Adam Optimizer ដោយខ្លួនឯង (ពីបាតដៃទទេ)៖ import numpy as np\n\ndef adam(grad_fn, theta_init, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-8, max_iters=1000):\n    theta = theta_init\n    m = 0.0   # 1st moment (momentum)\n    v = 0.0   # 2nd moment (adaptive scale)\n\n    for t in range(1, max_iters + 1):\n        g = grad_fn(theta)            # ① គណនា gradient\n\n        m = beta1 * m + (1 - beta1) * g       # ② update 1st moment\n        v = beta2 * v + (1 - beta2) * g ** 2  # ③ update 2nd moment\n\n        m_hat = m \u002F (1 - beta1 ** t)          # ④ កែតម្រូវ bias សម្រាប់ m\n        v_hat = v \u002F (1 - beta2 ** t)          # ⑤ កែតម្រូវ bias សម្រាប់ v\n\n        # ⑥ ធ្វើការ Update parameter\n        theta = theta - alpha \u002F (np.sqrt(v_hat) + eps) * m_hat  \n\n        if abs(g) \u003C 1e-7:\n            print(f\"ជោគជ័យនៅជំហានទី {t}\")\n            break\n\n    return theta\n\n# សាកល្បងកាត់បន្ថយ J(θ) = θ²,  ∇J(θ) = 2θ\ntheta_min = adam(grad_fn=lambda th: 2 * th, theta_init=5.0)\nprint(f\"ចំណុចទាបបំផុតគឺ θ = {theta_min:.8f}\") លទ្ធផល: ជោគជ័យនៅជំហានទី 817\nចំណុចទាបបំផុតគឺ θ = 0.00000001",{"id":10993,"title":8101,"titles":10994,"content":10995,"level":7527},"\u002Fkm\u002Frooms\u002Fadam-optimizer#adam-លើ-linear-regression",[5,7482],"សូមសាកអនុវត្ត Adam លើករណីប្រើប្រាស់ជាក់ស្តែង — ការ Fit ខ្សែត្រង់ y^=w⋅x+b\\hat{y} = w \\cdot x + by^​=w⋅x+b ទៅទិន្នន័យ។ import numpy as np\n\ndef adam_linear_regression(X, y, alpha=0.01, beta1=0.9, beta2=0.999,\n                            eps=1e-8, epochs=200):\n    m = len(y)\n    w, b = 0.0, 0.0\n\n    # Adam state ដាច់ដោយឡែកសម្រាប់ parameter នីមួយៗ\n    mw, vw = 0.0, 0.0   # moments for w\n    mb, vb = 0.0, 0.0   # moments for b\n\n    for t in range(1, epochs + 1):\n        y_pred = w * X + b\n        error  = y_pred - y\n\n        # Gradients (រូបមន្តដូចគ្នានឹង Gradient Descent)\n        gw = (2 \u002F m) * np.dot(error, X)\n        gb = (2 \u002F m) * np.sum(error)\n\n        # 1st និង 2nd moment updates សម្រាប់ w\n        mw = beta1 * mw + (1 - beta1) * gw\n        vw = beta2 * vw + (1 - beta2) * gw ** 2\n        mw_hat = mw \u002F (1 - beta1 ** t)\n        vw_hat = vw \u002F (1 - beta2 ** t)\n\n        # 1st និង 2nd moment updates សម្រាប់ b\n        mb = beta1 * mb + (1 - beta1) * gb\n        vb = beta2 * vb + (1 - beta2) * gb ** 2\n        mb_hat = mb \u002F (1 - beta1 ** t)\n        vb_hat = vb \u002F (1 - beta2 ** t)\n\n        # Parameter updates\n        w = w - alpha \u002F (np.sqrt(vw_hat) + eps) * mw_hat\n        b = b - alpha \u002F (np.sqrt(vb_hat) + eps) * mb_hat\n\n        if t % 50 == 0:\n            loss = np.mean(error ** 2)\n            print(f\"Epoch {t:4d}: loss={loss:.6f}  w={w:.4f}  b={b:.4f}\")\n\n    return w, b\n\n# ទំនាក់ទំនងពិត: y = 2x + 1\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([3.0, 5.0, 7.0, 9.0, 11.0])\n\nw, b = adam_linear_regression(X, y)\nprint(f\"\\nFitted: ŷ = {w:.4f}·x + {b:.4f}\") លទ្ធផល: Epoch   50: loss=0.000042  w=1.9953  b=1.0044\nEpoch  100: loss=0.000000  w=2.0000  b=1.0000\nEpoch  150: loss=0.000000  w=2.0000  b=1.0000\nEpoch  200: loss=0.000000  w=2.0000  b=1.0000\n\nFitted: ŷ = 2.0000·x + 1.0000 Adam ស្ដារ w=2,b=1w=2, b=1w=2,b=1 បានច្បាស់លាស់ និងលឿន — ជាពិសេសបើប្រៀបនឹង Gradient Descent ធម្មតា ដែលត្រូវបន្ដ Tune Learning Rate ដោយប្រុងប្រយ័ត្ន។",{"id":10997,"title":9428,"titles":10998,"content":10999,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ពេលណាគួរប្រើ-adam",[5],"Adam គឺជាជម្រើសដ៏សុវត្ថិភាពបំផុតសម្រាប់កិច្ចការ Deep Learning ស្ទើរតែទាំងអស់៖ Neural networks: Training MLPs, CNNs, Transformers, RNNsទិន្នន័យដែលមានការរំខាន (Noisy gradients): ល្អសម្រាប់ Mini-batch training ដែលប្រើ Batch size តូចៗ។ទិន្នន័យរំដោចខ្ចាត (Sparse features): ល្អសម្រាប់ NLP ដែលពាក្យខ្លះបង្ហាញកម្រ (ជម្រាលធំ ប៉ុន្តែមិនសូវញឹកញាប់)។អ្នកទើបចាប់ផ្តើម: នៅពេលអ្នកមិនចង់ចំណាយពេលច្រើនក្នុងការ Tune Learning Rate។ ចំណាំមួយ\n  Wilson et al. [5] បង្ហាញថា Adaptive optimizer ដូចជា Adam អាចនឹង Generalize បានន ចុះបន្តិចបើប្រៀបនឹង SGD + Momentum ដែល Tune ល្អ សម្រាប់ Image Classification។ ក្នុងករណីនោះ SGD + Momentum ជាមួយ Learning Rate Scheduling អាចប្រសើរជាង Adam។ ប៉ុន្តែសម្រាប់កិច្ចការភាគច្រើន ភាពរឹងមាំ (Robustness) របស់ Adam នៅតែឈ្នះ។",{"id":11001,"title":9491,"titles":11002,"content":11003,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#សេចក្តីសង្ខេប",[5],"គំនិតចំណុចសំខាន់ចំណុចខ្សោយ Fixed LRα\\alphaα តែមួយសម្រាប់ Parameters ទាំងអស់ — លម្អិតពេកMomentum (mtm_tmt​)ធ្វើឱ្យទិសជម្រាលរលូន និងស្ថិតស្ថេរតាមពេលAdaptive scale (vtv_tvt​)Scale ជំហានតាមប្រវត្តិទំហំជម្រាលBias correctionកែ Cold-start bias នៅពេល m0=v0=0m_0 = v_0 = 0m0​=v0​=0Adam updateθ←θ−αv^t+ϵm^t\\theta \\leftarrow \\theta - \\frac{\\alpha}{\\sqrt{\\hat{v}_t} + \\epsilon} \\hat{m}_tθ←θ−v^t​​+ϵα​m^t​ Adam មិនមែនមកលុបបំបាត់ Learning Rate (α\\alphaα) នោះទេ — វានៅតែសំខាន់។ ប៉ុន្តែ Adam ធ្វើឱ្យការហ្វឹកហាត់ម៉ូឌែល មិនសូវរងឥទ្ធិពលខ្លាំង ពីការកំណត់លេខ α\\alphaα ខុស។ នេះជាមូលហេតុដែលតម្លៃ Default 0.001 របស់វា ដំណើរការបានយ៉ាងល្អលើម៉ូឌែលរាប់ពាន់ខុសៗគ្នា។ បើ Gradient Descent គឺជាការដើរភ្នំដោយបោះជំហានស្មើៗគ្នា Adam គឺជាការជួលអ្នកនាំផ្លូវដែលមាន GPS ជាប់ខ្លួន ដែលចេះកែសម្រួលល្បឿនតាមស្ថានភាពផ្លូវ និងធានាថាអ្នកនឹងមិនដើរវង្វេង ឬចំណាយពេលឥតប្រយោជន៍លើផ្លូវដែលធ្លាប់ដើររួចនោះទេ។",{"id":11005,"title":10309,"titles":11006,"content":10620,"level":7520},"\u002Fkm\u002Frooms\u002Fadam-optimizer#ឯកសារយោង",[5],{"id":10454,"title":10453,"titles":11008,"content":11009,"level":7503},[],"ផ្តើមពី មេគុណប្រាប់ទិសនៃសមីការបន្ទាត់ រហូតដល់ calculus នៅពីក្រោយ machine learning រាល់ពេលដែល Neural Network រៀន វាតែងតែសួរខ្លួនឯងនូវសំណួរដដែលៗថា: \"ប្រសិនបើខ្ញុំកែប្រែ parameter នេះបន្តិច តើ error នឹងកើនឡើង ឬថយចុះ — ហើយប៉ុន្មាន?\" សំណួរនេះត្រូវបានឆ្លើយដោយ ដេរីវេ (Derivative)។ មុននឹងយើងនិយាយអំពី gradient ឬ optimizer យើងត្រូវយល់ derivative ពីដំបូងបង្អស់។",{"id":11011,"title":11012,"titles":11013,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-១-បន្ទាត់-និងមេគុណប្រាប់ទិស-lines-and-slopes","ផ្នែកទី ១ — បន្ទាត់ និងមេគុណប្រាប់ទិស (Lines and Slopes)",[10453],{"id":11015,"title":11016,"titles":11017,"content":11018,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#សមីការបន្ទាត់","សមីការបន្ទាត់",[10453,11012],"ទំនាក់ទំនងងាយបំផុតរវាងតម្លៃពីរ គឺបន្ទាត់ត្រង់៖ y=mx+by = mx + by=mx+b ក្នុងនោះ៖ xxx គឺជា ទិន្នន័យចូល (input)yyy គឺជា លទ្ធផល (output)mmm គឺជា មេគុណប្រាប់ទិស ឬចំណោត (slope) — បញ្ជាក់ថាបន្ទាត់នោះងើបឡើង ឬចុះក្រោមខ្លាំងកម្រិតណាbbb គឺជា ចំណុចប្រសព្វអ័ក្ស y — ជាកន្លែងដែលបន្ទាត់កាត់អ័ក្សឈរ ឧទាហរណ៍៖ y=2x+1y = 2x + 1y=2x+1 xxxy=2x+1y = 2x + 1y=2x+101132537 រាល់ពេលដែល xxx កើនឡើង ១ នោះ yyy នឹងកើនឡើង ២ ជានិច្ច។ មេគុណប្រាប់ទិស m=2m = 2m=2 គឺជាអ្នកកំណត់អត្រាកំណើនថេរនេះ។",{"id":11020,"title":11021,"titles":11022,"content":11023,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ការគណនាមេគុណប្រាប់ទិសរវាងចំណុចពីរ","ការគណនាមេគុណប្រាប់ទិសរវាងចំណុចពីរ",[10453,11012],"ប្រសិនបើយើងមានពីរចំណុច (x1,y1)(x_1, y_1)(x1​,y1​) និង (x2,y2)(x_2, y_2)(x2​,y2​) នៅលើបន្ទាត់ មេគុណប្រាប់ទិសគឺ៖ m=ΔyΔx=y2−y1x2−x1m = \\frac{\\Delta y}{\\Delta x} = \\frac{y_2 - y_1}{x_2 - x_1}m=ΔxΔy​=x2​−x1​y2​−y1​​ នេះគឺជាការគណនា បំរែបំរួលកម្ពស់ ធៀបនឹងបំរែបំរួលចម្ងាយដេក — ពោលគឺ yyy ប្រែប្រួលប៉ុន្មាន នៅពេល xxx ផ្លាស់ប្តូរមួយឯកតា។ ហេតុអ្វីបានជាមេគុណប្រាប់ទិសមានសារៈសំខាន់?\n  មេគុណប្រាប់ទិសប្រាប់អ្នកអំពី អត្រាបម្រែបម្រួល។ បើវាស្មើ ២ មានន័យថា \"រាល់ពេលដើរទៅមុខ ១ ជំហានក្នុងទិសដៅ x, តម្លៃ y នឹងឡើង ២\"។ បើស្មើ −3 មានន័យថា y នឹងចុះ ៣។ បើស្មើ ០ មានន័យថា y មិនប្រែប្រួលទេ (បន្ទាត់ដេករាបស្មើ)។",{"id":11025,"title":11026,"titles":11027,"content":11028,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-២-នៅពេលបន្ទាត់ក្លាយជាខ្សែកោង","ផ្នែកទី ២ — នៅពេលបន្ទាត់ក្លាយជាខ្សែកោង",[10453],"បន្ទាត់ត្រង់មានមេគុណប្រាប់ទិស ថេរ នៅគ្រប់កន្លែង។ ប៉ុន្តែក្នុងគណិតវិទ្យា និង machine learning ភាគច្រើនយើងជួបនឹង ខ្សែកោង ដែលមានចំណោតប្រែប្រួលនៅគ្រប់ចំណុច។ សូមពិនិត្យមើលអនុគមន៍ប៉ារ៉ាបូល៖ f(x)=x2f(x) = x^2f(x)=x2 xxxf(x)=x2f(x) = x^2f(x)=x2−39−11001139 នៅក្បែរ x=0x = 0x=0 ខ្សែកោងនេះស្ទើរតែរាបស្មើ។ ប៉ុន្តែនៅក្បែរ x=3x = 3x=3 វាហក់ឡើងយ៉ាងខ្លាំង។ ចំណោតរបស់វា ខុសៗគ្នានៅគ្រប់ចំណុច — នេះមានន័យថារូបមន្ត m=ΔyΔxm = \\frac{\\Delta y}{\\Delta x}m=ΔxΔy​ រវាងចំណុចពីរដែលនៅឆ្ងាយគ្នា អាចប្រាប់យើងបានត្រឹមតែ អត្រាបម្រែបម្រួលមធ្យម ប៉ុណ្ណោះ។",{"id":11030,"title":11031,"titles":11032,"content":11033,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#អត្រាបម្រែបម្រួលមធ្យម","អត្រាបម្រែបម្រួលមធ្យម",[10453,11026],"សម្រាប់ចំណុចពីរ xxx និង x+hx + hx+h នៅលើខ្សែកោង fff អត្រាបម្រែបម្រួលមធ្យមគឺ៖ ΔfΔx=f(x+h)−f(x)h\\frac{\\Delta f}{\\Delta x} = \\frac{f(x + h) - f(x)}{h}ΔxΔf​=hf(x+h)−f(x)​ នេះគឺជាចំណោតនៃ បន្ទាត់កាត់ (secant line) ដែលភ្ជាប់ចំណុចពីរនៅលើខ្សែកោង។ ឧទាហរណ៍ នៅលើ f(x)=x2f(x) = x^2f(x)=x2 ចន្លោះ x=1x = 1x=1 និង x=3x = 3x=3: f(3)−f(1)3−1=9−12=4\\frac{f(3) - f(1)}{3 - 1} = \\frac{9 - 1}{2} = 43−1f(3)−f(1)​=29−1​=4 នោះគឺជាភាពចោតមធ្យមចន្លោះ x=1x=1x=1 និង x=3x=3x=3 ប៉ុន្តែវាមិនប្រាប់យើងពីចំណោតនៅ ចំណុចជាក់លាក់ ណាមួយឡើយ។",{"id":11035,"title":11036,"titles":11037,"content":11038,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៣-លីមីត-ការពង្រីកមើលចំណុចតែមួយ","ផ្នែកទី ៣ — លីមីត៖ ការពង្រីកមើលចំណុចតែមួយ",[10453],"ដើម្បីរកចំណោត នៅត្រង់ចំណុចជាក់លាក់មួយ យើងត្រូវបង្រួមចម្ងាយ hhh ឱ្យខិតទៅជិតសូន្យបំផុត។ នៅពេល hhh កាន់តែតូចទៅៗ បន្ទាត់កាត់នឹងរំកិលខ្លួនរហូតក្លាយជា បន្ទាត់ប៉ះ (tangent line) — ដែលប៉ះខ្សែកោងត្រង់ចំណុចតែមួយគត់ និងបង្ហាញពីចំណោតពិតប្រាកដនៅត្រង់នោះ។ ជាផ្លូវការ អត្រាបម្រែបម្រួលខណៈ (ភ្លាមៗ) នៅត្រង់ xxx គឺជា លីមីត (limit)៖ lim⁡h→0f(x+h)−f(x)h\\lim_{h \\to 0} \\frac{f(x + h) - f(x)}{h}h→0lim​hf(x+h)−f(x)​ នេះគឺជាគំនិតស្នូលនៃ ដេរីវេ។",{"id":11040,"title":11041,"titles":11042,"content":11043,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#លីមីតតាមរយៈការយល់ដឹង","លីមីតតាមរយៈការយល់ដឹង",[10453,11036],"លីមីតសួរថា: \"តើប្រាតិបត្តិការណ៍នោះខិតទៅតម្លៃអ្វី នៅពេលអថេរខិតទៅជិតលេខណាមួយ — ទោះបីជាមិនដល់ចំណុចនោះក៏ដោយ?\" lim⁡h→0(x+h)2−x2h\\lim_{h \\to 0} \\frac{(x+h)^2 - x^2}{h}h→0lim​h(x+h)2−x2​ ពង្រីក numerator (ចំនួននៅខាងលើ): =lim⁡h→0x2+2xh+h2−x2h=lim⁡h→02xh+h2h=lim⁡h→0(2x+h)= \\lim_{h \\to 0} \\frac{x^2 + 2xh + h^2 - x^2}{h} = \\lim_{h \\to 0} \\frac{2xh + h^2}{h} = \\lim_{h \\to 0} (2x + h)=h→0lim​hx2+2xh+h2−x2​=h→0lim​h2xh+h2​=h→0lim​(2x+h) នៅពេល h→0h \\to 0h→0: =2x= 2x=2x ចំណោតនៃ f(x)=x2f(x) = x^2f(x)=x2 នៅចំណុចណាមួយ xxx គឺ 2x2x2x ពិតប្រាកដ។",{"id":11045,"title":11046,"titles":11047,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៤-ដេរីវេ-the-derivative","ផ្នែកទី ៤ — ដេរីវេ (The Derivative)",[10453],{"id":11049,"title":11050,"titles":11051,"content":11052,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#និយមន័យ","និយមន័យ",[10453,11046],"ដេរីវេ នៃអនុគមន៍ fff នៅត្រង់ចំណុច xxx សរសេរថា f′(x)f'(x)f′(x) ឬ dfdx\\frac{df}{dx}dxdf​ គឺ៖ f′(x)=lim⁡h→0f(x+h)−f(x)h\\boxed{f'(x) = \\lim_{h \\to 0} \\frac{f(x + h) - f(x)}{h}}f′(x)=h→0lim​hf(x+h)−f(x)​​ វាប្រាប់យើងអំពី អត្រាបម្រែបម្រួលភ្លាមៗ ឬចំណោតនៃបន្ទាត់ប៉ះនៅគ្រប់ចំណុច។",{"id":11054,"title":11055,"titles":11056,"content":11057,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#អត្ថន័យតាមរូបធរណីមាត្រ","អត្ថន័យតាមរូបធរណីមាត្រ",[10453,11046],"តម្លៃដេរីវេអត្ថន័យf′(x)>0f'(x) > 0f′(x)>0អនុគមន៍កំពុង កើនឡើង នៅត្រង់ xxxf′(x)\u003C0f'(x) \u003C 0f′(x)\u003C0អនុគមន៍កំពុង ថយចុះ នៅត្រង់ xxxf′(x)=0f'(x) = 0f′(x)=0អនុគមន៍ រាបស្មើ (អាចជាចំណុចទាបបំផុត ឬខ្ពស់បំផុត)∥f′(x)∥\\|f'(x)\\|∥f′(x)∥ ធំអនុគមន៍ប្រែប្រួល យ៉ាងលឿន∥f′(x)∥\\|f'(x)\\|∥f′(x)∥ តូចអនុគមន៍ប្រែប្រួល យឺតៗ",{"id":11059,"title":11060,"titles":11061,"content":11062,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៥-ច្បាប់នៃការគណនាដេរីវេ-rules","ផ្នែកទី ៥ — ច្បាប់នៃការគណនាដេរីវេ (Rules)",[10453],"ការគណនាលីមីតដោយដៃរាល់ពេលគឺហត់នឿយណាស់។ អ្នកគណិតវិទ្យាបានបង្កើត ច្បាប់កាត់ (shortcut rules) ជាច្រើន ដែលគ្របដណ្ដប់អនុគមន៍ស្ទើរតែទាំងអស់ដែលអ្នកនឹងជួប។",{"id":11064,"title":11065,"titles":11066,"content":11067,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ច្បាប់ស្វ័យគុណ-power-rule","ច្បាប់ស្វ័យគុណ (Power Rule)",[10453,11060],"សម្រាប់ f(x)=xnf(x) = x^nf(x)=xn: ddxxn=n⋅xn−1\\frac{d}{dx} x^n = n \\cdot x^{n-1}dxd​xn=n⋅xn−1 ឧទាហរណ៍: អនុគមន៍ដេរីវេx2x^2x22x2x2xx3x^3x33x23x^23x2x10x^{10}x1010x910x^910x9xxx (ពោលគឺ x1x^1x1)111555 (ថេរ, x0x^0x0)000",{"id":11069,"title":11070,"titles":11071,"content":11072,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ច្បាប់គុណនឹងថេរ-constant-multiple-rule","ច្បាប់គុណនឹងថេរ (Constant Multiple Rule)",[10453,11060],"ddx[c⋅f(x)]=c⋅f′(x)\\frac{d}{dx}[c \\cdot f(x)] = c \\cdot f'(x)dxd​[c⋅f(x)]=c⋅f′(x) ប្រសិនបើ f(x)=3x2f(x) = 3x^2f(x)=3x2 នោះ f′(x)=3⋅2x=6xf'(x) = 3 \\cdot 2x = 6xf′(x)=3⋅2x=6x។",{"id":11074,"title":11075,"titles":11076,"content":11077,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ច្បាប់ផលបូក-sum-rule","ច្បាប់ផលបូក (Sum Rule)",[10453,11060],"ddx[f(x)+g(x)]=f′(x)+g′(x)\\frac{d}{dx}[f(x) + g(x)] = f'(x) + g'(x)dxd​[f(x)+g(x)]=f′(x)+g′(x) ប្រសិនបើ f(x)=x3+5x2−2x+7f(x) = x^3 + 5x^2 - 2x + 7f(x)=x3+5x2−2x+7 គណនាដេរីវេម្តងមួយតេប: f′(x)=3x2+10x−2f'(x) = 3x^2 + 10x - 2f′(x)=3x2+10x−2",{"id":11079,"title":11080,"titles":11081,"content":11082,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ច្បាប់បណ្តាក់-chain-rule","ច្បាប់បណ្តាក់ (Chain Rule)",[10453,11060],"សម្រាប់ ការផ្សំ នៃអនុគមន៍ f(g(x))f(g(x))f(g(x)): ddxf(g(x))=f′(g(x))⋅g′(x)\\frac{d}{dx} f(g(x)) = f'(g(x)) \\cdot g'(x)dxd​f(g(x))=f′(g(x))⋅g′(x) អានថា: \"ដេរីវេនៃ outer function គណនានៅ inner function — គុណ (times) ដេរីវេនៃ inner function\" ឧទាហរណ៍: h(x)=(3x+1)4h(x) = (3x + 1)^4h(x)=(3x+1)4 ឱ្យ g(x)=3x+1g(x) = 3x + 1g(x)=3x+1 និង f(u)=u4f(u) = u^4f(u)=u4: h′(x)=4(3x+1)3⋅3=12(3x+1)3h'(x) = 4(3x+1)^3 \\cdot 3 = 12(3x+1)^3h′(x)=4(3x+1)3⋅3=12(3x+1)3 ច្បាប់បណ្តាក់ (Chain Rule) មាននៅគ្រប់ទីកន្លែងក្នុង machine learning — Backpropagation គឺជាការអនុវត្តច្បាប់នេះម្តងហើយម្តងទៀតឆ្លងកាត់ layer នៃ neural network។",{"id":11084,"title":11085,"titles":11086,"content":11087,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#តារាងសង្ខេបដេរីវេទូទៅ","តារាងសង្ខេបដេរីវេទូទៅ",[10453,11060],"អនុគមន៍ដេរីវេexe^xexexe^xexln⁡(x)\\ln(x)ln(x)1x\\frac{1}{x}x1​sin⁡(x)\\sin(x)sin(x)cos⁡(x)\\cos(x)cos(x)cos⁡(x)\\cos(x)cos(x)−sin⁡(x)-\\sin(x)−sin(x)σ(x)=11+e−x\\sigma(x) = \\frac{1}{1+e^{-x}}σ(x)=1+e−x1​ (sigmoid)σ(x)(1−σ(x))\\sigma(x)(1 - \\sigma(x))σ(x)(1−σ(x))",{"id":11089,"title":11090,"titles":11091,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៦-ការប្រើប្រាស់ដេរីវេក្នុងអនុវត្តជាក់ស្តែង","ផ្នែកទី ៦ — ការប្រើប្រាស់ដេរីវេក្នុងអនុវត្តជាក់ស្តែង",[10453],{"id":11093,"title":11094,"titles":11095,"content":11096,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ការស្វែងរកចំណុចទាបបំផុត-និងខ្ពស់បំផុត","ការស្វែងរកចំណុចទាបបំផុត និងខ្ពស់បំផុត",[10453,11090],"ប្រសិនបើ f′(x)=0f'(x) = 0f′(x)=0 នោះអនុគមន៍គឺរាបស្មើ។ ចំណុចនេះហៅថា ចំណុចវិបាក (critical point) ដែលអាចជា៖ ចំណុចអប្បបរមា (Local minimum): អនុគមន៍ចុះក្រោម ហើយឡើងវិញ → f′(x)f'(x)f′(x) ប្រែពីអវិជ្ជមានទៅជ្ជមានចំណុចអតិបរមា (Local maximum): អនុគមន៍ឡើង ហើយចុះក្រោម → f′(x)f'(x)f′(x) ប្រែពីជ្ជមានទៅអវិជ្ជមានចំណុចបន្ទោះ (Saddle point): អនុគមន៍រាបស្មើ ប៉ុន្តែបន្តដំណើរក្នុងទិសដៅទូទៅដដែល ឧទាហរណ៍: រករចំណុចអប្បបរមានៃ f(x)=x2−4x+5f(x) = x^2 - 4x + 5f(x)=x2−4x+5 f′(x)=2x−4=0  ⟹  x=2f'(x) = 2x - 4 = 0 \\implies x = 2f′(x)=2x−4=0⟹x=2 នៅ x=2x = 2x=2: f(2)=4−8+5=1f(2) = 4 - 8 + 5 = 1f(2)=4−8+5=1 — នេះជាចំណុចអប្បបរមា។ def f(x):\n    return x**2 - 4*x + 5\n\ndef f_prime(x):\n    return 2*x - 4\n\n# ស្វែងរកកន្លែងដែលដេរីវេ = 0\n# 2x - 4 = 0  =>  x = 2\nx_min = 2\nprint(f\"ចំណុចអប្បបរមានៅ x={x_min}, f(x)={f(x_min)}\")  # x=2, f(x)=1",{"id":11098,"title":11099,"titles":11100,"content":11101,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ដេរីវេជាសញ្ញាណទិស","ដេរីវេជាសញ្ញាណទិស",[10453,11090],"នេះគឺជាអត្ថន័យសំខាន់ដែលភ្ជាប់ calculus ទៅ machine learning: ប្រសិនបើ f′(x)>0f'(x) > 0f′(x)>0 នៅចំណុចណាមួយ ការដើរ xxx ទៅស្តាំធ្វើឱ្យ fff កើន។ ការដើរ xxx ទៅឆ្វេងធ្វើឱ្យ fff ចុះ។ប្រសិនបើ f′(x)\u003C0f'(x) \u003C 0f′(x)\u003C0 ផ្ទុយពីខាងលើ។ ដើម្បី កាត់បន្ថយ fff យើងគួរតែដើរ xxx ក្នុងទិសដៅ ផ្ទុយ ពីដេរីវេ: xnew=xold−α⋅f′(xold)x_{\\text{new}} = x_{\\text{old}} - \\alpha \\cdot f'(x_{\\text{old}})xnew​=xold​−α⋅f′(xold​) (ថ្មី = new, ចាស់ = old) ត្រង់ α\\alphaα គឺជាជំហានតូច (step size)។ ត្រូវចំណាំទេ? នេះគឺ ច្បាប់អាប់ដែតនៃ gradient descent ពិតប្រាកដ។",{"id":11103,"title":11104,"titles":11105,"content":11106,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៧-ពីអថេរមួយទៅអថេរច្រើន-ហ្ក្រាដ្យង់-the-gradient","ផ្នែកទី ៧ — ពីអថេរមួយទៅអថេរច្រើន៖ ហ្ក្រាដ្យង់ (The Gradient)",[10453],"ម៉ូឌែល Machine learning មិនមែនមានប៉ារ៉ាម៉ែត្រតែមួយទេ គឺវាមានរាប់លាន។ យើងត្រូវការរកដេរីវេធៀបនឹងប៉ារ៉ាម៉ែត្រ នីមួយៗ ក្នុងពេលតែមួយ។",{"id":11108,"title":11109,"titles":11110,"content":11111,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#ដេរីវេដោយផ្នែក-partial-derivatives","ដេរីវេដោយផ្នែក (Partial Derivatives)",[10453,11104],"ដេរីវេដោយផ្នែក គឺការគណនាដេរីវេធៀបនឹងអថេរមួយ ដោយទុកអថេរផ្សេងទៀតឱ្យនៅថេរ (មិនប្រែប្រួល)។ ∂J∂wi\\frac{\\partial J}{\\partial w_i}∂wi​∂J​ = \"តើ J ប្រែប្រួលប៉ុន្មាន ប្រសិនបើយើងកែ wiw_iwi​ តែម្នាក់ឯង?\" ឧទាហរណ៍: J(w1,w2)=w12+3w1w2+w22J(w_1, w_2) = w_1^2 + 3w_1 w_2 + w_2^2J(w1​,w2​)=w12​+3w1​w2​+w22​ ∂J∂w1=2w1+3w2∂J∂w2=3w1+2w2\\frac{\\partial J}{\\partial w_1} = 2w_1 + 3w_2 \\qquad \\frac{\\partial J}{\\partial w_2} = 3w_1 + 2w_2∂w1​∂J​=2w1​+3w2​∂w2​∂J​=3w1​+2w2​",{"id":11113,"title":11114,"titles":11115,"content":11116,"level":7527},"\u002Fkm\u002Frooms\u002Fderivatives#វ៉ិចទ័រហ្ក្រាដ្យង់-the-gradient-vector","វ៉ិចទ័រហ្ក្រាដ្យង់ (The Gradient Vector)",[10453,11104],"នៅពេលយើងប្រមូលដេរីវេដោយផ្នែកទាំងអស់មកដាក់ក្នុងវ៉ិចទ័រតែមួយ យើងហៅវាថា ហ្ក្រាដ្យង់ (∇J\\nabla J∇J): ∇J(w1,w2,…,wn)=[∂J∂w1∂J∂w2⋮∂J∂wn]\\nabla J(w_1, w_2, \\ldots, w_n) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial w_1} \\\\[4pt]\n\\frac{\\partial J}{\\partial w_2} \\\\\n\\vdots \\\\[4pt]\n\\frac{\\partial J}{\\partial w_n}\n\\end{bmatrix}∇J(w1​,w2​,…,wn​)=​∂w1​∂J​∂w2​∂J​⋮∂wn​∂J​​​ ហ្ក្រាដ្យង់គឺជាដេរីវេច្រើនឯករណ៍ (multi-dimensional equivalent of the derivative)។ វាចង្អុលបង្ហាញទិសដៅ ឡើងខ្លាំងបំផុត ក្នុង loss landscape។ ដើម្បីកាត់បន្ថយ error យើងត្រូវដើរក្នុង ទិសដៅផ្ទុយ ពីហ្ក្រាដ្យង់ — នេះជាមូលដ្ឋាននៃ gradient descent ពិតប្រាកដ។ ស្ពានទៅកាន់ Machine Learning\n  \n    ក្នុង ML, អនុគមន៍ខាតបង់ (loss function) វាស់វែងថា តើម៉ូឌែលទស្សន៍ទាយខុសកម្រិតណា។ ហ្ក្រាដ្យង់ប្រាប់យើងថា តើត្រូវកែតម្រូវទម្ងន់ (weights) ទៅទិសដៅណាដើម្បីឱ្យកំហុសនោះថយចុះ។",{"id":11118,"title":11119,"titles":11120,"content":11121,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#ផ្នែកទី-៨-ឧទាហរណ៍ពេញលេញ-linear-regression","ផ្នែកទី ៨ — ឧទាហរណ៍ពេញលេញ៖ Linear Regression",[10453],"សូមមើលការអនុវត្តជាក់ស្ដែងនៃគំនិតទាំងអស់នេះ។ ការតំរុង: យើងមានទិន្នន័យ (x(i),y(i))(x^{(i)}, y^{(i)})(x(i),y(i)) ហើយចង់ Fit បន្ទាត់ y^=wx+b\\hat{y} = wx + by^​=wx+b។ អនុគមន៍ខាតបង់ (Loss function) (Mean Squared Error): J(w,b)=1m∑i=1m(y^(i)−y(i))2=1m∑i=1m(wx(i)+b−y(i))2J(w, b) = \\frac{1}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)^2 = \\frac{1}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right)^2J(w,b)=m1​i=1∑m​(y^​(i)−y(i))2=m1​i=1∑m​(wx(i)+b−y(i))2 ដេរីវេដោយផ្នែក ធៀបនឹង www (ប្រើ chain rule — ដេរីវេនៃ squared term ខាងក្រៅ គុណ ដេរីវេនៃ wx+bwx+bwx+b ខាងក្នុង): ∂J∂w=2m∑i=1m(wx(i)+b−y(i))⋅x(i)\\frac{\\partial J}{\\partial w} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right) \\cdot x^{(i)}∂w∂J​=m2​i=1∑m​(wx(i)+b−y(i))⋅x(i) ដេរីវេដោយផ្នែក ធៀបនឹង bbb: ∂J∂b=2m∑i=1m(wx(i)+b−y(i))\\frac{\\partial J}{\\partial b} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(wx^{(i)} + b - y^{(i)}\\right)∂b∂J​=m2​i=1∑m​(wx(i)+b−y(i)) ការអាប់ដែត gradient descent — ដើរក្នុងទិសដៅផ្ទុយពី gradient: w←w−α⋅∂J∂w,b←b−α⋅∂J∂bw \\leftarrow w - \\alpha \\cdot \\frac{\\partial J}{\\partial w}, \\qquad b \\leftarrow b - \\alpha \\cdot \\frac{\\partial J}{\\partial b}w←w−α⋅∂w∂J​,b←b−α⋅∂b∂J​ import numpy as np\n\n# ទិន្នន័យ: ទំនាក់ទំនងពិត y = 3x + 2\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([5.0, 8.0, 11.0, 14.0, 17.0])\n\nw, b = 0.0, 0.0   # ចាប់ផ្ដើមពីសូន្យ\nalpha = 0.01\nm = len(y)\n\nfor epoch in range(500):\n    y_pred = w * X + b              # forward pass\n    error  = y_pred - y             # residuals: ŷ - y\n\n    # ដេរីវេដោយផ្នែក (the gradient)\n    dw = (2 \u002F m) * np.dot(error, X) # ∂J\u002F∂w\n    db = (2 \u002F m) * np.sum(error)    # ∂J\u002F∂b\n\n    # ជំហាន gradient descent\n    w = w - alpha * dw\n    b = b - alpha * db\n\nprint(f\"Fitted: ŷ = {w:.4f}·x + {b:.4f}\")\n# Output: ŷ = 3.0000·x + 2.0000 ដេរីវេ — គណនាតាមការវិភាគ calculus ហើយប្រើប្រាស់ម្ដងហើយម្ដងទៀត — គឺជាអ្វីដែលដំណើរការដំណើររៀនទាំងមូល។",{"id":11123,"title":9491,"titles":11124,"content":11125,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#សេចក្តីសង្ខេប",[10453],"គំនិតនិយមន័យខ្លីៗចំណោត (Slope)m=ΔyΔxm = \\frac{\\Delta y}{\\Delta x}m=ΔxΔy​ — អត្រាបម្រែបម្រួលថេរអត្រាបម្រែបម្រួលមធ្យមf(x+h)−f(x)h\\frac{f(x+h)-f(x)}{h}hf(x+h)−f(x)​ — ចំណោតនៃ secant ចន្លោះ hhhលីមីត (Limit)តម្លៃដែលប្រាតិបត្តិការណ៍ខិតទៅ នៅពេល h→0h \\to 0h→0ដេរីវេ (Derivative)f′(x)=lim⁡h→0f(x+h)−f(x)hf'(x) = \\lim_{h\\to 0}\\frac{f(x+h)-f(x)}{h}f′(x)=limh→0​hf(x+h)−f(x)​ — អត្រាបម្រែបម្រួលភ្លាមៗច្បាប់ស្វ័យគុណ (Power rule)ddxxn=nxn−1\\frac{d}{dx} x^n = nx^{n-1}dxd​xn=nxn−1ច្បាប់បណ្តាក់ (Chain rule)ddxf(g(x))=f′(g(x))⋅g′(x)\\frac{d}{dx}f(g(x)) = f'(g(x))\\cdot g'(x)dxd​f(g(x))=f′(g(x))⋅g′(x) — ចំបាច់សម្រាប់ backpropដេរីវេដោយផ្នែក (Partial derivative)ដេរីវេ ដោយទុកអថេរផ្សេងទៀតនៅថេរហ្ក្រាដ្យង់ (Gradient)វ៉ិចទ័រនៃដេរីវេដោយផ្នែកទាំងអស់ — ចង្អុលទៅទិសដៅឡើងខ្លាំងបំផុត ដេរីវេគឺជាចម្លើយគណិតវិទ្យាចំពោះសំណួរថា \"តើផ្លូវណាជាផ្លូវឡើងទួល?\"។ ក្នុង machine learning យើងប្រើសញ្ញាផ្ទុយរបស់វាដើម្បីរក \"ផ្លូវចុះទួល\" ដើម្បីបង្ហាត់ម៉ូឌែលឱ្យកាន់តែឆ្លាតវៃ។",{"id":11127,"title":11128,"titles":11129,"content":11130,"level":7520},"\u002Fkm\u002Frooms\u002Fderivatives#តើត្រូវរៀនអ្វីបន្ត","តើត្រូវរៀនអ្វីបន្ត?",[10453],"ឥឡូវនេះអ្នកមានមូលដ្ឋានគ្រឹះ calculus ហើយ។ ជំហានបន្ទាប់គឺ Gradient Descent ដែលជាអាល់ហ្គោរីតយកគំនិតដេរីវេនេះ មកបង្កើតជាម៉ាស៊ីនសម្រាប់រៀនដោយស្វ័យប្រវត្តិ។ បន្ទប់បន្ទាប់: Gradient Descent\n    See how the derivative becomes an optimization algorithm — with interactive experiments, full Python code, and a walk through every step of the math.\n    \n      ចូលបន្ទប់ Gradient Descent → html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"id":10458,"title":10457,"titles":11132,"content":11133,"level":7503},[],"ស្វែងយល់ពីរ Algorithm ដែលជាមូលដ្ឋានគ្រឹះនៃ Machine Learning។ យល់ដឹងពីរបៀបដែលវាស្វែងរកតម្លៃអប្បបរមានៃអនុគមន៍ម្តងមួយជំហានៗ។ រូបភាពយកមកពី: Creating a Gradient Descent Animation in Python Gradient Descent គឺជា optimization algorithm មូលដ្ឋានមួយ ក្នុងចំណោម algorithms ដែលមាននៅក្នុង machine learning។ វាជាវិធីសាស្រ្តសម្រាប់ស្វែងរកតម្លៃអប្បបរមានៃអនុគមន៏ ដោយដើរម្តងមួយជំហាន (iteration) ជាបន្តបន្ទាប់ក្នុងទិសដៅ ដែលធ្វើឲ្យអនុគមន៏កាន់តែតូចទៅៗ។",{"id":11135,"title":11136,"titles":11137,"content":11138,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#ប្រៀបធៀបលេងៗ","ប្រៀបធៀបលេងៗ",[10457],"ស្រមៃថា អ្នកកំពុងឈរនៅលើភ្នំមួយដែលមានអ័ព្ទក្រាស់ ហើយអ្នកចង់ទៅជ្រលងខាងក្រោម។ អ្នកមិនអាចមើលឃើញឆ្ងាយទេដោយសារមានអ័ព្ទក្រាស់ពេក ប៉ុន្តែអ្នកនៅបាតជើងរបស់អ្នក អាចដឹងថាកំពុងចុះជ្រៅទៅៗ រឺ ឡើងខ្ពង់ទៅៗ តាមរយៈជម្រោលចោត ។ Gradient descent ធ្វើរការដូចគ្នានេះដែរ: វាធ្វើម្តងមួយជំហានតូចៗចុះក្រោម តាមផ្លូវចម្រោងបំផុត រហូតដល់វាទៅដល់ចំណុចអប្បបរមា ទាបបំផុត។",{"id":11140,"title":11141,"titles":11142,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#គណិតវិទ្យា","គណិតវិទ្យា",[10457],{"id":11144,"title":11145,"titles":11146,"content":11147,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#រូបមន្តមូលដ្ឋាន","រូបមន្តមូលដ្ឋាន",[10457,11141],"Gradient descent ធ្វើបច្ចុប្បន្នភាព parameters ដោយប្រើរូបមន្តសាមញ្ញនេះ: θnew=θold−α∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\nabla J(\\theta)θnew​=θold​−α∇J(θ) ដែល: θ\\thetaθ តំណាងឱ្យ parameters ដែលយើងកំពុង optimize (តើតម្លៃ Parameter ណាមួយដែលយើងកំពុងស្វែងរក ដែលធ្វើឲ្យអនុគមន៍ JJJ មានតម្លៃតិចបំផុត)α\\alphaα គឺ learning rate (អត្រាបោះជំហាន ឬ ទំហំជំហាន)J(θ)J(\\theta)J(θ) គឺ cost function ឬ objective function ដែលយើងចង់ រកតម្លៃ θ\\thetaθ ណាដែលធ្វើឲ្យ JJJ មានតម្លៃតូចបំផុត∇J(θ)\\nabla J(\\theta)∇J(θ) គឺ Gradient(ដេរីវេ | Derivative) នៃ JJJ ជាអនុគមន៍នៃ θ\\thetaθ",{"id":11149,"title":11150,"titles":11151,"content":11152,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ការយល់ដឹងពី-gradient-ពីសាមញ្ញទៅ-កំរិតខ្ពស់","ការយល់ដឹងពី Gradient: ពីសាមញ្ញទៅ កំរិតខ្ពស់",[10457,11141],"តោះបកស្រាយនិមិត្តសញ្ញា gradient ∇\\nabla∇ (ហៅថា \"nabla\" ឬ \"del\") ដោយបង្កើតពីករណីសាមញ្ញបំផុត។",{"id":11154,"title":11155,"titles":11156,"content":11157,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#ករណី-1-អថេរតែមួយ-parameter-មួយ","ករណី 1: អថេរតែមួយ (Parameter មួយ)",[10457,11141,11150],"នៅពេលយើងមាន parameter តែមួយ, gradient គឺគ្រាន់តែជា ដេរីវេ: ∇J(θ)=dJdθ\\nabla J(\\theta) = \\frac{dJ}{d\\theta}∇J(θ)=dθdJ​ Derivative ប្រាប់យើងថា: \"ប្រសិនបើខ្ញុំបង្កើន θ\\thetaθ បន្តិចបន្តួច តើ JJJ ផ្លាស់ប្តូរប៉ុន្មាន?\" ឧទាហរណ៍: សម្រាប់ J(θ)=θ2J(\\theta) = \\theta^2J(θ)=θ2: ∇J(θ)=dJdθ=2θ\\nabla J(\\theta) = \\frac{dJ}{d\\theta} = 2\\theta∇J(θ)=dθdJ​=2θ ប្រសិនបើ θ=5\\theta = 5θ=5, នោះ ∇J(5)=10\\nabla J(5) = 10∇J(5)=10 → អនុគមន៍កំពុងកើនឡើង, ទៅខាងឆ្វេង (បន្ថយ θ\\thetaθ)ប្រសិនបើ θ=−3\\theta = -3θ=−3, នោះ ∇J(−3)=−6\\nabla J(-3) = -6∇J(−3)=−6 → អនុគមន៍កំពុងថយចុះ, ទៅខាងស្តាំ (បង្កើន θ\\thetaθ)ប្រសិនបើ θ=0\\theta = 0θ=0, នោះ ∇J(0)=0\\nabla J(0) = 0∇J(0)=0 → យើងស្ថិតនៅចំណុចអប្បបរមា!",{"id":11159,"title":11160,"titles":11161,"content":11162,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#ករណី-2-អថេរពីរ-parameters-ពីរ","ករណី 2: អថេរពីរ (Parameters ពីរ)",[10457,11141,11150],"នៅពេលយើងមាន parameters ពីរ θ1\\theta_1θ1​ និង θ2\\theta_2θ2​, gradient ក្លាយជា vector មួយមានធាតុផ្សំពីរ: ∇J(θ1,θ2)=[∂J∂θ1∂J∂θ2]\\nabla J(\\theta_1, \\theta_2) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial \\theta_1} \\\\\n\\frac{\\partial J}{\\partial \\theta_2}\n\\end{bmatrix}∇J(θ1​,θ2​)=[∂θ1​∂J​∂θ2​∂J​​] Partial derivative នីមួយៗ ∂J∂θi\\frac{\\partial J}{\\partial \\theta_i}∂θi​∂J​ សួរថា: \"ប្រសិនបើខ្ញុំផ្លាស់ប្តូរតែ θi\\theta_iθi​ (រក្សា អថេរផ្សេងទៀតថេរ), តើ JJJ ផ្លាស់ប្តូរប៉ុន្មាន?\" ឧទាហរណ៍: សម្រាប់ J(θ1,θ2)=θ12+θ22J(\\theta_1, \\theta_2) = \\theta_1^2 + \\theta_2^2J(θ1​,θ2​)=θ12​+θ22​: ∇J=[2θ12θ2]\\nabla J = \\begin{bmatrix}\n2\\theta_1 \\\\\n2\\theta_2\n\\end{bmatrix}∇J=[2θ1​2θ2​​] នៅចំណុច (θ1=3,θ2=4)(\\theta_1=3, \\theta_2=4)(θ1​=3,θ2​=4): ∇J=[68]\\nabla J = \\begin{bmatrix}\n6 \\\\\n8\n\\end{bmatrix}∇J=[68​] Vector នេះចង្អុលទៅទិសនៃការឡើងចម្រោងបំផុត។ យើងទៅក្នុងទិសផ្ទុយ (ដក វា) ដើម្បីចុះក្រោម!",{"id":11164,"title":11165,"titles":11166,"content":11167,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#ករណី-3-អថេរច្រើន-ករណីទូទៅ","ករណី 3: អថេរច្រើន (ករណីទូទៅ)",[10457,11141,11150],"សម្រាប់ n parameters θ1,θ2,…,θn\\theta_1, \\theta_2, \\ldots, \\theta_nθ1​,θ2​,…,θn​, gradient គឺ n-dimensional vector: ∇J(θ)=[∂J∂θ1∂J∂θ2⋮∂J∂θn]\\nabla J(\\theta) = \\begin{bmatrix}\n\\frac{\\partial J}{\\partial \\theta_1} \\\\\n\\frac{\\partial J}{\\partial \\theta_2} \\\\\n\\vdots \\\\\n\\frac{\\partial J}{\\partial \\theta_n}\n\\end{bmatrix}∇J(θ)=​∂θ1​∂J​∂θ2​∂J​⋮∂θn​∂J​​​ ធាតុផ្សំនីមួយៗប្រាប់យើងថា តើ JJJ ប្រែប្រួលប៉ុន្មាន ចំពោះការផ្លាស់ប្តូរនៃ parameter ក្នុងចំណោមណាមួយនោះ។ នេះគឺជាអ្វីដែលយើងត្រូវដឹង ដើម្បីកំណត់ថា តើយើងគួរកែ parameter នីមួយៗទៅទិសណា! ចំណុចសំខាន់: មិនថាអ្នកមាន parameter 1 ឬ 1 លាន, គំនិតគឺដូចគ្នាតេ: គណនាថា តើ parameter នីមួយៗប៉ះពាល់ដល់ cost ប៉ុន្មាន, បន្ទាប់មកកែសម្រួលវាក្នុងទិសផ្ទុយ។",{"id":11169,"title":11170,"titles":11171,"content":11172,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#ឧទាហរណ៍ពេញលេញ","ឧទាហរណ៍ពេញលេញ",[10457],"តោះមើល gradient descent ក្នុងការដំណើរការជាមួយករណីសាមញ្ញបំផុត: អថេរតែមួយ។ សូមគិតរកការបន្ថយទៅប្រកដដែលអប្បបរមាសម្រាប់អនុគមន៍ quadratic: J(θ)=θ2J(\\theta) = \\theta^2J(θ)=θ2 Gradient (ដេរីវេ) គឺ: ∇J(θ)=dJdθ=2θ\\nabla J(\\theta) = \\frac{dJ}{d\\theta} = 2\\theta∇J(θ)=dθdJ​=2θ យើងអាចសរសេរ Gradient descent algorithm ជា: θnew=θold−α⋅2θold\\theta_{new} = \\theta_{old} - \\alpha \\cdot 2\\theta_{old}θnew​=θold​−α⋅2θold​ ចាប់ផ្ដើមនៅ θ0=10\\theta_0 = 10θ0​=10 ជាមួយ learning rate α=0.1\\alpha = 0.1α=0.1: Iteration 1 (ជំហានទី 1): θ1=10−0.1×(2×10)=10−2=8\\theta_1 = 10 - 0.1 \\times (2 \\times 10) = 10 - 2 = 8θ1​=10−0.1×(2×10)=10−2=8 Gradient វិជ្ជមាន (10 ជំរាលឡើងខាងលើ), ដូច្នេះយើងបានធ្វើចលនាទៅខាងឆ្វេង (បន្ថយ θ\\thetaθ) Iteration 2 (ជំហានទី 2): θ2=8−0.1×(2×8)=8−1.6=6.4\\theta_2 = 8 - 0.1 \\times (2 \\times 8) = 8 - 1.6 = 6.4θ2​=8−0.1×(2×8)=8−1.6=6.4 នៅតែជា gradient វិជ្ជមាន, កំពុងតូចទៅ, ដូច្នេះជំហានតូចជាង Iteration 3 (ជំហានទី 3): θ3=6.4−0.1×(2×6.4)=6.4−1.28=5.12\\theta_3 = 6.4 - 0.1 \\times (2 \\times 6.4) = 6.4 - 1.28 = 5.12θ3​=6.4−0.1×(2×6.4)=6.4−1.28=5.12 Pattern បន្ត: នៅពេលយើងចូលទៅកាន់ចំណុចអប្បបរមា, gradient កាន់តែតូចទៅៗ, ដូច្នេះជំហានរបស់យើងតូចជាងដោយស្វ័យប្រវត្តិ! ជាមួយនឹងជំហាននីមួយៗ, យើងចូលទៅកាន់ជិតនូវចំណុចអប្បបរមានៅ θ=0\\theta = 0θ=0។ សូមកត់សម្គាល់ថា ជំហានតូចជាងដោយធម្មជាតិ នៅពេល gradient ថយចុះក្បែរនូវចំណុចអប្បបរមា!",{"id":11174,"title":11175,"titles":11176,"content":7496,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#គំនិតសំខាន់ៗ","គំនិតសំខាន់ៗ",[10457],{"id":11178,"title":11179,"titles":11180,"content":11181,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#learning-rate-អត្រាបោះជំហាន-ឬ-ទំហំជំហាន","Learning Rate | អត្រាបោះជំហាន ឬ ទំហំជំហាន",[10457,11175],"Learning rate α\\alphaα មានសំខាន់សំខាន់: ទំហំពេក: យើងអាចរំលងចំណុចអប្បបរមា ឬ បង្កើតការវិលជុំមិនចប់ (មិនដល់គោលដៅ)ទំហំតូច: ចំណាយពេលច្រើនហើយ កម្រដល់គោលដៅសមស្រប: ទៅដល់គោលដៅបានយ៉ាងមានប្រសិទ្ធភាព",{"id":11183,"title":11184,"titles":11185,"content":7496,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#តោះធ្វើតេស្តជាមួយ-learning-rates-ផ្សេងៗ-ហើយមើលថាវាអាចប៉ះពាល់ដល់ការចូលរួមគ្នា-convergence-យ៉ាងដូចម្តេច","តោះធ្វើតេស្តជាមួយ learning rates ផ្សេងៗ ហើយមើលថាវាអាចប៉ះពាល់ដល់ការចូលរួមគ្នា (convergence) យ៉ាងដូចម្តេច!",[10457,11175],{"id":11187,"title":11188,"titles":11189,"content":7496,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ប្រភេទនៃ-gradient-descent","ប្រភេទនៃ Gradient Descent",[10457,11175],{"id":11191,"title":10806,"titles":11192,"content":11193,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#_1-batch-gradient-descent",[10457,11175,11188],"ប្រើទិន្នន័យទាំងអស់ ដើម្បីគណនា gradient: θ=θ−α∇θJ(θ)\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta)θ=θ−α∇θ​J(θ) ដែល J(θ)J(\\theta)J(θ) ត្រូវបានគណនាលើឧទាហរណ៍បន្តុបកយសិក្សាទាំងអស់។",{"id":11195,"title":10811,"titles":11196,"content":11197,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#_2-stochastic-gradient-descent-sgd",[10457,11175,11188],"Update parameters ដោយប្រើឧទាហរណ៍បន្តុបកយសិក្សាមួយម្តងមួយ ក្នុងមួយពេល: θ=θ−α∇θJ(θ;x(i),y(i))\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta; x^{(i)}, y^{(i)})θ=θ−α∇θ​J(θ;x(i),y(i))",{"id":11199,"title":10816,"titles":11200,"content":11201,"level":7601},"\u002Fkm\u002Frooms\u002Fgradient-descent#_3-mini-batch-gradient-descent",[10457,11175,11188],"ជាការប្រទាក់ចូលគ្នា: ប្រើbatch តូចមួយ នៃឧទាហរណ៍: θ=θ−α∇θJ(θ;x(i:i+b),y(i:i+b))\\theta = \\theta - \\alpha \\nabla_\\theta J(\\theta; x^{(i:i+b)}, y^{(i:i+b)})θ=θ−α∇θ​J(θ;x(i:i+b),y(i:i+b)) ដែល bbb គឺ batch size។",{"id":11203,"title":11204,"titles":11205,"content":11206,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#ចំណុចរួមតូច-convergence","ចំណុចរួមតូច (Convergence)",[10457],"Gradient descent ចូលរួមគ្នា នៅពេល gradient ក្លាយជាតូចបំផុត: ∣∇J(θ)∣\u003Cϵ|\\nabla J(\\theta)| \u003C \\epsilon∣∇J(θ)∣\u003Cϵ ដែល ϵ\\epsilonϵ គឺតម្លៃ threshold តូចមួយ។",{"id":11208,"title":11209,"titles":11210,"content":11211,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#បញ្ហាប្រឈម","បញ្ហាប្រឈម",[10457],"Local Minima: Algorithm អាចជាប់គាំងនៅ local minima ជំនួសឱ្យការស្វែងរក global minimumSaddle Points: ចំណុចដែល gradient រកឃើញថាសូន្យ ប៉ុន្តែមិនមែនជាចំណុចអប្បបរមាPlateau Regions: តំបន់ដែល gradient មានទំហំតូចបំផុត ធ្វើឱ្យការសិក្សាយឺត",{"id":11213,"title":11214,"titles":11215,"content":11216,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#ការអនុវត្តន៍ក្នុងពិភពលោកជាក់ស្តែង","ការអនុវត្តន៍ក្នុងពិភពលោកជាក់ស្តែង",[10457],"Gradient descent ត្រូវបានប្រើដើម្បីបណ្តុះបណ្តាល: Neural Networks: ការ Optimize parameters រាប់លានLinear Regression: ការស្វែងរក best-fit lineLogistic Regression: បញ្ហា classificationSupport Vector Machines: ការស្វែងរក optimal hyperplanes",{"id":11218,"title":11219,"titles":11220,"content":11221,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#gradient-descent-ក្នុង-deep-learning","Gradient Descent ក្នុង Deep Learning",[10457,11214],"Deep Neural Network ប្រើ Gradient Descent ដើម្បីបណ្តុះបណ្តាលទម្ងន់ (weights) ក្នុង layers ផ្សេងៗ រូបភាពខាងលើបង្ហាញពី Deep Neural Network — ប្រភេទ Neural Network ពេញនិយមមួយ ដែលប្រើ Gradient Descent ដើម្បី optimize cost function។ ក្នុង Deep Learning: Input Layer ទទួល data (ឧ. pixels នៃរូបភាព, ពាក្យ,  លេខ)Hidden Layers ជ្រើរើសធ្វើ feature extraction — ស្វែងរកគំរូ (patterns) ស្មុគស្មាញ ពីទិន្នន័យOutput Layer ផ្តល់ការទស្សន៍ទាយ (prediction) ចុងក្រោយWeights www (ទម្ងន់) ក្នុង connections នីមួយៗ គឺជា parameters θ\\thetaθ ដែល Gradient Descent ត្រូវ optimize ក្នុងអំឡុងពេល Training: Forward Pass→Compute Loss J(θ)→Backpropagation→Gradient Descent Update\\begin{aligned}\n&\\text{Forward Pass} \\\\\n&\\rightarrow \\text{Compute Loss } J(\\theta) \\\\\n&\\rightarrow \\text{Backpropagation} \\\\\n&\\rightarrow \\text{Gradient Descent Update}\n\\end{aligned}​Forward Pass→Compute Loss J(θ)→Backpropagation→Gradient Descent Update​ Network មួយ មាន neurons រាប់លាន → weights រាប់លាន → gradient vector មាន រាប់លាន dimensions — ប៉ុន្តែ Gradient Descent ដំណើរការដូចគ្នានឹង 1D ដែរ: update ក្នុងទិស opposite នៃ gradient!",{"id":11223,"title":11224,"titles":11225,"content":11226,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#អនុវត្តក្នុង-python","អនុវត្តក្នុង Python",[10457],"ខាងក្រោមជាការសរសេរកូដ Python ដោយមិនប្រើ ML libraries ណាមួយ។ Code block នីមួយៗ ត្រូវតទៅនឹងរូបមន្ត math ខាងលើ — ជួរ highlighted ជា formula ចម្បង។",{"id":11228,"title":11229,"titles":11230,"content":11231,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ជំហានទី-1-cost-function-និង-gradient","ជំហានទី 1 — Cost Function និង Gradient",[10457,11224],"J(θ)=θ2,∇J(θ)=2θJ(\\theta) = \\theta^2, \\qquad \\nabla J(\\theta) = 2\\thetaJ(θ)=θ2,∇J(θ)=2θ # J(θ) = θ²  →  អនុគមន៍ដែលយើងចង់ minimize\ndef cost(theta):\n    return theta ** 2\n\n# ∇J(θ) = dJ\u002Fdθ = 2θ  →  derivative (gradient) របស់វា\ndef gradient(theta):\n    return 2 * theta",{"id":11233,"title":11234,"titles":11235,"content":11236,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ជំហានទី-2-update-rule","ជំហានទី 2 — Update Rule",[10457,11224],"θnew=θold−α⋅∇J(θ)\\theta_{new} = \\theta_{old} - \\alpha \\cdot \\nabla J(\\theta)θnew​=θold​−α⋅∇J(θ) def update(theta, alpha):\n    grad = gradient(theta)           # ① គណនា  ∇J(θ)\n    return theta - alpha * grad      # ② អនុវត្ត  θ_new = θ_old − α·∇J(θ) ជួរទី 3 គឺ formula update rule ខាងលើ សរសេរដោយផ្ទាល់ជា Python។",{"id":11238,"title":11239,"titles":11240,"content":11241,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ជំហានទី-3-loop-រហូតដល់-convergence","ជំហានទី 3 — Loop រហូតដល់ Convergence",[10457,11224],"Run updates រហូតដល់ ∣∇J(θ)∣\u003Cε|\\nabla J(\\theta)| \u003C \\varepsilon∣∇J(θ)∣\u003Cε — នៅពេល gradient ស្ទើររកឃើញថាសូន្យ: def gradient_descent(theta_init, alpha, epsilon=1e-6, max_iters=1000):\n    theta = theta_init                           # θ₀ — ចំណុចចាប់ផ្ដើម\n    for i in range(max_iters):\n        grad = gradient(theta)                   # ∇J(θ) = 2θ\n        if abs(grad) \u003C epsilon:                  # ឈប់: |∇J(θ)| \u003C ε\n            print(f\"Converged at iteration {i}\")\n            break\n        theta = theta - alpha * grad             # θ_new = θ_old − α·∇J(θ)\n        if i \u003C 5:\n            print(f\"  iter {i+1:2d}: θ={theta:.5f}  J={cost(theta):.5f}  ∇J={grad:.5f}\")\n    return theta\n# ស្របតាម ការគណនា manual ខាងលើ: θ₀ = 10, α = 0.1\ntheta_min = gradient_descent(theta_init=10.0, alpha=0.1)\nprint(f\"\\nMinimum at θ = {theta_min:.8f}\") Output — ស្របតាម iterations manual ខាងលើ: iter  1: θ= 8.00000  J=64.00000  ∇J=20.00000\n  iter  2: θ= 6.40000  J=40.96000  ∇J=16.00000\n  iter  3: θ= 5.12000  J=26.21440  ∇J=12.80000\n  iter  4: θ= 4.09600  J=16.77722  ∇J=10.24000\n  iter  5: θ= 3.27680  J=10.73742  ∇J= 8.19200\nMinimum at θ = 0.00000001",{"id":11243,"title":11244,"titles":11245,"content":11246,"level":7527},"\u002Fkm\u002Frooms\u002Fgradient-descent#ជំហានទី-4-linear-regression-parameters-ពីរ","ជំហានទី 4 — Linear Regression: Parameters ពីរ",[10457,11224],"សម្រាប់ model y^=wX+b\\hat{y} = wX + by^​=wX+b, cost function ប្រើ mean squared error: J(w,b)=1m∑i=1m(y^(i)−y(i))2J(w, b) = \\frac{1}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)^2J(w,b)=m1​∑i=1m​(y^​(i)−y(i))2 ជាមួយ partial derivatives: ∂J∂w=2m∑i=1m(y^(i)−y(i))x(i),∂J∂b=2m∑i=1m(y^(i)−y(i))\\frac{\\partial J}{\\partial w} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right) x^{(i)}, \\qquad \\frac{\\partial J}{\\partial b} = \\frac{2}{m} \\sum_{i=1}^{m} \\left(\\hat{y}^{(i)} - y^{(i)}\\right)∂w∂J​=m2​∑i=1m​(y^​(i)−y(i))x(i),∂b∂J​=m2​∑i=1m​(y^​(i)−y(i)) import numpy as np\n\ndef linear_regression_gd(X, y, alpha=0.01, epochs=500):\n    m = len(y)\n    w, b = 0.0, 0.0                        # θ = [w, b] — initialize ទៅ zero\n    for epoch in range(epochs):\n        y_pred = w * X + b                 # forward pass:  ŷ = w·X + b\n        error  = y_pred - y                # residuals:     ŷ − y\n        dw = (2 \u002F m) * np.dot(error, X)   # ∂J\u002F∂w = (2\u002Fm) Σ (ŷ−y)·x\n        db = (2 \u002F m) * np.sum(error)       # ∂J\u002F∂b = (2\u002Fm) Σ (ŷ−y)\n        w = w - alpha * dw                 # w_new = w_old − α·∂J\u002F∂w\n        b = b - alpha * db                 # b_new = b_old − α·∂J\u002F∂b\n        if epoch % 100 == 0:\n            loss = np.mean(error ** 2)     # J(w,b) = (1\u002Fm) Σ (ŷ−y)²\n            print(f\"Epoch {epoch:4d}: loss={loss:.4f}  w={w:.4f}  b={b:.4f}\")\n    return w, b\n\n# y = 2·x  →  model គួរ converge ទៅ w≈2, b≈0\nX = np.array([1.0, 2.0, 3.0, 4.0, 5.0])\ny = np.array([2.0, 4.0, 6.0, 8.0, 10.0])\nw, b = linear_regression_gd(X, y)\nprint(f\"\\nFitted:  ŷ = {w:.4f}·x + {b:.4f}\") ជួរ highlighted 7–12 ទំនាក់ទំនងដោយផ្ទាល់ទៅ formulas: ជួរ 7–8: forward pass y^=wX+b\\hat{y} = wX + by^​=wX+b និង residualsជួរ 9–10: partial derivatives ∂J∂w\\frac{\\partial J}{\\partial w}∂w∂J​ និង ∂J∂b\\frac{\\partial J}{\\partial b}∂b∂J​ជួរ 11–12: gradient descent update rule θnew=θold−α∇J\\theta_{new} = \\theta_{old} - \\alpha \\nabla Jθnew​=θold​−α∇J",{"id":11248,"title":11249,"titles":11250,"content":11251,"level":7520},"\u002Fkm\u002Frooms\u002Fgradient-descent#ជំហានបន្ទាប់","ជំហានបន្ទាប់",[10457],"នៅពេលអ្នកយល់ដឹង gradient descent, អ្នកអាចស្រាវជ្រាវ Algorithms បន្ថែមដូចជា: Momentum: បន្ថែមល្បឿនទៅក្នុងការ updateAdam: Adaptive learning rates សម្រាប់ parameter នីមួយៗRMSprop: គ្រប់គ្រង sparse gradients បានល្អជាង html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .spNyl, html code.shiki .spNyl{--shiki-light:#9C3EDA;--shiki-default:#C792EA;--shiki-dark:#C792EA}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .sHdIc, html code.shiki .sHdIc{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#EEFFFF;--shiki-default-font-style:italic;--shiki-dark:#BABED8;--shiki-dark-font-style:italic}html pre.shiki code .s7zQu, html code.shiki .s7zQu{--shiki-light:#39ADB5;--shiki-light-font-style:italic;--shiki-default:#89DDFF;--shiki-default-font-style:italic;--shiki-dark:#89DDFF;--shiki-dark-font-style:italic}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}html pre.shiki code .sbssI, html code.shiki .sbssI{--shiki-light:#F76D47;--shiki-default:#F78C6C;--shiki-dark:#F78C6C}html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}",1776142916652]