# SQL 血淚史 ## 將*.video的原始資料讀入Table-RawData ```sql --Create table-#FileContents to save all info from *.video CREATE TABLE #FileName (FileName nvarchar(max)) --read all file name from file_list.txt BULK INSERT #FileName FROM N'C:\Users\clay0\workshop\dbs\database\file_list.txt' WITH (rowterminator = '0x0A') --add FileID to each FileName to MyFileList SELECT ROW_NUMBER() OVER(ORDER BY FileName ASC) AS FileID, FileName INTO #MyFileList FROM #FileName --Save metadata from each *.video --into #TEMPRAW CREATE TABLE #TEMPRAW ( [YoutubeID] [nvarchar](MAX), [TITLE] [nvarchar](MAX), [THUMBNAIL] [nvarchar](MAX), [LOCATION] [nvarchar](MAX), [TAGS] [nvarchar](MAX) ) --save each file content to table declare @file_id int, @SQL VARCHAR(MAX) select @file_id = min( FileID ) from #MyFileList WHILE @file_id <= (SELECT COUNT(*) FROM #MyFileList) BEGIN DECLARE @FilePath NVARCHAR(1000) SELECT @FilePath = N'C:\Users\clay0\workshop\dbs\database\Datas\'+SubString(FileName, 1, 17) FROM #MyFileList WHERE FileID = @file_id print @FilePath SET @SQL = 'BULK INSERT #TEMPRAW FROM ''' + @FilePath + ''' WITH (FIELDTERMINATOR = ''0x0A'');' EXEC(@SQL) select @file_id = min( FileID ) from #MyFileList where FileID > @file_id end SELECT * FROM #TEMPRAW --string format & save into RawData SELECT REPLACE(REPLACE(REPLACE(YoutubeID, '{"VideoId":"', ''), '"}', ''),'0x0A','') AS YoutubeID, REPLACE(REPLACE(REPLACE(TITLE, '{"Title":"', ''), '"}', ''),'0x0A','') AS Title, REPLACE(REPLACE(REPLACE(REPLACE(TAGS, '{"Tags":"', ''), '"}', ''), '"', ''),'0x0A','') AS Tags INTO RawData FROM #TEMPRAW SELECT * FROM RawData ``` ![結果](https://imgur.com/Y1xOgno.jpg) ## 利用Table RawData 1NF(?)出Video Table ```sql SELECT YoutubeID AS YoutubeID, Title AS Title INTO Video FROM RawData SELECT * FROM Video --testing escape characters SELECT '123'+YoutubeID+'456' FROM Video SELECT '123'+Title+'456' FROM Video ``` ## 再次利用BULK將\*tag讀入並建Table ```sql --Build Tag Table from file --read all tag name from tag_list.txt CREATE TABLE #Tag (TagName nvarchar(max)) BULK INSERT #Tag FROM N'C:\Users\clay0\workshop\dbs\database\tag_list.txt' WITH (rowterminator = '0x0A') --add TagID to each Tag SELECT ROW_NUMBER() OVER(ORDER BY TagName ASC) AS TagID, REPLACE(REPLACE(TagName,'\n',''),' ', '') AS TagName INTO VideoTag FROM #Tag UPDATE VideoTag SET TagName = REPLACE(TagName,'0x0A','') UPDATE VideoTag SET TagName = REPLACE(TagName,RIGHT(TagName,1),'') --testing escape characters SELECT '123'+TagName+'456' FROM VideoTag ``` ## 將RAW DATA TAGS資料分開(1NF?)儲存於另外一個TABLE ```sql CREATE TABLE VideosTags ( [YoutubeID] varchar(11) , [TagID] int ) --save each video-tag to VideosTags declare @tag_id int, @TagName nvarchar(100) select @tag_id = min( TagID ) from VideoTag WHILE @tag_id <= (SELECT COUNT(*) FROM VideoTag) BEGIN SELECT @TagName = TagName FROM VideoTag WHERE TagID = @tag_id print @TagName INSERT INTO VideosTags SELECT YoutubeID AS YoutubeID, @tag_id AS TagID FROM RawData AS RD WHERE RD.Tags LIKE ('%'+@TagName+'%') select @tag_id = min( TagID ) from VideoTag where TagID > @tag_id end SELECT * FROM VideosTags ``` ## 利用GROUP BY找出Tag完全符合的YoutubeID ```sql --set TagName of Tag must be contained SELECT * INTO #ContainTag FROM VideoTag WHERE TagName IN ('anime', 'cover') --choose the YoutubeID match with tag in #ContainTag SELECT CT.TagID AS TagID, VTs.YoutubeID AS YoutubeID INTO #MatchVideo FROM #ContainTag AS CT INNER JOIN VideosTags AS VTs ON CT.TagID = VTs.TagID --statistic the frequency of YoutubeID SELECT COUNT(YoutubeID) AS Frequency, YoutubeID AS YoutubeID INTO #TagCount FROM #MatchVideo GROUP BY YoutubeID --choose the frequncy = # of tag in #ContainTag DECLARE @num_of_tag INT = (SELECT COUNT(*) FROM #ContainTag) SELECT YoutubeID AS YoutubeID INTO #SearchResult FROM #TagCount WHERE Frequency = @num_of_tag SELECT * FROM #SearchResult ``` ## 其他: ### 好用的擴增TABLE方式 ```sql=1 SELECT col1 AS col_name1, col2 AS col_name2 ... INTO #NewTable FROM #OldTable ``` line3 的 NewTable不可事先CREATE。 ### 檢查是否混入奇怪的字元導致字串比對/處理出錯 ```sql --檢查最後一個字元 ASCII(RIGHT(ColumnName, 1)) ```