# SQL 血淚史
## 將*.video的原始資料讀入Table-RawData
```sql
--Create table-#FileContents to save all info from *.video
CREATE TABLE #FileName (FileName nvarchar(max))
--read all file name from file_list.txt
BULK INSERT #FileName
FROM N'C:\Users\clay0\workshop\dbs\database\file_list.txt'
WITH (rowterminator = '0x0A')
--add FileID to each FileName to MyFileList
SELECT
ROW_NUMBER() OVER(ORDER BY FileName ASC) AS FileID,
FileName
INTO #MyFileList
FROM #FileName
--Save metadata from each *.video
--into #TEMPRAW
CREATE TABLE #TEMPRAW
(
[YoutubeID] [nvarchar](MAX),
[TITLE] [nvarchar](MAX),
[THUMBNAIL] [nvarchar](MAX),
[LOCATION] [nvarchar](MAX),
[TAGS] [nvarchar](MAX)
)
--save each file content to table
declare @file_id int, @SQL VARCHAR(MAX)
select @file_id = min( FileID ) from #MyFileList
WHILE @file_id <= (SELECT COUNT(*) FROM #MyFileList)
BEGIN
DECLARE @FilePath NVARCHAR(1000)
SELECT @FilePath = N'C:\Users\clay0\workshop\dbs\database\Datas\'+SubString(FileName, 1, 17)
FROM #MyFileList
WHERE FileID = @file_id
print @FilePath
SET @SQL = 'BULK INSERT #TEMPRAW FROM ''' + @FilePath + '''
WITH (FIELDTERMINATOR = ''0x0A'');'
EXEC(@SQL)
select @file_id = min( FileID ) from #MyFileList where FileID > @file_id
end
SELECT * FROM #TEMPRAW
--string format & save into RawData
SELECT
REPLACE(REPLACE(REPLACE(YoutubeID, '{"VideoId":"', ''), '"}', ''),'0x0A','') AS YoutubeID,
REPLACE(REPLACE(REPLACE(TITLE, '{"Title":"', ''), '"}', ''),'0x0A','') AS Title,
REPLACE(REPLACE(REPLACE(REPLACE(TAGS, '{"Tags":"', ''), '"}', ''), '"', ''),'0x0A','') AS Tags
INTO RawData
FROM #TEMPRAW
SELECT * FROM RawData
```

## 利用Table RawData 1NF(?)出Video Table
```sql
SELECT
YoutubeID AS YoutubeID,
Title AS Title
INTO Video
FROM RawData
SELECT * FROM Video
--testing escape characters
SELECT '123'+YoutubeID+'456' FROM Video
SELECT '123'+Title+'456' FROM Video
```
## 再次利用BULK將\*tag讀入並建Table
```sql
--Build Tag Table from file
--read all tag name from tag_list.txt
CREATE TABLE #Tag (TagName nvarchar(max))
BULK INSERT #Tag
FROM N'C:\Users\clay0\workshop\dbs\database\tag_list.txt'
WITH (rowterminator = '0x0A')
--add TagID to each Tag
SELECT
ROW_NUMBER() OVER(ORDER BY TagName ASC) AS TagID,
REPLACE(REPLACE(TagName,'\n',''),' ', '') AS TagName
INTO VideoTag
FROM #Tag
UPDATE VideoTag
SET TagName = REPLACE(TagName,'0x0A','')
UPDATE VideoTag
SET TagName = REPLACE(TagName,RIGHT(TagName,1),'')
--testing escape characters
SELECT '123'+TagName+'456' FROM VideoTag
```
## 將RAW DATA TAGS資料分開(1NF?)儲存於另外一個TABLE
```sql
CREATE TABLE VideosTags
(
[YoutubeID] varchar(11) ,
[TagID] int
)
--save each video-tag to VideosTags
declare @tag_id int, @TagName nvarchar(100)
select @tag_id = min( TagID ) from VideoTag
WHILE @tag_id <= (SELECT COUNT(*) FROM VideoTag)
BEGIN
SELECT @TagName = TagName
FROM VideoTag
WHERE TagID = @tag_id
print @TagName
INSERT INTO VideosTags
SELECT YoutubeID AS YoutubeID, @tag_id AS TagID
FROM RawData AS RD
WHERE RD.Tags LIKE ('%'+@TagName+'%')
select @tag_id = min( TagID ) from VideoTag where TagID > @tag_id
end
SELECT * FROM VideosTags
```
## 利用GROUP BY找出Tag完全符合的YoutubeID
```sql
--set TagName of Tag must be contained
SELECT *
INTO #ContainTag
FROM VideoTag
WHERE TagName IN ('anime', 'cover')
--choose the YoutubeID match with tag in #ContainTag
SELECT CT.TagID AS TagID, VTs.YoutubeID AS YoutubeID
INTO #MatchVideo
FROM #ContainTag AS CT
INNER JOIN VideosTags AS VTs
ON CT.TagID = VTs.TagID
--statistic the frequency of YoutubeID
SELECT COUNT(YoutubeID) AS Frequency, YoutubeID AS YoutubeID
INTO #TagCount
FROM #MatchVideo
GROUP BY YoutubeID
--choose the frequncy = # of tag in #ContainTag
DECLARE @num_of_tag INT = (SELECT COUNT(*) FROM #ContainTag)
SELECT YoutubeID AS YoutubeID
INTO #SearchResult
FROM #TagCount
WHERE Frequency = @num_of_tag
SELECT * FROM #SearchResult
```
## 其他:
### 好用的擴增TABLE方式
```sql=1
SELECT
col1 AS col_name1, col2 AS col_name2 ...
INTO #NewTable
FROM #OldTable
```
line3 的 NewTable不可事先CREATE。
### 檢查是否混入奇怪的字元導致字串比對/處理出錯
```sql
--檢查最後一個字元
ASCII(RIGHT(ColumnName, 1))
```