본문 바로가기
자료/Python

[한국기술교육대학교] 스크립트프로그래밍 과제2

by cjw.git 2021. 10. 7.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
class Solve:
    @staticmethod
    def problem1():
        print('------------------ 1번 문제 ------------------')
        '''
            and 연산의 특성상 a and b 일 때 a가 참이여도 b를 확인해 볼 필요가 있습니다.
            그래서 1 and 2 and 3이 참이여도 4까지 확인을 하기 때문에 4가 출력 된것입니다.
        '''
        a1 = 1 and 2 and 3 and 4
        print(a1)
        '''
            or 연산의 특성상 a or b 일 때 a만 참이면 b를 확인 할 필요가 없습니다.
            그러므로 1이 출력 되는 것입니다.
        '''
        a2 = 1 or 2 or 3 or 4
        print(a2)
        '''
            a and b 는 b까지 확인해야 하는데 or는 앞에가 참이면 확인해볼 필요가 없으므로
            1 and 2 가 참인이상 or 뒷부분을 확인 할 필요가 없습니다. 그러므로 2가 출력됩니다.
        '''
        a3 = 1 and 2 or 3 and 4
        print(a3)
        '''
            이것도 마찬가지로 () 먼저 체크를 해도 참이지만 or 특성상 3 and 4는 값에
            영향을 미치지 않기 때문에 최적화 되었습니다. 그러므로 2가 출력 됩니다.
        '''
        a4 = (1 and 2or (3 and 4)
        print(a4)
        '''
            1 or x 는 이미 1부터 참이므로 1에서 멈추고 1이 출력됩니다.
        '''
        a5 = 1 or 2 and 3 or 4
        print(a5)
        '''
            (1 or 2) 를 체크해도 and 연산자기 때문에 뒤 부분을 체크해야합니다.
            이 때, 3 or x 는 이미 참이므로 4 까지 가지 않고 3에서 멈추고 3을 출력합니다.
        '''
        a6 = (1 or 2and (3 or 4)
        print(a6)
 
    @staticmethod
    def problem2():
        print('------------------ 2번 문제 ------------------')
 
        def div_path(path):
            return ['/'.join(path.split('/')[:-1]), ''.join(path.split('/')[-1])]
 
        print(div_path('/home/chulsoo/test.txt'))
 
    @staticmethod
    def problem3():
        print('------------------ 3번 문제 ------------------')
 
        def list_union(list1, list2):
            result = list(map(int, list1))
            for item in list2:
                if item not in result:
                    result.append(item)
            result.sort()
            return result
 
        print(list_union([123], [124]))
        print(list_union([-10-50-1], [100909]))
        print(list_union([012], [012]))
 
    @staticmethod
    def problem4():
        print('------------------ 4번 문제 ------------------')
 
        def list_intersection(list1, list2):
            result = []
            for item in list1:
                if item in list2:
                    result.append(item)
            result.sort()
            return result
 
        def list_difference(list1, list2):
            result = list(map(int, list1))
            for item in list1:
                if item in list2:
                    result.remove(item)
            result.sort()
            return result
 
        print(list_intersection([123], [124]))
        print(list_intersection([-10-50-1], [100909]))
        print(list_intersection([012], [012]))
 
        print(list_difference([123], [124]))
        print(list_difference([-10-50-1], [100909]))
        print(list_difference([012], [012]))
 
    @staticmethod
    def problem5():
        print('------------------ 5번 문제 ------------------')
 
        def print_primenumber(a, b):
            print('%d 이상 %d 이하 정수 중 소수 리스트: ' % (a, b), end='')
 
            result = []
            for val in range(a, b + 1):
                for idx in range(2, val):
                    if val % idx == 0:
                        break
                else:
                    result.append(val)
            print(result)
 
        print_primenumber(210)
 
    # > 뒤로 오는 모든 것들을 가져오는 방법 - 심플(구현 중지)
    @staticmethod
    def problem6():
        print('------------------ 6번 문제 ------------------')
        import requests
        result = []
        # req = requests.get('http://cse.koreatech.ac.kr')
        # req = requests.get('http://www.naver.com')
        req = requests.get('https://cjw-git.tistory.com/176')
 
        # 의미 없는 문자들  ' ', "·", '©', '>', '&'
        repalce_words = ['\r''\n''\t''</html>']
        source = req.text
 
        # 의미 없는 문자들 치환
        for word in repalce_words:
            source = source.replace(word, '')
        source.strip()
        # 헤더를 지우기 전 제목을 가져옴
        if source.find('<title'!= -1:
            result = source[source.find('<title'+ 7: source.find('</title>')].split(' ')
 
        # 헤더를 지움
        source = source[source.find('</head>'+ 7:].strip()
        print(end='')
 
        # html 파일 내 스크립트 문장을 지움
        while source.find('<script'!= -1:
            start = source.find('<script')
            end = source.find('</script>')
            source = (source[:start] + source[end + 9:])
 
        # html 파일 내 스타일 문장을 지움
        while source.find('<style'!= -1:
            start = source.find('<style')
            end = source.find('</style>')
            source = (source[:start] + source[end + 8:])
        source.strip()
        cnt = 0
        temp_result = []
        for item in source.split('>'):
            var = item.strip()
            try:
                # 빈칸이거나 <로 시작하거나
                if len(var) == 0 or var[0== '<' or (var[0== '`' and var[1== '<'):
                    continue
            except:
                pass
            cnt += 1
            temp_result.append(var.split('<')[0])
 
            print(var.split('<')[0])
        print(end='')
 
    # html 의 모든 태그를 가져와서 삭제하는 방법 - 정석
    @staticmethod
    def problem6_1():
        print('------------------ 6번 문제 ------------------')
        import requests
 
        def getTag(src):
            '''
                혹시 모를 주석 및 스크립트, 스타일을 지워놓아야한다.
            :return:
            '''
            src = src.replace('\n'' ')
            # 태그는 중복 안셈
            html_tag_list = set()
            # 태그로 보일 수 있는 < 를 조사
            while src.find('<'!= -1:
                start_tag = src.index('<')
                end_tag = src[start_tag + 1:].find('>'+ 1
                # 만약 태그다음 /면 마무리 태그이므로
                if src[start_tag + 1!= '/':
                    # 각 태그의 끝 점을 알아옴
                    tag_space = src[start_tag + 1:].find(' '+ 1  # ex : <html lang="ko">
                    tag_equal = src[start_tag + 1:].find('='+ 1  # ex : <ulclass="">
                    tag_perfect_end = src[start_tag + 1:].find('/>'+ 1  # ex: <hr/>
 
                    # 태그 단어를 가져옴
                    tag_word = \
                        src[start_tag + 1:min(
                            i for i in [tag_space, end_tag, tag_equal, tag_perfect_end] if i != 0+ start_tag]
 
                    # 공백 태그가 아니라면 ex : " < " 같은 일반 문자열
                    if tag_word != '':
                        html_tag_list.add(tag_word)  # 추가
                src = src[start_tag + end_tag + 1:]
            return html_tag_list
 
        # req = requests.get('http://cse.koreatech.ac.kr')
        # req = requests.get('http://www.naver.com')
        # req = requests.get('https://www.timeanddate.com/time/map/')
        req = requests.get('https://cjw-git.tistory.com/176')
        source = req.text
 
        # region 쓸모 없는 데이터를 지움
        repalce_words = ['\r''\t''<!doctype html>',
                         '<!DOCTYPE html>']
 
        # 의미없는 데이터 교체
        for word in repalce_words:
            source = source.replace(word, '')
        source.strip()
 
        # html 파일 내 스크립트 문장을 지움
        while source.find('<script'!= -1:
            start = source.find('<script')
            end = source.find('</script>')
            source = (source[:start] + source[end + 9:])
 
        # html 파일 내 스타일 문장을 지움
        while source.find('<style'!= -1:
            start = source.find('<style')
            end = source.find('</style>')
            source = (source[:start] + source[end + 8:])
 
        # html 파일 내 주석을 지움
        while source.find('<!--'!= -1:
            start = source.find('<!--')
            end = source.find('-->')
            source = (source[:start] + source[end + 3:])
 
        # endregion
 
        # 모든 태그들을 가져옴
        tag_list = getTag(source)
 
        for tag in tag_list:
            source = source.replace('</' + tag + '>''')
        result = []
        while source.find('<'!= -1:
            start_tag = source.index('<')
            end_tag = source[start_tag + 1:].find('>'+ 1  # ex : <html>
            # 만약 태그다음 /면 마무리 태그이므로
            # 각 태그의 끝 점을 알아옴
            tag_space = source[start_tag + 1:].find(' '+ 1  # ex : <html lang="ko">
            tag_equal = source[start_tag + 1:].find('='+ 1  # ex : <ulclass="">
            tag_perfect_end = source[start_tag + 1:].find('/>'+ 2  # ex: <hr/>
 
            # 태그를 가져옴
            tag_word = \
                source[start_tag + 1:min(
                    i for i in [tag_space, end_tag, tag_equal, tag_perfect_end] if i != 0+ start_tag]
 
            # 만약 존재하는 태그이면
            # 사실상 예약어는 html 엔티티에 다 정의 되어 치환되지만 html을 잘 모르므로 > 와 같은 문자가 나타탈 지 모르니...
            if tag_word in tag_list:
                add_word = source[:start_tag].lstrip()
                if add_word != '':
                    result.append(add_word)
            source = source[start_tag + end_tag + 1:]
 
        # '&nbsp;', "&middot;", '&copy;', '&gt;', '&amp;'
        # https://entitycode.com
        for idx, val in enumerate(result):
            result[idx] = val.replace('&nbsp;'' ') \
                .replace('&lt;''<') \
                .replace('&gt;''>') \
                .replace('&copy;''©') \
                .replace('&amp;''&') \
                .replace('&middot;''.') \
                .replace('\n'' ')
        # .split(' ')가 아니라 .split()로 하면 '' 값이 사라지긴 한다. 그래도 혹시모르니 검사하여 개수를 셈.
        print(len([i for i in ''.join(result).split() if i != '']))
 
= Solve()
s.problem1()
s.problem2()
s.problem3()
s.problem4()
s.problem5()
s.problem6_1()
cs

 

 

/Users/jinwoo/PycharmProjects/pythonProject/venv/bin/python /Users/jinwoo/PycharmProjects/pythonProject/problem1.py
------------------ 1번 문제 ------------------
4
1
2
2
1
3
------------------ 2번 문제 ------------------
['/home/chulsoo', 'test.txt']
------------------ 3번 문제 ------------------
[1, 2, 3, 4]
[-10, -5, -1, 0, 9, 100]
[0, 1, 2]
------------------ 4번 문제 ------------------
[1, 2]
[0]
[0, 1, 2]
[3]
[-10, -5, -1]
[]
------------------ 5번 문제 ------------------
2 이상 10 이하 정수 중 소수 리스트: [2, 3, 5, 7]
------------------ 6번 문제 ------------------
157

Process finished with exit code 0

댓글